人工智能习题第二章

第二章


1、混淆矩阵

import numpyas np

import matplotlib.pyplotas plt

from sklearn.metricsimport confusion_matrix

from sklearn.metricsimport classification_report

# Define sample labels  #定义样本标签

true_labels= [2, 0, 0, 2, 4, 4, 1, 0, 3, 3, 3]

pred_labels= [2, 1, 0, 2, 4, 3, 1, 0, 1, 3, 3]

# Create confusion matrix创建混淆矩阵

confusion_mat= confusion_matrix(true_labels, pred_labels)

# Visualize confusion matrix可视化混淆矩阵

plt.imshow(confusion_mat, interpolation='nearest', cmap=plt.cm.gray)

plt.title('Confusion matrix')

plt.colorbar()

ticks= np.arange(5)

plt.xticks(ticks, ticks)

plt.yticks(ticks, ticks)

plt.ylabel('True labels')

plt.xlabel('Predicted labels')

plt.show()

# Classification report分级报告

targets= ['Class-0', 'Class-1', 'Class-2', 'Class-3', 'Class-4']

print('\n', classification_report(true_labels, pred_labels, target_names=targets))

2、数据预处理

import numpyas np

from sklearnimport preprocessing

input_data= np.array([[5.1, -2.9, 3.3],

                      [-1.2, 7.8, -6.1],

                      [3.9, 0.4, 2.1],

                      [7.3, -9.9, -4.5]])

# Binarize data  二进制数据

data_binarized= preprocessing.Binarizer(threshold=2.1).transform(input_data)

print("\nBinarized data:\n", data_binarized)

# Print mean and standard deviation  打印平均值和标准偏差

print("\nBEFORE:")

print("Mean =", input_data.mean(axis=0))

print("Std deviation =", input_data.std(axis=0))

# Remove mean  移除平均值

data_scaled= preprocessing.scale(input_data)

print("\nAFTER:")

print("Mean =", data_scaled.mean(axis=0))

print("Std deviation =", data_scaled.std(axis=0))

# Min max scaling  最小和最大比例

data_scaler_minmax= preprocessing.MinMaxScaler(feature_range=(0, 1))

data_scaled_minmax= data_scaler_minmax.fit_transform(input_data)

print("\nMin max scaled data:\n", data_scaled_minmax)

# Normalize data 数据规范化

data_normalized_l1= preprocessing.normalize(input_data, norm='l1')

data_normalized_l2= preprocessing.normalize(input_data, norm='l2')

print("\nL1 normalized data:\n", data_normalized_l1)

print("\nL2 normalized data:\n", data_normalized_l2)


3.房价

import numpyas np

from sklearnimport datasets

from sklearn.svmimport SVR

from sklearn.metricsimport mean_squared_error, explained_variance_score

from sklearn.utilsimport shuffle

# Load housing data  #下载房价数据

data= datasets.load_boston()

# Shuffle the data混乱数据

X, y= shuffle(data.data, data.target, random_state=7)

# Split the data into training and testing datasets将数据分成训练和测试数据集

num_training= int(0.8 * len(X))

X_train, y_train= X[:num_training], y[:num_training]

X_test, y_test= X[num_training:], y[num_training:]

# Create Support Vector Regression model创建支持向量回归模型

sv_regressor= SVR(kernel='linear', C=1.0, epsilon=0.1)

# Train Support Vector Regressor训练支持向量回归

sv_regressor.fit(X_train, y_train)

# Evaluate performance of Support Vector Regressor评价支持向量回归器的性能

y_test_pred= sv_regressor.predict(X_test)

mse= mean_squared_error(y_test, y_test_pred)

evs= explained_variance_score(y_test, y_test_pred)

print("\n#### Performance ####")

print("Mean squared error =", round(mse, 2))

print("Explained variance score =", round(evs, 2))

# Test the regressor on test datapoint在测试数据点上测试回归器

test_data= [3.7, 0, 18.4, 1, 0.87, 5.95, 91, 2.5052, 26, 666, 20.2, 351.34, 15.27]

print("\nPredicted price:", sv_regressor.predict([test_data])[0])

4.收入分类

import numpyas np

import matplotlib.pyplotas plt

from sklearnimport preprocessing

from sklearn.svmimport LinearSVC

from sklearn.multiclassimport OneVsOneClassifier

from sklearnimport model_selection

# Input file containing data 输入包含数据的文件

input_file= 'income_data.txt'

# Read the data  读取数据

X= []

y= []

count_class1= 0

count_class2= 0

max_datapoints= 25000

with open(input_file, 'r') as f:

    for linein f.readlines():

        if count_class1>= max_datapointsand count_class2>= max_datapoints:

break

        if '?' in line:

continue

        data= line[:-1].split(', ')

        if data[-1] == '<=50K' and count_class1< max_datapoints:

            X.append(data)

            count_class1+= 1

        if data[-1] == '>50K' and count_class2< max_datapoints:

            X.append(data)

            count_class2+= 1

# Convert to numpy array

X= np.array(X)

# Convert string data to numerical data  转换为numpy数组

label_encoder= []

X_encoded= np.empty(X.shape)

for i,itemin enumerate(X[0]):

    if item.isdigit():

        X_encoded[:, i] = X[:, i]

    else:

        label_encoder.append(preprocessing.LabelEncoder())

        X_encoded[:, i] = label_encoder[-1].fit_transform(X[:, i])

X= X_encoded[:, :-1].astype(int)

y= X_encoded[:, -1].astype(int)

# Create SVM classifier  创建SVM分类器

classifier= OneVsOneClassifier(LinearSVC(random_state=0))

# Train the classifier 训练分类器

classifier.fit(X, y)

# Cross validation  交叉验证

X_train, X_test, y_train, y_test= model_selection.train_test_split(X, y, test_size=0.2, random_state=5)

classifier= OneVsOneClassifier(LinearSVC(random_state=0))

classifier.fit(X_train, y_train)

y_test_pred= classifier.predict(X_test)

# Compute the F1 score of the SVM classifier  计算SVM分类器的F1评分

f1= model_selection.cross_val_score(classifier, X, y, scoring='f1_weighted', cv=3)

print("F1 score: " + str(round(100*f1.mean(), 2)) + "%")

# Predict output for a test datapoint  预测测试数据点的输出

input_data= ['37', 'Private', '215646', 'HS-grad', '9', 'Never-married', 'Handlers-cleaners', 'Not-in-family', 'White', 'Male', '0', '0', '40', 'United-States']

# Encode test datapoint  编码测试数据

input_data_encoded= [-1] * len(input_data)

count= 0

for i, itemin enumerate(input_data):

    if item.isdigit():

        input_data_encoded[i] = int(input_data[i])

    else:

        input_data_encoded[i] = int(label_encoder[count].transform(input_data[i]))

        count+= 1

input_data_encoded= np.array(input_data_encoded)

# Run classifier on encoded datapoint and print output 在编码的数据点上运行分类器并打印输出

predicted_class= classifier.predict(input_data_encoded)

print(label_encoder[-1].inverse_transform(predicted_class)[0])

5.标签编码

import numpyas np

from sklearnimport preprocessing

# Sample input labels 样本输入标签

input_labels= ['red', 'black', 'red', 'green', 'black', 'yellow', 'white']

# Create label encoder and fit the labels  创建标签编码器和适合的标签

encoder= preprocessing.LabelEncoder()

encoder.fit(input_labels)

# Print the mapping 打印映射

print("\nLabel mapping:")

for i, itemin enumerate(encoder.classes_):

    print(item, '-->', i)

# Encode a set of labels using the encoder 使用编码器编码一组标签

test_labels= ['green', 'red', 'black']

encoded_values= encoder.transform(test_labels)

print("\nLabels =", test_labels)

print("Encoded values =", list(encoded_values))

# Decode a set of values using the encoder  使用编码器解码一组值

encoded_values= [3, 0, 4, 1]

decoded_list= encoder.inverse_transform(encoded_values)

print("\nEncoded values =", encoded_values)

print("Decoded labels =", list(decoded_list))

6.逻辑回归

import numpyas np

from sklearnimport linear_model

import matplotlib.pyplotas plt

from utilitiesimport visualize_classifier

# Define sample input data  定义输入数据样本

X= np.array([[3.1, 7.2], [4, 6.7], [2.9, 8], [5.1, 4.5], [6, 5], [5.6, 5], [3.3, 0.4], [3.9, 0.9], [2.8, 1], [0.5, 3.4], [1, 4], [0.6, 4.9]])

y= np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3])

# Create the logistic regression classifier  创建逻辑回归分类器

classifier= linear_model.LogisticRegression(solver='liblinear', C=1)

#classifier = linear_model.LogisticRegression(solver='liblinear', C=100)

# Train the classifier  训练分类器

classifier.fit(X, y)

# Visualize the performance of the classifier  分类器性能的可视化

visualize_classifier(classifier, X, y)

7.朴素贝叶斯


8.多元回归函数

import numpyas np

from sklearnimport linear_model

import sklearn.metricsas sm

from sklearn.preprocessingimport PolynomialFeatures

# Input file containing data  输入包含数据的文件

input_file= 'data_multivar_regr.txt'

# Load the data from the input file  从输入文件中下载数据

data= np.loadtxt(input_file, delimiter=',')

X, y= data[:, :-1], data[:, -1]

# Split data into training and testing  将数据分解为训练和测试

num_training= int(0.8 * len(X))

num_test= len(X) - num_training

# Training data  训练集数据

X_train, y_train= X[:num_training], y[:num_training]

# Test data  测试数据

X_test, y_test= X[num_training:], y[num_training:]

# Create the linear regressor model  创建线性回归模型

linear_regressor= linear_model.LinearRegression()

# Train the model using the training sets  使用训练集训练模型

linear_regressor.fit(X_train, y_train)

# Predict the output  预测输出

y_test_pred= linear_regressor.predict(X_test)

# Measure performance  测试性能

print("Linear Regressor performance:")

print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred), 2))

print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred), 2))

print("Median absolute error =", round(sm.median_absolute_error(y_test, y_test_pred), 2))

print("Explained variance score =", round(sm.explained_variance_score(y_test, y_test_pred), 2))

print("R2 score =", round(sm.r2_score(y_test, y_test_pred), 2))

# Polynomial regression  多项式回归

polynomial= PolynomialFeatures(degree=10)

X_train_transformed= polynomial.fit_transform(X_train)

datapoint= [[7.75, 6.35, 5.56]]

poly_datapoint= polynomial.fit_transform(datapoint)

poly_linear_model= linear_model.LinearRegression()

poly_linear_model.fit(X_train_transformed, y_train)

print("\nLinear regression:\n", linear_regressor.predict(datapoint))

print("\nPolynomial regression:\n", poly_linear_model.predict(poly_datapoint))

9.

import pickle

import numpyas np

from sklearnimport linear_model

import sklearn.metricsas sm

import matplotlib.pyplotas plt

# Input file containing data

input_file= 'data_singlevar_regr.txt'

# Read data

data= np.loadtxt(input_file, delimiter=',')

X, y= data[:, :-1], data[:, -1]

# Train and test split

num_training= int(0.8 * len(X))

num_test= len(X) - num_training

# Training data

X_train, y_train= X[:num_training], y[:num_training]

# Test data

X_test, y_test= X[num_training:], y[num_training:]

# Create linear regressor object

regressor= linear_model.LinearRegression()

# Train the model using the training sets

regressor.fit(X_train, y_train)

# Predict the output

y_test_pred= regressor.predict(X_test)

# Plot outputs

plt.scatter(X_test, y_test, color='green')

plt.plot(X_test, y_test_pred, color='black', linewidth=4)

plt.xticks(())

plt.yticks(())

plt.show()

# Compute performance metrics

print("Linear regressor performance:")

print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred), 2))

print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred), 2))

print("Median absolute error =", round(sm.median_absolute_error(y_test, y_test_pred), 2))

print("Explain variance score =", round(sm.explained_variance_score(y_test, y_test_pred), 2))

print("R2 score =", round(sm.r2_score(y_test, y_test_pred), 2))

# Model persistence

output_model_file= 'model.pkl'

# Save the model

with open(output_model_file, 'wb') as f:

    pickle.dump(regressor, f)

# Load the model

with open(output_model_file, 'rb') as f:

    regressor_model= pickle.load(f)

# Perform prediction on test data

y_test_pred_new= regressor_model.predict(X_test)

print("\nNew mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred_new), 2))

单元回归函数
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容