代码与笔记如下
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 7 20:53:34 2022
@author: liufengyun
"""
from tensorflow import keras
import tensorflow as tf
import pandas as pd
from tensorflow.keras import layers
from tensorflow.keras import losses
# 下载MPG数据
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
# 加载字段
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names, na_values = "?", comment = '\t', sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()
# 查看部分数据
#print(dataset.head())
# 统计空白
# print(dataset.isna().sum())
# 删除空白数据
dataset = dataset.dropna()
# 将类别tag替换为3个特征列 (后续考虑进行embedding)
origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1) * 1.0
dataset['Europe'] = (origin == 2) * 1.0
dataset['Japan'] = (origin == 3) * 1.0
#print(dataset.tail())
# 分割训练集、测试集
train_dataset = dataset.sample(frac = 0.8, random_state = 0)
test_dataset = dataset.drop(train_dataset.index)
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
# 计算均值、方差等
train_stats = train_dataset.describe()
#train_stats.pop("MPG")
train_stats = train_stats.transpose()
#print(train_stats)
# 归一化
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
#
train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values, train_labels.values))
# 抽样
train_db = train_db.shuffle(100).batch(32)
# 构建网络
class Network(keras.Model):
def __init__(self):
super(Network, self).__init__()
self.fc1 = layers.Dense(64, activation='relu')
self.fc2 = layers.Dense(64, activation='relu')
self.fc3 = layers.Dense(1)
def call(self, input, training = None, mask = None):
x = self.fc1(input)
x = self.fc2(x)
x = self.fc3(x)
return x
# 实例化网络并设置optimizer
model = Network()
# batch = 4, feature_size = 9
model.build(input_shape=(4, 9))
# 打印网络信息
#print(model.summary())
# RMSprop算法: https://zhuanlan.zhihu.com/p/34230849
optimizer = tf.keras.optimizers.RMSprop(0.001)
for epoch in range(200):
for step, (x,y) in enumerate(train_db):
with tf.GradientTape() as tape:
out = model(x)
loss = tf.reduce_mean(losses.MSE(y, out))
mae_loss = tf.reduce_mean(losses.MAE(y, out))
if step % 10 == 0:
print(epoch, step, float(loss), float(mae_loss))
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))