原文链接:
https://blog.csdn.net/zzukun/article/details/49968129
代码(python3.7 + tensorflow2.0):
# -- coding: utf-8 --
import copy
import numpy as np
from time import time
# sigmoid函数
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# sigmoid函数的导数,这里out=sigmoid(x)
def sigmoid_out_to_derivative(output):
return output * (1 - output)
# 获取二进制对应关系,将所有数转二进制(我们的数据生成器)
int2binary = {} # 字典格式
binary_dim = 8 # 二进制的最长位数
largest_number = pow(2, binary_dim) # 最大值
binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T, axis=1) # unpackbits函数, 将整数转为2进制
for i in range(largest_number):
int2binary[i] = binary[i] # 使用字典格式赋值,得到结果类似{0:array([0,0,0,0,0,0,0,1], dtype = uint8), 1:array(......}
def main():
start = time()
# 设置学习速率
alpha = 0.1
# 输入层
input_dim = 2
# 隐藏层神经元个数
hidden_dim = 16
# 输出层
output_dim = 1
# 连接输入层与隐藏层的权值矩阵2行16列,取值区间为[-1, 1),使用区间内随机数初始化
synapse_0 = 2 * np.random.random((input_dim, hidden_dim)) - 1
# 连接隐藏层与输出层的权值矩阵16行1列,取值区间为[-1, 1),使用区间内随机数初始化
synapse_1 = 2 * np.random.random((hidden_dim, output_dim)) - 1
# 连接(上一个)隐藏层与(当前)隐藏层的权值矩阵16行16列,取值区间为[-1, 1),使用区间内随机数初始化
synapse_h = 2 * np.random.random((hidden_dim, hidden_dim)) - 1
# 上面三个矩阵更新时的缓冲矩阵
synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)
# 做一万次迭代
for j in range(10000):
# 针对其中一次:
# 生成一个简单的加法问题(a + b = c, a < max / 2, b < max/2, c<max)
a_int = np.random.randint(largest_number / 2)
a = int2binary[a_int]
b_int = np.random.randint(largest_number / 2)
b = int2binary[b_int]
# 根据随机获得的a_int, a_int, 计算c_int, 进而获得输出正确情况下的二进制c
c_int = a_int + b_int
c = int2binary[c_int]
# 初始化一个二进制数组,保存a+b的预测值
d = np.zeros_like(c)
# 重置误差值
overallError = 0
# 记录layer_2导数值
layer_2_deltas = list()
# 记录layer_1_values
layer_1_values = list()
# 全为0的数组初始化layer_1_values
layer_1_values.append(np.zeros(hidden_dim))
# 遍历a,b(8位二进制数组)的每一位
for position in range(binary_dim):
# 生成输入和输出,X: a b的同一位, y: c的同一位, 从最右边一位开始
X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]])
y = np.array([[c[binary_dim - position - 1]]]).T
# 隐藏层 (input ~+ prev_hidden)
# 这里(1, 2) * (2, 16) + (16,) * (16, 16) = (1, 16) + (16,) = (1, 16), 然后sigmoid((1, 16)) = (1, 16)
layer_1 = sigmoid(np.dot(X, synapse_0) + np.dot(layer_1_values[-1], synapse_h))
# 输出层 (new binary representation)
# (1,16) * (16, 1) = (1,1)
layer_2 = sigmoid(np.dot(layer_1, synapse_1))
# 预测值与实际值的误差
layer_2_error = y - layer_2 # (1,1)
layer_2_deltas.append((layer_2_error) * sigmoid_out_to_derivative(layer_2)) # ???
overallError += np.abs(layer_2_error[0]) # 误差之和,维度为(1,)
d[binary_dim - position - 1] = np.round(layer_2[0][0]) # 输出的预测值(一个进制位)
layer_1_values.append(copy.deepcopy(layer_1)) # 将隐藏层保存
future_layer_1_delta = np.zeros(hidden_dim)
for position in range(binary_dim):
X = np.array([[a[position], b[position]]])
layer_1 = layer_1_values[-position - 1]
prev_layer_1 = layer_1_values[-position - 2]
layer_2_delta = layer_2_deltas[-position - 1]
layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + layer_2_delta
.dot(synapse_1.T)) * sigmoid_out_to_derivative(layer_1)
synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
synapse_0_update += X.T.dot(layer_1_delta)
future_layer_1_delta = layer_1_delta
synapse_0 += synapse_0_update * alpha
synapse_1 += synapse_1_update * alpha
synapse_h += synapse_h_update * alpha
synapse_0_update = 0
synapse_1_update = 0
synapse_h_update = 0
if (j % 1000 == 0):
print('Error:%s' % str(overallError))
print("Pred:%s" % str(d))
print("True:%s" % str(c))
out = 0
for index, x in enumerate(reversed(d)):
out += x * pow(2, index)
print("%s + %s = %s" % (str(a_int), str(b_int), str(out)))
print("-" * 50)
print(synapse_0)
print(synapse_1)
print(synapse_h)
print("-x" * 50)
end = time()
print('耗时%.2f' % (end - start))
if __name__ == '__main__':
main()
使用tensorflow实现(python3.7+tensorflow2.0):
# -- coding: utf-8 --
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, Sequential
global binary_dim, largest_number
binary_dim = 8
largest_number = pow(2, binary_dim)
# binary = np.unpackbits(np.array([np.arange(largest_number)], dtype=np.uint8).T, axis=1)
# for i in range(largest_number):
# int2binary[i] = binary[i]
def main():
# X = np.random.randint(0, largest_number / 2, 4000).reshape(-1, 2)
# np.save("X", X)
X = np.load('X.npy')
X_train_old, X_test_old = X[:1800], X[1800:]
y = np.sum(X, axis=1).reshape(-1, 1)
y_train_old, y_test_old = y[:1800], y[1800:]
# X, y转二进制
# input: X_train (1800x2x8)
# output: y_train (1800x8)
# 单次输入 2x8的X 对应 1x8的y
X = np.unpackbits(X.astype(np.uint8), axis=1).reshape(-1, 2, 8).astype(np.float32)
X_train, X_test = X[:1800], X[1800:]
y = np.unpackbits(y.astype(np.uint8), axis=1).reshape(-1, 8).astype(np.float32)
y_train, y_test = y[:1800], y[1800:]
# 创建模型
model = Sequential()
# LSTM: 记忆相邻两组数据
model.add(layers.LSTM(16, activation='relu', input_shape=(2, 8)))
model.add(layers.Dense(8, activation='relu'))
model.compile(loss='mse', optimizer='adam')
h = model.fit(X_train, y_train, batch_size=20, epochs=1000)
# 绘制loss的变化
plt.figure(figsize=(16,2))
plt.plot(h.history['loss'])
plt.show()
model.save('lstm_model.h5')
model = models.load_model('lstm_model.h5')
cc = pow(2, np.arange(8))[::-1]
y_test_predict = np.round(model.predict(X_test))
# 将model预测值二级制转十进制
y_test_predict = np.dot(y_test_predict, cc).reshape(-1, 1)
print(X_test_old)
print(y_test_old)
print(y_test_predict)
print(y_test_predict - y_test_old)
# errorAll = np.sum(np.abs(y_test_predict - y_test_old)) / 200
# print(errorAll)
print('finish.')
if __name__ == '__main__':
main()
损失函数loss:
Figure_1.png