import random
import numpy as np
# coding: utf-8
class MulLayer:
def __init__(self):
self.x = None
self.y = None
def forward(self, x, y):
self.x = x
self.y = y
out = x * y
return out
def backward(self, dout):
dx = dout * self.y
dy = dout * self.x
return dx, dy
class AddLayer:
def __init__(self):
pass
def forward(self, x, y):
out = x + y
return out
def backward(self, dout):
dx = dout * 1
dy = dout * 1
return dx, dy
class PowerLayer:
def __init__(self, power):
self.x = None
self.power = power
pass
def forward(self, x):
out = x ** self.power
self.x = x
return out
def backward(self, dout):
dy = dout * self.power * self.x ** (self.power - 1)
return dy
def get_data(count):
x_data = []
t_data = []
for _ in range(count):
x1 = random.random() * 200 - 100
x2 = random.random()
x_data.append([x1, x2])
y = 5 * x1 + 6 * x2 + 10.1
t_data.append(y)
return np.array(x_data), np.array(t_data)
epoch = 100
train_size = 1000
test_size = 100
learning_rate = 1e-4
x_train, t_train = get_data(train_size)
x_test, t_test = get_data(test_size)
w1, w2, b = np.random.randn(3)
# momentum
momentum = 0.9
v_w1 = 0
v_w2 = 0
v_b = 0
# AdaGrad
h_w1 = 0
h_w2 = 0
h_b = 0
# RMSprop
decay_rate = 0.95
# Adam
idx = 0
beta1 = 0.9
beta2 = 0.999
w1_m = 0
w1_v = 0
w2_m = 0
w2_v = 0
b_m = 0
b_v = 0
w1_arr = []
w2_arr = []
b_arr = []
for _ in range(epoch):
for i in range(train_size):
x1, x2 = x_train[i]
t = t_train[i]
# forward
z1 = x1 * w1
z2 = x2 * w2
z3 = z1 + z2
z4 = z3 + b
z5 = z4 - t
z6 = z5 ** 2
print(f'循环:{i} , loss:{z6}')
# backward
dw1 = 2 * x1 * z5
dw2 = 2 * x2 * z5
db = 2 * z5
# 使用SGD
w1 -= learning_rate * dw1
w2 -= learning_rate * dw2
b -= learning_rate * db
# 使用Momentum
# v_w1 = momentum * v_w1 - learning_rate * dw1
# w1 += v_w1
# v_w2 = momentum * v_w2 - learning_rate * dw2
# w2 += v_w2
# v_b = momentum * v_b - learning_rate * db
# b += v_b
# 使用AdaGrad
# h_w1 += dw1 ** 2
# w1 -= learning_rate * dw1 / (np.sqrt(h_w1) + 1e-7)
# h_w2 += dw2 ** 2
# w2 -= learning_rate * dw2 / (np.sqrt(h_w2) + 1e-7)
# h_b += db ** 2
# b -= learning_rate * db / (np.sqrt(h_b) + 1e-7)
# 使用RMSprop
# h_w1 *= decay_rate
# h_w1 += dw1 ** 2
# w1 -= learning_rate * dw1 / (np.sqrt(h_w1) + 1e-7)
#
# h_w2 *= decay_rate
# h_w2 += dw2 ** 2
# w2 -= learning_rate * dw2 / (np.sqrt(h_w2) + 1e-7)
#
# h_b *= decay_rate
# h_b += db ** 2
# b -= learning_rate * db / (np.sqrt(h_b) + 1e-7)
# 使用Adam
# idx += 1
# lr_t = learning_rate * np.sqrt(1.0 - beta2 ** idx) / (1.0 - beta1 ** idx)
#
# w1_m += (1 - beta1) * (dw1 - w1_m)
# w1_v += (1 - beta2) * (dw1 ** 2 - w1_v)
# w1 -= lr_t * w1_m / (np.sqrt(w1_v + 1e-7))
#
# w2_m += (1 - beta1) * (dw2 - w2_m)
# w2_v += (1 - beta2) * (dw2 ** 2 - w2_v)
# w2 -= lr_t * w2_m / (np.sqrt(w2_v + 1e-7))
#
# b_m += (1 - beta1) * (db - b_m)
# b_v += (1 - beta2) * (db ** 2 - b_v)
# b -= lr_t * b_m / (np.sqrt(b_v + 1e-7))
if i % 10 == 0:
w1_arr.append(w1)
w2_arr.append(w2)
b_arr.append(b)
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
mpl.rcParams['legend.fontsize'] = 10
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_xlabel('w1')
ax.set_ylabel('w2')
ax.set_zlabel('b')
ax.scatter(w1_arr, w2_arr, b_arr, label='SGD')
ax.legend()
plt.show()
测试结果图片: