均方误 与交叉熵误差
def mean_squared_error(y,t):
return 0.5*np.sum((y-t)**2)
def cross_entropy_error(y,t): #t为真实值,y为预测值
delta=1e-7
return -np.sum(t*np.log(y+delta))
t = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
y1 = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
y2 = np.array([0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0])
print(mean_squared_error(t,y1))
print(mean_squared_error(t,y2))
print(cross_entropy_error(y1,t))
print(cross_entropy_error(y2,t))
0.09750000000000003
0.5975
0.510825457099338
2.302584092994546
mini-batch学习
(x_train, t_train), (x_test, t_test) = load_mnist(
normalize=True, one_hot_label=True)
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size) #从0-59999中随机抽出10个
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
mini-batch版交叉熵误差的实现
#训练数据是one-hot形式
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, t.size)
else:
batch_size = y.shape[0]
return -np.sum(t * np.log(y + 1e-7)) / batch_size
#训练数据不是one-hot形式
#def cross_entropy_error(y, t):
# if y.ndim == 1:
# t = t.reshape(1, t.size)
# y = y.reshape(1, y.size)
# batch_size = y.shape[0]
# return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
导数的计算
def numerical_diff(f, x):
h = 1e-4
return (f(x + h) - f(x - h)) / (2 * h)
def square(x):
return x * x
func = square
print(numerical_diff(func, 2))
4.000000000004
定义函数(,)=+
def function_2(x):
return x[0]**2+x[1]**2
求f在时的偏导数
def function_tmp1(x0):
return x0 * x0 + 4**2
def function_tmp2(x1):
return x1 * x1 + 3**2
print(numerical_diff(function_tmp1, 3))
print(numerical_diff(function_tmp2, 4))
6.00000000000378
7.999999999999119
由全部变量的偏导数汇总而成的向量称为梯度
def numerical_gradient(f, x):
h = 1e-4
grad = np.zeros_like(x) #存放结果
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2 * h)
x[idx] = tmp_val
return grad
print(numerical_gradient(function_2, np.array([0.0, 2.0])))
[0. 4.]
梯度下降
def gradient_descent(f,init_x,lr=0.01,step_num=100):
x=init_x
for i in range(step_num):
grad=numerical_gradient(f,x)
x-=lr*grad
return x
init_x=np.array([2.0,3.0])
print(gradient_descent(function_2,init_x,lr=0.1))
print(gradient_descent(function_2,init_x,lr=10))
[4.07407195e-10 6.11110793e-10]
[-2.39906967e+12 -2.76179331e+12]
神经网络的梯度
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 溢出对策
return np.exp(x) / np.sum(np.exp(x))
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# 监督数据是one-hot-vector的情况下,转换为正确解标签的索引
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
class simpleNet:
def __init__(self):
self.W = np.random.randn(2, 3) #高斯分布进行初始化
def predict(self, x):
return np.dot(x, self.W)
def loss(self, x, t):
z = self.predict(x)
y = softmax(z)
loss = cross_entropy_error(y, t)
return loss
net = simpleNet()
print(net.W)
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
t = np.array([0, 0, 1])
print(net.loss(x, t))
[[ 0.92716354 -0.14222582 0.29493579]
[-1.09513484 -0.03646633 1.0450259 ]]
[-0.42932323 -0.11815519 1.11748478]
0.4078456178864742
写一个2 层神经网络的类
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2 * h)
x[idx] = tmp_val # 还原值
it.iternext()
return grad
class TwoLayerNet:
def __init__(self,
input_size,
hidden_size,
output_size,
weight_init_std=0.01):
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(
input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(
hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
return y
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y, t)
def accurary(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accurary = np.sum(y == t) / float(x.shape[0])
return accurary
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
net=TwoLayerNet(input_size=784,hidden_size=100,output_size=10)
x=np.random.rand(100,784)
y=net.predict(x)
t=np.random.rand(100,10)
print(net.accurary(x,t))
0.1
mini-batch的实现
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)
train_loss_list = []
train_acc_list = []
test_acc_list = []
# 超参数
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
# 平均每个epoch的重复次数
iter_per_epoch = max(train_size / batch_size, 1)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
for i in range(iters_num):
print(i,end='')
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
grad = network.numerical_gradient(x_batch, t_batch)
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)
#计算每个epoch的识别精度
if i % iter_per_epoch == 0:
train_acc = network.accurary(x_train, t_train)
test_acc = network.accurary(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
这个没有运行结果,大约一分钟一次迭代,1W次循环要七天七夜,撑不住……