自己记录的一些实验,没有排版,如果看不懂就移步其他的文章。
import torch
import torch.nn as nn
# lstm = nn.LSTM(3, 5,bias=False)
# for name,param in lstm.named_parameters():
# print(name,param)
# inputs = [torch.randn(2, 3) for _ in range(5)]
# inputs = torch.cat(inputs).view(len(inputs), 2, -1)
# print("inputs维度为:",inputs.size())
# print("inputs的数据为:",inputs)
# hidden = (torch.randn(1, 2, 5), torch.randn(1, 2, 5)) # clean out hidden state
# print('hidden为:',hidden)
# out, (h,c) = lstm(inputs, hidden)
# print('out2',out)
# print('h:',h)
# print('c:',c)
# 直接用LSTM的输出
# out2 tensor([[[ 1.7645e-01, 1.3992e-01, -7.5099e-03, 2.1061e-01, -1.4658e-01],
# [ 6.9992e-01, 1.2492e-01, 1.9404e-01, -4.7846e-01, -1.5412e-01]],
#
# [[ 5.1047e-02, 1.7753e-01, 1.1094e-01, 3.6848e-01, -1.4002e-01],
# [ 1.9628e-01, -1.0030e-02, -1.4725e-01, -1.4024e-01, -1.4341e-01]],
#
# [[ 1.3596e-02, -3.1841e-02, -1.2336e-02, 1.1213e-01, 7.7684e-02],
# [ 1.9534e-01, 1.0617e-05, -1.2793e-01, -8.9563e-02, -4.4747e-02]],
#
# [[ 2.1918e-02, -1.2519e-01, -5.9217e-02, 7.3104e-02, 1.7281e-01],
# [ 7.7996e-03, 2.3978e-01, -2.9749e-02, -1.8220e-01, -1.5852e-01]],
#
# [[ 7.3694e-02, -1.9780e-01, -4.0656e-02, 7.4626e-02, 2.4339e-01],
# [ 2.6004e-02, 1.8576e-01, 1.2329e-01, -1.0394e-02, -2.8678e-02]]],
# grad_fn=<StackBackward>)
# h: tensor([[[ 0.0737, -0.1978, -0.0407, 0.0746, 0.2434],
# [ 0.0260, 0.1858, 0.1233, -0.0104, -0.0287]]],
# grad_fn=<StackBackward>)
# c: tensor([[[ 0.1478, -0.3483, -0.0767, 0.2050, 0.4492],
# [ 0.0360, 0.2471, 0.1619, -0.0297, -0.0431]]],
# grad_fn=<StackBackward>)
#########################################################################################
###### layer的层数为1 ###########
###### 验证通过 ###########
#########################################################################################
### 1 layer层为1验证通过
# import numpy as np
# inputs = np.array(
# [
# [[-0.1402, -2.4375, -0.8766],
# [ 3.5149, -1.2214, 0.0610]],
#
# [[ 1.0977, -0.2523, -0.2009],
# [-1.2237, -0.0940, 0.4024]],
#
# [[-0.4103, 1.4780, -0.7300],
# [ 0.2257, -0.0224, -0.1822]],
#
# [[-0.4267, 0.9405, -0.6343],
# [ 1.3214, -0.9563, 1.3815]],
#
# [[-0.7112, 0.8102, -1.2747],
# [ 1.1957, 1.4434, -3.5762]]
# ])
#
# w_x = np.array([[-0.3811, -0.3605, 0.2842],
# [ 0.3064, -0.3209, 0.4006],
# [-0.3390, 0.2509, 0.3408],
# [-0.1451, -0.3244, 0.3356],
# [-0.0164, -0.0209, 0.2757],
# [-0.0803, -0.3522, -0.1263],
# [-0.0135, -0.2249, -0.1965],
# [ 0.4439, 0.0303, -0.3635],
# [-0.1867, -0.3799, 0.0870],
# [ 0.1317, 0.3124, -0.0685],
# [-0.1750, -0.2223, -0.2423],
# [ 0.3572, -0.3671, 0.3487],
# [ 0.4386, -0.3053, -0.3875],
# [-0.0912, -0.1647, -0.2231],
# [ 0.0080, 0.3188, -0.3798],
# [ 0.3676, -0.4088, -0.3687],
# [ 0.0090, -0.0859, -0.3587],
# [ 0.3566, 0.3903, -0.0683],
# [ 0.2433, -0.1288, 0.2222],
# [-0.0706, 0.2519, -0.0899]])
#
# w_x_i = w_x[:5] # [5,3]
# w_x_f = w_x[5:10]
# w_x_g = w_x[10:15]
# w_x_o = w_x[15:]
#
#
# w_h = np.array([[-0.1880, 0.1977, -0.0799, 0.3464, -0.3551],
# [ 0.1164, -0.2267, 0.1575, 0.3174, 0.3518],
# [-0.0636, 0.4399, 0.2831, -0.3300, 0.1058],
# [ 0.0496, -0.4445, -0.0328, 0.1129, 0.3615],
# [-0.1430, 0.2036, 0.1565, 0.1253, 0.4222],
# [ 0.1469, 0.2363, 0.1311, -0.2609, -0.3295],
# [-0.3303, -0.0722, 0.0833, -0.2050, 0.3797],
# [ 0.1544, -0.0672, 0.0824, -0.3180, 0.3094],
# [-0.2804, -0.0722, -0.1767, 0.4304, 0.1947],
# [ 0.1885, -0.2188, -0.3247, 0.2962, 0.0254],
# [ 0.0597, 0.0725, -0.0304, 0.3348, 0.3484],
# [ 0.0966, -0.2528, 0.1068, 0.2571, -0.1851],
# [-0.4085, -0.1144, 0.4259, 0.4036, 0.1017],
# [ 0.1953, -0.1137, 0.0321, 0.3066, 0.4427],
# [ 0.3902, 0.2454, 0.1280, 0.2274, 0.1296],
# [-0.1686, -0.3899, 0.0491, 0.2731, 0.3999],
# [ 0.0209, -0.1487, -0.1902, -0.0968, -0.2197],
# [-0.0352, -0.2620, 0.1145, 0.1402, -0.3478],
# [-0.2530, 0.3749, -0.3157, -0.2548, 0.4453],
# [ 0.4071, 0.1028, 0.4016, -0.3845, 0.0030]])
#
# w_h_i =w_h[:5] #[5,5]
# w_h_f =w_h[5:10]
# w_h_g = w_h[10:15]
# w_h_o = w_h[15:]
#
#
# h0 = np.array([[[ 0.4961, -2.5106, -0.4681, 0.9342, -0.3996],
# [-0.5589, 0.6913, -0.3324, -0.1190, -0.5952]]])
# c0 = np.array([[[-0.4121, -0.8200, -0.1645, 1.4230, -0.8661],
# [ 2.7055, -0.8133, 0.1079, -1.1382, -0.6441]]])
# h = h0
# c_pre = torch.from_numpy(c0)
#
# for i in range(5):
# f_t = torch.sigmoid(torch.from_numpy(np.dot(inputs[i],w_x_f.T)+np.dot(h,w_h_f.T)))
# i_t = torch.sigmoid(torch.from_numpy(np.dot(inputs[i],w_x_i.T)+np.dot(h,w_h_i.T)))
# o_t = torch.sigmoid(torch.from_numpy(np.dot(inputs[i],w_x_o.T)+np.dot(h,w_h_o.T)))
# g_t = torch.tanh(torch.from_numpy(np.dot(inputs[i],w_x_g.T)+np.dot(h,w_h_g.T)))
# c_cur = torch.mul(f_t,c_pre) + torch.mul(i_t,g_t)
# h_t = torch.mul(o_t,torch.tanh(c_cur))
# print(h_t)
# h = h_t
# c_pre = c_cur
#************************************************************************************************
#**************************** 两层LSTM的验证 ******************************************
#************************************************************************************************
# import torch
# import torch.nn as nn
# lstm = nn.LSTM(3,5,2 ,bias=False)
# for name,param in lstm.named_parameters():
# print(name,param)
# inputs = [torch.randn(2, 3) for _ in range(5)]
# inputs = torch.cat(inputs).view(len(inputs), 2, -1)
# print("inputs维度为:",inputs.size())
# print("inputs的数据为:",inputs)
# hidden = (torch.randn(2, 2, 5), torch.randn(2, 2, 5)) # clean out hidden state
# print('hidden为:',hidden)
# out, (h,c) = lstm(inputs, hidden)
# print('out2',out)
# print('h:',h)
# print('c:',c)
"""
weight_ih_l0 Parameter containing:
tensor([[ 0.2379, -0.1477, -0.1843],
[-0.0606, -0.0339, 0.0669],
[ 0.3392, -0.4067, 0.1124],
[ 0.0649, -0.3140, -0.3768],
[-0.0533, 0.4317, -0.3409],
[-0.3619, -0.2457, -0.0266],
[-0.2505, 0.4028, -0.4456],
[ 0.4241, 0.2539, 0.4287],
[-0.1459, 0.0555, 0.3333],
[ 0.4154, -0.2894, 0.1728],
[ 0.3442, 0.0007, -0.3678],
[-0.2690, -0.0487, 0.1894],
[ 0.1186, -0.4336, 0.1892],
[ 0.2791, -0.3747, 0.0476],
[ 0.1883, 0.1262, 0.0340],
[ 0.4470, -0.2271, -0.1851],
[ 0.2339, 0.0431, 0.2807],
[ 0.2340, 0.2959, -0.2408],
[-0.3248, 0.4052, 0.2561],
[-0.2844, 0.4153, 0.0404]], requires_grad=True)
weight_hh_l0 Parameter containing:
tensor([[-2.0698e-01, -1.7755e-01, -3.0658e-01, -1.1588e-01, 4.5766e-02],
[ 3.2853e-01, 2.0137e-01, 3.6208e-01, -1.2821e-01, -1.7739e-03],
[-2.5506e-01, 9.9625e-02, -8.5663e-02, 3.6996e-01, 2.1595e-01],
[ 1.2179e-01, -1.6107e-01, -2.1954e-01, -2.9739e-01, -1.0122e-01],
[-3.0208e-01, -4.0644e-02, -6.3823e-02, -1.4546e-01, 1.3178e-01],
[-4.0934e-02, -1.6815e-01, -1.3157e-01, 9.9637e-02, 4.4259e-01],
[ 2.7418e-01, 1.4329e-01, -4.2061e-01, 4.1228e-01, -3.4112e-01],
[ 2.1834e-03, 3.2520e-01, -1.1582e-01, -2.9629e-01, -3.5685e-01],
[-3.3427e-01, -4.3912e-01, 2.6091e-01, -2.0448e-01, 2.5485e-02],
[-3.6938e-01, 3.2074e-01, -1.9377e-04, 1.6672e-01, 2.7508e-01],
[ 4.0973e-01, -1.9294e-01, 3.9284e-01, -5.4646e-02, 9.9561e-02],
[ 4.1741e-01, 1.5363e-02, 4.4413e-02, -3.1710e-01, 2.7422e-01],
[ 1.0180e-01, 2.4242e-01, 2.2364e-01, 3.6231e-01, 4.4573e-01],
[-1.4557e-01, -4.2360e-01, -2.6369e-01, 1.6559e-01, -5.2838e-02],
[ 4.3591e-01, 2.0227e-01, 3.5351e-03, 1.5808e-01, -2.3753e-01],
[-1.1144e-01, 4.3910e-01, -3.3235e-01, -1.2088e-01, -1.1441e-02],
[ 2.2494e-01, 1.1863e-01, -1.9057e-01, 1.9746e-01, 4.4242e-01],
[-1.0328e-02, 4.1831e-01, -4.8495e-02, 1.2776e-01, -4.3230e-01],
[ 2.0142e-01, -2.2626e-01, -3.4504e-02, -3.0516e-01, 2.6764e-02],
[ 2.2718e-01, -8.7050e-02, -1.2573e-01, 4.3948e-02, 2.9863e-01]],
requires_grad=True)
weight_ih_l1 Parameter containing:
tensor([[-0.3528, -0.4353, -0.4401, -0.3159, 0.0082],
[-0.0848, 0.1217, -0.4255, -0.1539, -0.4296],
[-0.3387, 0.2427, -0.3002, -0.0539, 0.1885],
[ 0.0592, 0.1616, 0.1149, -0.0716, 0.1982],
[ 0.4273, 0.3076, 0.0771, -0.0066, 0.3962],
[ 0.3060, 0.1231, 0.1049, -0.4156, -0.0435],
[ 0.2834, -0.4361, 0.1660, -0.2228, 0.2367],
[-0.0329, 0.2670, 0.2993, 0.1508, 0.3634],
[ 0.1463, -0.1507, -0.2782, -0.0661, -0.1345],
[-0.0224, -0.0984, 0.2000, 0.3648, -0.2722],
[ 0.0839, -0.2911, -0.2318, -0.1145, -0.2996],
[ 0.2876, 0.2281, 0.0653, 0.2732, 0.4271],
[-0.3735, -0.0143, 0.1642, -0.4130, 0.0443],
[-0.0360, 0.3639, -0.4225, 0.1759, -0.1124],
[ 0.0942, -0.3258, 0.3204, 0.1402, -0.1736],
[-0.0498, -0.2998, -0.2023, -0.1453, -0.1235],
[ 0.3115, -0.0031, 0.1426, -0.3733, -0.3755],
[-0.0982, 0.3129, 0.3575, -0.2197, 0.0030],
[-0.2150, 0.1534, 0.1993, -0.0530, -0.2424],
[ 0.0172, -0.2191, -0.2814, 0.1751, 0.3594]], requires_grad=True)
weight_hh_l1 Parameter containing:
tensor([[-0.3267, -0.0857, 0.0359, 0.4258, -0.0517],
[-0.3176, 0.1830, 0.3462, 0.4365, -0.2537],
[-0.4418, 0.2146, 0.3160, 0.1854, 0.0255],
[ 0.2646, -0.2706, -0.2983, -0.4163, 0.0241],
[ 0.2339, -0.0814, -0.0498, 0.1904, -0.0759],
[-0.1623, 0.1069, -0.0383, 0.2229, 0.2808],
[ 0.2908, -0.2432, -0.1614, 0.1363, 0.2924],
[-0.0738, -0.0246, 0.1532, -0.1271, 0.2876],
[-0.1851, -0.0708, -0.2438, 0.0712, -0.2138],
[ 0.3970, 0.3470, 0.2226, 0.1775, 0.3772],
[ 0.0505, -0.1404, 0.1426, -0.2838, 0.1350],
[ 0.4238, 0.3953, 0.2783, -0.0293, -0.3007],
[-0.3083, -0.4351, -0.3104, -0.3497, 0.1436],
[-0.0940, 0.2734, -0.3675, -0.3928, -0.2005],
[-0.0658, 0.1678, -0.1753, -0.3809, -0.4431],
[ 0.1146, 0.0413, -0.1114, -0.3802, -0.3555],
[ 0.0110, -0.3575, -0.0855, 0.4055, 0.0019],
[ 0.3016, 0.0619, 0.4235, 0.3673, -0.4325],
[-0.3618, -0.1637, -0.3951, 0.0520, -0.2395],
[ 0.3593, -0.3131, -0.1856, 0.1042, -0.3982]], requires_grad=True)
inputs维度为: torch.Size([5, 2, 3])
inputs的数据为: tensor([[[-1.1312, 1.0970, 1.2848],
[-0.6020, -0.2539, 0.0824]],
[[ 1.1160, -0.7939, 1.0617],
[-0.1893, -0.5105, -0.7088]],
[[-0.5781, -0.3050, 1.1445],
[ 0.7122, -0.2542, 1.1061]],
[[ 1.0484, 3.1398, 0.0259],
[ 0.6194, 1.3969, 1.1159]],
[[-0.1472, 1.2514, -0.4664],
[ 0.9148, -1.0041, 1.0280]]])
hidden为: (tensor([[[ 0.2570, 1.1255, 0.1157, -2.8301, 1.4929],
[ 0.7979, -1.6010, -0.6993, 1.9225, 0.2169]],
[[ 1.0473, -0.5031, -0.8567, 0.8341, 0.2007],
[ 0.4666, 0.2677, -0.2282, -1.3530, -0.0826]]]), tensor([[[-0.8367, 0.5181, 0.9831, -2.6134, -0.5054],
[ 0.1692, 0.1029, -1.0233, -0.7694, 1.0741]],
[[ 0.4925, -0.1189, 1.9970, 1.4383, 0.6084],
[-0.4519, 0.9721, -0.3155, 0.9260, 0.8702]]]))
out2 tensor([[[ 0.0896, -0.1895, 0.4685, 0.2865, 0.0076],
[-0.0713, 0.1772, -0.0267, 0.2940, 0.3037]],
[[ 0.0414, -0.0971, 0.3060, 0.0193, -0.0450],
[-0.0497, 0.1586, -0.0531, 0.1542, 0.1165]],
[[ 0.0330, -0.0681, 0.1953, -0.0433, -0.0419],
[-0.0564, 0.0931, -0.0509, 0.0660, 0.0563]],
[[ 0.0194, -0.0230, 0.1382, -0.0718, -0.0363],
[-0.0550, 0.0580, -0.0245, 0.0235, 0.0221]],
[[ 0.0285, -0.0092, 0.0896, -0.0506, -0.0334],
[-0.0510, 0.0415, -0.0142, -0.0022, 0.0262]]],
grad_fn=<StackBackward>)
h: tensor([[[ 0.0570, 0.0205, -0.0600, -0.3310, 0.0754],
[-0.0198, 0.0158, 0.2073, 0.1036, 0.0679]],
[[ 0.0285, -0.0092, 0.0896, -0.0506, -0.0334],
[-0.0510, 0.0415, -0.0142, -0.0022, 0.0262]]],
grad_fn=<StackBackward>)
c: tensor([[[ 0.1319, 0.0435, -0.1047, -0.5754, 0.1184],
[-0.0328, 0.0254, 0.5701, 0.2715, 0.1948]],
[[ 0.0552, -0.0177, 0.1720, -0.1056, -0.0677],
[-0.1070, 0.0854, -0.0279, -0.0044, 0.0540]]],
grad_fn=<StackBackward>)
"""
import numpy as np
inputs = np.array([[[-1.1312, 1.0970, 1.2848],
[-0.6020, -0.2539, 0.0824]],
[[ 1.1160, -0.7939, 1.0617],
[-0.1893, -0.5105, -0.7088]],
[[-0.5781, -0.3050, 1.1445],
[ 0.7122, -0.2542, 1.1061]],
[[ 1.0484, 3.1398, 0.0259],
[ 0.6194, 1.3969, 1.1159]],
[[-0.1472, 1.2514, -0.4664],
[ 0.9148, -1.0041, 1.0280]]])
w_x_0 = np.array([[ 0.2379, -0.1477, -0.1843],
[-0.0606, -0.0339, 0.0669],
[ 0.3392, -0.4067, 0.1124],
[ 0.0649, -0.3140, -0.3768],
[-0.0533, 0.4317, -0.3409],
[-0.3619, -0.2457, -0.0266],
[-0.2505, 0.4028, -0.4456],
[ 0.4241, 0.2539, 0.4287],
[-0.1459, 0.0555, 0.3333],
[ 0.4154, -0.2894, 0.1728],
[ 0.3442, 0.0007, -0.3678],
[-0.2690, -0.0487, 0.1894],
[ 0.1186, -0.4336, 0.1892],
[ 0.2791, -0.3747, 0.0476],
[ 0.1883, 0.1262, 0.0340],
[ 0.4470, -0.2271, -0.1851],
[ 0.2339, 0.0431, 0.2807],
[ 0.2340, 0.2959, -0.2408],
[-0.3248, 0.4052, 0.2561],
[-0.2844, 0.4153, 0.0404]])
w_h_0 = np.array([[-2.0698e-01, -1.7755e-01, -3.0658e-01, -1.1588e-01, 4.5766e-02],
[ 3.2853e-01, 2.0137e-01, 3.6208e-01, -1.2821e-01, -1.7739e-03],
[-2.5506e-01, 9.9625e-02, -8.5663e-02, 3.6996e-01, 2.1595e-01],
[ 1.2179e-01, -1.6107e-01, -2.1954e-01, -2.9739e-01, -1.0122e-01],
[-3.0208e-01, -4.0644e-02, -6.3823e-02, -1.4546e-01, 1.3178e-01],
[-4.0934e-02, -1.6815e-01, -1.3157e-01, 9.9637e-02, 4.4259e-01],
[ 2.7418e-01, 1.4329e-01, -4.2061e-01, 4.1228e-01, -3.4112e-01],
[ 2.1834e-03, 3.2520e-01, -1.1582e-01, -2.9629e-01, -3.5685e-01],
[-3.3427e-01, -4.3912e-01, 2.6091e-01, -2.0448e-01, 2.5485e-02],
[-3.6938e-01, 3.2074e-01, -1.9377e-04, 1.6672e-01, 2.7508e-01],
[ 4.0973e-01, -1.9294e-01, 3.9284e-01, -5.4646e-02, 9.9561e-02],
[ 4.1741e-01, 1.5363e-02, 4.4413e-02, -3.1710e-01, 2.7422e-01],
[ 1.0180e-01, 2.4242e-01, 2.2364e-01, 3.6231e-01, 4.4573e-01],
[-1.4557e-01, -4.2360e-01, -2.6369e-01, 1.6559e-01, -5.2838e-02],
[ 4.3591e-01, 2.0227e-01, 3.5351e-03, 1.5808e-01, -2.3753e-01],
[-1.1144e-01, 4.3910e-01, -3.3235e-01, -1.2088e-01, -1.1441e-02],
[ 2.2494e-01, 1.1863e-01, -1.9057e-01, 1.9746e-01, 4.4242e-01],
[-1.0328e-02, 4.1831e-01, -4.8495e-02, 1.2776e-01, -4.3230e-01],
[ 2.0142e-01, -2.2626e-01, -3.4504e-02, -3.0516e-01, 2.6764e-02],
[ 2.2718e-01, -8.7050e-02, -1.2573e-01, 4.3948e-02, 2.9863e-01]])
w_x_1 = np.array([[-0.3528, -0.4353, -0.4401, -0.3159, 0.0082],
[-0.0848, 0.1217, -0.4255, -0.1539, -0.4296],
[-0.3387, 0.2427, -0.3002, -0.0539, 0.1885],
[ 0.0592, 0.1616, 0.1149, -0.0716, 0.1982],
[ 0.4273, 0.3076, 0.0771, -0.0066, 0.3962],
[ 0.3060, 0.1231, 0.1049, -0.4156, -0.0435],
[ 0.2834, -0.4361, 0.1660, -0.2228, 0.2367],
[-0.0329, 0.2670, 0.2993, 0.1508, 0.3634],
[ 0.1463, -0.1507, -0.2782, -0.0661, -0.1345],
[-0.0224, -0.0984, 0.2000, 0.3648, -0.2722],
[ 0.0839, -0.2911, -0.2318, -0.1145, -0.2996],
[ 0.2876, 0.2281, 0.0653, 0.2732, 0.4271],
[-0.3735, -0.0143, 0.1642, -0.4130, 0.0443],
[-0.0360, 0.3639, -0.4225, 0.1759, -0.1124],
[ 0.0942, -0.3258, 0.3204, 0.1402, -0.1736],
[-0.0498, -0.2998, -0.2023, -0.1453, -0.1235],
[ 0.3115, -0.0031, 0.1426, -0.3733, -0.3755],
[-0.0982, 0.3129, 0.3575, -0.2197, 0.0030],
[-0.2150, 0.1534, 0.1993, -0.0530, -0.2424],
[ 0.0172, -0.2191, -0.2814, 0.1751, 0.3594]])
w_h_1 = np.array([[-0.3267, -0.0857, 0.0359, 0.4258, -0.0517],
[-0.3176, 0.1830, 0.3462, 0.4365, -0.2537],
[-0.4418, 0.2146, 0.3160, 0.1854, 0.0255],
[ 0.2646, -0.2706, -0.2983, -0.4163, 0.0241],
[ 0.2339, -0.0814, -0.0498, 0.1904, -0.0759],
[-0.1623, 0.1069, -0.0383, 0.2229, 0.2808],
[ 0.2908, -0.2432, -0.1614, 0.1363, 0.2924],
[-0.0738, -0.0246, 0.1532, -0.1271, 0.2876],
[-0.1851, -0.0708, -0.2438, 0.0712, -0.2138],
[ 0.3970, 0.3470, 0.2226, 0.1775, 0.3772],
[ 0.0505, -0.1404, 0.1426, -0.2838, 0.1350],
[ 0.4238, 0.3953, 0.2783, -0.0293, -0.3007],
[-0.3083, -0.4351, -0.3104, -0.3497, 0.1436],
[-0.0940, 0.2734, -0.3675, -0.3928, -0.2005],
[-0.0658, 0.1678, -0.1753, -0.3809, -0.4431],
[ 0.1146, 0.0413, -0.1114, -0.3802, -0.3555],
[ 0.0110, -0.3575, -0.0855, 0.4055, 0.0019],
[ 0.3016, 0.0619, 0.4235, 0.3673, -0.4325],
[-0.3618, -0.1637, -0.3951, 0.0520, -0.2395],
[ 0.3593, -0.3131, -0.1856, 0.1042, -0.3982]])
h = np.array([[[ 0.2570, 1.1255, 0.1157, -2.8301, 1.4929],
[ 0.7979, -1.6010, -0.6993, 1.9225, 0.2169]],
[[ 1.0473, -0.5031, -0.8567, 0.8341, 0.2007],
[ 0.4666, 0.2677, -0.2282, -1.3530, -0.0826]]])
c = np.array([[[-0.8367, 0.5181, 0.9831, -2.6134, -0.5054],
[ 0.1692, 0.1029, -1.0233, -0.7694, 1.0741]],
[[ 0.4925, -0.1189, 1.9970, 1.4383, 0.6084],
[-0.4519, 0.9721, -0.3155, 0.9260, 0.8702]]])
h_another = np.array(h,copy = True) # 这里如果不用深拷贝的话,后面的 h 就会被篡改
hiddle = inputs
hid_inp = torch.zeros(5,2,5)
w_x = [w_x_0,w_x_1]
w_h = [w_h_0,w_h_1]
h = h[0]
c_pre = torch.from_numpy(c[0])
print(hiddle[0].shape,w_x[0][5:10].shape)
print(h.shape,w_h[0][5:10].shape)
for t in range(5):
f_t = torch.sigmoid(torch.from_numpy(np.dot(hiddle[t],w_x[0][5:10].T)+np.dot(h,w_h[0][5:10].T)))
i_t = torch.sigmoid(torch.from_numpy(np.dot(hiddle[t],w_x[0][:5].T)+np.dot(h,w_h[0][:5].T)))
o_t = torch.sigmoid(torch.from_numpy(np.dot(hiddle[t],w_x[0][15:].T)+np.dot(h,w_h[0][15:].T)))
g_t = torch.tanh(torch.from_numpy(np.dot(hiddle[t],w_x[0][10:15].T)+np.dot(h,w_h[0][10:15].T)))
c_cur = torch.mul(f_t,c_pre) + torch.mul(i_t,g_t)
h_t = torch.mul(o_t,torch.tanh(c_cur))
h = h_t.numpy()
hid_inp[t] = h_t
c_pre = c_cur
# print(h_t)
# print(hid_inp)
h = h_another[1]
c_pre = torch.from_numpy(c[1])
for t in range(5):
f_t = torch.sigmoid(torch.from_numpy(np.dot(hid_inp[t],w_x[1][5:10].T)+np.dot(h,w_h[1][5:10].T)))
i_t = torch.sigmoid(torch.from_numpy(np.dot(hid_inp[t],w_x[1][:5].T)+np.dot(h,w_h[1][:5].T)))
o_t = torch.sigmoid(torch.from_numpy(np.dot(hid_inp[t],w_x[1][15:].T)+np.dot(h,w_h[1][15:].T)))
g_t = torch.tanh(torch.from_numpy(np.dot(hid_inp[t],w_x[1][10:15].T)+np.dot(h,w_h[1][10:15].T)))
c_cur = torch.mul(f_t,c_pre) + torch.mul(i_t,g_t)
h_t = torch.mul(o_t,torch.tanh(c_cur))
print(h_t)
h = h_t.numpy()
c_pre = c_cur
# print(h_t)