pytorch 中LSTM流程验证

自己记录的一些实验，没有排版，如果看不懂就移步其他的文章。
import torch
import torch.nn as nn
# lstm = nn.LSTM(3, 5,bias=False)
# for name,param in lstm.named_parameters():
#     print(name,param)
# inputs = [torch.randn(2, 3) for _ in range(5)]
# inputs = torch.cat(inputs).view(len(inputs), 2, -1)
# print("inputs维度为：",inputs.size())
# print("inputs的数据为：",inputs)
# hidden = (torch.randn(1, 2, 5), torch.randn(1, 2, 5))  # clean out hidden state
# print('hidden为：',hidden)
# out, (h,c) = lstm(inputs, hidden)
# print('out2',out)
# print('h:',h)
# print('c:',c)


# 直接用LSTM的输出
# out2 tensor([[[ 1.7645e-01,  1.3992e-01, -7.5099e-03,  2.1061e-01, -1.4658e-01],
#          [ 6.9992e-01,  1.2492e-01,  1.9404e-01, -4.7846e-01, -1.5412e-01]],
#
#         [[ 5.1047e-02,  1.7753e-01,  1.1094e-01,  3.6848e-01, -1.4002e-01],
#          [ 1.9628e-01, -1.0030e-02, -1.4725e-01, -1.4024e-01, -1.4341e-01]],
#
#         [[ 1.3596e-02, -3.1841e-02, -1.2336e-02,  1.1213e-01,  7.7684e-02],
#          [ 1.9534e-01,  1.0617e-05, -1.2793e-01, -8.9563e-02, -4.4747e-02]],
#
#         [[ 2.1918e-02, -1.2519e-01, -5.9217e-02,  7.3104e-02,  1.7281e-01],
#          [ 7.7996e-03,  2.3978e-01, -2.9749e-02, -1.8220e-01, -1.5852e-01]],
#
#         [[ 7.3694e-02, -1.9780e-01, -4.0656e-02,  7.4626e-02,  2.4339e-01],
#          [ 2.6004e-02,  1.8576e-01,  1.2329e-01, -1.0394e-02, -2.8678e-02]]],
#        grad_fn=<StackBackward>)
# h: tensor([[[ 0.0737, -0.1978, -0.0407,  0.0746,  0.2434],
#          [ 0.0260,  0.1858,  0.1233, -0.0104, -0.0287]]],
#        grad_fn=<StackBackward>)
# c: tensor([[[ 0.1478, -0.3483, -0.0767,  0.2050,  0.4492],
#          [ 0.0360,  0.2471,  0.1619, -0.0297, -0.0431]]],
#        grad_fn=<StackBackward>)




#########################################################################################
######                     layer的层数为1                                       ###########
######                       验证通过                                           ###########
#########################################################################################

### 1 layer层为1验证通过
# import numpy as np
# inputs = np.array(
#     [
#         [[-0.1402, -2.4375, -0.8766],
#          [ 3.5149, -1.2214,  0.0610]],
#
#         [[ 1.0977, -0.2523, -0.2009],
#          [-1.2237, -0.0940,  0.4024]],
#
#         [[-0.4103,  1.4780, -0.7300],
#          [ 0.2257, -0.0224, -0.1822]],
#
#         [[-0.4267,  0.9405, -0.6343],
#          [ 1.3214, -0.9563,  1.3815]],
#
#         [[-0.7112,  0.8102, -1.2747],
#          [ 1.1957,  1.4434, -3.5762]]
#     ])
#
# w_x = np.array([[-0.3811, -0.3605,  0.2842],
#         [ 0.3064, -0.3209,  0.4006],
#         [-0.3390,  0.2509,  0.3408],
#         [-0.1451, -0.3244,  0.3356],
#         [-0.0164, -0.0209,  0.2757],
#         [-0.0803, -0.3522, -0.1263],
#         [-0.0135, -0.2249, -0.1965],
#         [ 0.4439,  0.0303, -0.3635],
#         [-0.1867, -0.3799,  0.0870],
#         [ 0.1317,  0.3124, -0.0685],
#         [-0.1750, -0.2223, -0.2423],
#         [ 0.3572, -0.3671,  0.3487],
#         [ 0.4386, -0.3053, -0.3875],
#         [-0.0912, -0.1647, -0.2231],
#         [ 0.0080,  0.3188, -0.3798],
#         [ 0.3676, -0.4088, -0.3687],
#         [ 0.0090, -0.0859, -0.3587],
#         [ 0.3566,  0.3903, -0.0683],
#         [ 0.2433, -0.1288,  0.2222],
#         [-0.0706,  0.2519, -0.0899]])
#
# w_x_i = w_x[:5] # [5,3]
# w_x_f = w_x[5:10]
# w_x_g = w_x[10:15]
# w_x_o = w_x[15:]
#
#
# w_h = np.array([[-0.1880,  0.1977, -0.0799,  0.3464, -0.3551],
#         [ 0.1164, -0.2267,  0.1575,  0.3174,  0.3518],
#         [-0.0636,  0.4399,  0.2831, -0.3300,  0.1058],
#         [ 0.0496, -0.4445, -0.0328,  0.1129,  0.3615],
#         [-0.1430,  0.2036,  0.1565,  0.1253,  0.4222],
#         [ 0.1469,  0.2363,  0.1311, -0.2609, -0.3295],
#         [-0.3303, -0.0722,  0.0833, -0.2050,  0.3797],
#         [ 0.1544, -0.0672,  0.0824, -0.3180,  0.3094],
#         [-0.2804, -0.0722, -0.1767,  0.4304,  0.1947],
#         [ 0.1885, -0.2188, -0.3247,  0.2962,  0.0254],
#         [ 0.0597,  0.0725, -0.0304,  0.3348,  0.3484],
#         [ 0.0966, -0.2528,  0.1068,  0.2571, -0.1851],
#         [-0.4085, -0.1144,  0.4259,  0.4036,  0.1017],
#         [ 0.1953, -0.1137,  0.0321,  0.3066,  0.4427],
#         [ 0.3902,  0.2454,  0.1280,  0.2274,  0.1296],
#         [-0.1686, -0.3899,  0.0491,  0.2731,  0.3999],
#         [ 0.0209, -0.1487, -0.1902, -0.0968, -0.2197],
#         [-0.0352, -0.2620,  0.1145,  0.1402, -0.3478],
#         [-0.2530,  0.3749, -0.3157, -0.2548,  0.4453],
#         [ 0.4071,  0.1028,  0.4016, -0.3845,  0.0030]])
#
# w_h_i =w_h[:5] #[5,5]
# w_h_f =w_h[5:10]
# w_h_g = w_h[10:15]
# w_h_o = w_h[15:]
#
#
# h0 = np.array([[[ 0.4961, -2.5106, -0.4681,  0.9342, -0.3996],
#          [-0.5589,  0.6913, -0.3324, -0.1190, -0.5952]]])
# c0 = np.array([[[-0.4121, -0.8200, -0.1645,  1.4230, -0.8661],
#          [ 2.7055, -0.8133,  0.1079, -1.1382, -0.6441]]])
# h = h0
# c_pre = torch.from_numpy(c0)
#
# for i in range(5):
#     f_t = torch.sigmoid(torch.from_numpy(np.dot(inputs[i],w_x_f.T)+np.dot(h,w_h_f.T)))
#     i_t = torch.sigmoid(torch.from_numpy(np.dot(inputs[i],w_x_i.T)+np.dot(h,w_h_i.T)))
#     o_t = torch.sigmoid(torch.from_numpy(np.dot(inputs[i],w_x_o.T)+np.dot(h,w_h_o.T)))
#     g_t = torch.tanh(torch.from_numpy(np.dot(inputs[i],w_x_g.T)+np.dot(h,w_h_g.T)))
#     c_cur = torch.mul(f_t,c_pre) + torch.mul(i_t,g_t)
#     h_t = torch.mul(o_t,torch.tanh(c_cur))
#     print(h_t)
#     h = h_t
#     c_pre = c_cur



#************************************************************************************************
#****************************    两层LSTM的验证          ******************************************
#************************************************************************************************
# import torch
# import torch.nn as nn
# lstm = nn.LSTM(3,5,2 ,bias=False)
# for name,param in lstm.named_parameters():
#     print(name,param)
# inputs = [torch.randn(2, 3) for _ in range(5)]
# inputs = torch.cat(inputs).view(len(inputs), 2, -1)
# print("inputs维度为：",inputs.size())
# print("inputs的数据为：",inputs)
# hidden = (torch.randn(2, 2, 5), torch.randn(2, 2, 5))  # clean out hidden state
# print('hidden为：',hidden)
# out, (h,c) = lstm(inputs, hidden)
# print('out2',out)
# print('h:',h)
# print('c:',c)

"""
weight_ih_l0 Parameter containing:
tensor([[ 0.2379, -0.1477, -0.1843],
        [-0.0606, -0.0339,  0.0669],
        [ 0.3392, -0.4067,  0.1124],
        [ 0.0649, -0.3140, -0.3768],
        [-0.0533,  0.4317, -0.3409],
        [-0.3619, -0.2457, -0.0266],
        [-0.2505,  0.4028, -0.4456],
        [ 0.4241,  0.2539,  0.4287],
        [-0.1459,  0.0555,  0.3333],
        [ 0.4154, -0.2894,  0.1728],
        [ 0.3442,  0.0007, -0.3678],
        [-0.2690, -0.0487,  0.1894],
        [ 0.1186, -0.4336,  0.1892],
        [ 0.2791, -0.3747,  0.0476],
        [ 0.1883,  0.1262,  0.0340],
        [ 0.4470, -0.2271, -0.1851],
        [ 0.2339,  0.0431,  0.2807],
        [ 0.2340,  0.2959, -0.2408],
        [-0.3248,  0.4052,  0.2561],
        [-0.2844,  0.4153,  0.0404]], requires_grad=True)
weight_hh_l0 Parameter containing:
tensor([[-2.0698e-01, -1.7755e-01, -3.0658e-01, -1.1588e-01,  4.5766e-02],
        [ 3.2853e-01,  2.0137e-01,  3.6208e-01, -1.2821e-01, -1.7739e-03],
        [-2.5506e-01,  9.9625e-02, -8.5663e-02,  3.6996e-01,  2.1595e-01],
        [ 1.2179e-01, -1.6107e-01, -2.1954e-01, -2.9739e-01, -1.0122e-01],
        [-3.0208e-01, -4.0644e-02, -6.3823e-02, -1.4546e-01,  1.3178e-01],
        [-4.0934e-02, -1.6815e-01, -1.3157e-01,  9.9637e-02,  4.4259e-01],
        [ 2.7418e-01,  1.4329e-01, -4.2061e-01,  4.1228e-01, -3.4112e-01],
        [ 2.1834e-03,  3.2520e-01, -1.1582e-01, -2.9629e-01, -3.5685e-01],
        [-3.3427e-01, -4.3912e-01,  2.6091e-01, -2.0448e-01,  2.5485e-02],
        [-3.6938e-01,  3.2074e-01, -1.9377e-04,  1.6672e-01,  2.7508e-01],
        [ 4.0973e-01, -1.9294e-01,  3.9284e-01, -5.4646e-02,  9.9561e-02],
        [ 4.1741e-01,  1.5363e-02,  4.4413e-02, -3.1710e-01,  2.7422e-01],
        [ 1.0180e-01,  2.4242e-01,  2.2364e-01,  3.6231e-01,  4.4573e-01],
        [-1.4557e-01, -4.2360e-01, -2.6369e-01,  1.6559e-01, -5.2838e-02],
        [ 4.3591e-01,  2.0227e-01,  3.5351e-03,  1.5808e-01, -2.3753e-01],
        [-1.1144e-01,  4.3910e-01, -3.3235e-01, -1.2088e-01, -1.1441e-02],
        [ 2.2494e-01,  1.1863e-01, -1.9057e-01,  1.9746e-01,  4.4242e-01],
        [-1.0328e-02,  4.1831e-01, -4.8495e-02,  1.2776e-01, -4.3230e-01],
        [ 2.0142e-01, -2.2626e-01, -3.4504e-02, -3.0516e-01,  2.6764e-02],
        [ 2.2718e-01, -8.7050e-02, -1.2573e-01,  4.3948e-02,  2.9863e-01]],
       requires_grad=True)
weight_ih_l1 Parameter containing:
tensor([[-0.3528, -0.4353, -0.4401, -0.3159,  0.0082],
        [-0.0848,  0.1217, -0.4255, -0.1539, -0.4296],
        [-0.3387,  0.2427, -0.3002, -0.0539,  0.1885],
        [ 0.0592,  0.1616,  0.1149, -0.0716,  0.1982],
        [ 0.4273,  0.3076,  0.0771, -0.0066,  0.3962],
        [ 0.3060,  0.1231,  0.1049, -0.4156, -0.0435],
        [ 0.2834, -0.4361,  0.1660, -0.2228,  0.2367],
        [-0.0329,  0.2670,  0.2993,  0.1508,  0.3634],
        [ 0.1463, -0.1507, -0.2782, -0.0661, -0.1345],
        [-0.0224, -0.0984,  0.2000,  0.3648, -0.2722],
        [ 0.0839, -0.2911, -0.2318, -0.1145, -0.2996],
        [ 0.2876,  0.2281,  0.0653,  0.2732,  0.4271],
        [-0.3735, -0.0143,  0.1642, -0.4130,  0.0443],
        [-0.0360,  0.3639, -0.4225,  0.1759, -0.1124],
        [ 0.0942, -0.3258,  0.3204,  0.1402, -0.1736],
        [-0.0498, -0.2998, -0.2023, -0.1453, -0.1235],
        [ 0.3115, -0.0031,  0.1426, -0.3733, -0.3755],
        [-0.0982,  0.3129,  0.3575, -0.2197,  0.0030],
        [-0.2150,  0.1534,  0.1993, -0.0530, -0.2424],
        [ 0.0172, -0.2191, -0.2814,  0.1751,  0.3594]], requires_grad=True)
weight_hh_l1 Parameter containing:
tensor([[-0.3267, -0.0857,  0.0359,  0.4258, -0.0517],
        [-0.3176,  0.1830,  0.3462,  0.4365, -0.2537],
        [-0.4418,  0.2146,  0.3160,  0.1854,  0.0255],
        [ 0.2646, -0.2706, -0.2983, -0.4163,  0.0241],
        [ 0.2339, -0.0814, -0.0498,  0.1904, -0.0759],
        [-0.1623,  0.1069, -0.0383,  0.2229,  0.2808],
        [ 0.2908, -0.2432, -0.1614,  0.1363,  0.2924],
        [-0.0738, -0.0246,  0.1532, -0.1271,  0.2876],
        [-0.1851, -0.0708, -0.2438,  0.0712, -0.2138],
        [ 0.3970,  0.3470,  0.2226,  0.1775,  0.3772],
        [ 0.0505, -0.1404,  0.1426, -0.2838,  0.1350],
        [ 0.4238,  0.3953,  0.2783, -0.0293, -0.3007],
        [-0.3083, -0.4351, -0.3104, -0.3497,  0.1436],
        [-0.0940,  0.2734, -0.3675, -0.3928, -0.2005],
        [-0.0658,  0.1678, -0.1753, -0.3809, -0.4431],
        [ 0.1146,  0.0413, -0.1114, -0.3802, -0.3555],
        [ 0.0110, -0.3575, -0.0855,  0.4055,  0.0019],
        [ 0.3016,  0.0619,  0.4235,  0.3673, -0.4325],
        [-0.3618, -0.1637, -0.3951,  0.0520, -0.2395],
        [ 0.3593, -0.3131, -0.1856,  0.1042, -0.3982]], requires_grad=True)
inputs维度为： torch.Size([5, 2, 3])
inputs的数据为： tensor([[[-1.1312,  1.0970,  1.2848],
         [-0.6020, -0.2539,  0.0824]],

        [[ 1.1160, -0.7939,  1.0617],
         [-0.1893, -0.5105, -0.7088]],

        [[-0.5781, -0.3050,  1.1445],
         [ 0.7122, -0.2542,  1.1061]],

        [[ 1.0484,  3.1398,  0.0259],
         [ 0.6194,  1.3969,  1.1159]],

        [[-0.1472,  1.2514, -0.4664],
         [ 0.9148, -1.0041,  1.0280]]])
hidden为： (tensor([[[ 0.2570,  1.1255,  0.1157, -2.8301,  1.4929],
         [ 0.7979, -1.6010, -0.6993,  1.9225,  0.2169]],

        [[ 1.0473, -0.5031, -0.8567,  0.8341,  0.2007],
         [ 0.4666,  0.2677, -0.2282, -1.3530, -0.0826]]]), tensor([[[-0.8367,  0.5181,  0.9831, -2.6134, -0.5054],
         [ 0.1692,  0.1029, -1.0233, -0.7694,  1.0741]],

        [[ 0.4925, -0.1189,  1.9970,  1.4383,  0.6084],
         [-0.4519,  0.9721, -0.3155,  0.9260,  0.8702]]]))
out2 tensor([[[ 0.0896, -0.1895,  0.4685,  0.2865,  0.0076],
         [-0.0713,  0.1772, -0.0267,  0.2940,  0.3037]],

        [[ 0.0414, -0.0971,  0.3060,  0.0193, -0.0450],
         [-0.0497,  0.1586, -0.0531,  0.1542,  0.1165]],

        [[ 0.0330, -0.0681,  0.1953, -0.0433, -0.0419],
         [-0.0564,  0.0931, -0.0509,  0.0660,  0.0563]],

        [[ 0.0194, -0.0230,  0.1382, -0.0718, -0.0363],
         [-0.0550,  0.0580, -0.0245,  0.0235,  0.0221]],

        [[ 0.0285, -0.0092,  0.0896, -0.0506, -0.0334],
         [-0.0510,  0.0415, -0.0142, -0.0022,  0.0262]]],
       grad_fn=<StackBackward>)
h: tensor([[[ 0.0570,  0.0205, -0.0600, -0.3310,  0.0754],
         [-0.0198,  0.0158,  0.2073,  0.1036,  0.0679]],

        [[ 0.0285, -0.0092,  0.0896, -0.0506, -0.0334],
         [-0.0510,  0.0415, -0.0142, -0.0022,  0.0262]]],
       grad_fn=<StackBackward>)
c: tensor([[[ 0.1319,  0.0435, -0.1047, -0.5754,  0.1184],
         [-0.0328,  0.0254,  0.5701,  0.2715,  0.1948]],

        [[ 0.0552, -0.0177,  0.1720, -0.1056, -0.0677],
         [-0.1070,  0.0854, -0.0279, -0.0044,  0.0540]]],
       grad_fn=<StackBackward>)
"""
import numpy as np
inputs = np.array([[[-1.1312,  1.0970,  1.2848],
         [-0.6020, -0.2539,  0.0824]],

        [[ 1.1160, -0.7939,  1.0617],
         [-0.1893, -0.5105, -0.7088]],

        [[-0.5781, -0.3050,  1.1445],
         [ 0.7122, -0.2542,  1.1061]],

        [[ 1.0484,  3.1398,  0.0259],
         [ 0.6194,  1.3969,  1.1159]],

        [[-0.1472,  1.2514, -0.4664],
         [ 0.9148, -1.0041,  1.0280]]])

w_x_0 = np.array([[ 0.2379, -0.1477, -0.1843],
        [-0.0606, -0.0339,  0.0669],
        [ 0.3392, -0.4067,  0.1124],
        [ 0.0649, -0.3140, -0.3768],
        [-0.0533,  0.4317, -0.3409],
        [-0.3619, -0.2457, -0.0266],
        [-0.2505,  0.4028, -0.4456],
        [ 0.4241,  0.2539,  0.4287],
        [-0.1459,  0.0555,  0.3333],
        [ 0.4154, -0.2894,  0.1728],
        [ 0.3442,  0.0007, -0.3678],
        [-0.2690, -0.0487,  0.1894],
        [ 0.1186, -0.4336,  0.1892],
        [ 0.2791, -0.3747,  0.0476],
        [ 0.1883,  0.1262,  0.0340],
        [ 0.4470, -0.2271, -0.1851],
        [ 0.2339,  0.0431,  0.2807],
        [ 0.2340,  0.2959, -0.2408],
        [-0.3248,  0.4052,  0.2561],
        [-0.2844,  0.4153,  0.0404]])

w_h_0 = np.array([[-2.0698e-01, -1.7755e-01, -3.0658e-01, -1.1588e-01,  4.5766e-02],
        [ 3.2853e-01,  2.0137e-01,  3.6208e-01, -1.2821e-01, -1.7739e-03],
        [-2.5506e-01,  9.9625e-02, -8.5663e-02,  3.6996e-01,  2.1595e-01],
        [ 1.2179e-01, -1.6107e-01, -2.1954e-01, -2.9739e-01, -1.0122e-01],
        [-3.0208e-01, -4.0644e-02, -6.3823e-02, -1.4546e-01,  1.3178e-01],
        [-4.0934e-02, -1.6815e-01, -1.3157e-01,  9.9637e-02,  4.4259e-01],
        [ 2.7418e-01,  1.4329e-01, -4.2061e-01,  4.1228e-01, -3.4112e-01],
        [ 2.1834e-03,  3.2520e-01, -1.1582e-01, -2.9629e-01, -3.5685e-01],
        [-3.3427e-01, -4.3912e-01,  2.6091e-01, -2.0448e-01,  2.5485e-02],
        [-3.6938e-01,  3.2074e-01, -1.9377e-04,  1.6672e-01,  2.7508e-01],
        [ 4.0973e-01, -1.9294e-01,  3.9284e-01, -5.4646e-02,  9.9561e-02],
        [ 4.1741e-01,  1.5363e-02,  4.4413e-02, -3.1710e-01,  2.7422e-01],
        [ 1.0180e-01,  2.4242e-01,  2.2364e-01,  3.6231e-01,  4.4573e-01],
        [-1.4557e-01, -4.2360e-01, -2.6369e-01,  1.6559e-01, -5.2838e-02],
        [ 4.3591e-01,  2.0227e-01,  3.5351e-03,  1.5808e-01, -2.3753e-01],
        [-1.1144e-01,  4.3910e-01, -3.3235e-01, -1.2088e-01, -1.1441e-02],
        [ 2.2494e-01,  1.1863e-01, -1.9057e-01,  1.9746e-01,  4.4242e-01],
        [-1.0328e-02,  4.1831e-01, -4.8495e-02,  1.2776e-01, -4.3230e-01],
        [ 2.0142e-01, -2.2626e-01, -3.4504e-02, -3.0516e-01,  2.6764e-02],
        [ 2.2718e-01, -8.7050e-02, -1.2573e-01,  4.3948e-02,  2.9863e-01]])

w_x_1 = np.array([[-0.3528, -0.4353, -0.4401, -0.3159,  0.0082],
        [-0.0848,  0.1217, -0.4255, -0.1539, -0.4296],
        [-0.3387,  0.2427, -0.3002, -0.0539,  0.1885],
        [ 0.0592,  0.1616,  0.1149, -0.0716,  0.1982],
        [ 0.4273,  0.3076,  0.0771, -0.0066,  0.3962],
        [ 0.3060,  0.1231,  0.1049, -0.4156, -0.0435],
        [ 0.2834, -0.4361,  0.1660, -0.2228,  0.2367],
        [-0.0329,  0.2670,  0.2993,  0.1508,  0.3634],
        [ 0.1463, -0.1507, -0.2782, -0.0661, -0.1345],
        [-0.0224, -0.0984,  0.2000,  0.3648, -0.2722],
        [ 0.0839, -0.2911, -0.2318, -0.1145, -0.2996],
        [ 0.2876,  0.2281,  0.0653,  0.2732,  0.4271],
        [-0.3735, -0.0143,  0.1642, -0.4130,  0.0443],
        [-0.0360,  0.3639, -0.4225,  0.1759, -0.1124],
        [ 0.0942, -0.3258,  0.3204,  0.1402, -0.1736],
        [-0.0498, -0.2998, -0.2023, -0.1453, -0.1235],
        [ 0.3115, -0.0031,  0.1426, -0.3733, -0.3755],
        [-0.0982,  0.3129,  0.3575, -0.2197,  0.0030],
        [-0.2150,  0.1534,  0.1993, -0.0530, -0.2424],
        [ 0.0172, -0.2191, -0.2814,  0.1751,  0.3594]])

w_h_1 = np.array([[-0.3267, -0.0857,  0.0359,  0.4258, -0.0517],
        [-0.3176,  0.1830,  0.3462,  0.4365, -0.2537],
        [-0.4418,  0.2146,  0.3160,  0.1854,  0.0255],
        [ 0.2646, -0.2706, -0.2983, -0.4163,  0.0241],
        [ 0.2339, -0.0814, -0.0498,  0.1904, -0.0759],
        [-0.1623,  0.1069, -0.0383,  0.2229,  0.2808],
        [ 0.2908, -0.2432, -0.1614,  0.1363,  0.2924],
        [-0.0738, -0.0246,  0.1532, -0.1271,  0.2876],
        [-0.1851, -0.0708, -0.2438,  0.0712, -0.2138],
        [ 0.3970,  0.3470,  0.2226,  0.1775,  0.3772],
        [ 0.0505, -0.1404,  0.1426, -0.2838,  0.1350],
        [ 0.4238,  0.3953,  0.2783, -0.0293, -0.3007],
        [-0.3083, -0.4351, -0.3104, -0.3497,  0.1436],
        [-0.0940,  0.2734, -0.3675, -0.3928, -0.2005],
        [-0.0658,  0.1678, -0.1753, -0.3809, -0.4431],
        [ 0.1146,  0.0413, -0.1114, -0.3802, -0.3555],
        [ 0.0110, -0.3575, -0.0855,  0.4055,  0.0019],
        [ 0.3016,  0.0619,  0.4235,  0.3673, -0.4325],
        [-0.3618, -0.1637, -0.3951,  0.0520, -0.2395],
        [ 0.3593, -0.3131, -0.1856,  0.1042, -0.3982]])

h = np.array([[[ 0.2570,  1.1255,  0.1157, -2.8301,  1.4929],
         [ 0.7979, -1.6010, -0.6993,  1.9225,  0.2169]],

        [[ 1.0473, -0.5031, -0.8567,  0.8341,  0.2007],
         [ 0.4666,  0.2677, -0.2282, -1.3530, -0.0826]]])

c = np.array([[[-0.8367,  0.5181,  0.9831, -2.6134, -0.5054],
         [ 0.1692,  0.1029, -1.0233, -0.7694,  1.0741]],

        [[ 0.4925, -0.1189,  1.9970,  1.4383,  0.6084],
         [-0.4519,  0.9721, -0.3155,  0.9260,  0.8702]]])

h_another = np.array(h,copy = True)  # 这里如果不用深拷贝的话，后面的 h 就会被篡改


hiddle = inputs
hid_inp = torch.zeros(5,2,5)
w_x = [w_x_0,w_x_1]
w_h = [w_h_0,w_h_1]

h = h[0]
c_pre = torch.from_numpy(c[0])
print(hiddle[0].shape,w_x[0][5:10].shape)
print(h.shape,w_h[0][5:10].shape)
for t in range(5):
    f_t = torch.sigmoid(torch.from_numpy(np.dot(hiddle[t],w_x[0][5:10].T)+np.dot(h,w_h[0][5:10].T)))
    i_t = torch.sigmoid(torch.from_numpy(np.dot(hiddle[t],w_x[0][:5].T)+np.dot(h,w_h[0][:5].T)))
    o_t = torch.sigmoid(torch.from_numpy(np.dot(hiddle[t],w_x[0][15:].T)+np.dot(h,w_h[0][15:].T)))
    g_t = torch.tanh(torch.from_numpy(np.dot(hiddle[t],w_x[0][10:15].T)+np.dot(h,w_h[0][10:15].T)))
    c_cur = torch.mul(f_t,c_pre) + torch.mul(i_t,g_t)
    h_t = torch.mul(o_t,torch.tanh(c_cur))
    h = h_t.numpy()
    hid_inp[t] = h_t
    c_pre = c_cur
# print(h_t)
# print(hid_inp)

h = h_another[1]
c_pre = torch.from_numpy(c[1])
for t in range(5):
    f_t = torch.sigmoid(torch.from_numpy(np.dot(hid_inp[t],w_x[1][5:10].T)+np.dot(h,w_h[1][5:10].T)))
    i_t = torch.sigmoid(torch.from_numpy(np.dot(hid_inp[t],w_x[1][:5].T)+np.dot(h,w_h[1][:5].T)))
    o_t = torch.sigmoid(torch.from_numpy(np.dot(hid_inp[t],w_x[1][15:].T)+np.dot(h,w_h[1][15:].T)))
    g_t = torch.tanh(torch.from_numpy(np.dot(hid_inp[t],w_x[1][10:15].T)+np.dot(h,w_h[1][10:15].T)))
    c_cur = torch.mul(f_t,c_pre) + torch.mul(i_t,g_t)
    h_t = torch.mul(o_t,torch.tanh(c_cur))
    print(h_t)
    h = h_t.numpy()
    c_pre = c_cur

# print(h_t)
pytorch 中LSTM流程验证

推荐阅读更多精彩内容