tf.keras.layers.embedding
tf.keras.layers.Embedding(
input_dim, # 词汇表维度(总共有多少个不相同的词)
output_dim, # 输出Embedding的维度
embeddings_initializer='uniform',
embeddings_regularizer=None,
activity_regularizer=None,
embeddings_constraint=None,
mask_zero=False,
input_length=None # 输入语句的长度
)
from tensorflow import tf
import numpy as np
data = np.array([[0,1,2],[3,4,5]])
emb = tf.keras.layers.Embedding(input_dim=6, output_dim=8, input_length=3)
emb_data = emb(data)
$ data
array([[0, 1, 2],
[3, 4, 5]])
$ data.shape # (batch_size, input_length)
(2, 3)
$ emb_data # 相当于将data中的每个值映射成列数等于output_dim的向量
tf.Tensor(
[[[ 0.04417955 0.03951569 -0.04671072 -0.01689724 -0.0274343
0.01672404 -0.02383183 0.00011552]
[-0.04286272 -0.02671162 0.02381554 -0.0092687 0.00082551
-0.02382222 0.01143166 0.02674634]
[ 0.01257071 -0.00645797 -0.00185542 -0.02564175 0.01768965
-0.02935628 -0.01977453 0.02767775]]
[[-0.04021711 -0.01675881 -0.02166536 0.01848916 -0.02698034
0.01760058 -0.04472467 -0.02368132]
[ 0.00224058 0.0078318 -0.00974486 -0.00347499 -0.0094027
0.01286327 -0.03330473 -0.04883292]
[-0.03280728 -0.03111702 -0.01845707 0.02144312 -0.00158714
0.02110559 -0.01083742 0.04108325]]], shape=(2, 3, 8), dtype=float32)
$ emb_data.shape # (batch_size, input_length, output_dim)
TensorShape([2, 3, 8])
tf.keras.layers.Input
tf.keras.layers.Input(
shape=None, # shape=(32,) 预期的输入将是一批32维的向量
batch_shape=None, # batch_shape=(10,32) 表示预期的输入将是10个32维向量的批次
name=None,
dtype=K.floatx(), # 预期的输入数据类型
sparse=False,
tensor=None
)
input = tf.keras.layers.Input(shape=(32,)) # 网络的输入层
$ input
<KerasTensor: shape=(None, 32) dtype=float32 (created by layer 'input_3')>
tf.keras.layers.Dense
tf.keras.layers.Dense(
units, # 输出向量维度
activation=None, # 激活函数 None就是线性激活a(x)=x
use_bias=True, # Boolean, 是否使用偏置向量
kernel_initializer='glorot_uniform', # 权重矩阵的初始化器
bias_initializer='zeros', # 偏置向量的初始化器
kernel_regularizer=None, # 权重矩阵的正则化函数
bias_regularizer=None, # 偏置向量的正则化函数
activity_regularizer=None,
kernel_constraint=None, # 权重矩阵的约束函数
bias_constraint=None, # 偏置向量的约束函数
**kwargs
)
输入: (batch_size, input_dim)
执行: output = activation(dot(input, kernel) + bias)
kernel: 是由网络层创建的权值矩阵
输出: (batch_size, units)
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(128,)))
model.add(tf.keras.layers.Dense(16, activation='relu'))
$ model.input_shape
(None, 128)
$ model.output_shape
(None, 16)
tf.keras.layers.Activation
tf.keras.layers.Activation(
activation, **kwargs
)
layer = tf.keras.layers.Activation('relu') # tf.nn.relu
output = layer([-3.0, -1.0, 0.0, 2.0])
$ list(output.numpy())
[0.0, 0.0, 0.0, 2.0]
layer = tf.keras.layers.Activation(tf.nn.sigmoid)
output = layer([-3.0, -1.0, 0.0, 2.0])
$ list(output.numpy())
[0.047425866, 0.26894143, 0.5, 0.8807971]
tf.keras.layers.BatchNormalization
tf.keras.layers.BatchNormalization(
axis=-1, # 按照input的哪一个维度进行BN
momentum=0.99, # 计算均值与方差的滑动平均时使用的参数
epsilon=0.001,
center=True, # beta 是否进行平移
scale=True, # gamma 是否进行缩放
beta_initializer='zeros', # beta weight 初始化
gamma_initializer='ones', # gamma weight 初始化
moving_mean_initializer='zeros',
moving_variance_initializer='ones', # 可见初始的均值与方差是标准正态分布的均值与方差
beta_regularizer=None, # beta weight 正则 一般不用
gamma_regularizer=None, # gamma weight 正则 一般不用
beta_constraint=None, # beta weight 约束 一般不用
gamma_constraint=None, # gamma weight 约束 一般不用
**kwargs
)
# call arguments
inputs:
training: boolean indicating whether the layer should behave in training mode or in inference mode.
training=True: The layer will normalize its inputs using the mean and variance of the current batch of inputs.
training=False: The layer will normalize its inputs using the mean and variance of its moving statistics, learned during training.
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(32,)))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
tf.keras.layers.Dropout
tf.keras.layers.Dropout(
rate, # 丢弃比例
noise_shape=None,
seed=None,
**kwargs
)
tf.random.set_seed(0)
layer = tf.keras.layers.Dropout(.2, input_shape=(2,))
data = np.arange(10).reshape(5, 2).astype(np.float32)
$ data
array([[0., 1.],
[2., 3.],
[4., 5.],
[6., 7.],
[8., 9.]], dtype=float32)
outputs = layer(data, training=True)
$ outputs
<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[ 0. , 1.25],
[ 2.5 , 3.75],
[ 5. , 6.25],
[ 7.5 , 8.75],
[10. , 0. ]], dtype=float32)>
tf.keras.layers.add
tf.keras.layers.Add(
**kwargs
)
input_shape = (2, 3, 4)
x1 = tf.random.normal(input_shape)
x2 = tf.random.normal(input_shape)
y = tf.keras.layers.Add()([x1, x2])
$ x1, x2
(<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[ 0.8328295 , 0.76248366, -0.30527893, -0.753745 ],
[ 0.2836065 , 1.0222927 , 0.34153125, -1.3051203 ],
[ 0.49992284, 0.2483008 , -0.4567157 , 0.9952715 ]],
[[-1.9321301 , 0.6314686 , -0.9774864 , 1.425028 ],
[ 0.6635869 , -2.0512059 , -1.098715 , -0.76539195],
[-1.9133846 , -0.75865966, 0.9069262 , 2.8084674 ]]],
dtype=float32)>,
<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[ 1.3492554 , 0.47617865, 0.9979069 , 2.502786 ],
[-0.74634784, -0.79241073, -0.08082506, -0.548672 ],
[ 0.5416235 , 0.4636011 , 0.17938277, -0.8119523 ]],
[[-2.0378642 , -0.29241782, 0.666593 , 0.3068891 ],
[-2.027362 , 0.8497227 , -0.19519807, -0.8135654 ],
[ 0.61839324, 1.8079637 , 0.03657307, -1.0492609 ]]],
dtype=float32)>)
$ y
<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[ 2.182085 , 1.2386622 , 0.692628 , 1.7490408 ],
[-0.46274135, 0.229882 , 0.2607062 , -1.8537924 ],
[ 1.0415463 , 0.7119019 , -0.27733293, 0.18331921]],
[[-3.9699943 , 0.33905077, -0.31089336, 1.731917 ],
[-1.3637753 , -1.2014832 , -1.293913 , -1.5789573 ],
[-1.2949913 , 1.049304 , 0.94349927, 1.7592065 ]]],
dtype=float32)>
tf.keras.layers.Reshape
tf.keras.layers.Reshape(
target_shape,
**kwargs
)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Reshape((3, 4), input_shape=(12,)))
$ model.output_shape
(None, 3, 4) # None是batch_size大小
tf.keras.models.Model
tf.keras.Model(
*args,
**kwargs
)
inputs: 模型输入(一个或多个),keras.Input对象
outputs: 模型输出
name: 模型名称
# 1. 使用“功能性API”从input处开始
inputs = tf.keras.Input(shape=(3,))
layer1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) # weight.shape = (3, 4)
output1_tensor = layer1(inputs)
layer2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) # weight.shape = (4, 5)
output2_tensor = layer2(output1_tensor)
model = tf.keras.Model(inputs=inputs, outputs=output2_tensor)
$ layer1.get_weights(), layer2.get_weights()
([array([[ 0.1579181 , -0.91244614, 0.29514194, -0.61988306],
[ 0.6677849 , -0.05998987, -0.16426378, 0.6446078 ],
[ 0.39074814, -0.16210169, -0.68291634, 0.7450665 ]],
dtype=float32),
array([0., 0., 0., 0.], dtype=float32)],
[array([[ 0.08803588, 0.34079218, 0.19313765, -0.04077142, -0.23308784],
[ 0.2253282 , -0.2840523 , 0.15559089, 0.41640854, 0.13909006],
[ 0.0896551 , -0.46805245, 0.7265818 , 0.45355165, -0.701867 ],
[-0.63146317, 0.3386103 , 0.5843165 , -0.43221 , 0.41194856]],
dtype=float32),
array([0., 0., 0., 0., 0.], dtype=float32)])
# 2. 通过继承Model类,在这种情况下,应在__init__中定义图层,并在call中实现模型的前向传递
class MyModel(tf.keras.Model):
def __init__(self): # 定义图层
super(MyModel, self).__init__()
self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)
def call(self, inputs): # 模型的前向传递
x = self.dense1(inputs)
return self.dense2(x)
model = MyModel()
# 如果将Model子类化,则可以在call中加一个training参数,用于在训练和推理中指定其他行为
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)
self.dropout = tf.keras.layers.Dropout(0.5)
def call(self, inputs, training=False):
x = self.dense1(inputs)
if training:
x = self.dropout(x, training=training)
return self.dense2(x)
model = MyModel()
inputs = tf.keras.layers.Input(shape=(3,))
outputs = tf.keras.layers.Dense(1)(inputs)
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) # 配置损失和指标
x = np.random.random((4, 3))
y = np.random.randint(0, 2, 4)
model.fit(x, y) # 训练模型
$ model.metrics_names
['loss', 'mae']
$ x, y, output
(array([[0.50672765, 0.63972061, 0.69257209],
[0.04628472, 0.14436413, 0.35030561],
[0.60128111, 0.32340873, 0.48343087],
[0.20454429, 0.71204994, 0.04399014]]),
array([0, 1, 0, 1]),
<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.04742587, 0.26894143, 0.5 , 0.8807971 ], dtype=float32)>)
$ model.predict(x) # 模型预测
array([[ 0.4953674 ],
[ 0.18611424],
[ 0.5847541 ],
[-0.17252052]], dtype=float32)
tf.keras.layers.Concatenate
tf.keras.layers.concatenate(
inputs,
axis=-1,
**kwargs
)
x = np.arange(20).reshape(2, 2, 5)
y = np.arange(20, 30).reshape(2, 1, 5)
$ tf.keras.layers.concatenate([x, y], axis=1)
<tf.Tensor: shape=(2, 3, 5), dtype=int64, numpy=
array([[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[20, 21, 22, 23, 24]],
[[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[25, 26, 27, 28, 29]]])>
tf.keras.layers.Flatten
tf.keras.layers.Flatten(
data_format=None,
**kwargs
)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(64, 3, 3, input_shape=(3, 32, 32)))
$ model.output_shape
(None, 1, 10, 64)
model.add(tf.keras.layers.Flatten())
$ model.output_shape
(None, 640)