tf.constant
创建一个常数张量。
tf.constant(
value,
dtype=None,
shape=None,
name='Const',
verify_shape=False
)
# Constant 1-D Tensor populated with value list.
tensor = tf.constant([1, 2, 3, 4, 5, 6, 7]) #=> [1 2 3 4 5 6 7]
# Constant 2-D tensor populated with scalar value -1.
tensor = tf.constant(-1.0, shape=[2, 3]) #=> [[-1. -1. -1.]
#[-1. -1. -1.]]
tf.Variable 和 tf.get_variable
- tf.Variable 用来创建一个变量。
- tf.get_variable 用来创建或者获取变量。
- tf.Variable
# Create a variable.
w = tf.Variable(<initial-value>, name=<optional-name>)
- tf.get_variable
tf.get_variable(
name,
shape=None,
dtype=None,
initializer=None,
regularizer=None,
trainable=None,
collections=None,
caching_device=None,
partitioner=None,
validate_shape=True,
use_resource=None,
custom_getter=None,
constraint=None,
synchronization=tf.VariableSynchronization.AUTO,
aggregation=tf.VariableAggregation.NONE
)
def foo():
with tf.variable_scope("foo", reuse=tf.AUTO_REUSE):
v = tf.get_variable("v", [1])
return v
v1 = foo() # Creates v.
v2 = foo() # Gets the same, existing v.
assert v1 == v2
- 创建变量
#下面两个定义是等价的
v = tf.Variable(tf.constant(1.0, shape=[1]), name="v")
v = tf.get_variable("v", shape=[1], initializer=tf.constant_initializer(1.0))
对于tf.Variable,变量名称是一个可选的参数,通过 name="v" 的形式给出。
对于tf.get_variable,变量名称是一个必填的参数。
- tf.get_variable 获取变量
通过 tf.get_variable 获取一个已经创建的变量,需要通过 tf.variable_scope 函数来生成一个上下文管理器,并通过 reuse=True 参数来控制。
如果 reuse=False或者None, 那么 tf.get_variable 只能用于创建一个不存在的变量。
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1], initializer=tf.constant_initializer(1.0))
with tf.variable_scope("foo, reuse=True"):
v1 = tf.get_variable("v", [1])
print(v == v1) #True
tf.random.normal
输出符合正太分布的随机值。
tf.random.normal(
shape,
mean=0.0, #平均值
stddev=1.0, #标准差
dtype=tf.float32,
seed=None,
name=None
)
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
tf.truncated_normal_initializer
Initializer that generates a truncated normal distribution.
Returns:
A JSON-serializable Python dict.
__init__(
mean=0.0,
stddev=1.0,
seed=None,
dtype=tf.float32
)
tf.placeholder
placeholder() 函数是在神经网络构建 graph 的时候在模型中的 占位,此时并没有把要输入的数据传入模型,它只会分配必要的内存。等建立session后的会话中,运行模型的时候会通过 feed_dict() 函数向占位符填充数据。
tf.placeholder(
dtype,
shape=None,
name=None
)
x = tf.placeholder(tf.float32, shape=(1024, 1024))
y = tf.matmul(x, x)
with tf.Session() as sess:
rand_array = np.random.rand(1024, 1024)
print(sess.run(y, feed_dict={x: rand_array}))
tf.sigmoid
- Aliases:
tf.math.sigmoid
tf.nn.sigmoid
tf.sigmoid
y = 1 / (1 + exp(-x))
tf.math.sigmoid(
x,
name=None
)
tf.nn.sigmoid_cross_entropy_with_logits
计算sigmoid的交叉熵。
tf.nn.sigmoid_cross_entropy_with_logits(
_sentinel=None,
labels=None,
logits=None,
name=None
)
#For brevity, let x = logits, z = labels. The logistic loss is
z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
= z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
= z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
= z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
= (1 - z) * x + log(1 + exp(-x))
= x - x * z + log(1 + exp(-x))
#For x < 0, to avoid overflow in exp(-x), we reformulate the above
x - x * z + log(1 + exp(-x))
= log(exp(x)) - x * z + log(1 + exp(-x))
= - x * z + log(1 + exp(x))
#Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation
max(x, 0) - x * z + log(1 + exp(-abs(x)))
- 每个节点的交叉熵公式
tf.train.AdamOptimizer
构建一个Adam优化器。
__init__(
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08,
use_locking=False,
name='Adam'
)
- minimize
This method simply combines calls compute_gradients() and apply_gradients(). If you want to process the gradient before applying them call compute_gradients() and apply_gradients() explicitly instead of using this function.
minimize(
loss,
global_step=None,
var_list=None,
gate_gradients=GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None
)
tf.add_to_collection
- tf.add_to_collection("list_name", element):将元素element添加到列表list_name中
- tf.get_collection("list_name"):返回名称为list_name的列表
- tf.add_n(list):将列表元素相加并返回
import tensorflow as tf
tf.add_to_collection('losses', tf.constant(2.2))
tf.add_to_collection('losses', tf.constant(3.))
with tf.Session() as sess:
print(sess.run(tf.get_collection('losses'))) #[2.2, 3.0]
print(sess.run(tf.add_n(tf.get_collection('losses')) #5.2
trainable=False
设定trainable=False 可以防止该变量被数据流图的 GraphKeys.TRAINABLE_VARIABLES 收集,
这样我们就不会在训练的时候尝试更新它的值。
import tensorflow as tf
train = tf.Variable(0, name="v1")
no_train = tf.Variable(1, name="v2", trainable=False)
print(tf.trainable_variables()) #[<tf.Variable 'v1:0' shape=() dtype=int32_ref>]
tf.ConfigProto
config = tf.ConfigProto(allow_soft_placement=True,
log_device_placement=True)
sess = tf.Session(config=config)