TensorFlow2 张量与变量2

1 简单函数与张量

可以先简单地将 TensorFlow 视为一个科学计算库（类似于 Python 下的 NumPy）。它提供了一些简单函数，与 NumPy 极为类似，比如：

# 定义一个随机数（标量）
random_float = tf.random.uniform(shape=())

# 定义一个有2个元素的零向量
zero_vector = tf.zeros(shape=(2))

tf.random.uniform、tf.zeros 即 DAG 上的节点，它们的输出均为张量（即 tf.Tensor 对象），比如：

zero_vector

输出：

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>

张量一般分为常量 tf.constant 与变量 tf.Variable。常量的值在计算图中不可以被重新赋值，变量可以在计算图中用 assign 等算子重新赋值。

1.1 常量

与 NumPy 极为相似：

import numpy as np
import tensorflow as tf

i = tf.constant(1)  # tf.int32 类型常量
l = tf.constant(1, dtype=tf.int64)  # tf.int64 类型常量
f = tf.constant(1.23)  # tf.float32 类型常量
d = tf.constant(3.14, dtype=tf.double)  # tf.double 类型常量
s = tf.constant("hello world")  # tf.string类型常量
b = tf.constant(True)  # tf.bool类型常量


print(tf.int64 == np.int64)
print(tf.bool == np.bool)
print(tf.double == np.float64)
print(tf.string == np.unicode)  # tf.string 类型和 np.unicode类型不等价

输出：

True
True
True
False

我们也可以查看张量的维度：

scalar = tf.constant(True)  # 标量，0维张量

print(tf.rank(scalar))
# tf.rank 的作用和 numpy 的 ndim 方法相同
print(scalar.numpy().ndim)

显示：

tf.Tensor(0, shape=(), dtype=int32)
0

可以用 tf.cast 改变张量的数据类型。
可以用 numpy 方法将 tensorflow 中的张量转化成 numpy 中的张量。
可以用 shape 方法查看张量的尺寸。

A = tf.constant([[1., 2.], [3., 4.]])
# 查看矩阵A的形状、类型和值
print(A.shape)      # 输出(2, 2)，即矩阵的长和宽均为2
print(A.dtype)      # 输出<dtype: 'float32'>
print(A.numpy())

显示：

(2, 2)
<dtype: 'float32'>
[[1. 2.]
 [3. 4.]]

还有：

h = tf.constant([123,456],dtype = tf.int32)
f = tf.cast(h,tf.float32)
print(h.dtype, f.dtype)

显示：

<dtype: 'int32'>  <dtype: 'float32'>

1.2 变量

模型中需要被训练的参数一般被设置成变量。

常量值不可以改变，常量的重新赋值相当于创造新的内存空间

c = tf.constant([1.0, 2.0])
print(c)
print(id(c))
c = c + tf.constant([1.0, 1.0])
print(c)
print(id(c))

显示：

tf.Tensor([1. 2.], shape=(2,), dtype=float32)
222632776
tf.Tensor([2. 3.], shape=(2,), dtype=float32)
222631192

变量的值可以改变，可以通过assign, assign_add等方法给变量重新赋值：

v = tf.Variable([1.0, 2.0], name="v")
print(v)
print(id(v))
v.assign_add([1.0, 1.0])
print(v)
print(id(v))

显示：

<tf.Variable 'v:0' shape=(2,) dtype=float32, numpy=array([1., 2.], dtype=float32)>
271494792
<tf.Variable 'v:0' shape=(2,) dtype=float32, numpy=array([2., 3.], dtype=float32)>
271494792

2 张量的创建

张量创建的许多方法和 numpy 中创建 array 的方法很像。下面使用 tf.print 函数打印输出。

import tensorflow as tf
import numpy as np
a = tf.constant([1,2,3],dtype = tf.float32)
tf.print(a)
b = tf.range(1,10,delta = 2)
tf.print(b)
c = tf.linspace(0.0,2*3.14,100)
tf.print(c)
d = tf.zeros([3,3])
tf.print(d)
a = tf.ones([3,3])
b = tf.zeros_like(a,dtype= tf.float32)
tf.print(a)
tf.print(b)
b = tf.fill([3,2],5)
tf.print(b)
#均匀分布随机
tf.random.set_seed(1.0)
a = tf.random.uniform([5],minval=0,maxval=10)
tf.print(a)
#正态分布随机
b = tf.random.normal([3,3],mean=0.0,stddev=1.0)
tf.print(b)
#正态分布随机，剔除2倍方差以外数据重新生成
c = tf.random.truncated_normal((5,5), mean=0.0, stddev=1.0, dtype=tf.float32)
tf.print(c)
# 特殊矩阵
I = tf.eye(3,3) #单位矩阵
tf.print(I)
tf.print(" ")
t = tf.linalg.diag([1,2,3]) #对角阵
tf.print(t)

显示：

[1 2 3]
[1 3 5 7 9]
[0 0.0634343475 0.126868695 ... 6.15313148 6.21656609 6.28]
[[0 0 0]
 [0 0 0]
 [0 0 0]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[0 0 0]
 [0 0 0]
 [0 0 0]]
[[5 5]
 [5 5]
 [5 5]]
[1.65130854 9.01481247 6.30974197 4.34546089 2.9193902]
[[0.403087884 -1.0880208 -0.0630953535]
 [1.33655667 0.711760104 -0.489286453]
 [-0.764221311 -1.03724861 -1.25193381]]
[[-0.457012236 -0.406867266 0.728577733 -0.892977774 -0.369404584]
 [0.323488563 1.19383323 0.888299048 1.25985599 -1.95951891]
 [-0.202244401 0.294496894 -0.468728036 1.29494202 1.48142183]
 [0.0810953453 1.63843894 0.556645 0.977199793 -1.17777884]
 [1.67368948 0.0647980496 -0.705142677 -0.281972528 0.126546144]]
[[1 0 0]
 [0 1 0]
 [0 0 1]]
 
[[1 0 0]
 [0 2 0]
 [0 0 3]]

3 索引切片

张量的索引切片方式和 numpy 几乎是一样的。切片时支持缺省参数和省略号。对于 tf.Variable,可以通过索引和切片对部分元素进行修改。对于提取张量的连续子区域，也可以使用 tf.slice。

tf.random.set_seed(3)
t = tf.random.uniform([5,5],minval=0,maxval=10,dtype=tf.int32)
tf.print(t)
#第0行
tf.print(t[0])
#倒数第一行
tf.print(t[-1])
#第1行第3列
tf.print(t[1,3])
tf.print(t[1][3])
#第1行至第3行
tf.print(t[1:4,:])
tf.print(tf.slice(t,[1,0],[3,5])) #tf.slice(input,begin_vector,size_vector)
#第1行至最后一行，第0列到最后一列每隔两列取一列
tf.print(t[1:4,:4:2])
#对变量来说，还可以使用索引和切片修改部分元素
x = tf.Variable([[1,2],[3,4]],dtype = tf.float32)
x[1,:].assign(tf.constant([0.0,0.0]))
tf.print(x)
a = tf.random.uniform([3,3,3],minval=0,maxval=10,dtype=tf.int32)
tf.print(a)
#省略号可以表示多个冒号
tf.print(a[...,1])

此外，对于不规则的切片提取,可以使用 tf.gather，tf.gather_nd，tf.boolean_mask。

# 考虑班级成绩册的例子，有4个班级，每个班级10个学生，每个学生7门科目成绩。可以用一个 4x10x7 的张量来表示。
scores = tf.random.uniform((4, 10, 7), minval=0, maxval=100, dtype=tf.int32)
tf.print(scores)
# 抽取每个班级第0个学生，第5个学生，第9个学生的全部成绩
p = tf.gather(scores, [0, 5, 9], axis=1)
tf.print(p)
#抽取每个班级第0个学生，第5个学生，第9个学生的第1门课程，第3门课程，第6门课程成绩
q = tf.gather(tf.gather(scores,[0,5,9],axis=1),[1,3,6],axis=2)
tf.print(q)
# 抽取第0个班级第0个学生，第2个班级的第4个学生，第3个班级的第6个学生的全部成绩
#indices的长度为采样样本的个数，每个元素为采样位置的坐标
s = tf.gather_nd(scores,indices = [(0,0),(2,4),(3,6)])
tf.print(s)

tf.boolean_mask 功能最为强大，它可以实现 tf.gather，tf.gather_nd 的功能，并且 tf.boolean_mask 还可以实现布尔索引。

# 抽取每个班级第0个学生，第5个学生，第9个学生的全部成绩
p = tf.boolean_mask(scores, [True, False, False, False, False,
                             True, False, False, False, True], axis=1)
tf.print(p)
# 抽取第0个班级第0个学生，第2个班级的第4个学生，第3个班级的第6个学生的全部成绩
s = tf.boolean_mask(scores,
                    [[True, False, False, False, False, False, False, False, False, False],
                     [False, False, False, False, False,
                         False, False, False, False, False],
                     [False, False, False, False, True,
                         False, False, False, False, False],
                     [False, False, False, False, False, False, True, False, False, False]])
tf.print(s)
# 利用tf.boolean_mask可以实现布尔索引

# 找到矩阵中小于0的元素
c = tf.constant([[-1, 1, -1], [2, 2, -2], [3, -3, 3]], dtype=tf.float32)
tf.print(c, "\n")

tf.print(tf.boolean_mask(c, c < 0), "\n")
tf.print(c[c < 0])  # 布尔索引，为boolean_mask的语法糖形式

以上这些方法仅能提取张量的部分元素值，但不能更改张量的部分元素值得到新的张量。如果要通过修改张量的某些元素得到新的张量，可以使用 tf.where，tf.scatter_nd。

tf.where 可以理解为 if 的张量版本，此外它还可以用于找到满足条件的所有元素的位置坐标。tf.scatter_nd 的作用和 tf.gather_nd 有些相反，tf.gather_nd 用于收集张量的给定位置的元素，而 tf.scatter_nd 可以将某些值插入到一个给定 shape 的全 0 的张量的指定位置处。

# 找到张量中小于0的元素,将其换成np.nan得到新的张量
# tf.where和np.where作用类似，可以理解为if的张量版本

c = tf.constant([[-1, 1, -1], [2, 2, -2], [3, -3, 3]], dtype=tf.float32)
d = tf.where(c < 0, tf.fill(c.shape, np.nan), c)

# 如果where只有一个参数，将返回所有满足条件的位置坐标
indices = tf.where(c < 0)

# 将张量的第[0,0]和[2,1]两个位置元素替换为0得到新的张量
d = c - tf.scatter_nd([[0, 0], [2, 1]], [c[0, 0], c[2, 1]], c.shape)

# scatter_nd的作用和gather_nd有些相反
# 可以将某些值插入到一个给定shape的全0的张量的指定位置处。
indices = tf.where(c < 0)
tf.scatter_nd(indices, tf.gather_nd(c, indices), c.shape)

4 维度变换与数据合成

维度变换相关函数主要有 tf.reshape, tf.squeeze, tf.expand_dims, tf.transpose。

tf.reshape 可以改变张量的形状。tf.reshape 可以改变张量的形状，但是其本质上不会改变张量元素的存储顺序，所以，该操作实际上非常迅速，并且是可逆的。
tf.squeeze 可以减少维度。
tf.expand_dims 可以增加维度。
tf.transpose 可以交换维度。

维度变换相关函数主要有 tf.reshape, tf.squeeze, tf.expand_dims, tf.transpose。

tf.reshape 可以改变张量的形状。tf.reshape 可以改变张量的形状，但是其本质上不会改变张量元素的存储顺序，所以，该操作实际上非常迅速，并且是可逆的。
tf.squeeze 可以减少维度。如果张量在某个维度上只有一个元素，利用 tf.squeeze 可以消除这个维度。如果张量在某个维度上只有一个元素，利用 tf.squeeze 可以消除这个维度。
tf.expand_dims 可以增加维度。
tf.transpose 可以交换维度。与 tf.reshape 不同，它会改变张量元素的存储顺序。常用于图片存储格式的变换上。

张量的各个元素在内存中是线性存储的，其一般规律是，同一层级中的相邻元素的物理地址也相邻。

和 numpy 类似，可以用 tf.concat 和 tf.stack 方法对多个张量进行合并，可以用 tf.split 方法把一个张量分割成多个张量。tf.concat 和 tf.stack 有略微的区别，tf.concat 是连接，不会增加维度，而 tf.stack 是堆叠，会增加维度。tf.split 是 tf.concat 的逆运算，可以指定分割份数平均分割，也可以通过指定每份的记录数量进行分割。

5 数学运算

5.1 标量运算

加减乘除乘方，以及三角函数，指数，对数等常见函数，逻辑比较运算符等都是标量运算符。标量运算符的特点是对张量实施逐元素运算。有些标量运算符对常用的数学运算符进行了重载。并且支持类似 numpy 的广播特性。许多标量运算符都在 tf.math 模块下。

import tensorflow as tf
import numpy as np
a = tf.constant([[1.0, 2], [-3, 4.0]])
b = tf.constant([[5.0, 6], [7.0, 8.0]])
a + b  # 运算符重载
a - b
a * b
a / b
a ** 2
a ** (3.5)
a % 3  # mod的运算符重载，等价于m = tf.math.mod(a,3)
a//3  # 地板除法
a >= 2
(a >= 2) & (a <= 3)
(a >= 2) | (a <= 3)
a == 5  # tf.equal(a,5)
tf.sqrt(a)

a = tf.constant([1.0, 8.0])
b = tf.constant([5.0, 6.0])
c = tf.constant([6.0, 7.0])
tf.add_n([a, b, c])  # 多个张量相加
tf.maximum(a, b)  # 最大值
tf.minimum(a, b)  # 最小值

a = tf.constant("hello")
b = tf.constant("tensorflow2")
c = tf.strings.join([a,b]," ")
tf.print(c)

5.2 向量运算

向量运算符只在一个特定轴上运算，将一个向量映射到一个标量或者另外一个向量。许多向量运算符都以 reduce 开头。

# 向量reduce
a = tf.range(1, 10)
tf.print(tf.reduce_sum(a))
tf.print(tf.reduce_mean(a))
tf.print(tf.reduce_max(a))
tf.print(tf.reduce_min(a))
tf.print(tf.reduce_prod(a))

# 张量指定维度进行reduce
b = tf.reshape(a, (3, 3))
tf.print(tf.reduce_sum(b, axis=1, keepdims=True))
tf.print(tf.reduce_sum(b, axis=0, keepdims=True))

# bool类型的reduce
p = tf.constant([True, False, False])
q = tf.constant([False, False, True])
tf.print(tf.reduce_all(p))
tf.print(tf.reduce_any(q))

# 利用tf.foldr实现tf.reduce_sum
s = tf.foldr(lambda a, b: a+b, tf.range(10))
tf.print(s)

# cum扫描累积
a = tf.range(1, 10)
tf.print(tf.math.cumsum(a))
tf.print(tf.math.cumprod(a))

# arg最大最小值索引
a = tf.range(1, 10)
tf.print(tf.argmax(a))
tf.print(tf.argmin(a))

# tf.math.top_k可以用于对张量排序
a = tf.constant([1, 3, 7, 5, 4, 8])

values, indices = tf.math.top_k(a, 3, sorted=True)
tf.print(values)
tf.print(indices)

利用 tf.math.top_k 可以在 TensorFlow 中实现 KNN 算法。

5.3 矩阵运算

矩阵必须是二维的。矩阵运算包括：矩阵乘法，矩阵转置，矩阵逆，矩阵求迹，矩阵范数，矩阵行列式，矩阵求特征值，矩阵分解等运算。除了一些常用的运算外，大部分和矩阵有关的运算都在 tf.linalg 子包中。

# 矩阵乘法
a = tf.constant([[1, 2], [3, 4]])
b = tf.constant([[2, 0], [0, 2]])
a@b  # 等价于tf.matmul(a,b)

# 矩阵转置
a = tf.constant([[1.0, 2], [3, 4]])
tf.transpose(a)

# 矩阵逆，必须为tf.float32或tf.double类型
a = tf.constant([[1.0, 2], [3.0, 4]], dtype=tf.float32)
tf.linalg.inv(a)

# 矩阵求trace
a = tf.constant([[1.0, 2], [3, 4]])
tf.linalg.trace(a)

# 矩阵求范数
a = tf.constant([[1.0, 2], [3, 4]])
tf.linalg.norm(a)

# 矩阵行列式
a = tf.constant([[1.0, 2], [3, 4]])
tf.linalg.det(a)

# 矩阵特征值
tf.linalg.eigvalsh(a)

# 矩阵qr分解
a = tf.constant([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32)
q, r = tf.linalg.qr(a)
tf.print(q)
tf.print(r)
tf.print(q@r)

# 矩阵svd分解
a = tf.constant([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32)
v, s, d = tf.linalg.svd(a)
tf.matmul(tf.matmul(s, tf.linalg.diag(v)), d)

利用 svd 分解可以在 TensorFlow 中实现主成分分析降维。