处理大数据发现自己不会用多进程,先简单记一下
import multiprocessing as mp
import numpy as np
### 创建数据
data = np.random.randint(0,10,(200000, 5 ))
###
def count_num(arr, minnum = 5, maxnum = 10):
row, col = [*arr.shape]
counts = 0
for i in range(row):
for j in range(col) :
if minnum <= arr[i,j] <= maxnum:
counts += 1
else:
continue
print(counts)
##单进程
p1 = mp.Process(target = count_num , args = (data,))
p1.start()
p1.close()
p1.join()
##多进程
cpu_num = mp.cpu_count()
chunks = np.array_split(data, cpu_num)
p2 = mp.Pool(cpu_num)
p2.map(count_num, chunks)
p2.close()
p2.join()