操作
np.rot90(df, 1) #矩阵,逆时针,90度
np.rot90(df, -1) #矩阵,顺时针,90度
矩阵/数组,使用循环速度特慢
#!/usr/bin python3
# -*- coding: UTF-8 -*-
import os,sys,re
import pandas as pd
df=pd.read_csv("final.head", index_col = 0, header = 0, sep = "\t")
length=pd.read_csv("final.length", index_col = 0, header = 0, sep = "\t")
for i in range(len(df.index)):
for j in range(len(df.columns)):
df.iloc[i,j]=df.iloc[i,j]/length.iloc[i,0]
df.to_csv('final.norm', sep='\t', index = True)
numpy不用循环实现矩阵除数组,类似R语言(按行除),
即每一行列数组中每一个个数,按列除
array[:,None]将数组竖着排实现按行除
或用np.rot(df,)旋转矩阵,不推荐,容易转晕
#!/usr/bin/env python3
import os,re,sys
import numpy as np
ms, df, length, outfile = sys.argv
df = np.loadtxt(df, dtype=np.int32, delimiter='\t')
length = np.loadtxt(length, dtype=np.int32)
out=df/length[:,None] # [:,None]将数组竖着排
np.savetxt(outfile,out,fmt='%.8f',delimiter='\t')
#np.savetxt(outfile,out,fmt='%.20f',delimiter='\t') # 小数点后保留20位
计算rpkm
#!/usr/bin/env python3
import os,re,sys
import numpy as np
ms, df, length, mapped_reads, outfile = sys.argv
# df[gene,sample]
df = np.loadtxt(df, delimiter='\t')
length = np.loadtxt(length)
mapped_reads = np.loadtxt(mapped_reads)
#np.dtype=int32 # 这是整形32位,不适用
out=1e3*1e6*df/(length[:,None]*mapped_reads)
#使用旋转矩阵法,过于复杂不推荐
#out=1e3*1e6*np.rot90((np.rot90(df,1)/length),-1)/mapped_reads
np.savetxt(outfile,out,fmt='%.8f',delimiter='\t')
行列求和
#!/usr/bin python
# -*- coding: UTF-8 -*-
import pandas as pd
#t=[[1,2,3],[4,5,6],[7,8,9]]
#df=pd.DataFrame(t) # List转为dataframe
df=pd.read_csv("test.df", index_col = 0, header = 0, sep = "\t")
total=df.apply(lambda x: x.sum())
total.to_csv('test.total', sep='\t', index = True)
#df['Row_sum'] = df.apply(lambda x: x.sum(),axis=1) # 按行求和,添加为新列
#df.loc['Col_sum'] = df.apply(lambda x: x.sum()) # 各列求和,添加新的行