当一个探针对应多个基因名时,取第一个
rownames(RNA) <- gsub("(.*?)\\|.*", "\\1", RNA[,1])
当多个探针对应一个基因名时,取平均值
rpkm <- aggregate(rpkm[,2:8], by=list[rpkm[,1]], by=mean)
按照某个字段的某些字符串进行分组
group_list <- factor(unlist(lapply(x,function(x) strsplit(as.character(x),"_")[[1]][2])))
取在所有样本中count为0不超过20%的基因
RNAseq = methy[apply(methy,1,function(x) sum(x==0))<ncol(methy)*0.2,]
批量算pearson相关系数和P值
library(Hmisc)
qw<-rcorr(as.matrix(t(combine)), type="pearson")
write.csv(qw$r,file = "correlation_pearson.csv",quote = F)
write.csv(qw$P,file = "correlation_Pvalue.csv",quote = F)
粘贴list的每一个元素(比如hsa-miR-105)
unlist(lapply(lapply(x,function(x) strsplit(as.character(x),"-")[[1]][1:3]),paste,collapse = "-"))
永久设置清华镜像和中科大镜像,在~/.Rprofile添加:
## 清华镜像
options(repos=structure(c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/")))
## 中科大镜像
options(repos=structure(c(CRAN="https://mirrors.ustc.edu.cn/bioc/")))
If you want to put y in some other order, say order of decreasing x, do this:
df$Species <- factor(df$Species, levels=df[order(df$x,decreasing=T),]$Species)
ggplot(df)+geom_point(aes(x=x,y=Species),size=3,color="red")
read.table line 1 did not have 12 elements 试试这几个参数
fill = T, quote = "", blank.lines.skip = F
fileEncoding = "GBK"
metadata
library(GEOquery)
eSet <- getGEO('GSE49536',destdir='./',getGPL=F) #下载数据,构建基因表达矩阵
eset <- exprs(eSet[[1]]) #提取基因表达矩阵
metadata <- pData(eSet[[1]])