library(ggplot2)
library(reshape2)
library(scatterplot3d)
library(openxlsx)
library(corrplot)
library(ggrepel)
读入数据:
exp_data_chongyi <- read.table(paste(path_in,file="exp_matrix_chongyi.txt",sep="/"),sep='\t',header=T,row.names=1)
exp_data_shanghai <- read.table(paste(path_in,file="exp_matrix_shanghai.txt",sep="/"),sep='\t',header=T,row.names=1)
exp_matrix_chongyi <- exp_data_chongyi[,c(2:19)]
exp_matrix_shanghai <- exp_data_shanghai[,c(2:19)]
sample_id <- colnames(exp_matrix_chongyi)
sample_pheno <- data.frame(sample_id = sample_id,sample = rep(c("A1","A2","A3","A4","A5","A6"),each=3),group = rep(c("control","OE"),each=9))
表达数据的质量检测
#par(cex = 0.7)
n_sample <- ncol(exp_data)
cols <- rainbow(n_sample*1.2)
png(file=paste(out_path,"plot_QC.png",sep='/'),width = 1000, height = 1000)
boxplot(exp_data, col = cols, main="expression value",las=2)
dev.off()
绘制表达量的密度分布图
PlotExpDensity <- function(exp.data,path.out){
exp.data <- log(exp.data,10)
form <- melt(exp.data)
plot <- ggplot(form,aes(value,color=variable)) + xlab("Expression level") + geom_density(alpha = 0.6) +geom_rug() + theme_bw()
ggsave("density.pdf",plot,path=path.out)
}
PlotExpDensity(exp.data=exp_matrix_chongyi,path.out=path_out_chongyi)
PlotExpDensity(exp.data=exp_matrix_shanghai,path.out=path_out_shanghai)
PCA分析
PlotPCA <- function(exp.data,sample.pheno,path.out) {
exp.data <- scale(exp.data, center=T, scale=T)
#PCA分析
pca <- prcomp(t(exp.data),scale=F)
sample.pheno <- data.frame(sample.pheno)
pca_result <- as.data.frame(pca$x)[,c(1,2)]
pca_result <- cbind(pca_result,sample.pheno)
pca_result$sample_id <- as.factor(pca_result$sample_id)
print(head(pca_result))
#利用ggplot2画图
plot <- ggplot(pca_result)+
geom_point(aes(x=pca_result[,1],y=pca_result[,2],color=sample.pheno$sample_id,shape=sample.pheno$group),size=5)+
theme(legend.title =element_blank())+
labs(x="PCA1",y="PCA2")
ggsave("plot_pca.pdf",plot,path=path.out)
}
PlotPCA(exp.data=countData,sample.pheno=sample_pheno,path.out=path_out)
三维PCA
pdf(file=paste(path_out,"PCA_3D.pdf",sep='/'),width = 15, height = 15)
s3d <- scatterplot3d(pca_result[,1:3],
pch = 16, # 点形状
color=c(rep("#66C2A5FF",7),rep("#FC8D62FF",7)), # 点颜色
cex.symbols = 4 # 点大小
)
# 设置图例
legend("topright",title="Sample",
legend = unique(sample_pheno$sample_id),
col = c(rep("#66C2A5FF",7),rep("#FC8D62FF",7)),
pch = 16,
inset = -0.1,
xpd = TRUE,
horiz = TRUE)
# 设置文字标注
text(s3d$xyz.convert(pca_result[,1:3] + 2),
labels = sample_pheno$sample_id,
cex = 1.8,col = "black")
dev.off()
表达相关性分析
PlorCor <- function(exp.data,path.out) {
corr <- cor(exp.data, method = 'spearman')#输入为RPKM
pdf(paste(path.out,"plot_sample_cor.pdf",sep="/"),width=13,height=10)
corrplot(corr, type = 'upper', tl.col = 'black', order = 'hclust', tl.srt = 45, addrect=2,tl.pos="tp",col.lim=c(0.7,1),is.corr = FALSE) #addrect=2人为分成两组
corrplot(corr,add=TRUE, type="lower", method="number",order="hclust", col="black",diag=FALSE,tl.pos="n", cl.pos="n")
dev.off()
}
PlorCor(exp.data=exp_matrix_chongyi,path.out=path_out_chongyi)