通过lncRNA获得结合的miRNA(3个miRNA预测数据库中都存在),再获得miRNA作用的靶基因mRNA,靶基因mRNA与任一肿瘤差异mRNA取交集,交集mRNA与其相关miRNA,lncRNA作互作图,并将交集mRNA作GO和KEGG富集分析。
通过lncRNA获得结合的miRNA
1.lncRNA与miRNA的区别
2.mircode数据库
3.这可能是最轻松易懂的lncRNA与miRNA介绍了
4.LncRNA与miRNA结合预测
进入主页点击download
解压修改文件名为mircode.txt
将差异分析后的lncRNA复制粘贴到新建diff_lncRNA.txt文件中
输入文件为:
perl代码:
use strict;
use warnings;
my %hash=();
open(RF,"diff_lncRNA.txt") or die $!;
while(my $line=<RF>){
chomp($line);
$hash{$line}=1;
}
close(RF);
my %rep=();
open(RF,"mircode.txt") or die $!;
open(WF,">lncRNA_mircode.txt") or die $!;
while(my $line=<RF>){
if($.==1){
print WF "lncRNA\tmiRNA\n";
next;
}
my @arr=split(/\t/,$line);
my @oneArr=split(/\./,$arr[1]);
if(exists $hash{$oneArr[0]}){
my @threeArr=split(/\//,$arr[3]);
foreach my $mir(@threeArr){
if($mir=~/^miR/){
$mir="hsa-$mir";
}
else{
$mir="hsa-miR-$mir";
}
my $out="$oneArr[0]\t$mir";
unless(exists $rep{$out}){
print WF "$out\n";
$rep{$out}=1;
}
}
}
}
close(WF);
close(RF);
输出文件为
获取在三个数据库共有的miRNA及其结合的mRNA
use strict;
use warnings;
my %miHash=();
open(RF,"lncRNA_mircode.txt") or die $!;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
$arr[1]=~s/^\s+|\s+$//g;
if($arr[1]=~/hsa/){
$miHash{$arr[1]}=1;
}
else{
$miHash{"hsa-$arr[1]"}=1;
}
}
close(RF);
my %hash=();
my @files=glob("*.tsv");
my @dbs=();
foreach my $file(@files){
my $db=$file;
$db=~s/\.tsv//g;
push(@dbs,$db);
open(RF,"$file") or die $!;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
$arr[0]=~s/mir/miR/g;
if(exists $miHash{$arr[0]}){
my $mirnaGene="$arr[0]\t$arr[1]";
${$hash{$mirnaGene}}{$db}=1;
}
}
close(RF);
}
open(WF,">target.txt") or die $!;
print WF "miRNA\tGene\t" . join("\t",@dbs) . "\tSum\n";
foreach my $key(keys %hash){
my $outLine=$key;
my $sum=0;
foreach my $db(@dbs){
if(exists ${$hash{$key}}{$db}){
$sum++;
$outLine=$outLine . "\t1";
}
else{
$outLine=$outLine . "\t0";
}
}
if($sum>=3){
print WF $outLine . "\t$sum\n";
}
}
close(WF);
输出文件为:
Cytoscape输入文件准备
将之前作差异分析后得到的mRNA和logFC复制粘贴到新建gene.txt文件中
输入文件为:
perl代码如下:
use strict;
use warnings;
my %geneHash=();
open (RF,"gene.txt") or die $!;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
$geneHash{$arr[0]}=$arr[1];
}
close(RF);
my %miHash=();
open (RF,"target.txt") or die $!;
while(my $line=<RF>){
next if($.==1);
chomp($line);
my @arr=split(/\t/,$line);
if(exists $geneHash{$arr[1]}){
$miHash{$arr[0]}=1;
}
}
close(RF);
my %miHash2=();
my %repHash=();
open(NET,">network.txt") or die $!;
print NET "Node1\tNode2\tLine\n";
open(TYPE,">type.txt") or die $!;
print TYPE "Name\tType\n";
open(MRNA,">mRNA.txt") or die $!;
open(LNC,">lncRNA.txt") or die $!;
open(MI,">miRNA.txt") or die $!;
open (SYMBOL,">symbol.txt") or die $!;
print SYMBOL "gene\tlogFC\n";
open (RF,"lncRNA_mircode.txt") or die $!;
while(my $line=<RF>){
next if($.==1);
chomp($line);
my @arr=split(/\t/,$line);
if(exists $miHash{$arr[1]}){
print NET "$arr[0]\t$arr[1]\tlncRNA\n";
unless($repHash{$arr[0]}){
print TYPE "$arr[0]\tlncRNA\n";
$repHash{$arr[0]}=1;
print LNC "$arr[0]\n";
}
$miHash2{$arr[1]}=1;
}
}
close(RF);
open (RF,"target.txt") or die $!;
while(my $line=<RF>){
next if($.==1);
chomp($line);
my @arr=split(/\t/,$line);
if( (exists $geneHash{$arr[1]}) && (exists $miHash2{$arr[0]}) ){
print NET "$arr[0]\t$arr[1]\tmRNA\n";
unless($repHash{$arr[0]}){
print TYPE "$arr[0]\tmiRNA\n";
$repHash{$arr[0]}=1;
print MI "$arr[0]\n";
}
unless($repHash{$arr[1]}){
print TYPE "$arr[1]\tmRNA\n";
$repHash{$arr[1]}=1;
print MRNA "$arr[1]\n";
print SYMBOL "$arr[1]\t$geneHash{$arr[1]}\n";
}
}
}
close(SYMBOL);
close(RF);
close(MRNA);
close(LNC);
close(MI);
close(NET);
close(TYPE);
ceRNA网络构建
下载Java jre版本,下载Cytoscape软件,两软件下载在同一文件夹下
导入network.txt
导入type.txt
根据喜好选择颜色,形状等
鼠标在空白处点一下,进行重复操作
导出图片
基因名字转换为基因id
setwd("E:\\research")
library("org.Hs.eg.db")
rt=read.table("symbol.txt",sep="\t",check.names=F,header=T)
genes=as.vector(rt[,1])
entrezIDs <- mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA)
entrezIDs <- as.character(entrezIDs)
out=cbind(rt,entrezID=entrezIDs)
write.table(out,file="id.txt",sep="\t",quote=F,row.names=F)
GO富集分析
setwd("E:\\research")
library("clusterProfiler")
library("org.Hs.eg.db")
rt=read.table("id.txt",sep="\t",header=T,check.names=F)
rt=rt[is.na(rt[,"entrezID"])==F,]
geneFC=rt$logFC
gene=rt$entrezID
names(geneFC)=gene
#GO富集分析
kk <- enrichGO(gene = gene,OrgDb = org.Hs.eg.db, pvalueCutoff =0.05, qvalueCutoff = 0.05) #若筛选出的结果太少,可将qvalueCutoff = 0.05调为1
write.table(kk,file="GO.txt",sep="\t",quote=F,row.names = F)
#柱状图
tiff(file="barplot.tiff",width = 35,height = 20,units ="cm",compression="lzw",bg="white",res=600 #调节分辨率)
barplot(kk, drop = TRUE, showCategory = 100 #显示前100个通路)
dev.off()
#点图
tiff(file="dotplot.tiff",width = 35,height = 20,units ="cm",compression="lzw",bg="white",res=600)
dotplot(kk,showCategory = 100)
dev.off()
将id转换回基因名,使可读性更好,输入文件GO.txt
perl代码
use strict;
use warnings;
my %hash=();
open(RF,"id.txt") or die $!;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
$hash{$arr[$#arr]}="$arr[0]";
}
close(RF);
open(KEGG,"GO.txt") or die $!;
open(WF,">GO.xls") or die $!;
while(my $line=<KEGG>){
if($.==1){
print WF $line;
next;
}
chomp($line);
my @arr=split(/\t/,$line);
my @idArr=split(/\//,$arr[$#arr-1]);
my @symbols=();
foreach my $id(@idArr){
if(exists $hash{$id}){
push(@symbols,$hash{$id});
}
}
$arr[$#arr-1]=join("/",@symbols);
print WF join("\t",@arr) . "\n";
}
close(WF);
close(KEGG);
KEGG富集分析
setwd("C:\\Users")
library("clusterProfiler")
rt=read.table("id.txt",sep="\t",header=T,check.names=F)
rt=rt[is.na(rt[,"entrezID"])==F,]
geneFC=rt$logFC
gene=rt$entrezID
names(geneFC)=gene
#kegg富集分析,定义物种类型人就是hsa
kk <- enrichKEGG(gene = gene, organism = "hsa" , pvalueCutoff =0.05, qvalueCutoff =0.05)
write.table(kk,file="KEGG.txt",sep="\t",quote=F,row.names = F)
#柱状图
tiff(file="barplot.tiff",width = 30,height = 20,units ="cm",compression="lzw",bg="white",res=600)
barplot(kk, drop = TRUE, showCategory = 100)
dev.off()
#点图
tiff(file="dotplot.tiff",width = 30,height = 20,units ="cm",compression="lzw",bg="white",res=600)
dotplot(kk, showCategory = 100)
dev.off()
#通路图
library("pathview")
keggxls=read.table("KEGG.txt",sep="\t",header=T)
#gene.data = geneFC基因颜色定义
for(i in keggxls$ID){
pv.out <- pathview(gene.data = geneFC, pathway.id = i, species = "hsa", out.suffix = "pathview")
}
将id转换回基因名,使可读性更好,输入文件KEGG.txt
perl代码
use strict;
use warnings;
my %hash=();
open(RF,"id.txt") or die $!;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
$hash{$arr[$#arr]}="$arr[0]";
}
close(RF);
open(KEGG,"KEGG.txt") or die $!;
open(WF,">KEGG.xls") or die $!;
while(my $line=<KEGG>){
if($.==1){
print WF $line;
next;
}
chomp($line);
my @arr=split(/\t/,$line);
my @idArr=split(/\//,$arr[$#arr-1]);
my @symbols=();
foreach my $id(@idArr){
if(exists $hash{$id}){
push(@symbols,$hash{$id});
}
}
$arr[$#arr-1]=join("/",@symbols);
print WF join("\t",@arr) . "\n";
}
close(WF);
close(KEGG);