题目链接:http://www.bio-info-trainee.com/3793.html
题目比较基础,我这里就不附加详细的注释了
> rm(list = ls())
> options(stringsAsFactors = F)
> a=read.table('SraRunTable.txt',sep = '\t',header = T)
> b=read.csv('sample.csv')
> colnames(a)
[1] "BioSample" "Experiment" "MBases" "MBytes" "Run"
[6] "SRA_Sample" "Sample_Name" "Assay_Type" "AssemblyName" "AvgSpotLen"
[11] "BioProject" "Center_Name" "Consent" "DATASTORE_filetype" "DATASTORE_provider"
[16] "InsertSize" "Instrument" "LibraryLayout" "LibrarySelection" "LibrarySource"
[21] "LoadDate" "Organism" "Platform" "ReleaseDate" "SRA_Study"
[26] "age" "cell_type" "marker_genes" "source_name" "strain"
[31] "tissue"
> colnames(b)
[1] "Accession" "Title" "Sample.Type" "Taxonomy" "Channels"
[6] "Platform" "Series" "Supplementary.Types" "Supplementary.Links" "SRA.Accession"
[11] "Contact" "Release.Date"
> d=merge(a,b,by.x = 'Sample_Name',by.y = 'Accession')
> e=d[,c("MBases","Title")]
> save(e,file = 'input.Rdata')
> rm(list = ls())
> options(stringsAsFactors = F)
> load(file = 'input.Rdata')
> e[,2]
[1] "SS2_15_0048_A1" "SS2_15_0048_A2" "SS2_15_0048_A3" "SS2_15_0048_A4" "SS2_15_0048_A5" "SS2_15_0048_A6"
[7] "SS2_15_0048_A7" "SS2_15_0048_A8" "SS2_15_0048_A9" "SS2_15_0048_A10" "SS2_15_0048_A11" "SS2_15_0048_A12"
[13] "SS2_15_0048_A13" "SS2_15_0048_A14" "SS2_15_0048_A15" "SS2_15_0048_A16" "SS2_15_0048_A17" "SS2_15_0048_A18"
···
> plate=unlist(lapply(e[,2],function(x){
+ # x=e[1,2]
+ x
+ strsplit(x,'_')[[1]][3]
+
+ }))
> table(plate)
plate
0048 0049
384 384
> boxplot(e[,1]~plate)
> t.test(e[,1]~plate)
Welch Two Sample t-test
data: e[, 1] by plate
t = 2.3019, df = 728.18, p-value = 0.02162
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.1574805 1.9831445
sample estimates:
mean in group 0048 mean in group 0049
13.08854 12.01823
> e$plate=plate
> library(ggplot2)
> colnames(e)
[1] "MBases" "Title" "plate"
> ggplot(e,aes(x=plate,y=MBases))+geom_boxplot()
> library(ggpubr)
载入需要的程辑包:magrittr
> p <- ggboxplot(e, x = "plate", y = "MBases",
+ color = "plate", palette = "jco",
+ add = "jitter")
>
>
> # Add p-value
> p + stat_compare_means(method = 't.test')
1.png
2.png
3.png