更换数据集跑Seurat流程(smartseq2)
几个函数
1. DimPlot():
Graphs the output of a dimensional reduction technique on a 2D scatter plot where each point is a cell and it's positioned based on the cell embeddings determined by the reduction technique. By default, cells are colored by their identity class (can be changed with the group.by parameter). 画一个2D降维图,点代表细胞,可以根据其分组用不同的颜色表示。
Arguments:
DimPlot(object, dims = c(1, 2), cells = NULL, cols = NULL, pt.size = NULL, reduction = NULL, group.by = NULL, split.by = NULL, shape.by = NULL, order = NULL, label = FALSE, label.size = 4, repel = FALSE, cells.highlight = NULL, cols.highlight = "#DE2D26", sizes.highlight = 1, na.value = "grey50", ncol = NULL, combine = TRUE)
dims: Dimensions to plot, must be a two-length numeric vector specifying x- and y-dimensions. PCA图的维度,必须是两维向量
reduction:Which dimensionality reduction to use. If not specified, first searches for umap, then tsne, then pca.采用的降维方法,首选umap->tsne->pca
DimPlot(brc, dims = c(1, 3), reduction = "pca")
PC1及PC3
group.by:Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class.应用metadata中的某列数据作为分组标准,按不同颜色显示
DimPlot(brc, dims = c(1, 3), reduction = "pca", group.by = "plate")
按照批次分组绘图split.by: Name of a metadata column to split plot by,按照metadata中的某个属性(可以是批次信息,生物学状态,不同组织病理类型等等)将plot图分为几列
DimPlot(brc, dims = c(1, 3), reduction = "pca", split.by = "plate")
按照批次绘图,PC1及PC3
DimPlot(brc, dims = c(1, 3), reduction = "pca", split.by = "g")
按照分组绘图,PC1及PC3
2. DimHeatmap()
DimHeatmap(brc, dims = 1, cells = 500, balanced = TRUE)
object: brc
dims: 显示哪个维度的基因及细胞的热图(一般可根据下述步骤确定多少个维度后,将重要的维度都绘制出来)
cells: 绘图所用的细胞,500表示前500个细胞,其他参数一般不重要
代码运行
rm(list = ls())
packageVersion("Seurat")
library(Seurat)
library(dplyr)
library(patchwork)
library(ggsci)###改变一下配色
load(file='../input.Rdata')
#?CreateSeuratObject()
brc.data = a
brc <- CreateSeuratObject(counts = brc.data, project = "brc7", min.cells = 3, min.features = 200, meta.data = df)
brc
brc.data[c("Jak1", "Jak2", "Jak3"), 1:5]
brc[["percent.mt"]] <- PercentageFeatureSet(brc, pattern = "^MT-")
brc[["percent.ercc"]] <- PercentageFeatureSet(brc, pattern = "^ERCC-")
head(brc@meta.data)
#展示数据中每个细胞UMI,基因数,线粒体及ERCC比例
VlnPlot(brc, features = c("nFeature_RNA", "nCount_RNA", "percent.ercc","percent.mt"), ncol = 4)
#展示相关特征之间的关系
plot1 <- FeatureScatter(brc, feature1 = "nCount_RNA", feature2 = "percent.ercc")
plot2 <- FeatureScatter(brc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot3 <- FeatureScatter(brc, feature1 = "percent.ercc", feature2 = "nFeature_RNA")
plot1 + plot2 + plot3
#将unique基因count数超过7000,或者小于1000的细胞过滤掉,把ERCCcount数占50%以上的细胞过滤掉
brc <- subset(brc, subset = nFeature_RNA > 1000 & nFeature_RNA < 7000 & percent.ercc < 50)
#标准化:表达量*10000后取log,结果存储在pbmc[["RNA"]]@data
brc <- NormalizeData(brc, normalization.method = "LogNormalize", scale.factor = 10000)
#选取高变异基因,只选取前2000个
brc <- FindVariableFeatures(brc, selection.method = "vst", nfeatures = 2000)
#?FindVariableFeatures
top10 <- head(VariableFeatures(brc), 10)
plot1 <- VariableFeaturePlot(brc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2
#scale线性转换
all.genes <- rownames(brc)
brc <- ScaleData(brc, features = all.genes)
brc <- ScaleData(brc, vars.to.regress = "percent.ercc")
#降维 scale后PCA
brc <- RunPCA(brc, features = VariableFeatures(object = brc))
VizDimLoadings(brc, dims = 1:2, reduction = "pca")
DimPlot(brc, reduction = "pca")
#?DimPlot metadata中分组列名一定要加引号,参数可调整
DimPlot(brc, dims = c(1, 2), reduction = "pca", split.by = "g")
DimHeatmap(brc, dims = 1, cells = 500, balanced = TRUE)
?DimHeatmap
#合适维度的判定
brc <- JackStraw(brc, num.replicate = 100)
?JackStraw()
brc <- ScoreJackStraw(brc, dims = 1:20)
JackStrawPlot(brc, dims = 1:15)
brc <- FindNeighbors(brc, dims = 1:10)
brc <- FindClusters(brc, resolution = 0.5)
head(Idents(brc))
#降维可视化
brc <- RunUMAP(brc, dims = 1:10)
DimPlot(brc, reduction = "umap")
#寻找差异基因
cluster1.markers <- FindMarkers(brc, ident.1 = 1, min.pct = 0.25)
head(cluster1.markers, n = 5)
brc.markers <- FindAllMarkers(brc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
table(brc.markers$cluster == 2)
cluster1.markers <- FindMarkers(brc, ident.1 = 1, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE)
VlnPlot(brc, features = c("Fbln1", "Mfap5"))
top10 = brc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(brc, features = top10$gene) + NoLegend()