加载R包:
readfile2seurat <- function(file_path, samplename) {
mtx <- fread(file_path)
rownames(mtx) <- mtx$V1
mtx$V1 <- NULL
obj <- CreateSeuratObject(counts = mtx, min.cell = 0)
obj$orig.ident <- samplename
return(obj)
}
gzFiles = list.files("GSE129516_RAW_mouse/")
gzFiles
[1] "GSM3714747_chow1_filtered_gene_bc_matrices.csv.gz"
[2] "GSM3714748_chow2_filtered_gene_bc_matrices.csv.gz"
[3] "GSM3714749_chow3_filtered_gene_bc_matrices.csv.gz"
[4] "GSM3714750_nash1_filtered_gene_bc_matrices.csv.gz"
[5] "GSM3714751_nash2_filtered_gene_bc_matrices.csv.gz"
[6] "GSM3714752_nash3_filtered_gene_bc_matrices.csv.gz"
for(i in 1:6){
input = paste0('GSE129516_RAW_mouse/', gzFiles[i])
samplename = strsplit(gzFiles[i], '_')[[1]][2]
cat('Loading sample', samplename, '\n')
assign(samplename, readfile2seurat(input, samplename))
}
Loading sample chow1
Loading sample chow2
Loading sample chow3
Loading sample nash1
Loading sample nash2
Loading sample nash3
mouse_all <- merge(x = chow1, y = c(chow2, chow3, nash1, nash2, nash3), project = 'GSE129516_mouse')
mouse_all
An object of class Seurat
19349 features across 33168 samples within 1 assay
Active assay: RNA (19349 features, 0 variable features)
6 layers present: counts.1, counts.2, counts.3, counts.4, counts.5, counts.6
mouse_all[[]] %>% head
orig.ident nCount_RNA nFeature_RNA
AAACCTGAGAGTAATC_1 chow1 2337 1228
AAACCTGGTAAATGTG_1 chow1 1306 777
AAACGGGAGAAACGAG_1 chow1 3786 1784
AAACGGGAGGAGTACC_1 chow1 1726 1046
AAACGGGCACCACCAG_1 chow1 1345 826
AAACGGGTCAAGGCTT_1 chow1 1785 1008
mouse_all[[]] = mouse_all[[]] %>% mutate(sample = gsub('[1-3]', '', orig.ident))
mouse_all <- mouse_all %>% NormalizeData %>% FindVariableFeatures(nfeatures = 1000) %>%
ScaleData(features = rownames(mouse_all)) %>% RunPCA %>% FindNeighbors(dims = 1:25, verbose = F) %>%
FindClusters(resolution = 0.1, verbose = F) %>% RunTSNE(dims = 1:25, seed.use=2)
mouse_all[[]] %>% head
orig.ident nCount_RNA nFeature_RNA sample
AAACCTGAGAGTAATC_1 chow1 2337 1228 chow
AAACCTGGTAAATGTG_1 chow1 1306 777 chow
AAACGGGAGAAACGAG_1 chow1 3786 1784 chow
AAACGGGAGGAGTACC_1 chow1 1726 1046 chow
AAACGGGCACCACCAG_1 chow1 1345 826 chow
AAACGGGTCAAGGCTT_1 chow1 1785 1008 chow
RNA_snn_res.0.1 seurat_clusters
AAACCTGAGAGTAATC_1 0 0
AAACCTGGTAAATGTG_1 0 0
AAACGGGAGAAACGAG_1 0 0
AAACGGGAGGAGTACC_1 0 0
AAACGGGCACCACCAG_1 0 0
AAACGGGTCAAGGCTT_1 0 0
# 展示合并以后得分类结果
DimPlot(mouse_all, label = T, pt.size = 0.0001, cols = col_palatte)
# 也可分开展示两种状态
DimPlot(mouse_all, label = T, split.by= 'sample', pt.size = 0.0001, cols = col_palatte)
FeaturePlot(mouse_all, features = general_marker, ncol=4, cols=c('grey95', 'red3'), label=T, pt.size=0.001)
根据marker细胞的表达进行分类:
new.cluster.ids <- c('Endo', 'T cell', 'Macrophage', 'B-cell', 'DC', 'Macrophage',
'Cholangiocyte', 'Hepatocyte', 'Dividing cell', 'Plasma cell', 'HSC')
names(new.cluster.ids) = levels(mouse_all)
mouse_all <- RenameIdents(mouse_all, new.cluster.ids)
mouse_all[["cellType"]] <- Idents(object = mouse_all)
head(mouse_all[[]])
orig.ident nCount_RNA nFeature_RNA sample
AAACCTGAGAGTAATC_1 chow1 2337 1228 chow
AAACCTGGTAAATGTG_1 chow1 1306 777 chow
AAACGGGAGAAACGAG_1 chow1 3786 1784 chow
AAACGGGAGGAGTACC_1 chow1 1726 1046 chow
AAACGGGCACCACCAG_1 chow1 1345 826 chow
AAACGGGTCAAGGCTT_1 chow1 1785 1008 chow
RNA_snn_res.0.1 seurat_clusters cellType
AAACCTGAGAGTAATC_1 0 0 Endo
AAACCTGGTAAATGTG_1 0 0 Endo
AAACGGGAGAAACGAG_1 0 0 Endo
AAACGGGAGGAGTACC_1 0 0 Endo
AAACGGGCACCACCAG_1 0 0 Endo
AAACGGGTCAAGGCTT_1 0 0 Endo
# 展示分类后的结果
DimPlot(mouse_all, label = F, split.by= 'sample', pt.size = 0.0001, cols = col_palatte)
harmony能够对PCA分析的PC embeddings进行后续处理,从而整合不同批次的数据。在Seurat中能够方便的使用harmony。
mouse_harmony <- RunHarmony(mouse_all, "orig.ident") # 指定批次
mouse_harmony <- mouse_harmony %>% RunTSNE(dims = 1:25, reduction='harmony', seed.use=2)
# 展示校正后的结果
DimPlot(mouse_harmony, label = T, pt.size = 0.0001, cols = col_palatte)
# 显示所有样本
DimPlot(mouse_harmony, group.by = 'cellType', split.by= 'orig.ident', pt.size = 0.0001, cols = col_palatte)
FeaturePlot(mouse_harmony, features = general_marker, ncol=4, cols=c('grey95', 'red3'), label=F, pt.size=0.001)
saveRDS(mouse_harmony, "output/mouse_harmony.RDS")