monocle | Notion

library(monocle)
##--------------------------传输data----------------------------------------
mycds<-Seurat::as.CellDataSet(mycds)
mycds <- importCDS(PPdata8,import_all = TRUE)
##选择基因簇
id<-c("3","4","7")
Cell.sub0 <- subset([email protected],seurat_clusters==0)
Cell.sub1 <- subset([email protected],seurat_clusters==1)
Cell.sub2 <- subset([email protected],seurat_clusters==2)
Cell.sub4 <- subset([email protected],seurat_clusters==4)
Cell.sub3 <- subset([email protected],seurat_clusters==3)
Cell.sub5 <- subset([email protected],seurat_clusters==5)
Cell.sub6 <- subset([email protected],seurat_clusters==6)
Cell.sub7 <- subset([email protected],seurat_clusters==7)
Cell.sub8<- subset([email protected],seurat_clusters==8)
Cell.sub9 <- subset([email protected],seurat_clusters==9)
cll <- c(row.names(Cell.sub3),row.names(Cell.sub4),row.names(Cell.sub7))
mycds <- subset(PPdata8, cells=cll)
data <- as(as.matrix(mycds@assays$RNA@counts), 'sparseMatrix')
pd <- new('AnnotatedDataFrame', data = [email protected])
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
fd <- new('AnnotatedDataFrame', data = fData)
mycds <- newCellDataSet(data,
                        phenoData = pd,
                        featureData = fd,
                        expressionFamily = negbinomial.size())
##标准化和归一化
mycds <- estimateSizeFactors(mycds)
mycds <- estimateDispersions(mycds)
##使用seurat选择的高变基因⚠️
express_genes <- VariableFeatures(PPdata8)
mycds <- setOrderingFilter(mycds, express_genes)
mycds <- setOrderingFilter(mycds, disp.genes)
plot_ordering_genes(mycds)
##使用clusters差异表达基因
deg.cluster <- FindAllMarkers(PPdata8)
expression <- subset(deg.cluster,p_val_adj<0.05)$gene
mycds <- setOrderingFilter(mycds, expression)
plot_ordering_genes(mycds)
##使用monocle选择的高变基因⚠️
disp_table <- dispersionTable(mycds)
disp.genes <- subset(disp_table, mean_expression >= 0.1&dispersion_empirical >= 1*dispersion_fit)$gene_id
mycds <- setOrderingFilter(mycds, disp.genes)
                     plot_ordering_genes(mycds)
##降维
mycds <- reduceDimension(mycds, max_components = 2, method = 'DDRTree')
##细胞排序
mycds <- orderCells(mycds)
mycds <- orderCells(mycds,reverse = T)
mycds <- orderCells(mycds,root_state = 1)##选择根

##画图
#State轨迹分布图
plot1 <- plot_cell_trajectory(mycds, color_by = "State")
ggsave("pseudotime/State.pdf", plot = plot1, width = 6, height = 5)
ggsave("pseudotime/State.png", plot = plot1, width = 6, height = 5)
##Cluster轨迹分布图
plot2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters")
##把每一簇都画出来
plot_cell_trajectory(mycds,color_by = "seurat_clusters")+facet_wrap(~seurat_clusters, nrow = 1)
ggsave("pseudotime/Cluster.pdf", plot = plot2, width = 6, height = 5)
ggsave("pseudotime/Cluster.png", plot = plot2, width = 6, height = 5)
##Pseudotime轨迹图
plot3 <- plot_cell_trajectory(mycds, color_by = "Pseudotime")
ggsave("pseudotime/Pseudotime.pdf", plot = plot3, width = 6, height = 5)
ggsave("pseudotime/Pseudotime.png", plot = plot3, width = 6, height = 5)
##合并作图
plotc <- plot1|plot2|plot3
ggsave("D:/347-Combination.pdf", plot = plotc, width = 10, height = 3.5)
ggsave("D:/347-Combination.png", plot = plotc, width = 10, height = 3.5)
##基因作图
p1 <- plot_genes_in_pseudotime(mycds["Pp3c14-9940",],color_by = "seurat_clusters")
p2 <- plot_genes_in_pseudotime(mycds["Pp3c10-7030",],color_by = "seurat_clusters")

plot_genes_in_pseudotime(mycds["Pp3c21-15370",],color_by = "seurat_clusters")
plot_genes_in_pseudotime(mycds["Pp3c17-15380",],color_by = "Pseudotime")
plot_genes_in_pseudotime(mycds["Pp3c17-6500",],color_by = "State")
##单个基因作图
pData(mycds)$Pp3c3_26770 = log2( exprs(mycds)['Pp3c3-26770',]+1)
p1=plot_cell_trajectory(mycds, color_by = "Pp3c3_26770") + scale_color_gradient(na.value = "grey",low="blue",high="red")
##寻找拟时差异基因
Time_diff <- differentialGeneTest(mycds, cores = 1,
                                  fullModelFormulaStr = "~sm.ns(Pseudotime)")
Time_diff <- Time_diff[,c(5,2,3,4,1,6,7)] #把gene放前面，也可以不改
write.csv(Time_diff, "Time_diff_all.csv", row.names = F)
Time_genes <- Time_diff %>% pull(gene_short_name) %>% as.character()
p=plot_pseudotime_heatmap(mycds[Time_genes,], num_clusters=4, show_rownames=T, return_heatmap=T)
ggsave("Time_heatmapAll.pdf", p, width = 5, height = 10)
##热图
marker_genes <- row.names(subset(fData(mycds),
                                 gene_short_name %in% c("MEF2C", "MEF2D", "MYF5",
                                                        "ANPEP", "PDGFRA","MYOG",
                                                        "TPM1", "TPM2", "MYH2",
                                                        "MYH3", "NCAM1", "TNNT1",
                                                        "TNNT2", "TNNC1", "CDK1",
                                                        "CDK2", "CCNB1", "CCNB2",
                                                        "CCND1", "CCNA1", "ID1")))
Time_diff <- differentialGeneTest(mycds[disp.genes,],
                                      fullModelFormulaStr = "~sm.ns(Pseudotime)")
sig_gene_names <- row.names(subset(Time_diff, qval < 0.1))
##提取前100个
Time_genes <- top_n(Time_diff, n = 100, desc(qval)) %>% pull(gene_short_name) %>% as.character()
p = plot_pseudotime_heatmap(mycds[disp.genes,], num_clusters=4, show_rownames=F, return_heatmap=T)
ggsave("Time_heatmapTop100.pdf", p, width = 5, height = 10)
pheat <- plot_pseudotime_heatmap(mycds[sig_gene_names,],
                        num_clusters = 6,
                        cores = 1,
                        show_rownames = T)
ggsave("Time_heatmapTop100.pdf", p, width = 5, height = 10)
all_cluster <- data.frame(cutree(p$tree_row,k = 6))
##分支点基因寻找
BEAM_res <- BEAM(mycds[disp.genes,], branch_point = 1) 
p_branch <- plot_genes_branched_heatmap(mycds[row.names(subset(BEAM_res,
                                                 qval < 1e-5)),],
                            branch_point = 1, #绘制的是哪个分支
                            num_clusters = 4, #分成几个cluster，根据需要调整
                            use_gene_short_name = T,
                            show_rownames = T,
                            return_heatmap = T)#有632个gene，太多了
branch <- row.names(subset(BEAM_res,qval < 1e-5))
all_branch_cluster <- data.frame(cutree(p_branch$tree_row,k = 4))
ggsave("branch.png", p_branch, width = 5, height = 10)
##单基因作图
library(Seurat)
exprData <- mycds@assayData$exprs
exprData <- LogNormalize(exprData) #对数据normaliza一下，会比较显著，记得引用seurat包
mycds$Pp3c17_6500 <- exprData["Pp3c17-6500",] #输入想看的基因，cds是monocle的对象
plot_cell_trajectory(mycds, color_by = "Pp3c17_6500",cell_size=1,show_backbone=TRUE)+scale_color_gradient(low = "grey",high = "#DE045B",limits=c(1,2.5)) #为了让颜色好看
scale_color_gradient(low = "blue",high = "red",limits=c(1,2.5))
##保存结果
##--------------------------monocle3----------------------------------------
library(Seurat)
library(monocle3)
library(tidyverse)
library(patchwork)
##--------------------------3/4/7簇----------------------------------------
##导入cds文件
Cell.sub0 <- subset([email protected],seurat_clusters==0)
Cell.sub1 <- subset([email protected],seurat_clusters==1)
Cell.sub2 <- subset([email protected],seurat_clusters==2)
Cell.sub4 <- subset([email protected],seurat_clusters==4)
Cell.sub3 <- subset([email protected],seurat_clusters==3)
Cell.sub5 <- subset([email protected],seurat_clusters==5)
Cell.sub6 <- subset([email protected],seurat_clusters==6)
Cell.sub7 <- subset([email protected],seurat_clusters==7)
Cell.sub8<- subset([email protected],seurat_clusters==8)
Cell.sub9 <- subset([email protected],seurat_clusters==9)
cll <- c(row.names(Cell.sub3),row.names(Cell.sub4),row.names(Cell.sub7))
cds <- subset(PPdata8, cells=cll)
data <- cds@assays$RNA@counts
cell_metadata <- [email protected]
gene_annotation <- data.frame(gene_short_name = rownames(data))
rownames(gene_annotation) <- rownames(data)
cds <- new_cell_data_set(data,
                         cell_metadata = cell_metadata,
                         gene_metadata = gene_annotation)
##继承seurat的降维数据
cds.embed <- cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(PPdata8, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
cds@int_colData$reducedDims$UMAP <- int.embed 
plot_cells(cds, reduction_method="UMAP", color_cells_by="seurat_clusters") 
##NormalizeData+ScaleData+RunPCA
cds <- preprocess_cds(cds, num_dim = 50)
plot_pc_variance_explained(cds)
##去除批次
cds <- align_cds(cds,alignment_group = "plate")
##降维
cds <- reduce_dimension(cds, preprocess_method = "PCA",reduction_method = c("UMAP"))
##聚类
cds <- cluster_cells(cds)   
##轨迹推断
cds<- learn_graph(cds) 
##画图
plot_cells(cds,
          color_cells_by = "cluster",
          label_groups_by_cluster=FALSE,
          label_leaves=TRUE,
          label_branch_points=TRUE,
          group_label_size=10,
          cell_size=1.5)                   
##选择根
cds = order_cells(cds) 
##拟时序的 图
plot_cells(cds,
           color_cells_by = "pseudotime",
           label_cell_groups=FALSE,
           label_leaves=TRUE,
           label_branch_points=TRUE,
           graph_label_size=1.5,
           group_label_size=4,cell_size=1.5)

plot_cells(cds,
           color_cells_by = "cluster",
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE)
gene_module_df <- find_gene_modules(cds[pr_deg_ids,], resolution=c(0,10^seq(-6,-1)))
##单基因
plot_cells(cds, reduction_method="UMAP", color_cells_by="seurat_clusters",label_groups_by_cluster=FALSE,
           label_leaves=TRUE,
           label_branch_points=TRUE,
           group_label_size=10,
           cell_size=1.5,genes = "Pp3c1-19190") +scale_color_gradient(low = "grey",high = "red",limits=c(0,2.5))

data <- PPdata8@assays$RNA@counts
cell_metadata <- [email protected]
gene_annotation <- data.frame(gene_short_name = rownames(data))
rownames(gene_annotation) <- rownames(data)
cds <- new_cell_data_set(data,
                             cell_metadata = cell_metadata,
                             gene_metadata = gene_annotation)