WGCNA | Notion

0) printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg ##聚类所有样本，观察是否有离群值或异常值。 sampleTree = hclust(dist(datExpr00), method = "average") plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) ##删除离群样本 abline(h = 15, col = "red") #划定需要"> 0) printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg ##聚类所有样本，观察是否有离群值或异常值。 sampleTree = hclust(dist(datExpr00), method = "average") plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) ##删除离群样本 abline(h = 15, col = "red") #划定需要"> 0) printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg ##聚类所有样本，观察是否有离群值或异常值。 sampleTree = hclust(dist(datExpr00), method = "average") plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) ##删除离群样本 abline(h = 15, col = "red") #划定需要">

##WGCNA
library(WGCNA)
##写入表达量数据
datExpr00 = as.data.frame(t(df))##转置表格
datExpr00[] <- lapply(datExpr00, as.numeric)
gsg = goodSamplesGenes(datExpr00, verbose = 3) ##检查缺失值和识别离群值（异常值）
gsg$allOK
##如果gsg$allOK的结果为TRUE，证明没有缺失值，可以直接下一步。如果为FALSE，则需要用以下函数进行删除缺失值。
if (!gsg$allOK)
{
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0)
    printFlush(paste("Removing genes:", paste(names(datExpr00)[!gsg$goodGenes], collapse = ", ")));
  if (sum(!gsg$goodSamples)>0)
    printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", ")));
  # Remove the offending genes and samples from the data:
  datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg
##聚类所有样本，观察是否有离群值或异常值。
sampleTree = hclust(dist(datExpr00), method = "average") 

plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)
##删除离群样本
abline(h = 15, col = "red") #划定需要剪切的枝长
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
##这时候会从高度为15这里横切，把离群样本分开
table(clust)   
keepSamples = (clust==1)  #保留非离群(clust==1)的样本
datExpr0 = datExpr00[keepSamples, ]  #去除离群值后的数据
nGenes = ncol(datExpr0)
nSamples = nrow(datExpr0)
##表型数据
##载入数据
traitData = read.csv("ClinicalTraits.csv");
dim(traitData)  #每行是一个样本，每列是一种信息
##将临床表征数据和表达数据进行匹配（用样本名字进行匹配）
femaleSamples = rownames(datExpr00);
traitRows = match(femaleSamples, df4$sample);
datTraits = df4[traitRows, -1];
rownames(datTraits) = df4[traitRows, 1];
collectGarbage()
##可视化表型数据与基因表达量数据的联系，重构样本聚类树
sampleTree2 = hclust(dist(datExpr00), method = "average")
traitColors = numbers2colors(datTraits, signed = T) #用颜色代表关联度
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits),
                    main = "Sample dendrogram and trait heatmap")

##power选择
powers <- c(seq(1,10),seq(12,20,by=2))
powers= 1:40
##软阈值的选取
sft <- pickSoftThreshold(datExpr00,powerVector = powers,verbose = 3)  
sft$powerEstimate ##可以查看最佳软阈值，如果没有则需要手动选取，进行画图
par(mfrow = c(1, 2))
cex1 <- 0.9
plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     xlab = "Soft Threshold (power)", ylab = "SFT, signed R^2", type = "n", main = paste("Scale independence"))
text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     labels = powers, col = "red")
abline(h =0.85, col = "red")
plot(sft$fitIndices[, 1], sft$fitIndices[, 5], type = "n", xlab = "Soft Threshold (power)",
     ylab = "Mean Connectivity", main = paste("Mean connectivity"))
text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = power, col = "red")
abline(h =0, col = "red")

##sft$powerEstimate为NA时可运行下面
cex1=0.9 ##power阈值，可减到0.8
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",
     type="n",main = paste("Scale independence")); #画图 
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=power,cex=cex1,col="red")
abline(h=0.90,col="red") 

if (is.na(power)){ 
  power = ifelse(nSamples<20, ifelse(type == "unsigned", 9, 18), 
                 ifelse(nSamples<30, ifelse(type == "unsigned", 8, 16), 
                        ifelse(nSamples<40, ifelse(type == "unsigned", 7, 14), 
                               ifelse(type == "unsigned", 6, 12))        
                 ) 
  ) 
}

softPower =4#参数可改，在上面那power图里曲线平滑时的第一个数，一般不大于12
##一步法
datExpr00[] <- lapply(datExpr00, as.numeric)
net = blockwiseModules(
                        datExpr00,
                        power = softPower,
                        maxBlockSize = 6000,
                        TOMType = "signed", minModuleSize = 30,
                        reassignThreshold = 0, mergeCutHeight = 0.25,
                        numericLabels = TRUE, pamRespectsDendro = FALSE,
                        saveTOMs = F, 
                        verbose = 3)
##标记颜色
mergedColors = labels2colors(net$colors)
##画图
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",dendroLabels = FALSE, 
                    hang = 0.03,addGuide = TRUE, guideHang = 0.05)
##模块-表型数据关联
nGenes = ncol(datExpr0);
nSamples = nrow(datExpr0);
# 重新计算带有颜色标签的模块
MEs0 = moduleEigengenes(datExpr0, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
# 通过相关值对每个关联进行颜色编码
sizeGrWindow(10,6)
# 展示模块与表型数据的相关系数和 P值
textMatrix = paste(signif(moduleTraitCor, 2), "\\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));##c(6, 8, 4, 4)表示边距的大小分别为6英寸（下方）、8英寸（左侧）、4英寸（上方）和4英寸（右侧）
# 用热图的形式展示相关系数
par(mar = c(10, 10, 3, 3))
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = greenWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE, ##是否自定义边界
               cex.text = 0.5,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))

#colors = greenWhiteRed(50)不适用于红绿色盲患者，建议用 blueWhiteRed代替.
#该分析确定了几个重要的模块-特征关联。我们将体重作为感兴趣的特征来研究。

fpkmSamples = rownames(datExpr00) 
traitSamples =rownames(df2) 
traitRows = match(fpkmSamples, traitSamples) 
datTraits = df2[traitRows,] 
rownames(datTraits)
## 模块与数量性状间的相关性 
# 
moduleColors = mergedColors 
# Construct numerical labels corresponding to the colors 
colorOrder = c("grey", standardColors(50)) 
moduleLabels = match(moduleColors, colorOrder)-1 
MEs = mergedMEs 

nGenes = ncol(datExpr00) 
nSamples = nrow(datExpr00) 
moduleTraitCor = cor(MEs, datTraits, use = "p") 
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples) 

pdf(file="8_Module-trait relationships.pdf",width=10,height=10) 

textMatrix = paste(signif(moduleTraitCor, 2), "\\n(", 
                   signif(moduleTraitPvalue, 1), ")", sep = "") 

dim(textMatrix) = dim(moduleTraitCor) 
par(mar = c(6, 8.5, 3, 3)) 

tairtheat <- labeledHeatmap(Matrix = moduleTraitCor, 
               xLabels = names(datTraits), 
               yLabels = names(MEs), 
               ySymbols = names(MEs), 
               colorLabels = FALSE, 
               colors = greenWhiteRed(50), 
               textMatrix = textMatrix, 
               setStdMargins = FALSE, 
               cex.text = 0.5, 
               zlim = c(-1,1), 
               main = paste("Module-trait relationships")) 
library(gplots)
myheatcol = colorpanel(250,'#00bfc4',"white",'#f9766d')##修改热图颜色

net$TOMFiles

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree, file = "networkConstruction-auto.RData")
##输出
save(net,file="net.wgcna.RData")
test=data.frame(color=net$colors)
test$gene=rownames(test)

write.table(test,file="f01.total_module_gene.record.txt",sep="\\t",row.names=F,col.names=T,quote=F)
##多步法
Connectivity=softConnectivity(datExpr00,corFnc = "cor", corOptions = "use ='p'",power=softPower,type = "signed")

scaleFreePlot(Connectivity,nBreaks = 10,truncated = FALSE,removeFirst = FALSE, main = "")

adjacency = adjacency(datExpr00,corFnc = "cor", corOptions = "use ='p'",type = "signed", power = softPower)

TOM = TOMsimilarity(adjacency,TOMType="signed");

dissTOM = 1-TOM

geneTree = hclust(as.dist(dissTOM), method = "average")

minModuleSize = 200
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 1, 
                            pamRespectsDendro = FALSE, minClusterSize = minModuleSize) 
# 查看模块数
table(dynamicMods)
# 转换数字标签为颜色
dynamicColors = labels2colors(dynamicMods)
# 查看颜色
table(dynamicColors)
# 绘图
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", 
                    dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "cluster dendrogram")
##合并
# Calculate eigengenes
MEList = moduleEigengenes(datExpr00, colors = dynamicColors)
MEs = MEList$eigengenes
#  Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs)
#  Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average")
plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "")

MEDissThres = 0.3
# 在树状图中加入切割线
abline(h=MEDissThres, col = "red")
# 调用自动归并函数
merge = mergeCloseModules(datExpr00, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# 合并模块的颜色
mergedColors = merge$colors
# 合并后新模块
mergedMEs = merge$newMEs
# 对模块颜色重命名
moduleColors = mergedColors
# 将颜色转换为数据标签
 colorOrder = c("grey", standardColors(50))
moduleLabels = match(moduleColors, colorOrder)-1
MEs = mergedMEs
# 保存模块颜色和标签，以供后续部分使用
save(MEs, moduleLabels, moduleColors, geneTree, file = "networkConstruction-stepByStep.RData")
##合并后绘图
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors), 
                    c("Dynamic Tree Cut", "Merged dynamic"), 
                    dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
##模块-表型数据关联
nGenes = ncol(datExpr00);
nSamples = nrow(datExpr00)
# 重新计算带有颜色标签的模块
MEs0 = moduleEigengenes(datExpr00, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
# 通过相关值对每个关联进行颜色编码
sizeGrWindow(10,6)
# 展示模块与表型数据的相关系数和 P值
textMatrix = paste(signif(moduleTraitCor, 2), "\\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# 用热图的形式展示相关系数
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits), ##X轴
               yLabels = names(MEs), ##Y轴
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = greenWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = T,
               cex.text = 0.5,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))
myheatcol = colorpanel(250,'red',"orange",'lemonchiffon')
#colors = greenWhiteRed(50)不适用于红绿色盲患者，建议用 blueWhiteRed代替.
#该分析确定了几个重要的模块-特征关联。我们将体重作为感兴趣的特征来研究。
modules=c("darkorange")

probes= colnames(datExpr00)#获得基因名
inModule =is.finite(match(moduleColors,modules));
modProbes=probes[inModule]
filtered_df_brown <- fpkm[fpkm$rownames   %in% modProbes, ]

modTOM =TOM[inModule, inModule];

dimnames(modTOM) = list(modProbes, modProbes)##命名行列名
nTop = 30
IMConn = softConnectivity(datExpr00[, modProbes])
 top = (rank(-IMConn) <= nTop)
 cyt = exportNetworkToCytoscape(modTOM[top, top],
                                edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
                                nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
                                weighted = TRUE)
##筛选top
ntop=50
IMConn = softConnectivity(datExpr00[,modProbes])
top = (rank(-IMConn) <= ntop)
cyt = exportNetworkToCytoscape(modTOM[top, top],
                               edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
                               nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
                               weighted = TRUE)
wgc <- exportNetworkToCytoscape(modTOM[top,top],
                                edgeFile = paste("edges_top50", paste(modules, collapse="-"), ".txt", sep=""),
                                nodeFile = paste("nodes_top50", paste(modules, collapse="-"), ".txt", sep=""), 
                                weighted = TRUE,threshold = 0.1,nodeNames = modProbes, 
                                nodeAttr = mergedColors[inModule])
cyt = exportNetworkToCytoscape(
  modTOM,
  edgeFile = paste("magenta-edges-", paste(modules, collapse="-"), ".txt", sep=""),
  nodeFile = paste("magenta-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
  weighted = TRUE,
  threshold = 0.02,
  nodeNames = modProbes, 
  nodeAttr = moduleColors[inModule]
);

#输出模块基因
module ="magenta"
column =match(module, modNames);
moduleGenes = moduleColors==module;

/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_5-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_1-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_1-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_1-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_2-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_2-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_2-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_3-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_3-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_3-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_4-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_4-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_4-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_5-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_5-3_Aligned.sortedByCoord.out.bam