0) printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg ##聚类所有样本,观察是否有离群值或异常值。 sampleTree = hclust(dist(datExpr00), method = "average") plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) ##删除离群样本 abline(h = 15, col = "red") #划定需要"> 0) printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg ##聚类所有样本,观察是否有离群值或异常值。 sampleTree = hclust(dist(datExpr00), method = "average") plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) ##删除离群样本 abline(h = 15, col = "red") #划定需要"> 0) printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg ##聚类所有样本,观察是否有离群值或异常值。 sampleTree = hclust(dist(datExpr00), method = "average") plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) ##删除离群样本 abline(h = 15, col = "red") #划定需要">
##WGCNA
library(WGCNA)
##写入表达量数据
datExpr00 = as.data.frame(t(df))##转置表格
datExpr00[] <- lapply(datExpr00, as.numeric)
gsg = goodSamplesGenes(datExpr00, verbose = 3) ##检查缺失值和识别离群值(异常值)
gsg$allOK
##如果gsg$allOK的结果为TRUE,证明没有缺失值,可以直接下一步。如果为FALSE,则需要用以下函数进行删除缺失值。
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr00)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr00)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr00 = datExpr00[gsg$goodSamples, gsg$goodGenes]} ##运行完后再次运行gsg以及检验gsg
##聚类所有样本,观察是否有离群值或异常值。
sampleTree = hclust(dist(datExpr00), method = "average")
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
##删除离群样本
abline(h = 15, col = "red") #划定需要剪切的枝长
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
##这时候会从高度为15这里横切,把离群样本分开
table(clust)
keepSamples = (clust==1) #保留非离群(clust==1)的样本
datExpr0 = datExpr00[keepSamples, ] #去除离群值后的数据
nGenes = ncol(datExpr0)
nSamples = nrow(datExpr0)
##表型数据
##载入数据
traitData = read.csv("ClinicalTraits.csv");
dim(traitData) #每行是一个样本,每列是一种信息
##将临床表征数据和表达数据进行匹配(用样本名字进行匹配)
femaleSamples = rownames(datExpr00);
traitRows = match(femaleSamples, df4$sample);
datTraits = df4[traitRows, -1];
rownames(datTraits) = df4[traitRows, 1];
collectGarbage()
##可视化表型数据与基因表达量数据的联系,重构样本聚类树
sampleTree2 = hclust(dist(datExpr00), method = "average")
traitColors = numbers2colors(datTraits, signed = T) #用颜色代表关联度
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
##power选择
powers <- c(seq(1,10),seq(12,20,by=2))
powers= 1:40
##软阈值的选取
sft <- pickSoftThreshold(datExpr00,powerVector = powers,verbose = 3)
sft$powerEstimate ##可以查看最佳软阈值,如果没有则需要手动选取,进行画图
par(mfrow = c(1, 2))
cex1 <- 0.9
plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)", ylab = "SFT, signed R^2", type = "n", main = paste("Scale independence"))
text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
labels = powers, col = "red")
abline(h =0.85, col = "red")
plot(sft$fitIndices[, 1], sft$fitIndices[, 5], type = "n", xlab = "Soft Threshold (power)",
ylab = "Mean Connectivity", main = paste("Mean connectivity"))
text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = power, col = "red")
abline(h =0, col = "red")
##sft$powerEstimate为NA时可运行下面
cex1=0.9 ##power阈值,可减到0.8
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",
type="n",main = paste("Scale independence")); #画图
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=power,cex=cex1,col="red")
abline(h=0.90,col="red")
if (is.na(power)){
power = ifelse(nSamples<20, ifelse(type == "unsigned", 9, 18),
ifelse(nSamples<30, ifelse(type == "unsigned", 8, 16),
ifelse(nSamples<40, ifelse(type == "unsigned", 7, 14),
ifelse(type == "unsigned", 6, 12))
)
)
}
softPower =4#参数可改,在上面那power图里曲线平滑时的第一个数,一般不大于12
##一步法
datExpr00[] <- lapply(datExpr00, as.numeric)
net = blockwiseModules(
datExpr00,
power = softPower,
maxBlockSize = 6000,
TOMType = "signed", minModuleSize = 30,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = F,
verbose = 3)
##标记颜色
mergedColors = labels2colors(net$colors)
##画图
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",dendroLabels = FALSE,
hang = 0.03,addGuide = TRUE, guideHang = 0.05)
##模块-表型数据关联
nGenes = ncol(datExpr0);
nSamples = nrow(datExpr0);
# 重新计算带有颜色标签的模块
MEs0 = moduleEigengenes(datExpr0, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
# 通过相关值对每个关联进行颜色编码
sizeGrWindow(10,6)
# 展示模块与表型数据的相关系数和 P值
textMatrix = paste(signif(moduleTraitCor, 2), "\\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));##c(6, 8, 4, 4)表示边距的大小分别为6英寸(下方)、8英寸(左侧)、4英寸(上方)和4英寸(右侧)
# 用热图的形式展示相关系数
par(mar = c(10, 10, 3, 3))
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE, ##是否自定义边界
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
#colors = greenWhiteRed(50)不适用于红绿色盲患者,建议用 blueWhiteRed代替.
#该分析确定了几个重要的模块-特征关联。我们将体重作为感兴趣的特征来研究。
fpkmSamples = rownames(datExpr00)
traitSamples =rownames(df2)
traitRows = match(fpkmSamples, traitSamples)
datTraits = df2[traitRows,]
rownames(datTraits)
## 模块与数量性状间的相关性
#
moduleColors = mergedColors
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50))
moduleLabels = match(moduleColors, colorOrder)-1
MEs = mergedMEs
nGenes = ncol(datExpr00)
nSamples = nrow(datExpr00)
moduleTraitCor = cor(MEs, datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
pdf(file="8_Module-trait relationships.pdf",width=10,height=10)
textMatrix = paste(signif(moduleTraitCor, 2), "\\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3))
tairtheat <- labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
library(gplots)
myheatcol = colorpanel(250,'#00bfc4',"white",'#f9766d')##修改热图颜色
net$TOMFiles
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree, file = "networkConstruction-auto.RData")
##输出
save(net,file="net.wgcna.RData")
test=data.frame(color=net$colors)
test$gene=rownames(test)
write.table(test,file="f01.total_module_gene.record.txt",sep="\\t",row.names=F,col.names=T,quote=F)
##多步法
Connectivity=softConnectivity(datExpr00,corFnc = "cor", corOptions = "use ='p'",power=softPower,type = "signed")
scaleFreePlot(Connectivity,nBreaks = 10,truncated = FALSE,removeFirst = FALSE, main = "")
adjacency = adjacency(datExpr00,corFnc = "cor", corOptions = "use ='p'",type = "signed", power = softPower)
TOM = TOMsimilarity(adjacency,TOMType="signed");
dissTOM = 1-TOM
geneTree = hclust(as.dist(dissTOM), method = "average")
minModuleSize = 200
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 1,
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
# 查看模块数
table(dynamicMods)
# 转换数字标签为颜色
dynamicColors = labels2colors(dynamicMods)
# 查看颜色
table(dynamicColors)
# 绘图
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "cluster dendrogram")
##合并
# Calculate eigengenes
MEList = moduleEigengenes(datExpr00, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs)
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average")
plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "")
MEDissThres = 0.3
# 在树状图中加入切割线
abline(h=MEDissThres, col = "red")
# 调用自动归并函数
merge = mergeCloseModules(datExpr00, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# 合并模块的颜色
mergedColors = merge$colors
# 合并后新模块
mergedMEs = merge$newMEs
# 对模块颜色重命名
moduleColors = mergedColors
# 将颜色转换为数据标签
colorOrder = c("grey", standardColors(50))
moduleLabels = match(moduleColors, colorOrder)-1
MEs = mergedMEs
# 保存模块颜色和标签,以供后续部分使用
save(MEs, moduleLabels, moduleColors, geneTree, file = "networkConstruction-stepByStep.RData")
##合并后绘图
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
##模块-表型数据关联
nGenes = ncol(datExpr00);
nSamples = nrow(datExpr00)
# 重新计算带有颜色标签的模块
MEs0 = moduleEigengenes(datExpr00, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
# 通过相关值对每个关联进行颜色编码
sizeGrWindow(10,6)
# 展示模块与表型数据的相关系数和 P值
textMatrix = paste(signif(moduleTraitCor, 2), "\\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# 用热图的形式展示相关系数
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits), ##X轴
yLabels = names(MEs), ##Y轴
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = T,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
myheatcol = colorpanel(250,'red',"orange",'lemonchiffon')
#colors = greenWhiteRed(50)不适用于红绿色盲患者,建议用 blueWhiteRed代替.
#该分析确定了几个重要的模块-特征关联。我们将体重作为感兴趣的特征来研究。
modules=c("darkorange")
probes= colnames(datExpr00)#获得基因名
inModule =is.finite(match(moduleColors,modules));
modProbes=probes[inModule]
filtered_df_brown <- fpkm[fpkm$rownames %in% modProbes, ]
modTOM =TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)##命名行列名
nTop = 30
IMConn = softConnectivity(datExpr00[, modProbes])
top = (rank(-IMConn) <= nTop)
cyt = exportNetworkToCytoscape(modTOM[top, top],
edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE)
##筛选top
ntop=50
IMConn = softConnectivity(datExpr00[,modProbes])
top = (rank(-IMConn) <= ntop)
cyt = exportNetworkToCytoscape(modTOM[top, top],
edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE)
wgc <- exportNetworkToCytoscape(modTOM[top,top],
edgeFile = paste("edges_top50", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("nodes_top50", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,threshold = 0.1,nodeNames = modProbes,
nodeAttr = mergedColors[inModule])
cyt = exportNetworkToCytoscape(
modTOM,
edgeFile = paste("magenta-edges-", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("magenta-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,
threshold = 0.02,
nodeNames = modProbes,
nodeAttr = moduleColors[inModule]
);
#输出模块基因
module ="magenta"
column =match(module, modNames);
moduleGenes = moduleColors==module;
/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_5-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_1-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_1-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_1-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_2-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_2-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_2-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_3-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_3-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_3-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_4-1_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_4-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_4-3_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_5-2_Aligned.sortedByCoord.out.bam,/home/liuli/geneomic/BRAKER-master/new/BRAKER-master/lh_5-3_Aligned.sortedByCoord.out.bam