15. July 2020

基因加权网络共表达

1
2
3
4


#BiocManager::install(c("AnnotationDbi", "impute","GO.db", "preprocessCore"))
#install.packages(c("matrixStats", "Hmisc","foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival"))
#install.packages(c("WGCNA", "stringr", "reshape2"))

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


library(WGCNA)
setwd("/storage2/wangchao/jupyter/WGCNA")
options(stringsAsFactors = FALSE)
femData = read.csv("LTE.csv")
dim(femData)
head(femData)

datExpr0 = as.data.frame(t(femData[, -c(1:2)]))
head(datExpr0)

names(datExpr0) = femData$Symbol;
rownames(datExpr0) = names(femData)[-c(1:2)];
head(datExpr0)

数据清洗

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15


gsg = goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK;
if (!gsg$allOK)
{
    # Optionally, print the gene and sample names that were removed:
    # 打印删除的基因和样本名称
    if (sum(!gsg$goodGenes)>0)
        printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
    if (sum(!gsg$goodSamples)>0)
        printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
    # Remove the offending genes and samples from the data:
    # 从数据中删除有问题的基因和样本
    datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}
head(datExpr0)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


# 接下来我们对样本进行聚类（与随后的基因聚类相比），看看是否有明显的异常值。
# hclusts聚类算法, dist计算基因之间的距离
sampleTree = hclust(dist(datExpr0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
# 绘制聚类树，sizeGrWindow设置绘图窗口大小
# sizeGrWindow(16,9)

# 设置文字大小
par(cex = 0.5);
# 设置图像边距c(bottom, left, top, right) 
# par(mar = c(0,4,2,0))
# 画图 main标题，sub子标题，xlab x轴标题，cex.lab标题字体大小，cex.axis坐标轴刻度大小，cex.main主标题字体
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)

确定power

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54


powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr0, powerVector = powers, verbose = 5)
#设置网络构建参数选择范围，计算无尺度分布拓扑矩阵

  # Plot the results:
  ##sizeGrWindow(9, 5)
  par(mfrow = c(1,2));
  cex1 = 0.9;
  # Scale-free topology fit index as a function of the soft-thresholding power
  plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
       main = paste("Scale independence"));
  text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       labels=powers,cex=cex1,col="red");
  # this line corresponds to using an R^2 cut-off of h
  abline(h=0.9,col="red")
  # Mean connectivity as a function of the soft-thresholding power
  plot(sft$fitIndices[,1], sft$fitIndices[,5],
       xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
       main = paste("Mean connectivity"))
  text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
  sft
  
  cor <- WGCNA::cor
net = blockwiseModules(
				 datExpr0,
				 power = 6,
				 maxBlockSize = 6000,
				 TOMType = "unsigned", minModuleSize = 30,
				 reassignThreshold = 0, mergeCutHeight = 0.25,
				 numericLabels = TRUE, pamRespectsDendro = FALSE,
				 saveTOMs = F, 
				 verbose = 3
 )
 table(net$colors) 
 
 #绘画结果展示### open a graphics window
#sizeGrWindow(12, 30)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],"Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
					
##结果保存###
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
table(moduleColors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
     file = "changbai.RData")

提取模块内基因

 1
 2
 3
 4
 5
 6
 7
 8
 9
10


color<-moduleColors[!duplicated(moduleColors)]
for (i in color){
  # Select module
  module = i;
  # Select module probes
  probes = colnames(datExpr0) ## 我们例子里面的probe就是基因
  inModule = (moduleColors==i);
  modProbes = probes[inModule]; 
summary(modProbes)
write.csv(modProbes,paste0("/storage2/wangchao/jupyter/WGCNA/LTE.module/",i,".csv"))}

模块内基因GO注释

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20


#BiocManager::install("topGO")
#BiocManager::install("clusterProfiler")
#BiocManager::install("org.Ss.eg.db")
library('topGO')
library(clusterProfiler)
library( org.Ss.eg.db)
setwd("/storage2/wangchao/jupyter/WGCNA/LTE.module")
temp = list.files(pattern="*.csv")
temp
for (i in temp){
    data<-read.csv(paste0("/storage2/wangchao/jupyter/WGCNA/LTE.module/",i))
gene<-data[,2]
head(gene)
gene = bitr(gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Ss.eg.db")
X<-gene$ENTREZID
head(X)
go <- enrichGO(X, OrgDb = org.Ss.eg.db, ont='ALL',pAdjustMethod = 'BH',pvalueCutoff = 0.05, 
               qvalueCutoff = 0.05,keyType = 'ENTREZID')
write.csv(go,paste0("/storage2/wangchao/jupyter/WGCNA/LTE.module/GO/",i))
}

The LatestT