Workflow(4) GO&Pathway&PPI

Workflow(4) GO&Pathway&PPI

准备工作:

1.GO网站

https://david.ncifcrf.gov/tools.jsp

2.Pathway网站

https://david.ncifcrf.gov/tools.jsp
https://www.kegg.jp/

3.PPI网站

https://string-db.org/cgi/input?sessionId=b4AQOkQXeIzj&input_page_show_search=on

4.数据

数据为找到的差异蛋白的ACCESSION号

P05362
P10909
P11279
上述网站都要求一行一个登录号,为避免意外建议直接从Excel里面复制所有的

5.sublime

https://www.sublimetext.com/
这是一个文本处理工具,类似txt,可用于整理数据格式,GO等数据结果如果不经过处理直接粘贴在txt或者excel里会乱序

步骤

1.GO 和 Pathway等数据获取

  1. 上传数据
  2. 选择数据格式,uniprot accession
  3. 列表内容,gene list
  4. 上传

GO

  1. 结果
    result1

result2
BP,CC,MF分别呈现,点击chart进入数据页面
6. 下载数据,会弹出一个网页
result3
全选复制,粘贴到sublime里(注意全程不要乱点,会导致数据格式乱
7. 如何把数据格式改成可处理的
result4
选中一处空格,按ctrl+f,出现如图页面,点击右下角Find all, 然后点击Tab键
呼!现在可以贴到excel里面查看筛选啦!

2.泡泡图

代码如下,第一块是基础绘图,后面有参数修改说明。
其余问题会持续更新,请耐心等待

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
library(ggplot2)
library(stats)
library(dplyr)
setwd("F:/Taogroup/ZZ/20210130phos+full/GO/")
data=read.table('phos.txt',header=T,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))


pdf("GOMFbubble.pdf",width = 8,height = 6)
ggplot(data, aes(x=GeneRatio, y=Term, size =Count,color=PValue)) +
geom_point(alpha=0.7)+
scale_color_gradient(low = "red")
scale_size(range = c(8, 12))
dev.off()
#单个GO作图
1
2
3
4
5
6
7
8
9
10
pdf("phos.pdf",width = 8,height = 6)
ggplot(data, aes(x=-log2(FDR), y=Term, size=Count, color=Classification)) +
geom_point(alpha=0.5) +
scale_size(range = c(1, 10), name="Count") +
xlab("-log2(FDR)") +
ylab("Term") +
theme(axis.text=element_text(size=7),axis.title=element_text(size=12,face="bold"))
#labs(title = "CC")
dev.off()
#三种GO放在一个图上

3.条形图(绘制pathway信息)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
library(ggplot2)
setwd("F:/")
pdf("keggbar.pdf",width = 8,height = 6)
data<-read.table('KEGG.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()
pdf("GOBPbar.pdf",width = 8,height = 6)
data<-read.table('GOBP.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()
pdf("GOCCbar.pdf",width = 8,height = 6)
data<-read.table('GOCC.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()
pdf("GOMFbar.pdf",width = 8,height = 6)
data<-read.table('GOMF.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()

4.美化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#气泡图换颜色scale_color_manual(values=c())
library(ggplot2)
library(stats)
library(dplyr)
setwd("F:/Taogroup/People/HSY/MM/Gly/FullandGlycombine/addfulltogly/20210728/")
data=read.table('BPforBubble.txt',header=T,sep="\t")

pdf("BPGO.pdf",width = 10,height = 6)
ggplot(data, aes(x=-log2(PValue), y=Term, size=Count, color=SampleGroup)) +
geom_point(alpha=0.8) +
scale_size(range = c(3, 15), name="Count") +
scale_color_manual(values=c('#00BA38','#F8766D','#619CFF'))+
xlab("-log2(PValue)")
theme(axis.text=element_text(size=7.5),axis.title=element_text(size=12))
dev.off()


pdf("GO.pdf")
ggplot(data, aes(x=ProbabilitySet, y=Sample, size=PhosProtein, color=SearchMethod)) +
geom_point(alpha=0.8) +
scale_size(range = c(5, 15), name="PhosProtein") +
xlab("ProbabilitySet") +
ylab("Sample") +
theme(axis.text=element_text(size=8),axis.title=element_text(size=12))
dev.off()

#修改刻度等
library(ggplot2)
library(stats)
library(dplyr)
setwd("E:/Datasetlocalize/People/ZGY/20210817_new_version/Result/")
data=read.table('81Protein.txt',header=T,sep="\t")

pdf("GO.pdf",width = 10,height = 8)
ggplot(data, aes(x=ProbabilitySet, y=Sample, size=PhosProtein, color=SearchMethod)) +
geom_point(alpha=0.8) +
scale_size(range = c(5, 15), name="PhosProtein") +
xlab("ProbabilitySet") +
ylab("Sample") +
theme(axis.text=element_text(size=8),axis.title=element_text(size=12))+
theme(panel.grid = element_blank())+ #不显示网格
# theme(axis.text.x = element_blank())
dev.off()

pdf("KEGG.pdf",width = 8,height = 6)
data<-read.table('KEGG.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_size(range = c(5, 15)) +
scale_fill_gradient(low = "#FA8072", high="white")+
#刻度为整数
scale_x_continuous(breaks = scales::pretty_breaks()) +
theme(axis.text=element_text(size=15,face='bold'),axis.title=element_text(size=12,face='bold'))
dev.off()