Workflow(4) GO&Pathway&PPI

Posted on 2023-02-21 Edited on 2023-02-22 In Workflow

Workflow(4) GO&Pathway&PPI

准备工作：

1.GO网站

https://david.ncifcrf.gov/tools.jsp

2.Pathway网站

https://david.ncifcrf.gov/tools.jsp
https://www.kegg.jp/

3.PPI网站

https://string-db.org/cgi/input?sessionId=b4AQOkQXeIzj&input_page_show_search=on

4.数据

数据为找到的差异蛋白的ACCESSION号
如
P05362
P10909
P11279
上述网站都要求一行一个登录号，为避免意外建议直接从Excel里面复制所有的

5.sublime

https://www.sublimetext.com/
这是一个文本处理工具，类似txt，可用于整理数据格式，GO等数据结果如果不经过处理直接粘贴在txt或者excel里会乱序

步骤

1.GO 和 Pathway等数据获取

上传数据
选择数据格式，uniprot accession
列表内容，gene list
上传

结果

result2
BP，CC，MF分别呈现，点击chart进入数据页面
6. 下载数据，会弹出一个网页
result3
全选复制，粘贴到sublime里（注意全程不要乱点，会导致数据格式乱）
7. 如何把数据格式改成可处理的
result4
选中一处空格，按ctrl+f,出现如图页面，点击右下角Find all, 然后点击Tab键
呼！现在可以贴到excel里面查看筛选啦！

2.泡泡图

代码如下，第一块是基础绘图，后面有参数修改说明。
其余问题会持续更新，请耐心等待

library(ggplot2)
library(stats)
library(dplyr)
setwd("F:/Taogroup/ZZ/20210130phos+full/GO/")
data=read.table('phos.txt',header=T,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))


pdf("GOMFbubble.pdf",width = 8,height = 6)
ggplot(data, aes(x=GeneRatio, y=Term, size =Count,color=PValue)) +
  geom_point(alpha=0.7)+
  scale_color_gradient(low = "red")
  scale_size(range = c(8, 12))
dev.off()
#单个GO作图

pdf("phos.pdf",width = 8,height = 6)
ggplot(data, aes(x=-log2(FDR), y=Term, size=Count, color=Classification)) +
  geom_point(alpha=0.5) + 
  scale_size(range = c(1, 10), name="Count") + 
  xlab("-log2(FDR)") +
  ylab("Term") + 
  theme(axis.text=element_text(size=7),axis.title=element_text(size=12,face="bold"))
 #labs(title = "CC")
dev.off()
#三种GO放在一个图上

3.条形图（绘制pathway信息）

library(ggplot2)
setwd("F:/")
pdf("keggbar.pdf",width = 8,height = 6)
data<-read.table('KEGG.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()
pdf("GOBPbar.pdf",width = 8,height = 6)
data<-read.table('GOBP.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()
pdf("GOCCbar.pdf",width = 8,height = 6)
data<-read.table('GOCC.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()
pdf("GOMFbar.pdf",width = 8,height = 6)
data<-read.table('GOMF.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
geom_col(alpha=0.7)+
scale_fill_gradient(low = "red", high="white")
dev.off()

4.美化

#气泡图换颜色scale_color_manual(values=c())
library(ggplot2)
library(stats)  
library(dplyr)
setwd("F:/Taogroup/People/HSY/MM/Gly/FullandGlycombine/addfulltogly/20210728/")
data=read.table('BPforBubble.txt',header=T,sep="\t")

pdf("BPGO.pdf",width = 10,height = 6)
ggplot(data, aes(x=-log2(PValue), y=Term, size=Count, color=SampleGroup)) +
  geom_point(alpha=0.8) + 
  scale_size(range = c(3, 15), name="Count") + 
  scale_color_manual(values=c('#00BA38','#F8766D','#619CFF'))+
  xlab("-log2(PValue)")
  theme(axis.text=element_text(size=7.5),axis.title=element_text(size=12))
dev.off()


pdf("GO.pdf")
ggplot(data, aes(x=ProbabilitySet, y=Sample, size=PhosProtein, color=SearchMethod)) +
  geom_point(alpha=0.8) + 
  scale_size(range = c(5, 15), name="PhosProtein") + 
  xlab("ProbabilitySet") +
  ylab("Sample") + 
  theme(axis.text=element_text(size=8),axis.title=element_text(size=12))
dev.off()

#修改刻度等
library(ggplot2)
library(stats)  
library(dplyr)
setwd("E:/Datasetlocalize/People/ZGY/20210817_new_version/Result/")
data=read.table('81Protein.txt',header=T,sep="\t")

pdf("GO.pdf",width = 10,height = 8)
ggplot(data, aes(x=ProbabilitySet, y=Sample, size=PhosProtein, color=SearchMethod)) +
  geom_point(alpha=0.8) + 
  scale_size(range = c(5, 15), name="PhosProtein") + 
  xlab("ProbabilitySet") +
  ylab("Sample") + 
  theme(axis.text=element_text(size=8),axis.title=element_text(size=12))+
  theme(panel.grid = element_blank())+     #不显示网格
# theme(axis.text.x = element_blank())
dev.off()

pdf("KEGG.pdf",width = 8,height = 6)
data<-read.table('KEGG.txt',header=TRUE,sep="\t")
data$Term<-factor(data$Term,levels = c(data$Term))
ggplot(data, aes(x=Count, y=Term,fill=PValue)) +
  geom_col(alpha=0.7)+
  scale_size(range = c(5, 15)) +
  scale_fill_gradient(low = "#FA8072", high="white")+
  #刻度为整数
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  theme(axis.text=element_text(size=15,face='bold'),axis.title=element_text(size=12,face='bold'))
dev.off()

0%