PCA&AUC&ROC

PCA

1
2
3
4
5
6
7
8
9
10
11
12
13
library(factoextra)
setwd("F://Taogroup/People/ZH/20210806Pca_7sample/PCA/")
df <- read.table("intact_pca.txt",row.names = 1,header = T)
phos_pca <- prcomp(df[,1:2788],center = T,scale. = T)
fviz_eig(phos_pca,addlabels = T)
fviz_pca_ind(phos_pca,
col.ind = df$Group,
addEllipses = T,
geom = ("point"))

summary(phos_pca)
#rotation is the coefficient of peptides
phos_pca$rotation

ROC

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
setwd("F://Taogroup/People/SJ/20220301PRM/ROC/")
protein <- read.table("proteomeandphos.txt",row.names = 1,header = T)
as.factor(protein$Group)
train_sub = sample(nrow(protein),7/10*nrow(protein))
train_Data = protein[train_sub,]
test_data = protein[-train_sub,]

protein_logistic <- glm(Group~H15+PGM1+ACLY+KPCD+AHNK+UBP24+SEPT2+HEP2+RUXF+NHRF2+MMP25,
data = train_Data,
family = binomial("logit"))

summary(protein_logistic)
#p <- predict(protein_logistic, type = 'response')
#plot(seq(-2,2,length=80),sort(p),col='blue')
library(pROC)
#对测试集进行预测
pre_logistic <- as.numeric(predict(protein_logistic,newdata = test_data,type = "response")>0.5)
#将测试集计算所得概率与观测本身取值整合到一起
obs_p_logistic = data.frame(prob=pre_logistic,obs=test_data$Group)
#输出混淆矩阵
table(test_data$Group,pre_logistic,dnn = c("actual value","predict value"))
logistic_roc <- roc(test_data$Group,pre_logistic)
plot(logistic_roc,print.auc=TRUE,auc.polygon=TRUE,grid=c(0.1,0.2),
grid.col=c("green","red"),max.auc.polygon=TRUE,
auc.polygon.col="skyblue",print.thres=TRUE)