R语言画图-曼哈顿图-来源网络

#安装方法:
#====设置安装源为清华大学TUNA镜像站点=====================
options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
install.packages('qqman')
#==========================================================

#if(!require("qqman"))install.packages("qqman")
#if(!require("CMplot"))install.packages("CMplot")
library(ggplot2)
#library(qqman)
library(eoffice)
library(CMplot)
library(dplyr)
library(tidyr)
setwd('G:/私有云/data/GWAS-cold351/')

a=getwd()

#输入文件格式如下:
#  SNP   CHR   BP   P
# 其中SNP列为SNP名称,这一列可以没有
# CHR,BP分别为SNP所在的染色体和位置
# P表示该SNP的关联显著性p值
data1<-readxl::read_xlsx("by_conditon.xlsx",sheet = 2)
#use search() to determine whether all the packages have loaded
# 取bonferroni矫正阈值
#fdr=0.01/3352885
#cm_data = data %>% select("CHROM","POS","SRCS","SLCS","RSLCS","RSLCA")
#data0 = data %>% gather(key = "Trait",value = "pvalue",-c("CHROM","POS"))
data1<-data[,c(8,1,2,7)]
#CMplot(data1,plot.type = "m",multracks = TRUE,threshold = fdr,dpi = 300,file = "jpg")
#CMplot(cm_data, plot.type="m",multracks=TRUE,threshold=c(1e-4,1e-3),threshold.lty=c(1,2), 
#       threshold.lwd=c(1,1), threshold.col=c("black","grey"),bin.size=1e6)

#CMplot(data, plot.type="m",multracks=TRUE,threshold=c(1e-4,1e-3),threshold.lty=c(1,2), 
#       threshold.lwd=c(1,1), threshold.col=c("black","grey"), amplify=TRUE,bin.size=1e6,
#       chr.den.col=c("darkgreen", "yellow", "red"), signal.col=c("red","green","blue"),
#       signal.cex=1, file="jpg",memo="",dpi=300,file.output=TRUE,verbose=TRUE,
#       highlight.text.cex=1.4)
#data(package="CMplot")#查看R包带的数据集
#view("pig60k")查看R包示例数据

#计算染色体长度
names(data1)<-c("SNP","CHR","BP","P")
chr_len <-data1 %>% group_by(CHR) %>%
  summarise(chr_len=max(BP))
#计算每条chr的初始位置
chr_pos<- chr_len %>%
  mutate(total=cumsum(chr_len)-chr_len) %>%
  select(-chr_len)
#计算累计SNP的位置
Snp_pos<- chr_pos %>%
  left_join(data1, .,by="CHR")%>%
  arrange(CHR,BP) %>%
  mutate(BPcum=BP+total)
head(Snp_pos,2)

X_axis <-  Snp_pos %>% group_by(CHR) %>% summarize(center=( max(BPcum) +min(BPcum) ) / 2 )
#scale_color_manual(values = c("#0073C2FF", "#EFC000FF"))
p <- ggplot(Snp_pos, aes(x=BPcum, y=-log10(P),color=SNP)) +
  #设置点的大小,透明度
  #geom_point(aes(color=as.factor(SNP)), alpha=0.8, size=1.3) +
  geom_point()+
  #设置颜色
  scale_color_manual(values = c(ca ="#f8766d",cs ="#33c960")) +
  #scale_fill_manual(values = c(ca ="red",cs ="green")) +
  #scale_color_brewer(values = c(ca ="red",cs ="green"))+
  #设定X轴
  scale_x_continuous( label = X_axis$CHR, breaks= X_axis$center ) +
  #去除绘图区和X轴之间的gap
  scale_y_continuous(expand = c(0, 0) )+
  labs(x="Chromosomes",y="-log10(Pvalue)",title = "")+
  theme(legend.title=element_blank())+
  theme(legend.text = element_text(face = "bold",family = "serif"))+
  theme(axis.text.x = element_text(size = 10,colour = "black",face = "bold",family = "serif"))+
  theme(axis.text.y = element_text(size = 10,colour = "black",face = "bold",family = "serif"))+
  theme(axis.title.x = element_text(size = 12,face = "bold",family = "serif"))+
  theme(axis.title.y = element_text(size = 12,face = "bold",family = "serif"))+
  theme_bw() + theme(panel.grid=element_blank())+
  theme(axis.line = element_line(colour = "black"))
ggsave(p,filename = "different_condition.png")
toffice(figure = p,format = "pptx",filename = "condition.pptx",height = 6,width =6.5,append = TRUE)
上一篇:linux之shell的正则表达式


下一篇:C++中string转为字符数组