ggplot2分组箱线图添加均值

ggplot2分组箱线图添加均值

一、不分组的简单箱线图加均值

代码如下(示例):

# Library
library(ggplot2)

# create data
names=c(rep("A", 20) , rep("B", 8) , rep("C", 30), rep("D", 80))
value=c( sample(2:5, 20 , replace=T) , sample(4:10, 8 , replace=T), sample(1:7, 30 , replace=T), sample(3:8, 80 , replace=T) )
data=data.frame(names,value)

# plot
p <- ggplot(data, aes(x=names, y=value, fill=names)) +
  geom_boxplot(alpha=0.7) +
  stat_summary(fun=mean, geom="point", shape=20, size=2.5, color="red", fill="red",alpha=0.7) +
  theme(legend.position="none") +
  scale_fill_brewer(palette="Accent")
p

ggplot2分组箱线图添加均值

stat_summary

用到了stat_summary函数算均值,具体介绍一下

代码如下(示例):(图片略,直接跑可以跑出来)

library(tidyverse)
library(gapminder)
#----------------------
gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) + 
  stat_summary(fun = "mean", geom = "point") + #对lifeExp求平均,用点点显示
  stat_summary(fun = "mean", geom = "line") #对lifeExp求平均,用线显示

#----------------------
gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun = "median", geom = "bar")#对lifeExp求中位数,用bar显示

#----------------------
gapminder %>% 
  mutate(year = as.integer(year)) %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun = "mean", geom = "area",#对lifeExp求平均,用面积覆盖显示
               fill = "#EB5286",
               alpha = .5) +
  stat_summary(fun = "mean", geom = "point",#对lifeExp求平均,用点点显示
               color = "#6F213F") 

#----------------------

gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun = mean, #点是均值
               geom = "pointrange",#点和范围线
               fun.min = min,#对应范围是最大值和最小值
               fun.max = max)

#----------------------

gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(geom = "errorbar",#使用经典的误差线来显示最大值和最小值
               width = 1,#fun缺省
               fun.min = min,
               fun.max = max)

#----------------------
gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun = mean,
               geom = "pointrange",
               fun.max = function(x) mean(x) + sd(x),
               fun.min = function(x) mean(x) - sd(x))

   #fun.data="mean_sd"在ggpubr包里
gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun.data = "mean_sd",geom = "pointrange")
   #以上两种作用是一样的

gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun=mean,geom="bar")+
  stat_summary(fun.data = "mean_sd",geom = "errorbar",color="red")


#----------------------
    #标准差
gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun = mean,
               geom = "pointrange",
               fun.max = function(x) mean(x) + sd(x) / sqrt(length(x)),
               fun.min = function(x) mean(x) - sd(x) / sqrt(length(x)))

#----------------------
library(Hmisc)
gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun.data = "mean_cl_normal",#置信区间
               fun.args = list(conf.int = .99))#设置是99%质心区间

    #置信区间的误差线
gapminder %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  stat_summary(fun.data = "mean_cl_normal",
               geom = "errorbar",
               width = .4) +
  stat_summary(fun = "mean", geom = "point")

#----------------------
    #均值加置信区间
gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(x = continent, y = lifeExp)) +
  stat_summary(fun = "mean", geom = "bar", alpha = .7) +
  stat_summary(fun = "mean", geom = "point", 
               size = 1) +
  stat_summary(fun.data = "mean_cl_normal",
               geom = "errorbar",
               width = .2) 

#----------------------
    #用mean_cl_bool对mpg进行运算,返回均值,最大值,最小值3个向量组成的矩阵
#----------------------
    #position = position_dodge( )并排显示多个条形图
    #第一幅图的绘图命令中用到了参数position="dodge",第二幅图的绘图命令中用到的参数是position=position_dodge()。
    #这是因为position="dodge"是参数默认为0.9的position_dodge()的简写。当我们需要单独指定该参数的时候,必须输入完整的命令。
colors <-c("#E41A1C","#1E90FF","#FF8C00","#4DAF4A","#984EA3",
           "#40E0D0","#FFC0CB","#00BFFF","#FFDEAD","#90EE90",
           "#EE82EE","#00FFFF","#F0A3FF", "#0075DC", 
           "#993F00","#4C005C","#2BCE48","#FFCC99",
           "#808080","#94FFB5","#8F7C00","#9DCC00",
           "#C20088","#003380","#FFA405","#FFA8BB",
           "#426600","#FF0010","#5EF1F2","#00998F",
           "#740AFF","#990000","#FFFF00")
gapminder %>% 
  mutate(
    year = as.factor(year)
  ) %>%
  ggplot(aes(x = continent, y = lifeExp, fill = year)) +
  stat_summary(fun = "mean", geom = "bar", 
               alpha = .7, position = position_dodge(0.95)) +#组间的距离
  stat_summary(fun = "mean", geom = "point", 
               position = position_dodge(0.95),
               size = 1) +
  stat_summary(fun.data = "mean_cl_normal",
               geom = "errorbar",
               position = position_dodge(0.95),
               width = .2) +
  scale_fill_manual(values = colors)+ ###这个可以这样子改颜色!!!
  theme_minimal()+ #去掉黑背景还蛮好看
  scale_y_continuous(expand=c(0,5))+scale_x_discrete(expand=c(0.2,0))
    #离散型scale_y_discrete 或者scale_x_discrete就是用于更改坐标轴两边的留白的

参考: 通过ggplot2中stat_summary函数快速进行数据统计.

以及 stat_summary.


分组箱线图

所需数据格式(长数据)ggplot2分组箱线图添加均值


p1 <- ggplot(baidata, aes(x=组别, y=值, fill=组别)) + 
  facet_wrap(~状态)+ labs (y="白细胞")+ #分面按照基线7天14天
  ggtitle("不同组别不同时间点白细胞变化") +
  theme(plot.title = element_text(hjust = 0.5,size = 10)) + 
  scale_fill_manual(values=c("lightgoldenrod1","lavender"))+
  geom_boxplot( color="azure4",outlier.colour="red",
                outlier.fill="red",outlier.size=1,outlier.alpha=0#notch=TRUE,notchwidth = 0.8
  )+ theme(text = element_text(family = "wqy-microhei"))

#outlier.alpha=0异常值透明度为0,就是不显示异常值

p1

p1+stat_summary(fun=mean,geom="point",color="red",alpha=0.5,size=1)

ggplot2分组箱线图添加均值

上一篇:OpenCasCade —— 分割一条曲线并返回分割点的坐标及参数


下一篇:A Beginner’s Guide to Decentralized Autonomous Organizations