人口金字塔

title: "pyramid"
author: "wintryheart"
date: "2019/12/2"
output: html_document

knitr::opts_chunk$set(echo = TRUE)

金字塔的画法

library(tidyverse)
library(ggplot2)
library(gganimate)
population <- read.csv("c:/users/liang/desktop/total.csv")

Index	X2017年	X2016年	X2015年	X2014年	X2013年	X2012年	X2011年	X2009年
总体男性人口数(人口抽样调查)(人)	586072	593087	10917046	576011	573428	576354	587039	591871
0-4岁男性人口数(人口抽样调查)(人)	36468	36703	668449	34484	34273	34694	35247	33140
5-9岁男性人口数(人口抽样调查)(人)	34344	34666	638535	34326	33890	33252	33242	34705
10-14岁男性人口数(人口抽样调查)(人)	32929	32773	598685	31616	31141	32370	33709	39749
15-19岁男性人口数(人口抽样调查)(人)	32034	33199	626249	34584	36177	38909	42066	44170
20-24岁男性人口数(人口抽样调查)(人)	38496	41366	809143	46891	50961	52033	55243	44001

pop2 <- gather(population, key = "year", value = "population", -Index)
pop2$year <- str_sub(pop2$year, 2, 5)
pop2$Index <- str_remove_all(pop2$Index, "[人口数抽样调查]")
pop2$Index <- str_remove_all(pop2$Index, "[()]")
pop2 <- mutate(pop2, sex=str_extract(pop2$Index, "[男,女]"))
pop2 <- mutate(pop2, age=str_remove_all(pop2$Index, "[男女性]"))
pop2 <- pop2[, 2:5]

year	population	sex	age
2017	586072	男	总体
2017	36468	男	0-4岁
2017	34344	男	5-9岁
2017	32929	男	10-14岁
2017	32034	男	15-19岁
2017	38496	男	20-24岁

pop3 <- filter(pop2, age!="总体")
# 计算分性别分年龄段的人口比例
pop3 <- pop3 %>%
  group_by(year) %>%
  mutate(per=population/sum(population)*100)

pop3$year <- as.numeric(pop3$year)

pop3$age <- as.factor(pop3$age)
#这样做有问题，5-9岁年龄组错位。
levels(pop3$age)
#观察到5-9排在第10位，要调整到第2位。
#使用levels函数来纠正因子排序
levels(pop3$age) <- levels(pop3$age)[c(1, 10, 2:9, 11:length(levels(pop3$age)))]


#先做2017年的人口金字塔图
# 利用subset()抽取2017年的数据
pop2017 <- subset(pop3, year==2017)

# 利用subset()做分性别的条形图，然后旋转坐标轴。
# 利用aex(y=per*(-1))做对称轴。
# 利用scale_y_continuous()和abs()将负值标签调整为正。 
ggplot(data=pop2017, aes(x=age, y=per, fill=sex)) +
  geom_bar(data = subset(pop2017, sex=="女"), stat="identity") +
  geom_bar(data = subset(pop2017,sex=="男"), aes(y=per*(-1)), stat="identity") +
  scale_y_continuous(breaks = seq(-5,5,1), labels=abs(seq(-5, 5,1))) +
  coord_flip()

Rplot16.png

年龄段中5-9岁因子排序问题另一种解决方案


pop4 <-  filter(pop2, age!="总体")

# 先提取出age唯一值
age3 <- unique(pop4$age)
age3
# 然后按原字符顺序转成因子变量
age4 <- factor(1:20, labels=age3)
age4
# 最后，按age4的排序赋给数据集pop4中的age变量
pop4$age <- factor(pop4$age, levels=age4)
pop4$age


# 重新作图
pop2017 <- subset(pop4, year==2017)

ggplot(data=pop2017, aes(x=age, y=per, fill=sex)) +
  geom_bar(data = subset(pop2017, sex=="女"), stat="identity") +
  geom_bar(data = subset(pop2017,sex=="男"), aes(y=per*(-1)), stat="identity") +
  scale_y_continuous(breaks = seq(-6,6,1), labels=abs(seq(-6, 6,1))) +
  labs(x=NULL, y=NULL, title = "2017年中国人口金字塔\n", fill="", caption = "数据来源：国家统计局\n制作：wintryheart") +
  theme(plot.title = element_text(hjust=0.5), legend.position = c(.9, .9), legend.background = element_blank())+
  coord_flip()

Rplot17.png

利用gganimate包制做历年人口金字塔动图

调用 transition_time()函数制作动图。
调用{fram_time}，在标题中显示对应时间（年份）。
由于动图中时间点是带小数位的，用 round()取整，确保标题中年份显示时为整数。

ggplot(data=pop3, aes(x=age, y=per, fill=sex)) +
  geom_bar(data=subset(pop3, sex=="女"), stat="identity") +
  geom_bar(data = subset(pop3,sex=="男"), aes(y=per*(-1)), stat="identity") +
  scale_y_continuous(breaks = seq(-5,5,1), labels=abs(seq(-5, 5,1))) +
  labs(x=NULL, y=NULL, title = "中国人口金字塔: {round(frame_time)}\n", fill="", caption = "数据来源：国家统计局\n制作：wintryheart") +
  theme(plot.title = element_text(hjust=0.5), legend.position = c(.9, .9), legend.background = element_blank())+
  coord_flip() +
  transition_time(year)

filefe464637466.gif

人口金字塔

金字塔的画法

利用gganimate包制做历年人口金字塔动图

参考帖子：