2021-01-20 Introduction to the Tidyverse Chapter1

How to use filer:

library(gapminder)
library(dplyr)

# Filter the gapminder dataset for the year 1957
gapminder %>%
filter(year==1957)
library(gapminder)
library(dplyr)

# Filter for China in 2002
gapminder%>%
filter(year==2002,country=="China")

must use %>%

how to use arrange:

library(gapminder)
library(dplyr)

# Sort in ascending order of lifeExp
gapminder%>%
arrange(lifeExp)

  
# Sort in descending order of lifeExp
gapminder%>%
arrange(desc(lifeExp))

Combine filter with arrange: we should use %>%after the statement

library(gapminder)
library(dplyr)

# Filter for the year 1957, then arrange in descending order of population
gapminder%>%
filter(year==1957)%>%
arrange(desc(pop))

how to use mutate to change the row or add a new row based on the data given:

library(gapminder)
library(dplyr)

# Use mutate to change lifeExp to be in months
gapminder%>%
mutate(lifeExp=lifeExp*12)

# Use mutate to create a new column called lifeExpMonths
gapminder%>%
mutate(lifeExpMonths=12*lifeExp)

How to combine filter, arrange, mutate

library(gapminder)
library(dplyr)

# Filter, mutate, and arrange the gapminder dataset
gapminder%>%
filter(year==2007)%>%
mutate(lifeExpMonths=12*lifeExp)%>%
arrange(desc(lifeExpMonths))

visualizing with ggplot2

# Load the ggplot2 package as well
library(gapminder)
library(dplyr)
library(ggplot2)
# Create gapminder_1952
gapminder_1952<-gapminder%>%
filter(year==1952)
gapminder_1952

how to use ggplot to visualize the data:

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952)
# Change to put pop on the x-axis and gdpPercap on the y-axis
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
  geom_point()

how to scale x or y to log scale:

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Change this plot to put the x-axis on a log scale
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
  geom_point()
  scale_x_log10()
library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Scatter plot comparing pop and gdpPercap, with both axes on a log scale
ggplot(gapminder_1952,aes(x=pop,y=gdpPercap))+
geom_point()
scale_x_log10()
scale_y_log10()

remember:no need to input gapminder%>%

how to color the data based on category(continent):

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Scatter plot comparing pop and lifeExp, with color representing continent
ggplot(gapminder_1952,aes(x=pop,y=lifeExp,color=continent))+
geom_point()+
scale_x_log10()

how to alter the size of plot:

ggplot(gapminder_2007, aes(x = gdpPercap, y = lifeExp, color = continent,
size = pop)) +
geom_point() +
scale_x_log10()

Use facet: know pop, lifeExp for every continent:

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Scatter plot comparing pop and lifeExp, faceted by continent
ggplot(gapminder_1952,aes(x=pop,y=lifeExp))+
geom_point()+
scale_x_log10()+
facet_wrap(~continent)
library(gapminder)
library(dplyr)
library(ggplot2)

# Scatter plot comparing gdpPercap and lifeExp, with color representing continent
# and size representing population, faceted by year
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,color=continent,size=pop))+
geom_point()+
scale_x_log10()+
facet_wrap(~year)

summarize and median:

library(gapminder)
library(dplyr)

# Summarize to find the median life expectancy
gapminder %>%
summarize(medianLifeExp = median(lifeExp))

Filter the year

library(gapminder)
library(dplyr)

# Filter for 1957 then summarize the median life expectancy
gapminder%>%
filter(year==1957)%>%
summarize(medianLifeExp=median(lifeExp))

use median and max:

library(gapminder)
library(dplyr)

# Filter for 1957 then summarize the median life expectancy and the maximum GDP per capita
gapminder%>%
filter(year==1957)%>%
summarise(medianLifeExp=median(lifeExp),maxGdpPercap=max(gdpPercap))

How to use group by to summarize data:

library(gapminder)
library(dplyr)
# Find median life expectancy and maximum GDP per capita in each continent in 1957
gapminder%>%
filter(year==1957)%>%
group_by(continent)%>%
summarize(medianLifeExp=median(lifeExp),maxGdpPercap=max(gdpPercap))

group by 2 elements: year,and continent

library(gapminder)
library(dplyr)

# Find median life expectancy and maximum GDP per capita in each continent/year combination
gapminder%>%
group_by(year, continent) %>%
summarize(medianLifeExp=median(lifeExp),maxGdpPercap=max(gdpPercap))

visualize the group by table:
Use the by_year dataset to create a scatter plot showing the change of median life expectancy over time, with year on the x-axis and medianLifeExp on the y-axis. Be sure to add expand_limits(y = 0) to make sure the plot's y-axis includes zero.

library(gapminder)
library(dplyr)
library(ggplot2)

by_year <- gapminder %>%
  group_by(year) %>%
  summarize(medianLifeExp = median(lifeExp),
            maxGdpPercap = max(gdpPercap))

# Create a scatter plot showing the change in medianLifeExp over time
ggplot(by_year,aes(x=year,y=medianLifeExp,expand_limits(y = 0)))+
geom_point()

how to use the ggplot to visualize data:
notice the position of expand_limits(y=0)

library(gapminder)
library(dplyr)
library(ggplot2)

# Summarize medianGdpPercap within each continent within each year: by_year_continent
by_year_continent<- gapminder %>%
group_by(year,continent) %>%
summarize(medianGdpPercap = median(gdpPercap))

# Plot the change in medianGdpPercap in each continent over time
ggplot(by_year_continent,aes(x=year,y=medianGdpPercap,color=continent))+
geom_point() +
expand_limits(y = 0)
library(gapminder)
library(dplyr)
library(ggplot2)

# Summarize the median GDP and median life expectancy per continent in 2007
by_continent_2007<-gapminder%>%
filter(year==2007)%>%
group_by(continent)%>%
summarize(medianLifeExp=median(lifeExp),medianGdpPercap=median(gdpPercap))

# Use a scatter plot to compare the median GDP and median life expectancy
ggplot(by_continent_2007,aes(x=medianGdpPercap,y=medianLifeExp,color=continent))+
geom_point()

construct a line graph:

library(gapminder)
library(dplyr)
library(ggplot2)

# Summarize the median gdpPercap by year, then save it as by_year
by_year<-gapminder%>%
group_by(year)%>%
summarize(medianGdpPercap=median(gdpPercap))

# Create a line plot showing the change in medianGdpPercap over time
ggplot(by_year,aes(x=year,y=medianGdpPercap))+
geom_line() +
expand_limits(y = 0)
library(gapminder)
library(dplyr)
library(ggplot2)

# Summarize the median gdpPercap by year & continent, save as by_year_continent
by_year_continent<-gapminder%>%
group_by(year,continent)%>%
summarize(medianGdpPercap=median(gdpPercap))

# Create a line plot showing the change in medianGdpPercap by continent over time
ggplot(by_year_continent,aes(x=year,y=medianGdpPercap,color=continent))+
geom_line() +
expand_limits(y = 0)

how to create bar chart:

library(gapminder)
library(dplyr)
library(ggplot2)

# Summarize the median gdpPercap by continent in 1952
by_continent<-gapminder%>%
filter(year==1952)%>%
group_by(continent)%>%
summarize(medianGdpPercap=median(gdpPercap))

# Create a bar plot showing medianGdp by continent
ggplot(by_continent,aes(x=continent,y=medianGdpPercap))+
geom_col()
library(gapminder)
library(dplyr)
library(ggplot2)

# Filter for observations in the Oceania continent in 1952
oceania_1952<-gapminder%>%
filter(year==1952,continent=="Oceania")

# Create a bar plot of gdpPercap by country
ggplot(oceania_1952,aes(x=country,y=gdpPercap))+
geom_col()

how to use histogram :

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952) %>%
  mutate(pop_by_mil = pop / 1000000)

# Create a histogram of population (pop_by_mil)
ggplot(gapminder_1952,aes(x=pop_by_mil))+
geom_histogram(bins = 50)

boxplot:

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Create a boxplot comparing gdpPercap among continents
ggplot(gapminder_1952,aes(x=continent,y=gdpPercap))+
geom_boxplot()+
scale_y_log10()

use ggtitle: to add a tittle to your graph

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Add a title to this graph: "Comparing GDP per capita across continents"
ggplot(gapminder_1952, aes(x = continent, y = gdpPercap)) +
  geom_boxplot() +
  scale_y_log10()+
  ggtitle("Comparing GDP per capita across continents")
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容