How to use filer:
library(gapminder)
library(dplyr)
# Filter the gapminder dataset for the year 1957
gapminder %>%
filter(year==1957)
library(gapminder)
library(dplyr)
# Filter for China in 2002
gapminder%>%
filter(year==2002,country=="China")
must use %>%
how to use arrange:
library(gapminder)
library(dplyr)
# Sort in ascending order of lifeExp
gapminder%>%
arrange(lifeExp)
# Sort in descending order of lifeExp
gapminder%>%
arrange(desc(lifeExp))
Combine filter with arrange: we should use %>%after the statement
library(gapminder)
library(dplyr)
# Filter for the year 1957, then arrange in descending order of population
gapminder%>%
filter(year==1957)%>%
arrange(desc(pop))
how to use mutate to change the row or add a new row based on the data given:
library(gapminder)
library(dplyr)
# Use mutate to change lifeExp to be in months
gapminder%>%
mutate(lifeExp=lifeExp*12)
# Use mutate to create a new column called lifeExpMonths
gapminder%>%
mutate(lifeExpMonths=12*lifeExp)
How to combine filter, arrange, mutate
library(gapminder)
library(dplyr)
# Filter, mutate, and arrange the gapminder dataset
gapminder%>%
filter(year==2007)%>%
mutate(lifeExpMonths=12*lifeExp)%>%
arrange(desc(lifeExpMonths))
visualizing with ggplot2
# Load the ggplot2 package as well
library(gapminder)
library(dplyr)
library(ggplot2)
# Create gapminder_1952
gapminder_1952<-gapminder%>%
filter(year==1952)
gapminder_1952
how to use ggplot to visualize the data:
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Change to put pop on the x-axis and gdpPercap on the y-axis
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
geom_point()
how to scale x or y to log scale:
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Change this plot to put the x-axis on a log scale
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
geom_point()
scale_x_log10()
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Scatter plot comparing pop and gdpPercap, with both axes on a log scale
ggplot(gapminder_1952,aes(x=pop,y=gdpPercap))+
geom_point()
scale_x_log10()
scale_y_log10()
remember:no need to input gapminder%>%
how to color the data based on category(continent):
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Scatter plot comparing pop and lifeExp, with color representing continent
ggplot(gapminder_1952,aes(x=pop,y=lifeExp,color=continent))+
geom_point()+
scale_x_log10()
how to alter the size of plot:
ggplot(gapminder_2007, aes(x = gdpPercap, y = lifeExp, color = continent,
size = pop)) +
geom_point() +
scale_x_log10()
Use facet: know pop, lifeExp for every continent:
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Scatter plot comparing pop and lifeExp, faceted by continent
ggplot(gapminder_1952,aes(x=pop,y=lifeExp))+
geom_point()+
scale_x_log10()+
facet_wrap(~continent)
library(gapminder)
library(dplyr)
library(ggplot2)
# Scatter plot comparing gdpPercap and lifeExp, with color representing continent
# and size representing population, faceted by year
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,color=continent,size=pop))+
geom_point()+
scale_x_log10()+
facet_wrap(~year)
summarize and median:
library(gapminder)
library(dplyr)
# Summarize to find the median life expectancy
gapminder %>%
summarize(medianLifeExp = median(lifeExp))
Filter the year
library(gapminder)
library(dplyr)
# Filter for 1957 then summarize the median life expectancy
gapminder%>%
filter(year==1957)%>%
summarize(medianLifeExp=median(lifeExp))
use median and max:
library(gapminder)
library(dplyr)
# Filter for 1957 then summarize the median life expectancy and the maximum GDP per capita
gapminder%>%
filter(year==1957)%>%
summarise(medianLifeExp=median(lifeExp),maxGdpPercap=max(gdpPercap))
How to use group by to summarize data:
library(gapminder)
library(dplyr)
# Find median life expectancy and maximum GDP per capita in each continent in 1957
gapminder%>%
filter(year==1957)%>%
group_by(continent)%>%
summarize(medianLifeExp=median(lifeExp),maxGdpPercap=max(gdpPercap))
group by 2 elements: year,and continent
library(gapminder)
library(dplyr)
# Find median life expectancy and maximum GDP per capita in each continent/year combination
gapminder%>%
group_by(year, continent) %>%
summarize(medianLifeExp=median(lifeExp),maxGdpPercap=max(gdpPercap))
visualize the group by table:
Use the by_year dataset to create a scatter plot showing the change of median life expectancy over time, with year on the x-axis and medianLifeExp on the y-axis. Be sure to add expand_limits(y = 0) to make sure the plot's y-axis includes zero.
library(gapminder)
library(dplyr)
library(ggplot2)
by_year <- gapminder %>%
group_by(year) %>%
summarize(medianLifeExp = median(lifeExp),
maxGdpPercap = max(gdpPercap))
# Create a scatter plot showing the change in medianLifeExp over time
ggplot(by_year,aes(x=year,y=medianLifeExp,expand_limits(y = 0)))+
geom_point()
how to use the ggplot to visualize data:
notice the position of expand_limits(y=0)
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize medianGdpPercap within each continent within each year: by_year_continent
by_year_continent<- gapminder %>%
group_by(year,continent) %>%
summarize(medianGdpPercap = median(gdpPercap))
# Plot the change in medianGdpPercap in each continent over time
ggplot(by_year_continent,aes(x=year,y=medianGdpPercap,color=continent))+
geom_point() +
expand_limits(y = 0)
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize the median GDP and median life expectancy per continent in 2007
by_continent_2007<-gapminder%>%
filter(year==2007)%>%
group_by(continent)%>%
summarize(medianLifeExp=median(lifeExp),medianGdpPercap=median(gdpPercap))
# Use a scatter plot to compare the median GDP and median life expectancy
ggplot(by_continent_2007,aes(x=medianGdpPercap,y=medianLifeExp,color=continent))+
geom_point()
construct a line graph:
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize the median gdpPercap by year, then save it as by_year
by_year<-gapminder%>%
group_by(year)%>%
summarize(medianGdpPercap=median(gdpPercap))
# Create a line plot showing the change in medianGdpPercap over time
ggplot(by_year,aes(x=year,y=medianGdpPercap))+
geom_line() +
expand_limits(y = 0)
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize the median gdpPercap by year & continent, save as by_year_continent
by_year_continent<-gapminder%>%
group_by(year,continent)%>%
summarize(medianGdpPercap=median(gdpPercap))
# Create a line plot showing the change in medianGdpPercap by continent over time
ggplot(by_year_continent,aes(x=year,y=medianGdpPercap,color=continent))+
geom_line() +
expand_limits(y = 0)
how to create bar chart:
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize the median gdpPercap by continent in 1952
by_continent<-gapminder%>%
filter(year==1952)%>%
group_by(continent)%>%
summarize(medianGdpPercap=median(gdpPercap))
# Create a bar plot showing medianGdp by continent
ggplot(by_continent,aes(x=continent,y=medianGdpPercap))+
geom_col()
library(gapminder)
library(dplyr)
library(ggplot2)
# Filter for observations in the Oceania continent in 1952
oceania_1952<-gapminder%>%
filter(year==1952,continent=="Oceania")
# Create a bar plot of gdpPercap by country
ggplot(oceania_1952,aes(x=country,y=gdpPercap))+
geom_col()
how to use histogram :
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952) %>%
mutate(pop_by_mil = pop / 1000000)
# Create a histogram of population (pop_by_mil)
ggplot(gapminder_1952,aes(x=pop_by_mil))+
geom_histogram(bins = 50)
boxplot:
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Create a boxplot comparing gdpPercap among continents
ggplot(gapminder_1952,aes(x=continent,y=gdpPercap))+
geom_boxplot()+
scale_y_log10()
use ggtitle: to add a tittle to your graph
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Add a title to this graph: "Comparing GDP per capita across continents"
ggplot(gapminder_1952, aes(x = continent, y = gdpPercap)) +
geom_boxplot() +
scale_y_log10()+
ggtitle("Comparing GDP per capita across continents")