学会使用和操作时间数据课程目录
Chapter1. R里的时间和数据
Chapter2. 操作和剖析时间数据
Chapter3. 对时间数据进行计算
Chapter4. 问题实践
Chapter2. 操作和剖析时间数据
使用lubridate
包
有一个很便利的包可以用来操作时间数据的格式。
比方说ymd
表示年月日,dmy
表示日月年。
library(lubridate)
# Parse x
x <- "2010 September 20th" # 2010-09-20
ymd(x)
[1] "2010-09-20"
# Parse y
y <- "02.01.2010" # 2010-01-02
dmy(y)
[1] "2010-01-02"
# Parse z
z <- "Sep, 12th 2010 14:00" # 2010-09-12T14:00
mdy_hm(z)
[1] "2010-09-12 14:00:00 UTC"
# Specify order to include both "mdy" and "dmy"
two_orders <- c("October 7, 2001", "October 13, 2002", "April 13, 2003",
"17 April 2005", "23 April 2017")
parse_date_time(two_orders, orders = c("mdy","dmy"))
[1] "2001-10-07 UTC" "2002-10-13 UTC" "2003-04-13 UTC" "2005-04-17 UTC"
[5] "2017-04-23 UTC"
# Specify order to include "dOmY", "OmY" and "Y"
short_dates <- c("11 December 1282", "May 1372", "1253")
parse_date_time(short_dates, orders = c("dOmY","OmY","Y"))
[1] "1282-12-11 UTC" "1372-05-01 UTC" "1253-01-01 UTC"
# Specify an order string to parse x
x <- "Monday June 1st 2010 at 4pm"
parse_date_time(x, orders = "AmdyIp")
[1] "2010-06-01 16:00:00 UTC"
# Specify order to include both "mdy" and "dmy"
two_orders <- c("October 7, 2001", "October 13, 2002", "April 13, 2003",
"17 April 2005", "23 April 2017")
parse_date_time(two_orders, orders = c("mdy","dmy"))
[1] "2001-10-07 UTC" "2002-10-13 UTC" "2003-04-13 UTC" "2005-04-17 UTC"
[5] "2017-04-23 UTC"
# Specify order to include "dOmY", "OmY" and "Y"
short_dates <- c("11 December 1282", "May 1372", "1253")
parse_date_time(short_dates, orders = c("dOmY","OmY","Y"))
[1] "1282-12-11 UTC" "1372-05-01 UTC" "1253-01-01 UTC"
这里总结一下常用的字母符号代表的意思。
再来一个练习,读取一个csv文件,将其中的date
列定义为ymd
的日期格式,然后组合其他变量进行可视化。
library(lubridate)
library(readr)
library(dplyr)
library(ggplot2)
# Parse date
akl_daily <- akl_daily_raw %>%
mutate(date = ymd(date))
# Print akl_daily
akl_daily
# A tibble: 3,661 x 7
date max_temp min_temp mean_temp mean_rh events cloud_cover
<date> <int> <int> <int> <int> <chr> <int>
1 2007-09-01 60 51 56 75 <NA> 4
2 2007-09-02 60 53 56 82 Rain 4
3 2007-09-03 57 51 54 78 <NA> 6
4 2007-09-04 64 50 57 80 Rain 6
5 2007-09-05 53 48 50 90 Rain 7
6 2007-09-06 57 42 50 69 <NA> 1
7 2007-09-07 59 41 50 77 <NA> 4
8 2007-09-08 59 46 52 80 <NA> 5
9 2007-09-09 55 50 52 88 Rain 7
10 2007-09-10 59 50 54 82 Rain 4
# ... with 3,651 more rows
# Plot to check work
ggplot(akl_daily, aes(x = date, y = max_temp)) +
geom_line()
Warning message: Removed 1 row(s) containing missing values (geom_path).
library(lubridate)
library(readr)
library(dplyr)
library(ggplot2)
# Import "akl_weather_hourly_2016.csv"
akl_hourly_raw <- read_csv("akl_weather_hourly_2016.csv")
# Print akl_hourly_raw
akl_hourly_raw
# Use make_date() to combine year, month and mday
akl_hourly <- akl_hourly_raw %>%
mutate(date = make_date(year = year, month = month, day = mday))
# Parse datetime_string
akl_hourly <- akl_hourly %>%
mutate(
datetime_string = paste(date, time, sep = "T"),
datetime = ymd_hms(datetime_string)
)
# Print date, time and datetime columns of akl_hourly
akl_hourly %>% select(date, time, datetime)
# Plot to check work
ggplot(akl_hourly, aes(x = datetime, y = temperature)) +
geom_line()