获得赛题原始数据(用电数据yu):
CREATE TABLE IF NOT EXISTS fyd_tianchi_power2
AS
SELECT *
FROM odps_tc_257100_f673506e024.tianchi_power2;
CREATE TABLE IF NOT EXISTS fyd_tianchi_weather_data
AS
SELECT *
FROM odps_tc_257100_f673506e024.tianchi_weather_data;
将原数据表中的日期为字符串格式,将其中‘2015/1/1’形式改为‘2015/01/01’格式再转为时间戳格式:
CREATE TABLE IF NOT EXISTS fyd_tianchi_power_data2
AS
SELECT TO_DATE(CASE
WHEN record_date LIKE '____/_/_' THEN concat(regexp_extract(record_date, '(.+/)(.+/)(.+)', 1), '0', regexp_extract(record_date, '(.+/)(.+/)(.+)', 2), '0', regexp_extract(record_date, '(.+/)(.+/)(.+)', 3))
WHEN record_date LIKE '____/__/_' THEN concat(regexp_extract(record_date, '(.+/)(.+/)(.+)', 1), regexp_extract(record_date, '(.+/)(.+/)(.+)', 2), '0', regexp_extract(record_date, '(.+/)(.+/)(.+)', 3))
WHEN record_date LIKE '____/_/__' THEN concat(regexp_extract(record_date, '(.+/)(.+/)(.+)', 1), '0', regexp_extract(record_date, '(.+/)(.+/)(.+)', 2), regexp_extract(record_date, '(.+/)(.+/)(.+)', 3))
ELSE record_date
END, 'yyyy/mm/dd') AS record_date, user_id, power_consumption
FROM fyd_tianchi_power2
将相同日期的数据求和汇总:
CREATE TABLE IF NOT EXISTS fyd_sum_power_data2
AS
SELECT record_date, SUM(power_consumption) AS sum_consumption
FROM fyd_tianchi_power_data2
GROUP BY record_date
输出显示刚才的数据:
select * from fyd_sum_power_data2
查看选取某一用户的数据:
SELECT record_date, power_consumption
FROM fyd_tianchi_power_data2
WHERE user_id = 175
ORDER BY record_date
LIMIT 700