1.先开发一个java类,继承UDF,并重载evaluate方法
package com.wlw.udf;
/**
* 将json转为对象
* @author Tomas
*/
import com.alibaba.fastjson.JSON;
import org.apache.hadoop.hive.ql.exec.UDF;
public class JsonPraser extends UDF{
public String evaluate(String str){
MovieRateBean movieRateBean = JSON.parseObject(str, MovieRateBean.class);
return movieRateBean.toString();
}
}
package com.wlw.udf;
/**
* javabeen类
* @author Tomas
*/
public class MovieRateBean {
private String movie;
private String rate;
private String timeStamp;
private String uid;
public String getMovie() {
return movie;
}
public void setMovie(String movie) {
this.movie = movie;
}
public String getRate() {
return rate;
}
public void setRate(String rate) {
this.rate = rate;
}
public String getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(String timeStamp) {
this.timeStamp = timeStamp;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
@Override
public String toString() {
return movie + "\t" + rate + "\t" + timeStamp + "\t" + uid;
}
}
2.打成jar包上传到服务器
3.建立数据表并导入数据
create table rat_json(line string) row format delimited;
load data local inpath '/home/hadoop/rating.json' into table rat_json;
4.将jar包添加到hive的classpath
hive>add JAR /home/hadoop/udf.jar;
5.创建自定义函数与开发好的java class关联
create temporary function jsonpar as 'com.wlw.udf.JsonPraser';
6.使用自定义函数
insert overwrite table t_rating
select split(jsonpar(line),'\t')[0]as movieid,
split(jsonpar(line),'\t')[1] as rate,split(jsonpar(line),'\t')[2] as timestring,
split(jsonpar(line),'\t')[3] as uid from rat_json limit 10;
7.使用hive自带的函数
select get_json_object(line,'$.movie') as moive,get_json_object(line,'$.rate') as rate from rat_json limit 10;