虽然Hive提供了很多函数,但是在实际工作还是需要根据工作的业务需求实现自己的自定义函数,这就是自定义函数(UDF)。
1.编写代码:
我们将编写一个根据日期返回对应星座的函数,具体实现代码如下:
package com.pptb.bigdata.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* 根据日期计算星座的UDF
*/
@Description(name = "zodiac",
value = "_FUNC_(date) from the input date string or separate month and day arugments,returns the sign of the Zodiac.",
extended = "Example:\n"
+ ">SELECT _FUNC_(date string) from src;\n"
+ ">SELECT _FUNC_(month,day) from src;")
public class UDFZodiacSign extends UDF {
private SimpleDateFormat dateFormat;
public UDFZodiacSign() {
this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
}
public String evaluate(Date birthDay) {
return this.evaluate(birthDay.getMonth() + 1, birthDay.getDate());
}
public String evaluate(String birthDay) {
Date date = null;
try {
date = this.dateFormat.parse(birthDay);
} catch (Exception ex) {
return null;
}
return this.evaluate(date.getMonth() + 1, date.getDate());
}
public String evaluate(Integer month, Integer day) {
if (month == 1) {
if (day <= 20) {
return "摩羯座";
} else {
return "水瓶座";
}
}
if (month == 2) {
if (day <= 19) {
return "水瓶座";
} else {
return "双鱼座";
}
}
if (month == 3) {
if (day <= 20) {
return "双鱼座";
} else {
return "白羊座";
}
}
if (month == 4) {
if (day <= 20) {
return "白羊座";
} else {
return "金牛座";
}
}
if (month == 5) {
if (day <= 21) {
return "金牛座";
} else {
return "双子座";
}
}
if (month == 6) {
if (day <= 21) {
return "双子座";
} else {
return "巨蟹座";
}
}
if (month == 7) {
if (day <= 22) {
return "巨蟹座";
} else {
return "狮子座";
}
}
if (month == 8) {
if (day <= 23) {
return "狮子座";
} else {
return "处女座";
}
}
if (month == 9) {
if (day <= 23) {
return "处女座";
} else {
return "天秤座";
}
}
if (month == 10) {
if (day <= 23) {
return "天秤座";
} else {
return "天蝎座";
}
}
if (month == 11) {
if (day <= 22) {
return "天蝎座";
} else {
return "天蝎座";
}
}
if (month == 12) {
if (day <= 22) {
return "天蝎座";
} else {
return "摩羯座";
}
}
return null;
}
}
编写Hive UDF函数需要引用如下两个jar:
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
1.打包代码:
代码写好后,我们将函数打包,建议将所有引用jar打包到一起,也就是打包成fat jar,本地打包的jar地址:
/Users/wesley/rework/bigdata/target/bigdata-1.0-SNAPSHOT-jar-with-dependencies.jar
2.在Hive中使用:
在Hive CLI中通过命令add jar 把我们打包的jar加入到类路径下:
hive > add jar /Users/wesley/rework/bigdata/target/bigdata-1.0-SNAPSHOT-jar-with-dependencies.jar;
然后通过create function命令定义我们的函数:
hive > create temporary function zodiac as 'com.pptb.bigdata.hive.udf.UDFZodiacSign';
现在我们可以使用这个函数了:
hive >select zodiac('2017-06-05') from dual;