Hive 自定义UDF函数实现日期格式化和字段AES加密
自定义日期格式化UDF函数
- 首先自定义日期解析转换的工具类
import org.apache.commons.lang3.StringUtils;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
/**
* 日期解析转换工具类
* @author root
*/
public class DateUtils {
private static String[] PARSE_PATTERNS = {"yyyy-MM-dd", "yyyy年MM月dd日",
"yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm", "yyyy/MM/dd",
"yyyy/MM/dd HH:mm:ss", "yyyy/MM/dd HH:mm", "yyyyMMdd","yyyyMMdd HH:mm:ss"};
public static String parseDate(String string, String pattern) {
if (string == null) {
return null;
}
try {
org.apache.commons.lang3.time.DateUtils.parseDate(string, pattern);
return pattern;
} catch (ParseException e) {
return null;
}
}
/**
* 输入日期统一转换为yyyy-MM-dd 格式
* parseDate 自动判断日期类型
* @param rawDate
* @return
*/
public static String unionformDate(String rawDate) {
String result = "";
for (String parsePattern : PARSE_PATTERNS) {
try {
String s = parseDate(rawDate, parsePattern);
if (StringUtils.isNotBlank(s)) {
SimpleDateFormat format = new SimpleDateFormat(s);
Date parse = format.parse(rawDate);
SimpleDateFormat format2 = new SimpleDateFormat("yyyy-MM-dd");
result = format2.format(parse);
}
} catch (Exception e) {
e.printStackTrace();
}
}
return result;
}
/**
* 输入日期统一转换为目标日期格式 格式
* parseDate 自动判断日期类型
* @param rawDate
* @return
*/
public static String unionformDateWithPattern(String rawDate, String resultPattern) {
String result = "";
for (String parsePattern : PARSE_PATTERNS) {
try {
String s = parseDate(rawDate, parsePattern);
if (StringUtils.isNotBlank(s)) {
SimpleDateFormat format = new SimpleDateFormat(s);
Date parse = format.parse(rawDate);
SimpleDateFormat format2 = new SimpleDateFormat(resultPattern);
result = format2.format(parse);
}
} catch (Exception e) {
e.printStackTrace();
}
}
return result;
}
}
- 自定义日期转换UDF函数
import com.miminglamp.utils.DateUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* 日期统一UDF函数
*/
public class DateUnionUDF extends UDF {
/**
* 传递原始日期和format
*
* @param singleDate
* @param format
* @return
*/
public String evaluate(String singleDate, String format) {
if ("null".equals(singleDate)) {
singleDate = "9999-99-99 00:00:00";
}
if ("NULL".equals(singleDate)) {
singleDate = "9999-99-99 00:00:00";
}
if ("".equals(singleDate)) {
singleDate = "9999-99-99 00:00:00";
}
if (singleDate != null) {
String result = DateUtils.unionformDateWithPattern(singleDate, format);
return result;
} else {
return "9999-99-99";
}
}
/**
* 不传递format默认为 yyyy-MM-dd格式
*
* @param singleDate
* @return
*/
public String evaluate(String singleDate) {
if ("null".equals(singleDate)) {
singleDate = "9999-99-99 00:00:00";
}
if ("NULL".equals(singleDate)) {
singleDate = "9999-99-99 00:00:00";
}
if ("".equals(singleDate)) {
singleDate = "9999-99-99 00:00:00";
}
if (singleDate != null) {
String result = DateUtils.unionformDateWithPattern(singleDate, "yyyy-MM-dd");
return result;
} else {
return "9999-99-99";
}
}
public static void main(String[] args) {
DateUnionUDF dateUnionUDF = new DateUnionUDF();
System.out.println(dateUnionUDF.evaluate("2020/12/12"));
System.out.println(dateUnionUDF.evaluate("2020/12/12", "yyyy-MM-dd HH:mm:ss"));
}
}
自定义字段AES加密函数
- 首先创建AES加密的工具类
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
import javax.crypto.*;
import javax.crypto.spec.SecretKeySpec;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.InvalidKeyException;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.Scanner;
/*
* AES对称加密和解密
*/
public class AES {
/*
* 加密
* 1.构造密钥生成器
* 2.根据ecnodeRules规则初始化密钥生成器
* 3.产生密钥
* 4.创建和初始化密码器
* 5.内容加密
* 6.返回字符串
*/
public static String AESEncode(String encodeRules,String content){
try {
//1.构造密钥生成器,指定为AES算法,不区分大小写
KeyGenerator keygen=KeyGenerator.getInstance("AES");
//2.根据ecnodeRules规则初始化密钥生成器
//生成一个128位的随机源,根据传入的字节数组
SecureRandom random = SecureRandom.getInstance("SHA1PRNG");
random.setSeed(encodeRules.getBytes());
keygen.init(128, random); //3.产生原始对称密钥
SecretKey original_key=keygen.generateKey();
//4.获得原始对称密钥的字节数组
byte [] raw=original_key.getEncoded();
//5.根据字节数组生成AES密钥
SecretKey key=new SecretKeySpec(raw, "AES");
//6.根据指定算法AES自成密码器
Cipher cipher=Cipher.getInstance("AES");
//7.初始化密码器,第一个参数为加密(Encrypt_mode)或者解密解密(Decrypt_mode)操作,第二个参数为使用的KEY
cipher.init(Cipher.ENCRYPT_MODE, key);
//8.获取加密内容的字节数组(这里要设置为utf-8)不然内容中如果有中文和英文混合中文就会解密为乱码
byte [] byte_encode=content.getBytes(StandardCharsets.UTF_8);
//9.根据密码器的初始化方式--加密:将数据加密
byte [] byte_AES=cipher.doFinal(byte_encode);
//10.将加密后的数据转换为字符串
//这里用Base64Encoder中会找不到包
//解决办法:
//在项目的Build path中先移除JRE System Library,再添加库JRE System Library,重新编译后就一切正常了。
//11.将字符串返回
return new BASE64Encoder().encode(byte_AES);
} catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | IllegalBlockSizeException | BadPaddingException e) {
e.printStackTrace();
}
//如果有错就返加nulll
return null;
}
/*
* 解密
* 解密过程:
* 1.同加密1-4步
* 2.将加密后的字符串反纺成byte[]数组
* 3.将加密内容解密
*/
public static String AESDncode(String encodeRules,String content){
try {
//1.构造密钥生成器,指定为AES算法,不区分大小写
KeyGenerator keygen=KeyGenerator.getInstance("AES");
//2.根据ecnodeRules规则初始化密钥生成器
//生成一个128位的随机源,根据传入的字节数组
SecureRandom random = SecureRandom.getInstance("SHA1PRNG");
random.setSeed(encodeRules.getBytes());
keygen.init(128, random);
//3.产生原始对称密钥
SecretKey original_key=keygen.generateKey();
//4.获得原始对称密钥的字节数组
byte [] raw=original_key.getEncoded();
//5.根据字节数组生成AES密钥
SecretKey key=new SecretKeySpec(raw, "AES");
//6.根据指定算法AES自成密码器
Cipher cipher=Cipher.getInstance("AES");
//7.初始化密码器,第一个参数为加密(Encrypt_mode)或者解密(Decrypt_mode)操作,第二个参数为使用的KEY
cipher.init(Cipher.DECRYPT_MODE, key);
//8.将加密并编码后的内容解码成字节数组
byte [] byte_content= new BASE64Decoder().decodeBuffer(content);
/*
* 解密
*/
byte [] byte_decode=cipher.doFinal(byte_content);
return new String(byte_decode, StandardCharsets.UTF_8);
} catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | IOException | IllegalBlockSizeException | BadPaddingException e) {
e.printStackTrace();
}
//如果有错就返加nulll
return null;
}
public static void main(String[] args) {
Scanner scanner=new Scanner(System.in);
/*
* 加密
*/
String encodeRules = "cdp2021";
System.out.println("使用AES对称加密,请输入加密的规则");
System.out.println("请输入要加密的内容:");
String content = scanner.next();
System.out.println("根据输入的规则"+encodeRules+"加密后的密文是:"+ AESEncode(encodeRules, content));
/*
* 解密
*/
System.out.println("使用AES对称解密,请输入加密的规则:(须与加密相同)");
System.out.println("请输入要解密的内容(密文):");
content = scanner.next();
System.out.println("根据输入的规则"+encodeRules+"解密后的明文是:"+ AESDncode(encodeRules, content));
}
}
- 创建AES UDF函数
import com.miminglamp.aes.AES;
import org.apache.hadoop.hive.ql.exec.UDF;
public class AESUDF extends UDF {
String password = "CDP2021";
public String evaluate(String type, String content) throws Exception {
if (content == null) return null;
if (!type.equals("encode") && !type.equals("decode")) {
throw new Exception("Parmeter one is needed encode/decode");
}
if (type.equals("encode")) {
//进行加密
return AES.AESEncode(password, content);
} else {
//进行解密
return AES.AESDncode(password, content);
}
}
public static void main(String[] args) {
try {
System.out.println(new AESUDF().evaluate("encode", "test"));
System.out.println(new AESUDF().evaluate("decode", "2YDc41+Y7OIKBgfqNMwDD9koGoxAy52jGiRdJC8A/+o="));
} catch (Exception e) {
e.printStackTrace();
}
}
}
函数的临时注册和永久注册
- 永久注册:
上传Jar包到HDFS的指定路径
CREATE FUNCTION 数据库名.函数名 AS '包名.类名' USING JAR 'hdfs:///path/xxxx.jar';
- 删除永久函数:
drop function dev.dateunion;
临时注册:
add jar /path/xx.jar(存储在本地磁盘)
- 临时注册UDF函数(hive会话生效)
create temporary function 函数名 as '包名.类名';
- 删除临时函数:
drop temporary function 数据库名.函数名;
测试UDF函数使用
# 永久UDF函数
CREATE FUNCTION dev.dateunion AS 'com.miminglamp.udf.DateUnionUDF' USING JAR 'hdfs:///opt/kezhen/hive/udf/HiveUDF-1.0.jar';
# 永久UDF函数
CREATE FUNCTION dev.aesencodes AS 'com.miminglamp.udf.AESUDF' USING JAR 'hdfs:///opt/kezhen/hive/udf/HiveUDF-1.0.jar';
# 使用默认格式yyyy-MM-dd 格式
select id,dev.dateunion(times) times from app_tmp.hive_udf;
# 传入目标格式
select id,dev.dateunion(times,'yyyy-MM-dd HH:mm:ss') times from app_tmp.hive_udf;
# 使用AES加密
select id,dev.aesencodes('encode',times) times from app_tmp.hive_udf;
# 使用AES解密
select id, dev.aesencodes('decode',times) times from app_tmp.aesencode;
项目pom.xml
创建普通的IDEA的maven-quickstart工程即可,创建工程参考:https://blog.csdn.net/qq_43081842/article/details/105294308
最后附上Pom.xml
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
<jdk.version>1.8</jdk.version>
<scala.version>2.11.8</scala.version>
<scala.compat.version>2.11</scala.compat.version>
<hadoop.version>3.0.0-cdh6.0.1</hadoop.version>
<spark.version>2.2.0-cdh6.0.1</spark.version>
<hive.version>2.1.1-cdh6.0.1</hive.version>
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repository>
<id>aliyun</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
<repository>
<id>jboss</id>
<url>http://repository.jboss.com/nexus/content/groups/public</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>3.0.1</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.8.sec06</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.12</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.12</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.spark</groupId>-->
<!-- <artifactId>spark-hive-thriftserver_2.11</artifactId>-->
<!-- <version>2.2.0</version>-->
<!-- </dependency>-->
<!--mysql依赖的jar包-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.35</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!--hive 依赖-->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
</dependencies>