最近尝试使用flink的table-sql,发现没有from_unixtime函数,只能自定义该udf。
原始kafka消息日志
{"action":"exposure","itemId":"16c65063e51d4d834722bf1a4b1d6378@TT@1576","rankIndex":14,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@TT@16","rankIndex":10,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@FT@287","rankIndex":21,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@TT@12","rankIndex":22,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"b6f42135e217f70e97e214faf818ff07@TT@1523","rankIndex":10,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
{"action":"exposure","itemId":"b6f42135e217f70e97e214faf818ff07@TT@2759","rankIndex":25,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@TT@68","rankIndex":2,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"16c65063e51d4d834722bf1a4b1d6378@TT@2045","rankIndex":13,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"16c65063e51d4d834722bf1a4b1d6378@TT@982","rankIndex":17,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"b6f42135e217f70e97e214faf818ff07@TT@1498","rankIndex":28,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
我们要格式化就是time字段。
自定义udf函数
import org.apache.flink.table.functions.ScalarFunction;
import java.text.SimpleDateFormat;
import java.util.Date;
public class FromUnixTimeUDF extends ScalarFunction {
public String DATE_FORMAT;
public FromUnixTimeUDF() {
this.DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
}
public FromUnixTimeUDF(String dateFormat) {
this.DATE_FORMAT = dateFormat;
}
public String eval(String longTime) {
try {
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
Date date = new Date(Long.parseLong(longTime) * 1000);
return sdf.format(date);
} catch (Exception e) {
return null;
}
}
}
主程序main函数
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamTableEnvironment tEnv = TableEnvironment.getTableEnvironment(env);
tEnv.registerFunction("from_unixtime", new FromUnixTimeUDF());
tEnv.connect(initKafkaDescriptor()).withFormat(new Json().failOnMissingField(true).deriveSchema())
.withSchema(initSchema()).inAppendMode().registerTableSource("transfer_plan_show");
Table result = tEnv.sqlQuery("select unionId,itemId,action,from_unixtime(`time`) as creat_time,rankIndex as rank_index from transfer_plan_show");
result.printSchema();
tEnv.toAppendStream(result, Row.class).print();
env.execute();
相关函数
//链接kafka配置
private Kafka initKafkaDescriptor(){
Kafka kafkaDescriptor= new Kafka().version("0.11").topic("transfer_plan_show")
.startFromLatest().property("bootstrap.servers", KafkaConfig.KAFKA_BROKER_LIST)
.property("group.id", "trafficwisdom-streaming");
return kafkaDescriptor;
}
//根据json自定义schema
private Schema initSchema(){
Schema schema=new Schema().field("action", Types.STRING())
.field("itemId",Types.STRING())
.field("time",Types.STRING())
.field("unionId",Types.STRING())
.field("rankIndex",Types.INT());
return schema;
}
说明因为sql中time是关键字,所以加上加上两个反斜杠 ``.