Flink-Sql自定义UDF

最近尝试使用flink的table-sql,发现没有from_unixtime函数,只能自定义该udf。
原始kafka消息日志

{"action":"exposure","itemId":"16c65063e51d4d834722bf1a4b1d6378@TT@1576","rankIndex":14,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@TT@16","rankIndex":10,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@FT@287","rankIndex":21,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@TT@12","rankIndex":22,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"b6f42135e217f70e97e214faf818ff07@TT@1523","rankIndex":10,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
{"action":"exposure","itemId":"b6f42135e217f70e97e214faf818ff07@TT@2759","rankIndex":25,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
{"action":"exposure","itemId":"15af1bc74e1ce2d7d0c12a0968618f1c@TT@68","rankIndex":2,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"16c65063e51d4d834722bf1a4b1d6378@TT@2045","rankIndex":13,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"16c65063e51d4d834722bf1a4b1d6378@TT@982","rankIndex":17,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"b6f42135e217f70e97e214faf818ff07@TT@1498","rankIndex":28,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}

我们要格式化就是time字段。
自定义udf函数

import org.apache.flink.table.functions.ScalarFunction;

import java.text.SimpleDateFormat;
import java.util.Date;

public class FromUnixTimeUDF extends ScalarFunction {
    public String DATE_FORMAT;

    public FromUnixTimeUDF() {
        this.DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
    }

    public FromUnixTimeUDF(String dateFormat) {
        this.DATE_FORMAT = dateFormat;
    }

    public String eval(String longTime) {
        try {
            SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
            Date date = new Date(Long.parseLong(longTime) * 1000);
            return sdf.format(date);
        } catch (Exception e) {
            return null;
        }
    }
}

主程序main函数

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
    	StreamTableEnvironment tEnv = TableEnvironment.getTableEnvironment(env);
        tEnv.registerFunction("from_unixtime", new FromUnixTimeUDF());
        tEnv.connect(initKafkaDescriptor()).withFormat(new Json().failOnMissingField(true).deriveSchema())
        .withSchema(initSchema()).inAppendMode().registerTableSource("transfer_plan_show");
        Table result = tEnv.sqlQuery("select unionId,itemId,action,from_unixtime(`time`) as creat_time,rankIndex as rank_index from transfer_plan_show");
        result.printSchema();
        tEnv.toAppendStream(result, Row.class).print();
        env.execute();

相关函数

	//链接kafka配置
    private Kafka initKafkaDescriptor(){
      Kafka kafkaDescriptor=  new Kafka().version("0.11").topic("transfer_plan_show")
                .startFromLatest().property("bootstrap.servers", KafkaConfig.KAFKA_BROKER_LIST)
                .property("group.id", "trafficwisdom-streaming");
      return kafkaDescriptor;
    }
	//根据json自定义schema
    private Schema initSchema(){
        Schema schema=new Schema().field("action", Types.STRING())
                .field("itemId",Types.STRING())
                .field("time",Types.STRING())
                .field("unionId",Types.STRING())
                .field("rankIndex",Types.INT());
        return schema;
    }

说明因为sql中time是关键字,所以加上加上两个反斜杠 ``.

上一篇:独孤九剑-Spark面试80连击(下)


下一篇:大数据Hive框架——自定义函数