【Flink】Flink基础数据类型和自定义Sink

Flink基础数据类型和自定义Sink

基础数据类型

  • Flink 支持所有的 Java 和 Scala 基础数据类型,Int, Double, Long, String……
DataStream<Integer> numberStream = env.fromElements(1, 2, 3, 4);
numberStream.map(data -> data * 2);
  • Java 和 Scala 元组(Tuples)
DataStream<Tuple2<String, Integer>> personStream = env.fromElements(
 new Tuple2("Adam", 17),
 new Tuple2("Sarah", 23) );
personStream.filter(p -> p.f1 > 18);
  • Scala 样例类(case classes)
case class Person(name: String, age: Int)
val persons: DataStream[Person] = env.fromElements(
	Person("Adam", 17),
	Person("Sarah", 23) )
persons.filter(p => p.age > 18)

跳转顶部


  • Java 简单对象(POJOs)
public class Person {
public String name;
public int age;
	public Person() {}
	public Person(String name, int age) { 
		this.name = name; 
		this.age = age; 
	}
}
	DataStream<Person> persons = env.fromElements( 
		new Person("Alex", 42), 
		new Person("Wendy", 23));
  • 其它(Arrays, Lists, Maps, Enums, 等等)

    Flink 对 Java 和 Scala 中的一些特殊目的的类型也都是支持的,比如 Java 的ArrayList,HashMap,Enum 等等。

自定义Sink

  • Flink 没有类似于 spark 中 foreach 方法,让用户进行迭代的操作。虽有对外的输出操作都要利用 Sink 完成。最后通过类似如下方式完成整个任务最终输出操作。
    stream.addSink(new MySink(xxxx))

  • 官方提供了一部分的框架的 sink。除此以外,需要用户自定义实现 sink。

kafka

  • 编写一个程序现实当作当作Kafka的消费者,然后将数据进行处理当作Kafka的生产者给输出,也就是处理从Kafka中传入的数据并将其进行处理后输出
package sink;

import beans.SenSorReading;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;

import java.util.Properties;

/**
 * 从卡夫卡中读取数据,载数据进行处理后在发送到卡夫卡
 */
public class SinkTest01 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "a:9092");
        properties.setProperty("group.id", "consumer-group");
        properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("auto.offset.reset", "latest");

        DataStreamSource<String> inputStream = env.addSource(new FlinkKafkaConsumer011<String>("first", new SimpleStringSchema(), properties));


        /**
         * 这边的·1toString是简写了下方与Kafka连接的序列化
         */
        SingleOutputStreamOperator<String> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SenSorReading(fields[0], new Long(fields[1]), new Double(fields[2])).toString();
        });

        DataStreamSink<String> first = dataStream.addSink(new FlinkKafkaProducer011<String>("a:9092", "first", new SimpleStringSchema()));



        env.execute();
    }
}

跳转顶部


Redis

  • 需要引用依赖
<dependency>
	 <groupId>org.apache.bahir</groupId>
	 <artifactId>flink-connector-redis_2.11</artifactId>
	 <version>1.0</version>
</dependency>
  • 自定义Map类
public static class MyRedisMapper implements RedisMapper<SensorReading>{
 // 保存到 redis 的命令,存成哈希表
	 public RedisCommandDescription getCommandDescription() {
	 return new RedisCommandDescription(RedisCommand.HSET, "sensor_tempe");
   }
	 public String getKeyFromData(SensorReading data) {
	 return data.getId();
   }
	 public String getValueFromData(SensorReading data) {
	 return data.getTemperature().toString();
   }
}
  • 在主方法里调用
FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
 .setHost("localhost")
 .setPort(6379)
 .build();
dataStream.addSink( new RedisSink<SensorReading>(config, new MyRedisMapper()) );

跳转顶部


Elasticsearch

  • 引用依赖
<dependency>
	 <groupId>org.apache.flink</groupId>
	 <artifactId>flink-connector-elasticsearch6_2.12</artifactId>
	 <version>1.10.1</version>
</dependency>
  • 自定义类
public static class MyEsSinkFunction implements ElasticsearchSinkFunction<SensorReading>{
	 @Override
	 public void process(SensorReading element, RuntimeContext ctx, RequestIndexer indexer) {
		 HashMap<String, String> dataSource = new HashMap<>();
		 dataSource.put("id", element.getId());
		 dataSource.put("ts", element.getTimestamp().toString());
		 dataSource.put("temp", element.getTemperature().toString());
		 IndexRequest indexRequest = Requests.indexRequest()
			 .index("sensor")
			 .type("readingData")
			 .source(dataSource);
		 indexer.add(indexRequest);
		 }
	}
  • 在主方法里调用
ArrayList<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("localhost", 9200));

dataStream.addSink( new ElasticsearchSink.Builder<SensorReading>(httpHosts, new 
MyEsSinkFunction()).build());

跳转顶部


MySQL

  • 依赖
<dependency>
	 <groupId>mysql</groupId>
	 <artifactId>mysql-connector-java</artifactId>
	 <version>5.1.44</version>
</dependency>
  • 具体代码
package sink;

import beans.SenSorReading;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;

public class MyJDBCSink {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> inputStream = env.readTextFile("src/main/resources/sensor.txt");

        SingleOutputStreamOperator<SenSorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SenSorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        });

        dataStream.addSink(new MyJdbcSink());
        env.execute();
    }

    public static class MyJdbcSink extends RichSinkFunction<SenSorReading> {
        Connection conn = null;
        PreparedStatement insertStmt = null;
        PreparedStatement updateStmt = null;

        // open 主要是创建连接
        @Override
        public void open(Configuration parameters) throws Exception {
            conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test",
                    "root", "123456");
            // 创建预编译器,有占位符,可传入参数
            insertStmt = conn.prepareStatement("INSERT INTO sensor_temp (id, temp) VALUES (?, ?)");
            updateStmt = conn.prepareStatement("UPDATE sensor_temp SET temp = ? WHERE id = ?");
        }

        // 调用连接,执行 sql
        @Override
        public void invoke(SenSorReading value, Context context) throws Exception {
            // 执行更新语句,注意不要留 super
            updateStmt.setDouble(1, value.getTemperature());
            updateStmt.setString(2, value.getId());
            updateStmt.execute();
            // 如果刚才 update 语句没有更新,那么插入
            if (updateStmt.getUpdateCount() == 0) {
                insertStmt.setString(1, value.getId());
                insertStmt.setDouble(2, value.getTemperature());
                insertStmt.execute();
            }
        }

        @Override
        public void close() throws Exception {
            insertStmt.close();
            updateStmt.close();
            conn.close();
        }
    }
}

跳转顶部


上一篇:【转载】利用 IDEA HTTP 请求文件访问 API 接口


下一篇:Springboot中WebMvcConfigurer接口详解