win10端获取数据来源:
package com.atguigu.KafkaToHdfs;
import java.io.*;
import java.util.Scanner;
public class IOProducer {
public static void main(String[] args) throws IOException {
Scanner sc=new Scanner(System.in);
FileOutputStream fos = new FileOutputStream(new File("E:/Share-Virtual/file.txt"),true);
String line;
while(true){
System.out.println("请输入消息:");
line=sc.nextLine();
fos.write((line+"\n").getBytes());
fos.flush();
}
}
}
Flume配置信息
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = TAILDIR
a1.sources.r1.positionFile = /opt/module/flume/position/tail_dir3.json
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /mnt/hgfs/Share-Virtual/file.txt
# Describe the sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.bootstrap.servers =hadoop102:9092,hadoop103:9092,hadoop104:9092
a1.sinks.k1.kafka.topic = first
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.producer.acks = 1
a1.sinks.k1.kafka.producer.linger.ms = 1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
Kafka消费者端:
package com.atguigu.KafkaToHdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.connect.connector.Connector;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Properties;
public class MyConsumer {
private static String KafkaHost;
private static String KafkaGroup;
private static String KafkaTopic;
private static String HdfsURI;
private static String HdfsDir;
private static String hadoopUser;
private static Connector connect;
private static Configuration hdfsConf;
private static FileSystem hadoopFS;
public static void main(String[] args) throws IOException, InterruptedException {
hadoopUser = "hadoop";
init();
System.out.println("开始启动服务...");
hdfsConf = new Configuration();
hadoopFS = FileSystem.get(URI.create(HdfsURI), hdfsConf, hadoopUser);
if (!hadoopFS.exists(new Path("/" + HdfsDir))) {
hadoopFS.mkdirs(new Path("/" + HdfsDir));
}
System.out.println("服务启动完毕,监听执行中");
run();
}
public static void run() {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KafkaHost);
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "100");
properties.put(ConsumerConfig.GROUP_ID_CONFIG, KafkaGroup);
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(properties);
consumer.subscribe(Arrays.asList(KafkaTopic));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord record : records) {
String s = record.value().toString();
System.out.println(s);
ByteArrayInputStream fis = new ByteArrayInputStream((s+"\n").getBytes());
FSDataOutputStream fos = null;
try {
if (!hadoopFS.exists(new Path("/" + HdfsDir + "/tmp1.txt"))) {
fos = hadoopFS.create(new Path("/" + HdfsDir + "/tmp1.txt"), false);
} else {
fos = hadoopFS.append(new Path("/" + HdfsDir + "/tmp1.txt"));
}
} catch (IOException e) {
e.printStackTrace();
}
try {
IOUtils.copyBytes(fis, fos, hdfsConf);
} catch (IOException e) {
e.printStackTrace();
}
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
}
}
}
private static void init() {
KafkaHost = "hadoop102:9092";
KafkaGroup = "test";
KafkaTopic = "first";
HdfsURI = "hdfs://hadoop102:9000";
HdfsDir = "kafka-hdfs";
}
}
大数据组件 Flume-Kafka-HDFS的串联连接操作