大数据处理框架之Strom:kafka storm 整合

storm 使用kafka做数据源,还可以使用文件、redis、jdbc、hive、HDFS、hbase、netty做数据源。

新建一个maven 工程:


<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <groupId>storm06</groupId>
<packaging>jar</packaging> <name>storm07</name>
<!-- Repository where we can found the storm dependencies -->
<!-- storm & kafka sqout -->
<!-- 单元测试 -->
<!-- 绑定到特定的生命周期之后,运行maven-source-pluin 运行目标为jar-no-fork -->


package bhz.storm.kafka.example;

import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder; public class KafkaTopology {
public static void main(String[] args) throws
AlreadyAliveException, InvalidTopologyException {
// zookeeper hosts for the Kafka cluster
ZkHosts zkHosts = new ZkHosts(",,"); // Create the KafkaSpout configuartion
// Second argument is the topic name
// Third argument is the zookeeper root for Kafka
// Fourth argument is consumer group id
SpoutConfig kafkaConfig = new SpoutConfig(zkHosts,"words_topic", "", "id7"); // Specify that the kafka messages are String
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); // We want to consume all the first messages in the topic everytime
// we run the topology to help in debugging. In production, this
// property should be false
kafkaConfig.forceFromStart = true; // Now we create the topology
TopologyBuilder builder = new TopologyBuilder(); // set the kafka spout class
builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1); // configure the bolts
builder.setBolt("SentenceBolt", new SentenceBolt(), 1).globalGrouping("KafkaSpout");
builder.setBolt("PrinterBolt", new PrinterBolt(), 1).globalGrouping("SentenceBolt"); // create an instance of LocalCluster class for executing topology in local mode.
LocalCluster cluster = new LocalCluster();
Config conf = new Config(); // Submit topology for execution
cluster.submitTopology("KafkaToplogy", conf, builder.createTopology()); try {
// Wait for some time before exiting
System.out.println("Waiting to consume from kafka");
} catch (Exception exception) {
System.out.println("Thread interrupted exception : " + exception);
} // kill the KafkaTopology
cluster.killTopology("KafkaToplogy"); // shut down the storm test cluster
package bhz.storm.kafka.example;

import java.util.ArrayList;
import java.util.List; import org.apache.commons.lang.StringUtils; import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple; import com.google.common.collect.ImmutableList; public class SentenceBolt extends BaseBasicBolt { // list used for aggregating the words
private List<String> words = new ArrayList<String>(); public void execute(Tuple input, BasicOutputCollector collector) {
// Get the word from the tuple
String word = input.getString(0); if(StringUtils.isBlank(word)){
// ignore blank lines
} System.out.println("Received Word:" + word); // add word to current list of words
words.add(word); if (word.endsWith(".")) {
// word ends with '.' which means this is the end of
// the sentence publishes a sentence tuple
(Object) StringUtils.join(words, ' '))); // and reset the words list.
} public void declareOutputFields(OutputFieldsDeclarer declarer) {
// here we declare we will be emitting tuples with
// a single field called "sentence"
declarer.declare(new Fields("sentence"));
package bhz.storm.kafka.example;

import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Tuple; public class PrinterBolt extends BaseBasicBolt { public void execute(Tuple input, BasicOutputCollector collector) {
// get the sentence from the tuple and print it
String sentence = input.getString(0);
System.out.println("Received Sentence:" + sentence);
} public void declareOutputFields(OutputFieldsDeclarer declarer) {
// we don't emit anything
