大数据平台技术:Storm

文章目录

Storm的下载与安装

参考林子雨教案安装,成功后显示:
大数据平台技术:Storm

基于Storm的wordcount应用

实现原理

先来回忆一下storm的基本组成:
大数据平台技术:Storm
wordcount的topo结构:
大数据平台技术:Storm

代码

首先更新配置文件pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.zchi</groupId>
    <artifactId>storm_study</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>0.9.7</version>
        </dependency>
    </dependencies>

</project>

WordCount.java 文件

import java.util.*;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

        import java.util.*;

/*
 ** WordCountTopolopgyAllInJava类(单词计数)
 ** @author zhangchi
 */
public class wordcount {

    // 定义一个喷头,用于产生数据。该类继承自BaseRichSpout
    public static class RandomSentenceSpout extends BaseRichSpout {
        private SpoutOutputCollector _collector;
        private Random _rand;
        private Map<String,Values> pending;

        @Override
        public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
            _collector = collector;
            _rand = new Random();
            pending=new HashMap<String, Values>();
        }

        @Override
        public void nextTuple() {

            // 睡眠一段时间后再产生一个数据
            Utils.sleep(100);
            // 句子数组
            String[] sentences = new String[]{"the cow jumped over the moon", "an apple a day keeps the doctor away",
                    "four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature"};
            // 随机选择一个句子
            String sentence = sentences[_rand.nextInt(sentences.length)];
            Values tmpValues=new Values(sentence);
            String msgID= UUID.randomUUID().toString();
            pending.put(msgID,tmpValues);
            // 发射该句子给Bolt,每个tuple都有一个唯一标识
            _collector.emit(tmpValues,msgID);
        }

        // 确认函数:成功处理的tuple,其id会从pending列表中删除
        @Override
        public void ack(Object id) {
            System.out.println("Msg:"+id+" send successful!");
            pending.remove(id);

        }

        // 失败处理函数:处理失败的时候重新发送一次tuple,
        @Override
        public void fail(Object id) {
            System.out.println("Msg:"+id+" send failed,will try again!");
            Values failedMsg=pending.get(id);
            _collector.emit(failedMsg,id);
        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            // 定义一个字段word
            declarer.declare(new Fields("word"));
        }
    }

    // 定义个Bolt,用于将句子切分为单词
    public static class SplitSentence extends BaseRichBolt {
        private OutputCollector collector;

        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            // 定义一个字段
            declarer.declare(new Fields("word"));
        }

        @Override
        public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
            collector=outputCollector;
        }

        @Override
        public void execute(Tuple tuple) {
            // 接收到一个句子
            String sentence = tuple.getString(0);
            // 把句子切割为单词
            StringTokenizer iter = new StringTokenizer(sentence);
            // 发送每一个单词
            while (iter.hasMoreElements()) {
                collector.emit(new Values(iter.nextToken()));
            }
            // 确认对数据进行处理
            collector.ack(tuple);
        }
    }



    // 定义一个Bolt,用于单词计数
    public static class WordCount extends BaseBasicBolt {
        Map<String, Long> counts = new HashMap<String, Long>();


        @Override
        public void execute(Tuple tuple, BasicOutputCollector collector) {
            // 接收一个单词
            String word = tuple.getString(0);
            // 获取该单词对应的计数
            Long count = counts.get(word);
            if (count == null)
                count = 0l;
            // 计数增加
            count++;
            // 将单词和对应的计数加入map中
            counts.put(word, count);
            collector.emit(new Values(word, count));
        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            // 定义两个字段word和count
            declarer.declare(new Fields("word", "count"));
        }
    }

    //定义全局Bolt,用于统计最终结果以及所有的单词数统计

    public static class GlobalWordCount extends BaseBasicBolt{
        Map<String,Long> result=new HashMap<String, Long>();
        @Override
        public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {
            String word=tuple.getStringByField("word");
            Long count=tuple.getLongByField("count");
            result.put(word,count);
        }

        @Override
        public void cleanup(){
            System.out.println("---------------------------------Final Result----------------------------------------------");
            long totalCount=0;
            for (String key:result.keySet()){
                long count=result.get(key);
                System.out.println("---------------------------------Word:"+key+"  Count:"+count);
                totalCount+=count;
            }
            System.out.println("---------------------------------TotalCount:"+totalCount);

        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

        }
    }

    public static void main(String[] args) throws Exception {
        // 创建一个拓扑
        TopologyBuilder builder = new TopologyBuilder();
        // 设置Spout,这个Spout的名字叫做"Spout",设置并行度为5
        builder.setSpout("spout", new RandomSentenceSpout(), 2);
        // 设置slot——“split”,并行度为8,它的数据来源是spout的
        builder.setBolt("split", new SplitSentence(), 8).shuffleGrouping("spout");
        // 设置slot——“count”,你并行度为12,它的数据来源是split的word字段
        builder.setBolt("count", new WordCount(), 12).fieldsGrouping("split", new Fields("word"));
        //设置slot--------"globalcount" ,数据来源是spout
        builder.setBolt("globalcount",new GlobalWordCount()).globalGrouping("count");

        Config conf = new Config();
        conf.setDebug(false);

        //if(args != null && args.length > 0){
        //if(false){
        //  conf.setNumWorkers(3);
        //  StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
        //}else{
        conf.setMaxTaskParallelism(3);

        // 本地伪集群模式运行
        LocalCluster cluster = new LocalCluster();
        // 集群运行
//        StormSubmitter.submitTopology("word-count", conf, builder.createTopology() );
        //本地伪集群 提交拓扑(该拓扑的名字叫word-count)
        cluster.submitTopology("word-count", conf, builder.createTopology());
        Thread.sleep(30000);
        cluster.killTopology("word-count");
        cluster.shutdown();
        //}
    }
}

最后运行的结果为:
大数据平台技术:Storm

---------------------------------Final Result----------------------------------------------
---------------------------------Word:away  Count:111
---------------------------------Word:ago  Count:102
---------------------------------Word:jumped  Count:114
---------------------------------Word:seven  Count:199
---------------------------------Word:cow  Count:114
---------------------------------Word:two  Count:106
---------------------------------Word:years  Count:102
---------------------------------Word:dwarfs  Count:97
---------------------------------Word:score  Count:102
---------------------------------Word:apple  Count:111
---------------------------------Word:white  Count:97
---------------------------------Word:four  Count:102
---------------------------------Word:and  Count:199
---------------------------------Word:keeps  Count:111
---------------------------------Word:day  Count:111
---------------------------------Word:over  Count:114
---------------------------------Word:a  Count:111
---------------------------------Word:nature  Count:106
---------------------------------Word:i  Count:106
---------------------------------Word:am  Count:106
---------------------------------Word:an  Count:111
---------------------------------Word:the  Count:436
---------------------------------Word:doctor  Count:111
---------------------------------Word:with  Count:106
---------------------------------Word:moon  Count:114
---------------------------------Word:at  Count:106
---------------------------------Word:snow  Count:97
---------------------------------TotalCount:3402

运行注意事项:

  1. 运行前记得启动storm(启动nimbus和supervisor)
cd /usr/local/storm
./bin/storm nimbus
/usr/local/storm/bin/storm supervisor

将storm写入HDFS

2 be con…

上一篇:大数据技术


下一篇:大数据相关面试题