nimbus 英 [ˈnɪmbəs] 美 [ˈnɪmbəs] n. (大片的)雨云;光环
strom 分布式实时的流式计算框架
strom如下图右侧,来一个数据,处理一个,单位时间内处理的数据量不能太大,以保证它的正常运行,但是一旦启动一直运行。
批处理则不同,spark则是微批处理框架的计算框架,也能够达到实时性。
MR 做不到实时性,数量级是TB,PB级的,频繁操作磁盘,频繁启停job.
ETL(数据清洗)extracted transform load Spout 英 [spaʊt] 美 [spaʊt] 壶嘴;喷出;喷口;管口;龙卷 bolt 英 [bəʊlt] 美 [boʊlt] n. (门窗的)闩,插v.
用插销闩上;能被闩上;用螺栓把(甲和乙)固定在一起;(马等受惊)脱缰 adv. 突然地;像箭似地;直立地
Nimbus 类似于 master supervisor 类似于 slave
worker task
ack机制无法保证数据不被重复计算,但是可以保证数据至少被正确处理一次。(可能因错误,引发非错误数据重发被计算两次) package com.sxt.storm.ack; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.Map; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.IRichSpout; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; public class MySpout implements IRichSpout{ private static final long serialVersionUID = 1L; int index = 0; FileInputStream fis; InputStreamReader isr; BufferedReader br; SpoutOutputCollector collector = null; String str = null; @Override public void nextTuple() { try { if ((str = this.br.readLine()) != null) { // 过滤动作 index++; collector.emit(new Values(str), index); // collector.emit(new Values(str)); } } catch (Exception e) { } } @Override public void close() { try { br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } } @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { try { this.collector = collector; this.fis = new FileInputStream("track.log"); this.isr = new InputStreamReader(fis, "UTF-8"); this.br = new BufferedReader(isr); } catch (Exception e) { e.printStackTrace(); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("log")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } @Override public void ack(Object msgId) { System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout ack:"+msgId.toString()); } @Override public void activate() { } @Override public void deactivate() { } @Override public void fail(Object msgId) { System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout fail:"+msgId.toString()); } } package com.sxt.storm.ack; import java.util.Map; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; public class MyBolt implements IRichBolt { private static final long serialVersionUID = 1L; OutputCollector collector = null; @Override public void cleanup() { } int num = 0; String valueString = null; @Override public void execute(Tuple input) { try { valueString = input.getStringByField("log") ; if(valueString != null) { num ++ ; System.err.println(Thread.currentThread().getName()+" lines :"+num +" session_id:"+valueString.split("\t")[1]); } collector.emit(input, new Values(valueString)); // collector.emit(new Values(valueString)); collector.ack(input); Thread.sleep(2000); } catch (Exception e) { collector.fail(input); e.printStackTrace(); } } @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector ; } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("session_id")) ; } @Override public Map<String, Object> getComponentConfiguration() { return null; } } package com.sxt.storm.ack; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.topology.TopologyBuilder; public class Main { /** * @param args */ public static void main(String[] args) { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("spout", new MySpout(), 1); builder.setBolt("bolt", new MyBolt(), 2).shuffleGrouping("spout"); // Map conf = new HashMap(); // conf.put(Config.TOPOLOGY_WORKERS, 4); Config conf = new Config() ; conf.setDebug(true); conf.setMessageTimeoutSecs(conf, 100); conf.setNumAckers(4); if (args.length > 0) { try { StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); } catch (AlreadyAliveException e) { e.printStackTrace(); } catch (InvalidTopologyException e) { e.printStackTrace(); } }else { LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("mytopology", conf, builder.createTopology()); } } }
单点故障, flume ha 单点瓶颈, load balance http://flume.apache.org/FlumeUserGuide.html#scribe-source 美团日志收集系统架构 https://tech.meituan.com/2013/12/09/meituan-flume-log-system-architecture-and-design.html 实例: 电话掉话率,(非正常挂断:没有声音了,不在服务区)
中国移动项目架构图:
kafka 创建topic ./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --create --replication-factor 2 --partitions 3 --topic mylog_cmcc ## 启动消费 ./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic mylog_cmcc // 下边程序用于生成生产数据。 /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package kafka.productor; import java.util.Properties; import java.util.Random; import backtype.storm.utils.Utils; import kafka.producer.KeyedMessage; import kafka.producer.ProducerConfig; import tools.DateFmt; /*** * 模拟发送数据到kafka中 * * @author hadoop * */ public class CellProducer extends Thread { // bin/kafka-topics.sh --create --zookeeper localhost:2181 // --replication-factor 3 --partitions 5 --topic cmcccdr private final kafka.javaapi.producer.Producer<Integer, String> producer; private final String topic; private final Properties props = new Properties(); public CellProducer(String topic) { props.put("serializer.class", "kafka.serializer.StringEncoder");// 字符串消息 props.put("metadata.broker.list", KafkaProperties.broker_list); producer = new kafka.javaapi.producer.Producer<Integer, String>(new ProducerConfig(props)); this.topic = topic; } /* * public void run() { // order_id,order_amt,create_time,province_id Random * random = new Random(); String[] cell_num = { "29448-37062", * "29448-51331", "29448-51331","29448-51333", "29448-51343" }; String[] * drop_num = { "0","1","2"};//掉话1(信号断断续续) 断话2(完全断开) * * // Producer.java // record_time, imei, cell, * ph_num,call_num,drop_num,duration,drop_rate,net_type,erl // 2011-06-28 * 14:24:59.867,356966,29448-37062,0,0,0,0,0,G,0 // 2011-06-28 * 14:24:59.867,352024,29448-51331,0,0,0,0,0,G,0 // 2011-06-28 * 14:24:59.867,353736,29448-51331,0,0,0,0,0,G,0 // 2011-06-28 * 14:24:59.867,353736,29448-51333,0,0,0,0,0,G,0 // 2011-06-28 * 14:24:59.867,351545,29448-51333,0,0,0,0,0,G,0 // 2011-06-28 * 14:24:59.867,353736,29448-51343,1,0,0,8,0,G,0 int i =0 ; NumberFormat nf * = new DecimalFormat("000000"); while(true) { i ++ ; // String messageStr * = i+"\t"+cell_num[random.nextInt(cell_num.length)]+"\t"+DateFmt. * getCountDate(null, * DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ; * String testStr = nf.format(random.nextInt(10)+1); * * String messageStr = * i+"\t"+("29448-"+testStr)+"\t"+DateFmt.getCountDate(null, * DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ; * * System.out.println("product:"+messageStr); producer.send(new * KeyedMessage<Integer, String>(topic, messageStr)); Utils.sleep(1000) ; // * if (i==500) { // break; // } } * * } */ public void run() { Random random = new Random(); String[] cell_num = { "29448-37062", "29448-51331", "29448-51331", "29448-51333", "29448-51343" }; // 正常0; 掉话1(信号断断续续); 断话2(完全断开) String[] drop_num = { "0", "1", "2" }; int i = 0; while (true) { i++; String testStr = String.format("%06d", random.nextInt(10) + 1); // messageStr: 2494 29448-000003 2016-01-05 10:25:17 1 // String messageStr = i + "\t" + ("29448-" + testStr) + "\t" + DateFmt.getCountDate(null, DateFmt.date_long) + "\t" + drop_num[random.nextInt(drop_num.length)]; System.out.println("product:" + messageStr); producer.send(new KeyedMessage<Integer, String>(topic, messageStr)); Utils.sleep(1000); // if(i == 500) { // break; // } } } public static void main(String[] args) { // topic设置 CellProducer producerThread = new CellProducer(KafkaProperties.Cell_Topic); // 启动线程生成数据 producerThread.start(); } } package cmcc.constant; public class Constants { public static final String HBASE_ZOOKEEPER_LIST = "node4:2181"; public static final String KAFKA_ZOOKEEPER_LIST = "node2:2181,node3:2181,node4:2181"; public static final String BROKER_LIST = "node2:9092,node3:9092,node4:9092"; public static final String ZOOKEEPERS = "node2,node3,node4"; }