storm

nimbus
英 [ˈnɪmbəs]   美 [ˈnɪmbəs]  
n.
(大片的)雨云;光环

 

strom 分布式实时的流式计算框架

strom如下图右侧,来一个数据,处理一个,单位时间内处理的数据量不能太大,以保证它的正常运行,但是一旦启动一直运行。
批处理则不同,spark则是微批处理框架的计算框架,也能够达到实时性。
MR 做不到实时性,数量级是TB,PB级的,频繁操作磁盘,频繁启停job.

 storm

 

 

 

 

 

 

storm

storm

 

 

 storm

 

 

 storm

 

 

 

 

 

 storm

 

 

 

ETL(数据清洗)extracted transform load
Spout
英 [spaʊt]   美 [spaʊt]  
壶嘴;喷出;喷口;管口;龙卷
bolt
英 [bəʊlt]   美 [boʊlt]  
n.
(门窗的)闩,插v.
用插销闩上;能被闩上;用螺栓把(甲和乙)固定在一起;(马等受惊)脱缰
adv.
突然地;像箭似地;直立地
Nimbus 类似于  master 
supervisor 类似于 slave 
worker task

  

 storm

 

 

storm

 

 

 storm

 

 

 

 storm

 

 

 storm

 

 

 storm

 

 

 storm

 

 

 storm

 

 

 storm

 

 

 storm

 

 

 storm

 

 

 storm

 

 

 

ack机制无法保证数据不被重复计算,但是可以保证数据至少被正确处理一次。(可能因错误,引发非错误数据重发被计算两次)

package com.sxt.storm.ack;


import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;

public class MySpout implements IRichSpout{

	private static final long serialVersionUID = 1L;

	int index = 0;
	
	FileInputStream fis;
	InputStreamReader isr;
	BufferedReader br;			
	SpoutOutputCollector collector = null;
	String str = null;

	@Override
	public void nextTuple() {
		try {
			if ((str = this.br.readLine()) != null) {
				// 过滤动作
				index++;
				collector.emit(new Values(str), index);
//				collector.emit(new Values(str));
			}
		} catch (Exception e) {
		}
		
		
	}
	@Override
	public void close() {
		try {
			br.close();
			isr.close();
			fis.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	@Override
	public void open(Map conf, TopologyContext context,
			SpoutOutputCollector collector) {
		try {
			this.collector = collector;
			this.fis = new FileInputStream("track.log");
			this.isr = new InputStreamReader(fis, "UTF-8");
			this.br = new BufferedReader(isr);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("log"));
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		return null;
	}
	
	@Override
	public void ack(Object msgId) {
		System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout ack:"+msgId.toString());
	}

	@Override
	public void activate() {
		
	}

	@Override
	public void deactivate() {
		
	}

	@Override
	public void fail(Object msgId) {
		System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout fail:"+msgId.toString());
	}

}

package com.sxt.storm.ack;

import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

public class MyBolt implements IRichBolt {

	private static final long serialVersionUID = 1L;

	OutputCollector collector = null;
	@Override
	public void cleanup() {

	}
	int num = 0;
	String valueString = null;
	@Override
	public void execute(Tuple input) {
		try {
			valueString = input.getStringByField("log") ;
			
			if(valueString != null) {
				num ++ ;
				System.err.println(Thread.currentThread().getName()+"   lines  :"+num +"   session_id:"+valueString.split("\t")[1]);
			}
			collector.emit(input, new Values(valueString));
//			collector.emit(new Values(valueString));
			collector.ack(input);
			Thread.sleep(2000);
		} catch (Exception e) {
			collector.fail(input);
			e.printStackTrace();
		}
		
	}

	@Override
	public void prepare(Map stormConf, TopologyContext context,
			OutputCollector collector) {
		this.collector = collector ;
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("session_id")) ;
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		return null;
	}

}

package com.sxt.storm.ack;


import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;

public class Main {

	/**
	 * @param args
	 */
	public static void main(String[] args) {

		TopologyBuilder builder = new TopologyBuilder();

		builder.setSpout("spout", new MySpout(), 1);
		builder.setBolt("bolt", new MyBolt(), 2).shuffleGrouping("spout");
		
//		Map conf = new HashMap();
//		conf.put(Config.TOPOLOGY_WORKERS, 4);
		
		Config conf = new Config() ;
		conf.setDebug(true);
		conf.setMessageTimeoutSecs(conf, 100);
		conf.setNumAckers(4);
		
		if (args.length > 0) {
			try {
				StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
			} catch (AlreadyAliveException e) {
				e.printStackTrace();
			} catch (InvalidTopologyException e) {
				e.printStackTrace();
			}
		}else {
			LocalCluster localCluster = new LocalCluster();
			localCluster.submitTopology("mytopology", conf, builder.createTopology());
		}
		
	}

}

  

storm

 

 

 

单点故障, flume ha 
单点瓶颈,  load balance

http://flume.apache.org/FlumeUserGuide.html#scribe-source

美团日志收集系统架构
https://tech.meituan.com/2013/12/09/meituan-flume-log-system-architecture-and-design.html


实例: 电话掉话率,(非正常挂断:没有声音了,不在服务区)
中国移动项目架构图:

 storm

 

 

 

kafka 创建topic
./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --create --replication-factor 2 --partitions 3 --topic mylog_cmcc

## 启动消费
./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic mylog_cmcc



// 下边程序用于生成生产数据。

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package kafka.productor;

import java.util.Properties;
import java.util.Random;

import backtype.storm.utils.Utils;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import tools.DateFmt;

/***
 * 模拟发送数据到kafka中
 * 
 * @author hadoop
 *
 */
public class CellProducer extends Thread {

	// bin/kafka-topics.sh --create --zookeeper localhost:2181
	// --replication-factor 3 --partitions 5 --topic cmcccdr
	private final kafka.javaapi.producer.Producer<Integer, String> producer;
	private final String topic;
	private final Properties props = new Properties();

	public CellProducer(String topic) {
		props.put("serializer.class", "kafka.serializer.StringEncoder");// 字符串消息
		props.put("metadata.broker.list", KafkaProperties.broker_list);
		producer = new kafka.javaapi.producer.Producer<Integer, String>(new ProducerConfig(props));
		this.topic = topic;
	}

	/*
	 * public void run() { // order_id,order_amt,create_time,province_id Random
	 * random = new Random(); String[] cell_num = { "29448-37062",
	 * "29448-51331", "29448-51331","29448-51333", "29448-51343" }; String[]
	 * drop_num = { "0","1","2"};//掉话1(信号断断续续) 断话2(完全断开)
	 * 
	 * // Producer.java // record_time, imei, cell,
	 * ph_num,call_num,drop_num,duration,drop_rate,net_type,erl // 2011-06-28
	 * 14:24:59.867,356966,29448-37062,0,0,0,0,0,G,0 // 2011-06-28
	 * 14:24:59.867,352024,29448-51331,0,0,0,0,0,G,0 // 2011-06-28
	 * 14:24:59.867,353736,29448-51331,0,0,0,0,0,G,0 // 2011-06-28
	 * 14:24:59.867,353736,29448-51333,0,0,0,0,0,G,0 // 2011-06-28
	 * 14:24:59.867,351545,29448-51333,0,0,0,0,0,G,0 // 2011-06-28
	 * 14:24:59.867,353736,29448-51343,1,0,0,8,0,G,0 int i =0 ; NumberFormat nf
	 * = new DecimalFormat("000000"); while(true) { i ++ ; // String messageStr
	 * = i+"\t"+cell_num[random.nextInt(cell_num.length)]+"\t"+DateFmt.
	 * getCountDate(null,
	 * DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ;
	 * String testStr = nf.format(random.nextInt(10)+1);
	 * 
	 * String messageStr =
	 * i+"\t"+("29448-"+testStr)+"\t"+DateFmt.getCountDate(null,
	 * DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ;
	 * 
	 * System.out.println("product:"+messageStr); producer.send(new
	 * KeyedMessage<Integer, String>(topic, messageStr)); Utils.sleep(1000) ; //
	 * if (i==500) { // break; // } }
	 * 
	 * }
	 */
	public void run() {
		Random random = new Random();
		String[] cell_num = { "29448-37062", "29448-51331", "29448-51331", "29448-51333", "29448-51343" };
		// 正常0; 掉话1(信号断断续续); 断话2(完全断开)
		String[] drop_num = { "0", "1", "2" };
		int i = 0;
		while (true) {
			i++;
			String testStr = String.format("%06d", random.nextInt(10) + 1);

			// messageStr: 2494 29448-000003 2016-01-05 10:25:17 1
			//
			String messageStr = i + "\t" + ("29448-" + testStr) + "\t" + DateFmt.getCountDate(null, DateFmt.date_long)
					+ "\t" + drop_num[random.nextInt(drop_num.length)];
			System.out.println("product:" + messageStr);
			producer.send(new KeyedMessage<Integer, String>(topic, messageStr));
			Utils.sleep(1000);
			// if(i == 500) {
			// break;
			// }
		}
	}

	public static void main(String[] args) {
		// topic设置
		CellProducer producerThread = new CellProducer(KafkaProperties.Cell_Topic);

		// 启动线程生成数据
		producerThread.start();

	}
}


package cmcc.constant;

public class Constants {

	public static final String HBASE_ZOOKEEPER_LIST = "node4:2181";

	public static final String KAFKA_ZOOKEEPER_LIST = "node2:2181,node3:2181,node4:2181";

	public static final String BROKER_LIST = "node2:9092,node3:9092,node4:9092";

	public static final String ZOOKEEPERS = "node2,node3,node4";
}

  

 

  

上一篇:大数据-Storm


下一篇:大数据经典学习路线以及各阶段所发挥的作用