sparkStream mapWithState

目的

保存所有数字求和的状态,输出累积求和的值

代码

package com.yy.udf

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.MapWithStateDStream
import org.apache.spark.streaming.{Seconds, State, StateSpec, StreamingContext}

object MapWithStateLearn {
    def main(args: Array[String]): Unit = {
        val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
        val ssc = new StreamingContext(conf, Seconds(5))
        val stream = ssc.socketTextStream("127.0.0.1",9999) //nc -lk发送数字
        ssc.checkpoint("C:\\yy\\coding_project\\yy\\data")
        ssc.sparkContext.setLogLevel("ERROR")

        val stateSpec = StateSpec.function(stateFunc _)

        // mapWithState 统计你输入数字的和
        val value: MapWithStateDStream[String, Long, Long, Option[(String, Long)]] = stream.map { record =>
            ("sum_number:", record.toLong)
        }.mapWithStat
上一篇:跨域的理解


下一篇:sparkstream整合kafka