import Utils.SparkUtils
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
object Demo {
def main(args: Array[String]): Unit = {
val sc: SparkContext =SparkUtils.getSparkContext()
val rdd: RDD[(String, Int)] =sc.textFile("data/cc.txt")
.flatMap(_.split("\\s+"))
.map((_,1))
.reduceByKey(_+_)
.map(_._1.toUpperCase)
.map((_,1))
.reduceByKey(_+_)
println(rdd.toDebugString)
sc.stop()
}
}
(2) ShuffledRDD[7] at reduceByKey at Demo.scala:16 [] ±(2)
MapPartitionsRDD[6] at map at Demo.scala:15 []
| MapPartitionsRDD[5] at map at Demo.scala:14 []
| ShuffledRDD[4] at reduceByKey at Demo.scala:13 []
±(2) MapPartitionsRDD[3] at map at Demo.scala:12 []
| MapPartitionsRDD[2] at flatMap at Demo.scala:11 []
| data/cc.txt MapPartitionsRDD[1] at textFile at Demo.scala:10 []
| data/cc.txt HadoopRDD[0] at textFile at Demo.scala:10 []