package sparkcore
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Demo09Union {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("Union").setMaster("local")
val sc = new SparkContext(conf)
val rdd1: RDD[Int] = sc.parallelize(List(1,2,3,4,5,6))
val rdd2: RDD[Int] = sc.parallelize(List(4,5,6,7,8,9))
/**
* union:合并两个RDD,rdd的类型必须一致。不会去重
*/
val unionRDD: RDD[Int] = rdd1.union(rdd2)
/**
* 去重
*/
val distinctRDD: RDD[Int] = unionRDD.distinct()
distinctRDD.foreach(println)
}
}