spark的二次排序

通过scala实现二次排序

package _core.SortAndTopN

import org.apache.spark.{SparkConf, SparkContext}

/**
* Author Mr. Guo
* Create 2018/9/29 - 22:00
*/
class SecondarySort(val first: Int, val second: Int) extends Ordered[SecondarySort] with Serializable { override def compare(that: SecondarySort): Int = {
if (this.first - that.first != 0) {
this.first - that.first
} else {
this.second - that.second
}
}
} object SecondarySortApp { def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("SecondarySortApp").setMaster("local[2]")
val sc = new SparkContext(conf) val lines = sc.textFile("file:///E:\\工作\\test_data\\secondSorted.txt")
val pairWithSortkey = lines.map(line => {
(new SecondarySort(line.split(" ")(0).toInt, line.split(" ")(1).toInt)
, line)
}) val sorted = pairWithSortkey.sortByKey(false)
val sortResult = sorted.map(sortedline=>sortedline._2)
sortResult.collect.foreach(println)
}
}

  

上一篇:第3章 jQuery的DOM操作


下一篇:Java IO编程全解(六)——4种I/O的对比与选型