#cd training/spark/bin
#./spark-shell --master spark://192.168.56.21:7077 // 单机版
#./spark-shell --master spark://192.168.56.2121:7077 --executor-memory 512m --total-executor-cores 2
scala> sc.parallelize(List("hello scala","hello java","hello scala")).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
res0: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[3] at reduceByKey at <console>:25
scala> sc.parallelize(List("hello scala","hello java","hello scala")).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).collect
res1: Array[(String, Int)] = Array((scala,2), (hello,3), (java,1))
scala> sc.parallelize(List("hello scala","hello java","hello scala")).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).sortBy(_._2).collect
res2: Array[(String, Int)] = Array((java,1), (scala,2), (hello,3))