Spark的reduceByKey使用时注意:如果key值是唯一的,那么value值就会原样输出。
reduceByKey是基于combineByKey,如果key值唯一,就不会执行merge步骤,也就不会执行reduceByKey方法体。
代码实例:
object Test
{
def main
(args
: Array
[String]): Unit = {
val conf
= new SparkConf
()
val sc
= new SparkContext
(conf
)
val rdd
= sc
.parallelize
(
List
(
(("10001085", "51"), List
(6,5,4)),
(("10001085", "51"), List
(5,4,3)),
(("10001085", "51"), List
(4,3,2)),
(("10001085", "01"), List
(3,2,1))))
val ret
= rdd
.combineByKey
(
(v
: List
[Int]) => {var ii
= 0;for(i
<- v
){ii
+= i
}; ii
+ "-" },
(c
: String, v
: List
[Int]) => {var ii
= 0;for(i
<- v
){ii
+= i
}; ii
+ "@" + c
},
(c1
: String, c2
: String) => c1
+ "$" + c2
)
val array
= ret
.collect
()
for(a
<- array
){
println
(a
._1
, a
._2
)
}
}
}
转载请注明原文地址:https://tech.qufami.com/read-26691.html