Java Spark演算子:countおよびcountByKey



Java Spark Operators



import org.apache.spark.SparkConf import org.apache.spark.api.java.JavaPairRDD import org.apache.spark.api.java.JavaSparkContext import scala.Tuple2 import java.util.Arrays import java.util.List import java.util.Map /** * count operator: * Find the number of elements in an RDD, the return value is long. * * countByKey operator: * Find the same number of keys in an RDD, the return value is map type. * * The output result of the following code is: * 4 * {d=2, t=2} */ public class CountByKeyAndCountDemo { public static void main(String[] args) { SparkConf conf = new SparkConf().setMaster('local').setAppName('spark') JavaSparkContext sc = new JavaSparkContext(conf) List<Tuple2<String, Integer>> list1 = Arrays.asList( new Tuple2<>('t', 1), new Tuple2<>('t', 2), new Tuple2<>('d', 1), new Tuple2<>('d', 2) ) JavaPairRDD<String,Integer> javaPairRDD = sc.parallelizePairs(list1) //count operator, find the number of elements long count = javaPairRDD.count() System.err.println(count) //countByKey operator, find the number of a key Map map = javaPairRDD.countByKey() System.err.println(map.toString()) } }