delphi-xk
diff --git a/‎src/main/scala/com/hyzs/spark/streaming/DirectKafkaWordCount.scala‎
Lines changed: 55 additions & 0 deletions b/‎src/main/scala/com/hyzs/spark/streaming/DirectKafkaWordCount.scala‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎src/test/scala/SparkTest.scala‎
Lines changed: 0 additions & 1 deletion b/‎src/test/scala/SparkTest.scala‎
Lines changed: 0 additions & 1 deletion
@@ -0,0 +1,55 @@
+package com.hyzs.spark.streaming
+
+import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.common.serialization.StringDeserializer
+import org.apache.spark.{SparkConf, TaskContext}
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.kafka010._
+
+/**
+ * Created by xk on 2018/11/23.
+ */
+object DirectKafkaWordCount {
+ def main(args: Array[String]) {
+
+ val Array(brokers, groupId, topics) = Array("111.230.17.36:9094","testGroup02","jd_data01")
+
+ // Create context with 2 second batch interval
+ val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local")
+ val ssc = new StreamingContext(sparkConf, Seconds(10))
+
+ // Create direct kafka stream with brokers and topics
+ val topicsSet = topics.split(",").toSet
+ val kafkaParams = Map[String, Object](
+ ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
+ ConsumerConfig.GROUP_ID_CONFIG -> groupId,
+ ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
+ ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
+ ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
+ val messages = KafkaUtils.createDirectStream[String, String](
+ ssc,
+ LocationStrategies.PreferConsistent,
+ ConsumerStrategies.Subscribe[String, String](topicsSet, kafkaParams))
+
+ // Get the lines, split them into words, count the words and print
+ val lines = messages.map(_.value)
+ val words = lines.flatMap(_.split(" "))
+ val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
+ wordCounts.print()
+
+/* messages.foreachRDD{rdd =>
+ val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
+ rdd.foreachPartition { item =>
+ val o: OffsetRange = offsetRanges(TaskContext.get.partitionId)
+
+ println(s"The record from topic [${o.topic}] is in partition ${o.partition} which offset from ${o.fromOffset} to ${o.untilOffset}")
+ println(s"The record content is ${item.toList.mkString}")
+ }
+ rdd.count()
+ }*/
+
+ // Start the computation
+ ssc.start()
+ ssc.awaitTermination()
+ }
+}
@@ -42,7 +42,6 @@ class SparkTest extends FunSuite with SharedSparkContext with RDDComparisons {
  test("test initializing spark context") {
  val list = List(1, 2, 3, 4)
  val rdd = sc.parallelize(list)
- rdd.treeAggregate()
  assert(rdd.count === list.length)
  }
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,6 @@ class SparkTest extends FunSuite with SharedSparkContext with RDDComparisons {`
`42`	`42`	`test("test initializing spark context") {`
`43`	`43`	`val list = List(1, 2, 3, 4)`
`44`	`44`	`val rdd = sc.parallelize(list)`
`45`		`- rdd.treeAggregate()`
`46`	`45`	`assert(rdd.count === list.length)`
`47`	`46`	`}`
`48`	`47`