Kotlin
diff --git a/‎jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt‎
Lines changed: 6 additions & 0 deletions b/‎jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt‎
Lines changed: 4 additions & 0 deletions b/‎jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkStreamingIntegration.kt‎
Lines changed: 79 additions & 101 deletions b/‎jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkStreamingIntegration.kt‎
Lines changed: 79 additions & 101 deletions
diff --git a/‎jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt‎
Lines changed: 38 additions & 25 deletions b/‎jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt‎
Lines changed: 38 additions & 25 deletions
@@ -39,6 +39,8 @@ abstract class Integration : JupyterIntegration() {
  */
  abstract fun KotlinKernelHost.onLoaded()
 
+ abstract fun KotlinKernelHost.onShutdown()
+
  abstract fun KotlinKernelHost.afterCellExecution(snippetInstance: Any, result: FieldValue)
 
  open val dependencies: Array<String> = arrayOf(
@@ -93,6 +95,10 @@ abstract class Integration : JupyterIntegration() {
  afterCellExecution(snippetInstance, result)
  }
 
+ onShutdown {
+ onShutdown()
+ }
+
  // Render Dataset
  render<Dataset<*>> {
  HTML(it.toHtml())
 
@@ -70,5 +70,9 @@ internal class SparkIntegration : Integration() {
  ).map(::execute)
  }
 
+ override fun KotlinKernelHost.onShutdown() {
+ execute("""spark.stop()""")
+ }
+
  override fun KotlinKernelHost.afterCellExecution(snippetInstance: Any, result: FieldValue) = Unit
 }
@@ -19,29 +19,10 @@
  */
 package org.jetbrains.kotlinx.spark.api.jupyter
 
-import kotlinx.html.*
-import kotlinx.html.stream.appendHTML
-import org.apache.spark.api.java.JavaRDDLike
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.Dataset
-import org.apache.spark.unsafe.array.ByteArrayMethods
-import org.intellij.lang.annotations.Language
-import org.jetbrains.kotlinx.jupyter.api.HTML
-import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
-import org.jetbrains.kotlinx.spark.api.*
-import java.io.InputStreamReader
-
 
-import org.apache.spark.*
-import org.apache.spark.streaming.api.java.JavaStreamingContext
+import org.intellij.lang.annotations.Language
 import org.jetbrains.kotlinx.jupyter.api.FieldValue
 import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
-import scala.collection.*
-import org.jetbrains.kotlinx.spark.api.SparkSession
-import scala.Product
-import java.io.Serializable
-import scala.collection.Iterable as ScalaIterable
-import scala.collection.Iterator as ScalaIterator
 
 /**
  * %use spark-streaming
@@ -60,90 +41,87 @@ internal class SparkStreamingIntegration : Integration() {
 
  @Language("kts")
  val _1 = listOf(
- """
- val sscCollection = mutableSetOf<JavaStreamingContext>()
- """.trimIndent(),
- """
- @JvmOverloads
- fun withSparkStreaming(
- batchDuration: Duration = Durations.seconds(1L),
- checkpointPath: String? = null,
- hadoopConf: Configuration = SparkHadoopUtil.get().conf(),
- createOnError: Boolean = false,
- props: Map<String, Any> = emptyMap(),
- master: String = SparkConf().get("spark.master", "local[*]"),
- appName: String = "Kotlin Spark Sample",
- timeout: Long = -1L,
- startStreamingContext: Boolean = true,
- func: KSparkStreamingSession.() -> Unit,
- ) {
- var ssc: JavaStreamingContext? = null
- try {
-
- // will only be set when a new context is created
- var kSparkStreamingSession: KSparkStreamingSession? = null
-
- val creatingFunc = {
- val sc = SparkConf()
- .setAppName(appName)
- .setMaster(master)
- .setAll(
- props
- .map { (key, value) -> key X value.toString() }
- .asScalaIterable()
- )
-
- val ssc1 = JavaStreamingContext(sc, batchDuration)
- ssc1.checkpoint(checkpointPath)
-
- kSparkStreamingSession = KSparkStreamingSession(ssc1)
- func(kSparkStreamingSession!!)
-
- ssc1
- }
-
- ssc = when {
- checkpointPath != null ->
- JavaStreamingContext.getOrCreate(checkpointPath, creatingFunc, hadoopConf, createOnError)
-
- else -> creatingFunc()
- }
- 
- sscCollection += ssc!!
-
- if (startStreamingContext) {
- ssc!!.start()
- kSparkStreamingSession?.invokeRunAfterStart()
- }
- ssc!!.awaitTerminationOrTimeout(timeout)
- } finally {
- ssc?.stop()
- println("stopping ssc")
- ssc?.awaitTermination()
- println("ssc stopped")
- ssc?.let(sscCollection::remove) 
- }
- }
- """.trimIndent(),
+// For when onInterrupt is implemented in the Jupyter kernel
+// """
+// val sscCollection = mutableSetOf<JavaStreamingContext>()
+// """.trimIndent(),
+// """
+// @JvmOverloads
+// fun withSparkStreaming(
+// batchDuration: Duration = Durations.seconds(1L),
+// checkpointPath: String? = null,
+// hadoopConf: Configuration = SparkHadoopUtil.get().conf(),
+// createOnError: Boolean = false,
+// props: Map<String, Any> = emptyMap(),
+// master: String = SparkConf().get("spark.master", "local[*]"),
+// appName: String = "Kotlin Spark Sample",
+// timeout: Long = -1L,
+// startStreamingContext: Boolean = true,
+// func: KSparkStreamingSession.() -> Unit,
+// ) {
+//
+// // will only be set when a new context is created
+// var kSparkStreamingSession: KSparkStreamingSession? = null
+//
+// val creatingFunc = {
+// val sc = SparkConf()
+// .setAppName(appName)
+// .setMaster(master)
+// .setAll(
+// props
+// .map { (key, value) -> key X value.toString() }
+// .asScalaIterable()
+// )
+//
+// val ssc = JavaStreamingContext(sc, batchDuration)
+// ssc.checkpoint(checkpointPath)
+//
+// kSparkStreamingSession = KSparkStreamingSession(ssc)
+// func(kSparkStreamingSession!!)
+//
+// ssc
+// }
+//
+// val ssc = when {
+// checkpointPath != null ->
+// JavaStreamingContext.getOrCreate(checkpointPath, creatingFunc, hadoopConf, createOnError)
+//
+// else -> creatingFunc()
+// }
+// sscCollection += ssc
+//
+// if (startStreamingContext) {
+// ssc.start()
+// kSparkStreamingSession?.invokeRunAfterStart()
+// }
+// ssc.awaitTerminationOrTimeout(timeout)
+// ssc.stop()
+// }
+// """.trimIndent(),
  """
  println("To start a spark streaming session, simply use `withSparkStreaming { }` inside a cell. To use Spark normally, use `withSpark { }` in a cell, or use `%use spark` to start a Spark session for the whole notebook.")""".trimIndent(),
  ).map(::execute)
  }
 
- override fun KotlinKernelHost.afterCellExecution(snippetInstance: Any, result: FieldValue) {
-
- @Language("kts")
- val _1 = listOf(
- """
- while (sscCollection.isNotEmpty())
- sscCollection.first().let {
- it.stop()
- sscCollection.remove(it)
- }
- """.trimIndent(),
- """
- println("afterCellExecution cleanup!")
- """.trimIndent()
- ).map(::execute)
- }
+ override fun KotlinKernelHost.onShutdown() = Unit
+
+ override fun KotlinKernelHost.afterCellExecution(snippetInstance: Any, result: FieldValue) = Unit
+
+// For when this feature is implemented in the Jupyter kernel
+// override fun KotlinKernelHost.onInterrupt() {
+//
+// @Language("kts")
+// val _1 = listOf(
+// """
+// while (sscCollection.isNotEmpty())
+// sscCollection.first().let {
+// it.stop()
+// sscCollection.remove(it)
+// }
+// """.trimIndent(),
+// """
+// println("onInterrupt cleanup!")
+// """.trimIndent()
+// ).map(::execute)
+// }
 }
@@ -21,15 +21,13 @@ package org.jetbrains.kotlinx.spark.api.jupyter
 
 import io.kotest.assertions.throwables.shouldThrowAny
 import io.kotest.core.spec.style.ShouldSpec
-import io.kotest.matchers.collections.shouldBeIn
 import io.kotest.matchers.nulls.shouldNotBeNull
 import io.kotest.matchers.shouldBe
 import io.kotest.matchers.shouldNotBe
 import io.kotest.matchers.string.shouldContain
 import io.kotest.matchers.types.shouldBeInstanceOf
 import jupyter.kotlin.DependsOn
 import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.streaming.Duration
 import org.apache.spark.streaming.api.java.JavaStreamingContext
 import org.intellij.lang.annotations.Language
 import org.jetbrains.kotlinx.jupyter.EvalRequestData
@@ -41,11 +39,8 @@ import org.jetbrains.kotlinx.jupyter.libraries.EmptyResolutionInfoProvider
 import org.jetbrains.kotlinx.jupyter.repl.EvalResultEx
 import org.jetbrains.kotlinx.jupyter.testkit.ReplProvider
 import org.jetbrains.kotlinx.jupyter.util.PatternNameAcceptanceRule
-import org.jetbrains.kotlinx.spark.api.tuples.*
-import org.jetbrains.kotlinx.spark.api.*
-import scala.Tuple2
+import org.jetbrains.kotlinx.spark.api.SparkSession
 import java.io.Serializable
-import java.util.*
 import kotlin.script.experimental.jvm.util.classpathFromClassloader
 
 class JupyterTests : ShouldSpec({
@@ -269,7 +264,8 @@ class JupyterStreamingTests : ShouldSpec({
  context("Jupyter") {
  withRepl {
 
- should("Have sscCollection instance") {
+ // For when onInterrupt is implemented in the Jupyter kernel
+ xshould("Have sscCollection instance") {
 
  @Language("kts")
  val sscCollection = exec("""sscCollection""")
@@ -292,29 +288,46 @@ class JupyterStreamingTests : ShouldSpec({
  }
  }
 
- should("stream") {
- val input = listOf("aaa", "bbb", "aaa", "ccc")
- val counter = Counter(0)
+ xshould("stream") {
 
- withSparkStreaming(Duration(10), timeout = 1000) {
-
- val (counterBroadcast, queue) = withSpark(ssc) {
- spark.broadcast(counter) X LinkedList(listOf(sc.parallelize(input)))
- }
-
- val inputStream = ssc.queueStream(queue)
-
- inputStream.foreachRDD { rdd, _ ->
- withSpark(rdd) {
- rdd.toDS().forEach {
- it shouldBeIn input
- counterBroadcast.value.value++
+ @Language("kts")
+ val value = exec(
+ """
+ import java.util.LinkedList
+ import org.apache.spark.api.java.function.ForeachFunction
+ import org.apache.spark.util.LongAccumulator
+ 
+
+ val input = arrayListOf("aaa", "bbb", "aaa", "ccc")
+ 
+ @Volatile
+ var counter: LongAccumulator? = null
+ 
+ withSparkStreaming(Duration(10), timeout = 1_000) {
+ 
+ val queue = withSpark(ssc) {
+ LinkedList(listOf(sc.parallelize(input)))
+ }
+ 
+ val inputStream = ssc.queueStream(queue)
+ 
+ inputStream.foreachRDD { rdd, _ ->
+ withSpark(rdd) {
+ if (counter == null)
+ counter = sc.sc().longAccumulator()
+
+ rdd.toDS().showDS().forEach {
+ if (it !in input) error(it + " should be in input")
+ counter!!.add(1L)
+ }
  }
  }
  }
- }
+ counter!!.sum()
+ """.trimIndent()
+ ) as Long
 
- counter.value shouldBe input.size
+ value shouldBe 4L
  }
 
  }
Original file line number	Diff line number	Diff line change
`@@ -70,5 +70,9 @@ internal class SparkIntegration : Integration() {`
`70`	`70`	`).map(::execute)`
`71`	`71`	`}`
`72`	`72`
	`73`	`+ override fun KotlinKernelHost.onShutdown() {`
	`74`	`+ execute("""spark.stop()""")`
	`75`	`+ }`
	`76`	`+`
`73`	`77`	`override fun KotlinKernelHost.afterCellExecution(snippetInstance: Any, result: FieldValue) = Unit`
`74`	`78`	`}`