Intel-bigdata
diff --git a/‎bin/functions/hibench_prop_env_mapping.py‎
Lines changed: 1 addition & 0 deletions b/‎bin/functions/hibench_prop_env_mapping.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bin/workloads/ml/svm/spark/run.sh‎
Lines changed: 1 addition & 1 deletion b/‎bin/workloads/ml/svm/spark/run.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎conf/workloads/ml/svm.conf‎
Lines changed: 2 additions & 0 deletions b/‎conf/workloads/ml/svm.conf‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/SVMWithSGDExample.scala‎
Lines changed: 8 additions & 2 deletions b/‎sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/SVMWithSGDExample.scala‎
Lines changed: 8 additions & 2 deletions
@@ -112,6 +112,7 @@
  NUM_FEATURES_SVM="hibench.svm.features",
  NUM_ITERATIONS_SVM="hibench.svm.numIterations",
  STEPSIZE_SVM="hibench.svm.stepSize",
+ SVM_STORAGE_LEVEL="hibench.svm.storage.level",
  REGPARAM_SVM="hibench.svm.regParam",
  # For ALS
  NUM_USERS_ALS="hibench.als.users",
 
@@ -26,7 +26,7 @@ rmr_hdfs $OUTPUT_HDFS || true
 
 SIZE=`dir_size $INPUT_HDFS`
 START_TIME=`timestamp`
-run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample --numIterations $NUM_ITERATIONS_SVM --stepSize $STEPSIZE_SVM --regParam $REGPARAM_SVM $INPUT_HDFS
+run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample --numIterations $NUM_ITERATIONS_SVM --storageLevel $SVM_STORAGE_LEVEL --stepSize $STEPSIZE_SVM --regParam $REGPARAM_SVM $INPUT_HDFS
 END_TIME=`timestamp`
 
 gen_report ${START_TIME} ${END_TIME} ${SIZE}
 
@@ -22,3 +22,5 @@ hibench.svm.regParam 0.01
 
 hibench.workload.input ${hibench.hdfs.data.dir}/SVM/Input
 hibench.workload.output ${hibench.hdfs.data.dir}/SVM/Output
+
+hibench.svm.storage.level MEMORY_ONLY
@@ -22,6 +22,7 @@ import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.storage.StorageLevel
 
 import scopt.OptionParser
 
@@ -31,7 +32,8 @@ object SVMWithSGDExample {
  numIterations: Int = 100,
  stepSize: Double = 1.0,
  regParam: Double = 0.01,
- dataPath: String = null
+ dataPath: String = null,
+ storageLevel: String= "MEMORY_ONLY"
  )
 
  def main(args: Array[String]): Unit = {
@@ -48,6 +50,9 @@ object SVMWithSGDExample {
  opt[Double]("regParam")
  .text(s"regParam, default: ${defaultParams.regParam}")
  .action((x,c) => c.copy(regParam = x))
+ opt[String]("storageLevel")
+ .text(s"storage level, default: ${defaultParams.storageLevel}")
+ .action((x, c) => c.copy(storageLevel = x))
  arg[String]("<dataPath>")
  .required()
  .text("data path of SVM")
@@ -68,12 +73,13 @@ object SVMWithSGDExample {
  val numIterations = params.numIterations
  val stepSize = params.stepSize
  val regParam = params.regParam
+ val storageLevel = StorageLevel.fromString(params.storageLevel)
 
  val data: RDD[LabeledPoint] = sc.objectFile(dataPath)
 
  // Split data into training (60%) and test (40%).
  val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
- val training = splits(0).cache()
+ val training = splits(0).persist(storageLevel)
  val test = splits(1)
 
  // Run training algorithm to build the model