Skip to content

Commit fe3738e

Browse files
authored
[HIBENCH-648]make storage level configurable in SVM (#649)
* add svm pmem storage level add svm pmem storage level * add storage level from svm * Add storage level config to SVM workload * Create svm.conf
1 parent 0ee1e59 commit fe3738e

File tree

4 files changed

+12
-3
lines changed

4 files changed

+12
-3
lines changed

bin/functions/hibench_prop_env_mapping.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
NUM_FEATURES_SVM="hibench.svm.features",
113113
NUM_ITERATIONS_SVM="hibench.svm.numIterations",
114114
STEPSIZE_SVM="hibench.svm.stepSize",
115+
SVM_STORAGE_LEVEL="hibench.svm.storage.level",
115116
REGPARAM_SVM="hibench.svm.regParam",
116117
# For ALS
117118
NUM_USERS_ALS="hibench.als.users",

bin/workloads/ml/svm/spark/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ rmr_hdfs $OUTPUT_HDFS || true
2626

2727
SIZE=`dir_size $INPUT_HDFS`
2828
START_TIME=`timestamp`
29-
run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample --numIterations $NUM_ITERATIONS_SVM --stepSize $STEPSIZE_SVM --regParam $REGPARAM_SVM $INPUT_HDFS
29+
run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample --numIterations $NUM_ITERATIONS_SVM --storageLevel $SVM_STORAGE_LEVEL --stepSize $STEPSIZE_SVM --regParam $REGPARAM_SVM $INPUT_HDFS
3030
END_TIME=`timestamp`
3131

3232
gen_report ${START_TIME} ${END_TIME} ${SIZE}

conf/workloads/ml/svm.conf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ hibench.svm.regParam 0.01
2222

2323
hibench.workload.input ${hibench.hdfs.data.dir}/SVM/Input
2424
hibench.workload.output ${hibench.hdfs.data.dir}/SVM/Output
25+
26+
hibench.svm.storage.level MEMORY_ONLY

sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/SVMWithSGDExample.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
2222
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
2323
import org.apache.spark.rdd.RDD
2424
import org.apache.spark.mllib.regression.LabeledPoint
25+
import org.apache.spark.storage.StorageLevel
2526

2627
import scopt.OptionParser
2728

@@ -31,7 +32,8 @@ object SVMWithSGDExample {
3132
numIterations: Int = 100,
3233
stepSize: Double = 1.0,
3334
regParam: Double = 0.01,
34-
dataPath: String = null
35+
dataPath: String = null,
36+
storageLevel: String= "MEMORY_ONLY"
3537
)
3638

3739
def main(args: Array[String]): Unit = {
@@ -48,6 +50,9 @@ object SVMWithSGDExample {
4850
opt[Double]("regParam")
4951
.text(s"regParam, default: ${defaultParams.regParam}")
5052
.action((x,c) => c.copy(regParam = x))
53+
opt[String]("storageLevel")
54+
.text(s"storage level, default: ${defaultParams.storageLevel}")
55+
.action((x, c) => c.copy(storageLevel = x))
5156
arg[String]("<dataPath>")
5257
.required()
5358
.text("data path of SVM")
@@ -68,12 +73,13 @@ object SVMWithSGDExample {
6873
val numIterations = params.numIterations
6974
val stepSize = params.stepSize
7075
val regParam = params.regParam
76+
val storageLevel = StorageLevel.fromString(params.storageLevel)
7177

7278
val data: RDD[LabeledPoint] = sc.objectFile(dataPath)
7379

7480
// Split data into training (60%) and test (40%).
7581
val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
76-
val training = splits(0).cache()
82+
val training = splits(0).persist(storageLevel)
7783
val test = splits(1)
7884

7985
// Run training algorithm to build the model

0 commit comments

Comments
 (0)