Skip to content

Commit 32472f2

Browse files
minmingzhuminmingzhu
andauthored
Fix correlation issues (#691)
* add correlation CI * fix building error and submit correlation.conf Signed-off-by: minmingzhu <minmingzhu@intel.com> * Update correlation.conf Co-authored-by: minmingzhu <minmingzhu@intel.com>
1 parent 576a0db commit 32472f2

File tree

4 files changed

+34
-17
lines changed

4 files changed

+34
-17
lines changed

bin/run_all.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ for benchmark in `cat $root_dir/conf/benchmarks.lst`; do
8989
if [ $benchmark == "ml/gmm" ] && [ $framework == "hadoop" ]; then
9090
continue
9191
fi
92+
if [ $benchmark == "ml/correlation" ] && [ $framework == "hadoop" ]; then
93+
continue
94+
fi
9295

9396
echo -e "${UYellow}${BYellow}Run ${Yellow}${UYellow}${benchmark}/${framework}${Color_Off}"
9497
echo -e "${BCyan}Exec script: ${Cyan}$WORKLOAD/${framework}/run.sh${Color_Off}"

conf/workloads/ml/correlation.conf

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
hibench.correlation.tiny.examples 50000
2+
hibench.correlation.tiny.features 10000
3+
4+
hibench.correlation.small.examples 100000
5+
hibench.correlation.small.features 20000
6+
7+
hibench.correlation.large.examples 200000
8+
hibench.correlation.large.features 30000
9+
10+
hibench.correlation.huge.examples 300000
11+
hibench.correlation.huge.features 50000
12+
13+
hibench.correlation.gigantic.examples 500000
14+
hibench.correlation.gigantic.features 80000
15+
16+
hibench.correlation.bigdata.examples 2000000
17+
hibench.correlation.bigdata.features 20000
18+
19+
hibench.correlation.corrType pearson
20+
21+
hibench.correlation.examples ${hibench.correlation.${hibench.scale.profile}.examples}
22+
hibench.correlation.features ${hibench.correlation.${hibench.scale.profile}.features}
23+
hibench.correlation.partitions ${hibench.default.map.parallelism}
24+
25+
hibench.workload.input ${hibench.hdfs.data.dir}/Correlation/Input
26+
hibench.workload.output ${hibench.hdfs.data.dir}/Correlation/Output

conf/workloads/ml/correlation.info

Lines changed: 0 additions & 8 deletions
This file was deleted.

sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/CorrelationExample.scala

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
package com.intel.hibench.sparkbench.ml
22

33
import org.apache.spark.ml.feature.LabeledPoint
4-
import scopt.OptionParser
5-
import org.apache.spark.rdd.RDD
6-
import org.apache.spark.sql.{Row, SparkSession}
74
import org.apache.spark.ml.linalg.{Matrix, Vector}
85
import org.apache.spark.ml.stat.Correlation
6+
import org.apache.spark.rdd.RDD
7+
import org.apache.spark.sql.{Row, SparkSession}
8+
import scopt.OptionParser
99

1010
object CorrelationExample {
1111

1212
case class Params(
13-
input: String = null,
14-
corrType: String = "pearson",
15-
)
13+
input: String = null,
14+
corrType: String = "pearson")
1615

1716
def main(args: Array[String]): Unit = {
1817

@@ -46,20 +45,17 @@ object CorrelationExample {
4645
import spark.implicits._
4746
val training = data.toDF().cache()
4847

49-
5048
val numTraining = training.count()
5149

5250
val numFeatures = training.select("features").first().getAs[Vector](0).size
5351
println(s" numTraining = $numTraining")
5452
println(s" numFeatures = $numFeatures")
5553

56-
5754
println(s"Correlation ${params.corrType} between label and each feature")
5855
val df = training.toDF("label", "features")
5956
val Row(coeff1: Matrix) = Correlation.corr(df, "features", params.corrType).head()
6057
println(s"Pearson correlation matrix:\n $coeff1.")
6158

6259
spark.stop()
6360
}
64-
6561
}

0 commit comments

Comments
 (0)