delphi-xk
diff --git a/‎src/main/scala/com/hyzs/spark/ml/ConvertLibsvm_v2.scala‎
Lines changed: 2 additions & 5 deletions b/‎src/main/scala/com/hyzs/spark/ml/ConvertLibsvm_v2.scala‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎src/main/scala/com/hyzs/spark/ml/MatrixOpsInSpark.scala‎
Lines changed: 30 additions & 0 deletions b/‎src/main/scala/com/hyzs/spark/ml/MatrixOpsInSpark.scala‎
Lines changed: 30 additions & 0 deletions
@@ -9,17 +9,13 @@ import com.hyzs.spark.utils.SparkUtils._
 import com.hyzs.spark.utils.{BaseUtil, Params}
 import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.feature.{StringIndexer, StringIndexerModel, VectorAssembler}
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{col, _}
 import org.apache.spark.sql.types._
-
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 import com.hyzs.spark.sql.JDDataProcess
-import org.apache.spark.sql.expressions.UserDefinedFunction
+
 
 /**
  * Created by XIANGKUN on 2018/4/24.
@@ -171,6 +167,7 @@ object ConvertLibsvm_v2 {
  }
 
 
+
  def convertLibsvm(args:Array[String]): Unit ={
  //TODO: switch libsvm with or without label table
  //val args = Array("train")
 
@@ -3,10 +3,12 @@ package com.hyzs.spark.ml
 import com.hyzs.spark.utils.SparkUtils._
 import com.hyzs.spark.utils.BaseUtil._
 import org.apache.spark.ml.clustering.KMeans
+import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.ml.classification.{BinaryLogisticRegressionSummary, LogisticRegression, LogisticRegressionModel}
 /**
  * Created by xk on 2018/5/8.
  */
@@ -47,5 +49,33 @@ object MatrixOpsInSpark {
  model.clusterCenters.foreach(println)
  }
 
+ def logisticRegressionTest(): Unit ={
+ val training = spark.table("kddcup_vector")
+ val lr = new LogisticRegression()
+ .setMaxIter(10)
+ .setRegParam(0.3)
+ .setElasticNetParam(0.8)
+
+ // Fit the model
+ val lrModel:LogisticRegressionModel = lr.fit(training)
+ // Print the coefficients and intercept for logistic regression
+ println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
+
+ val trainingSummary = lrModel.summary
+ val binarySummary = trainingSummary.asInstanceOf[BinaryLogisticRegressionSummary]
+ val roc = binarySummary.roc
+ roc.show()
+ println(s"areaUnderROC: ${binarySummary.areaUnderROC}")
+
+ }
+
+ def convertDataSetToLabeledPoint(dataSet:Dataset[Row]): Dataset[LabeledPoint] = {
+ val labeled = dataSet.map{ row =>
+ val datum:Array[Double] = row.toSeq.map(toDoubleDynamic).toArray
+ val labeledPoint = LabeledPoint(datum(0), Vectors.dense(datum.drop(1)))
+ labeledPoint
+ }
+ labeled
+ }
 
 }