@@ -3,10 +3,12 @@ package com.hyzs.spark.ml
33import com .hyzs .spark .utils .SparkUtils ._
44import com .hyzs .spark .utils .BaseUtil ._
55import org .apache .spark .ml .clustering .KMeans
6+ import org .apache .spark .ml .feature .LabeledPoint
67import org .apache .spark .ml .linalg .Vectors
78import org .apache .spark .ml .linalg .SQLDataTypes .VectorType
89import org .apache .spark .sql .types .{StructField , StructType }
910import org .apache .spark .sql .{Dataset , Row }
11+ import org .apache .spark .ml .classification .{BinaryLogisticRegressionSummary , LogisticRegression , LogisticRegressionModel }
1012/**
1113 * Created by xk on 2018/5/8.
1214 */
@@ -47,5 +49,33 @@ object MatrixOpsInSpark {
4749 model.clusterCenters.foreach(println)
4850 }
4951
52+ def logisticRegressionTest (): Unit = {
53+ val training = spark.table(" kddcup_vector" )
54+ val lr = new LogisticRegression ()
55+ .setMaxIter(10 )
56+ .setRegParam(0.3 )
57+ .setElasticNetParam(0.8 )
58+
59+ // Fit the model
60+ val lrModel : LogisticRegressionModel = lr.fit(training)
61+ // Print the coefficients and intercept for logistic regression
62+ println(s " Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}" )
63+
64+ val trainingSummary = lrModel.summary
65+ val binarySummary = trainingSummary.asInstanceOf [BinaryLogisticRegressionSummary ]
66+ val roc = binarySummary.roc
67+ roc.show()
68+ println(s " areaUnderROC: ${binarySummary.areaUnderROC}" )
69+
70+ }
71+
72+ def convertDataSetToLabeledPoint (dataSet: Dataset [Row ]): Dataset [LabeledPoint ] = {
73+ val labeled = dataSet.map{ row =>
74+ val datum : Array [Double ] = row.toSeq.map(toDoubleDynamic).toArray
75+ val labeledPoint = LabeledPoint (datum(0 ), Vectors .dense(datum.drop(1 )))
76+ labeledPoint
77+ }
78+ labeled
79+ }
5080
5181}
0 commit comments