dotnet
diff --git a/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs‎
Lines changed: 41 additions & 0 deletions b/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs‎
Lines changed: 53 additions & 0 deletions b/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs‎
Lines changed: 85 additions & 0 deletions b/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs‎
Lines changed: 96 additions & 0 deletions b/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs‎
Lines changed: 65 additions & 0 deletions b/‎docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs‎
Lines changed: 65 additions & 0 deletions
@@ -0,0 +1,41 @@
+using Microsoft.ML.Transforms.Categorical;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+ public class LightGbmBinaryClassification
+ {
+ // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+ public static void Example()
+ {
+ // Creating the ML.Net IHostEnvironment object, needed for the pipeline.
+ var mlContext = new MLContext();
+
+ // Download and featurize the dataset.
+ var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+ // Leave out 10% of data for testing.
+ var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
+
+ // Create the Estimator.
+ var pipeline = mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features");
+
+ // Fit this Pipeline to the Training Data.
+ var model = pipeline.Fit(split.TrainSet);
+
+ // Evaluate how the model is doing on the test data.
+ var dataWithPredictions = model.Transform(split.TestSet);
+
+ var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
+ SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+ // Output:
+ // Accuracy: 0.88
+ // AUC: 0.93
+ // F1 Score: 0.71
+ // Negative Precision: 0.90
+ // Negative Recall: 0.94
+ // Positive Precision: 0.76
+ // Positive Recall: 0.66
+ }
+ }
+}
@@ -0,0 +1,53 @@
+using Microsoft.ML.LightGBM;
+using Microsoft.ML.Transforms.Categorical;
+using static Microsoft.ML.LightGBM.Options;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+ class LightGbmBinaryClassificationWithOptions
+ {
+ // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+ public static void Example()
+ {
+ // Creating the ML.Net IHostEnvironment object, needed for the pipeline
+ var mlContext = new MLContext();
+
+ // Download and featurize the dataset.
+ var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+ // Leave out 10% of data for testing.
+ var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
+
+ // Create the pipeline with LightGbm Estimator using advanced options.
+ var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(
+ new Options
+ {
+ LabelColumn = "IsOver50K",
+ FeatureColumn = "Features",
+ Booster = new GossBooster.Options
+ {
+ TopRate = 0.3,
+ OtherRate = 0.2
+ }
+ });
+
+ // Fit this Pipeline to the Training Data.
+ var model = pipeline.Fit(split.TrainSet);
+
+ // Evaluate how the model is doing on the test data.
+ var dataWithPredictions = model.Transform(split.TestSet);
+
+ var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
+ SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+ // Output:
+ // Accuracy: 0.88
+ // AUC: 0.93
+ // F1 Score: 0.71
+ // Negative Precision: 0.90
+ // Negative Recall: 0.94
+ // Positive Precision: 0.76
+ // Positive Recall: 0.67
+ }
+ }
+}
@@ -0,0 +1,85 @@
+using System;
+using System.Linq;
+using Microsoft.ML.Data;
+using Microsoft.ML.SamplesUtils;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+ class LightGbmMulticlassClassification
+ {
+ // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+ public static void Example()
+ {
+ // Create a general context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Create in-memory examples as C# native class.
+ var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);
+
+ // Convert native C# class to IDataView, a consumble format to ML.NET functions.
+ var dataView = mlContext.Data.ReadFromEnumerable(examples);
+
+ //////////////////// Data Preview ////////////////////
+ // Label Features
+ // AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045
+ // BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918
+ // CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099
+ // DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455
+
+ // Create a pipeline. 
+ // - Convert the string labels into key types.
+ // - Apply LightGbm multiclass trainer.
+ var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label")
+ .Append(mlContext.MulticlassClassification.Trainers.LightGbm(labelColumn: "LabelIndex"))
+ .Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel"))
+ .Append(mlContext.Transforms.CopyColumns("Scores", "Score"));
+
+ // Split the static-typed data into training and test sets. Only training set is used in fitting
+ // the created pipeline. Metrics are computed on the test.
+ var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
+
+ // Train the model.
+ var model = pipeline.Fit(split.TrainSet);
+
+ // Do prediction on the test set.
+ var dataWithPredictions = model.Transform(split.TestSet);
+
+ // Evaluate the trained model using the test set.
+ var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex");
+
+ // Check if metrics are reasonable.
+ Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}.");
+ // Console output:
+ // Macro accuracy: 0.8655, Micro accuracy: 0.8651.
+
+ // IDataView with predictions, to an IEnumerable<DatasetUtils.MulticlassClassificationExample>.
+ var nativePredictions = mlContext.CreateEnumerable<DatasetUtils.MulticlassClassificationExample>(dataWithPredictions, false).ToList();
+
+ // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index
+ // (e.g., 1) to its actual label (e.g., "AA").
+ // The metadata can be used to get all the unique labels used during training.
+ var labelBuffer = new VBuffer<ReadOnlyMemory<char>>();
+ dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer);
+ // nativeLabels is { "AA" , "BB", "CC", "DD" }
+ var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex.
+
+
+ // Show prediction result for the 3rd example.
+ var nativePrediction = nativePredictions[2];
+ // Console output:
+ // Our predicted label to this example is "AA" with probability 0.9257.
+ Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " +
+ $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}.");
+
+ // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i].
+ // Console output:
+ // The probability of being class "AA" is 0.9257.
+ // The probability of being class "BB" is 0.0739.
+ // The probability of being class "CC" is 0.0002.
+ // The probability of being class "DD" is 0.0001.
+ for (int i = 0; i < nativeLabels.Length; ++i)
+ Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}.");
+ }
+ }
+}
@@ -0,0 +1,96 @@
+using System;
+using System.Linq;
+using Microsoft.ML.Data;
+using Microsoft.ML.LightGBM;
+using Microsoft.ML.SamplesUtils;
+using static Microsoft.ML.LightGBM.Options;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+ class LightGbmMulticlassClassificationWithOptions
+ {
+ // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+ public static void Example()
+ {
+ // Create a general context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext(seed: 0);
+
+ // Create in-memory examples as C# native class.
+ var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);
+
+ // Convert native C# class to IDataView, a consumble format to ML.NET functions.
+ var dataView = mlContext.Data.ReadFromEnumerable(examples);
+
+ //////////////////// Data Preview ////////////////////
+ // Label Features
+ // AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045
+ // BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918
+ // CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099
+ // DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455
+
+ // Create a pipeline. 
+ // - Convert the string labels into key types.
+ // - Apply LightGbm multiclass trainer with advanced options.
+ var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label")
+ .Append(mlContext.MulticlassClassification.Trainers.LightGbm(new Options
+ {
+ LabelColumn = "LabelIndex",
+ FeatureColumn = "Features",
+ Booster = new DartBooster.Options
+ {
+ DropRate = 0.15,
+ XgboostDartMode = false
+ }
+ }))
+ .Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel"))
+ .Append(mlContext.Transforms.CopyColumns("Scores", "Score"));
+
+ // Split the static-typed data into training and test sets. Only training set is used in fitting
+ // the created pipeline. Metrics are computed on the test.
+ var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
+
+ // Train the model.
+ var model = pipeline.Fit(split.TrainSet);
+
+ // Do prediction on the test set.
+ var dataWithPredictions = model.Transform(split.TestSet);
+
+ // Evaluate the trained model using the test set.
+ var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex");
+
+ // Check if metrics are reasonable.
+ Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}.");
+ // Console output:
+ // Macro accuracy: 0.8619, Micro accuracy: 0.8611.
+
+ // IDataView with predictions, to an IEnumerable<DatasetUtils.MulticlassClassificationExample>.
+ var nativePredictions = mlContext.CreateEnumerable<DatasetUtils.MulticlassClassificationExample>(dataWithPredictions, false).ToList();
+
+ // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index
+ // (e.g., 1) to its actual label (e.g., "AA").
+ // The metadata can be used to get all the unique labels used during training.
+ var labelBuffer = new VBuffer<ReadOnlyMemory<char>>();
+ dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer);
+ // nativeLabels is { "AA" , "BB", "CC", "DD" }
+ var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex.
+
+
+ // Show prediction result for the 3rd example.
+ var nativePrediction = nativePredictions[2];
+ // Console output:
+ // Our predicted label to this example is AA with probability 0.8986.
+ Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " +
+ $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}.");
+
+ // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i].
+ // Console output:
+ // The probability of being class AA is 0.8986.
+ // The probability of being class BB is 0.0961.
+ // The probability of being class CC is 0.0050.
+ // The probability of being class DD is 0.0003.
+ for (int i = 0; i < nativeLabels.Length; ++i)
+ Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}.");
+ }
+ }
+}
@@ -0,0 +1,65 @@
+using System;
+using System.Linq;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+ class LightGbmRegression
+ {
+ // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+ public static void Example()
+ {
+ // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+ // as well as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Download and load the housing dataset into an IDataView.
+ var dataView = SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext);
+
+ //////////////////// Data Preview ////////////////////
+ /// Only 6 columns are displayed here.
+ // MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s ...
+ // 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 ...
+ // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ...
+ // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ...
+
+ var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1);
+
+ // Create the estimator, here we only need LightGbm trainer
+ // as data is already processed in a form consumable by the trainer.
+ var labelName = "MedianHomeValue";
+ var featureNames = dataView.Schema
+ .Select(column => column.Name) // Get the column names
+ .Where(name => name != labelName) // Drop the Label
+ .ToArray();
+ var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
+ .Append(mlContext.Regression.Trainers.LightGbm(
+ labelColumn: labelName,
+ numLeaves: 4,
+ minDataPerLeaf: 6,
+ learningRate: 0.001));
+
+ // Fit this pipeline to the training data.
+ var model = pipeline.Fit(split.TrainSet);
+
+ // Get the feature importance based on the information gain used during training.
+ VBuffer<float> weights = default;
+ model.LastTransformer.Model.GetFeatureWeights(ref weights);
+ var weightsValues = weights.DenseValues().ToArray();
+ Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361
+ Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1
+
+ // Evaluate how the model is doing on the test data.
+ var dataWithPredictions = model.Transform(split.TestSet);
+ var metrics = mlContext.Regression.Evaluate(dataWithPredictions, label: labelName);
+ SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+ // Output
+ // L1: 4.97
+ // L2: 51.37
+ // LossFunction: 51.37
+ // RMS: 7.17
+ // RSquared: 0.08
+ }
+ }
+}