Skip to content
18 changes: 18 additions & 0 deletions src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,24 @@ public static IEnumerable<string> GetColumnNames(ColumnInformation columnInforma
return columnNames;
}

public static IDictionary<ColumnPurpose, int> CountColumnsByPurpose(ColumnInformation columnInformation)
{
var result = new Dictionary<ColumnPurpose, int>();
var columnNames = GetColumnNames(columnInformation);
foreach (var columnName in columnNames)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks to me this can be a more terse implementation using linq grouping. are we anti-linq in this repo? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love LINQ! Here, I think the existing way is more readable, but I see where you're coming from. Style is so idiosyncratic

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is my version, you think this isn't that readable? Fair warning, I haven't tested if it works :)

 var columnNames = ColumnInformationUtil.GetColumnNames(this.ColumnInformation); var columnPurposes = columnNames.Select(c => (Name: c, Purpose: ColumnInformation.GetColumnPurpose(c))).Where(c => c.Purpose != null); var purposeCounts = columnPurposes.GroupBy(c => c.Purpose).ToDictionary(c => c.Key, c => c.Count()); 
{
var purpose = columnInformation.GetColumnPurpose(columnName);
if (purpose == null)
{
continue;
}

result.TryGetValue(purpose.Value, out int count);
result[purpose.Value] = ++count;
}
return result;
}

private static void AddStringsToListIfNotNull(List<string> list, IEnumerable<string> strings)
{
foreach (var str in strings)
Expand Down
29 changes: 29 additions & 0 deletions src/Microsoft.ML.AutoML/TrainerExtensions/SweepableParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ private static IEnumerable<SweepableParam> BuildLbfgsArgsParams()
};
}

/// <summary>
/// The names of every hyperparameter swept across all trainers.
/// </summary>
public static ISet<string> AllHyperparameterNames = GetAllSweepableParameterNames();

public static IEnumerable<SweepableParam> BuildAveragePerceptronParams()
{
return BuildAveragedLinearArgsParams().Concat(BuildOnlineLinearArgsParams());
Expand Down Expand Up @@ -172,5 +177,29 @@ public static IEnumerable<SweepableParam> BuildSymSgdLogisticRegressionParams()
new SweepableDiscreteParam("UpdateFrequency", new object[] { "<Auto>", 5, 20 })
};
}

/// <summary>
/// Gets the name of every hyperparameter swept across all trainers.
/// </summary>
public static ISet<string> GetAllSweepableParameterNames()
{
var sweepableParams = new List<SweepableParam>();
sweepableParams.AddRange(BuildAveragePerceptronParams());
sweepableParams.AddRange(BuildAveragePerceptronParams());
sweepableParams.AddRange(BuildFastForestParams());
sweepableParams.AddRange(BuildFastTreeParams());
sweepableParams.AddRange(BuildFastTreeTweedieParams());
sweepableParams.AddRange(BuildLightGbmParamsMulticlass());
sweepableParams.AddRange(BuildLightGbmParams());
sweepableParams.AddRange(BuildLinearSvmParams());
sweepableParams.AddRange(BuildLbfgsLogisticRegressionParams());
sweepableParams.AddRange(BuildOnlineGradientDescentParams());
sweepableParams.AddRange(BuildLbfgsPoissonRegressionParams());
sweepableParams.AddRange(BuildSdcaParams());
sweepableParams.AddRange(BuildOlsParams());
sweepableParams.AddRange(BuildSgdParams());
sweepableParams.AddRange(BuildSymSgdLogisticRegressionParams());
return new HashSet<string>(sweepableParams.Select(p => p.Name));
}
}
}
9 changes: 9 additions & 0 deletions src/mlnet/CodeGenerator/CodeGenerationHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
using Microsoft.ML.CLI.CodeGenerator.CSharp;
using Microsoft.ML.CLI.Data;
using Microsoft.ML.CLI.ShellProgressBar;
using Microsoft.ML.CLI.Telemetry.Events;
using Microsoft.ML.CLI.Utilities;
using Microsoft.ML.Data;
using NLog;
Expand Down Expand Up @@ -51,7 +52,9 @@ public void GenerateCode()
{
inputColumnInformation.IgnoredColumnNames.Add(value);
}
var inferColumnsStopwatch = Stopwatch.StartNew();
columnInference = _automlEngine.InferColumns(context, inputColumnInformation);
InferColumnsEvent.TrackEvent(columnInference.ColumnInformation, inferColumnsStopwatch.Elapsed);
}
catch (Exception)
{
Expand All @@ -74,6 +77,9 @@ public void GenerateCode()
// The reason why we are doing this way of defining 3 different results is because of the AutoML API
// i.e there is no common class/interface to handle all three tasks together.

// Start a timer for the experiment
var stopwatch = Stopwatch.StartNew();

List<RunDetail<BinaryClassificationMetrics>> completedBinaryRuns = new List<RunDetail<BinaryClassificationMetrics>>();
List<RunDetail<MulticlassClassificationMetrics>> completedMulticlassRuns = new List<RunDetail<MulticlassClassificationMetrics>>();
List<RunDetail<RegressionMetrics>> completedRegressionRuns = new List<RunDetail<RegressionMetrics>>();
Expand Down Expand Up @@ -236,6 +242,7 @@ public void GenerateCode()
{
var binaryMetric = new BinaryExperimentSettings().OptimizingMetric;
var bestBinaryIteration = BestResultUtil.GetBestRun(completedBinaryRuns, binaryMetric);
ExperimentCompletedEvent.TrackEvent(bestBinaryIteration, completedBinaryRuns, TaskKind.BinaryClassification, stopwatch.Elapsed);
bestPipeline = bestBinaryIteration.Pipeline;
bestModel = bestBinaryIteration.Model;
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, _settings.MlTask, _settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedBinaryRuns.Count());
Expand All @@ -253,6 +260,7 @@ public void GenerateCode()
{
var regressionMetric = new RegressionExperimentSettings().OptimizingMetric;
var bestRegressionIteration = BestResultUtil.GetBestRun(completedRegressionRuns, regressionMetric);
ExperimentCompletedEvent.TrackEvent(bestRegressionIteration, completedRegressionRuns, TaskKind.Regression, stopwatch.Elapsed);
bestPipeline = bestRegressionIteration.Pipeline;
bestModel = bestRegressionIteration.Model;
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, _settings.MlTask, _settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedRegressionRuns.Count());
Expand All @@ -270,6 +278,7 @@ public void GenerateCode()
{
var muliclassMetric = new MulticlassExperimentSettings().OptimizingMetric;
var bestMulticlassIteration = BestResultUtil.GetBestRun(completedMulticlassRuns, muliclassMetric);
ExperimentCompletedEvent.TrackEvent(bestMulticlassIteration, completedMulticlassRuns, TaskKind.MulticlassClassification, stopwatch.Elapsed);
bestPipeline = bestMulticlassIteration.Pipeline;
bestModel = bestMulticlassIteration.Model;
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, _settings.MlTask, _settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedMulticlassRuns.Count());
Expand Down
4 changes: 3 additions & 1 deletion src/mlnet/Commands/CommandDefinitions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ namespace Microsoft.ML.CLI.Commands
{
internal static class CommandDefinitions
{
public const string AutoTrainCommandName = "auto-train";

internal static System.CommandLine.Command AutoTrain(ICommandHandler handler)
{
var newCommand = new System.CommandLine.Command("auto-train", "Create a new .NET project using ML.NET to train and run a model", handler: handler)
var newCommand = new System.CommandLine.Command(AutoTrainCommandName, "Create a new .NET project using ML.NET to train and run a model", handler: handler)
{
MlTask(),
Dataset(),
Expand Down
7 changes: 1 addition & 6 deletions src/mlnet/Commands/New/NewCommandHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.DotNet.Cli.Telemetry;
using Microsoft.ML.CLI.CodeGenerator;
using Microsoft.ML.CLI.Data;

Expand All @@ -11,18 +10,14 @@ namespace Microsoft.ML.CLI.Commands.New
internal class NewCommand : ICommand
{
private readonly NewCommandSettings _settings;
private readonly MlTelemetry _telemetry;

internal NewCommand(NewCommandSettings settings, MlTelemetry telemetry)
internal NewCommand(NewCommandSettings settings)
{
_settings = settings;
_telemetry = telemetry;
}

public void Execute()
{
_telemetry.LogAutoTrainMlCommand(_settings.Dataset.Name, _settings.MlTask.ToString(), _settings.Dataset.Length);

CodeGenerationHelper codeGenerationHelper = new CodeGenerationHelper(new AutoMLEngine(_settings), _settings); // Needs to be improved.
codeGenerationHelper.GenerateCode();
}
Expand Down
33 changes: 26 additions & 7 deletions src/mlnet/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
using System;
using System.CommandLine.Builder;
using System.CommandLine.Invocation;
using System.Diagnostics;
using System.IO;
using System.Linq;
using Microsoft.DotNet.Cli.Telemetry;
using Microsoft.ML.CLI.Commands;
using Microsoft.ML.CLI.Commands.New;
using Microsoft.ML.CLI.Data;
using Microsoft.ML.CLI.Telemetry.Events;
using Microsoft.ML.CLI.Utilities;
using NLog;
using NLog.Targets;
Expand All @@ -20,24 +21,33 @@ namespace Microsoft.ML.CLI
public class Program
{
private static Logger _logger = LogManager.GetCurrentClassLogger();

public static void Main(string[] args)
{
var telemetry = new MlTelemetry();
Telemetry.Telemetry.Initialize();
int exitCode = 1;
Exception ex = null;
var stopwatch = Stopwatch.StartNew();

var mlNetCommandEvent = new MLNetCommandEvent();

// Create handler outside so that commandline and the handler is decoupled and testable.
var handler = CommandHandler.Create<NewCommandSettings>(
(options) =>
{
try
{
// Send telemetry event for command issued
mlNetCommandEvent.AutoTrainCommandSettings = options;
mlNetCommandEvent.TrackEvent();

// Map the verbosity to internal levels
var verbosity = Utils.GetVerbosity(options.Verbosity);

// Build the output path
string outputBaseDir = string.Empty;
if (options.Name == null)
{

options.Name = "Sample" + Utils.GetTaskKind(options.MlTask).ToString();
outputBaseDir = Path.Combine(options.OutputPath.FullName, options.Name);
}
Expand All @@ -50,7 +60,7 @@ public static void Main(string[] args)
options.OutputPath = new DirectoryInfo(outputBaseDir);

// Instantiate the command
var command = new NewCommand(options, telemetry);
var command = new NewCommand(options);

// Override the Logger Configuration
var logconsole = LogManager.Configuration.FindTargetByName("logconsole");
Expand All @@ -67,6 +77,7 @@ public static void Main(string[] args)
}
catch (Exception e)
{
ex = e;
_logger.Log(LogLevel.Error, e.Message);
_logger.Log(LogLevel.Debug, e.ToString());
_logger.Log(LogLevel.Info, Strings.LookIntoLogFile);
Expand All @@ -82,7 +93,8 @@ public static void Main(string[] args)

var parseResult = parser.Parse(args);

if (parseResult.Errors.Count == 0)
var commandParseSucceeded = !parseResult.Errors.Any();
if (commandParseSucceeded)
{
if (parseResult.RootCommandResult.Children.Count > 0)
{
Expand All @@ -95,13 +107,20 @@ public static void Main(string[] args)

var explicitlySpecifiedOptions = options.Where(opt => !opt.IsImplicit).Select(opt => opt.Name);

telemetry.SetCommandAndParameters(command.Name, explicitlySpecifiedOptions);
mlNetCommandEvent.CommandLineParametersUsed = explicitlySpecifiedOptions;
}
}
}

// Send system info telemetry
SystemInfoEvent.TrackEvent();

parser.InvokeAsync(parseResult).Wait();
// Send exit telemetry
ApplicationExitEvent.TrackEvent(exitCode, commandParseSucceeded, stopwatch.Elapsed, ex);
// Flush pending telemetry logs
Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(3));
Environment.Exit(exitCode);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.IO;
using Microsoft.DotNet.AutoML;
using Microsoft.Extensions.EnvironmentAbstractions;
using Microsoft.ML.CLI.Telemetry;

namespace Microsoft.DotNet.Configurer
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.DotNet.AutoML;
using Microsoft.DotNet.Configurer;
using Microsoft.ML.CLI.Telemetry;
using RuntimeEnvironment = Microsoft.DotNet.PlatformAbstractions.RuntimeEnvironment;
using RuntimeInformation = System.Runtime.InteropServices.RuntimeInformation;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

using System;
using System.IO;
using Microsoft.DotNet.AutoML;
using Microsoft.Extensions.EnvironmentAbstractions;
using Microsoft.ML.CLI.Telemetry;

namespace Microsoft.DotNet.Configurer
{
Expand Down
28 changes: 28 additions & 0 deletions src/mlnet/Telemetry/Events/ApplicationExitEvent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace Microsoft.ML.CLI.Telemetry.Events
{
/// <summary>
/// Telemetry event for CLI application exit.
/// </summary>
internal class ApplicationExitEvent
{
public static void TrackEvent(int exitCode, bool commandParseSucceeded, TimeSpan duration, Exception ex)
{
Telemetry.TrackEvent("application-exit",
new Dictionary<string, string>
{
{ "CommandParseSucceeded", commandParseSucceeded.ToString() },
{ "ExitCode", exitCode.ToString() },
{ "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() },
},
duration, ex);
}
}
}
36 changes: 36 additions & 0 deletions src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Microsoft.ML.AutoML;

namespace Microsoft.ML.CLI.Telemetry.Events
{
/// <summary>
/// Telemetry event for AutoML experiment completion.
/// </summary>
internal static class ExperimentCompletedEvent
{
public static void TrackEvent<TMetrics>(RunDetail<TMetrics> bestRun,
List<RunDetail<TMetrics>> allRuns,
TaskKind machineLearningTask,
TimeSpan duration)
{
Telemetry.TrackEvent("experiment-completed",
new Dictionary<string, string>()
{
{ "BestIterationNum", (allRuns.IndexOf(bestRun) + 1).ToString() },
{ "BestPipeline", Telemetry.GetSanitizedPipelineStr(bestRun.Pipeline) },
{ "BestTrainer", bestRun.TrainerName },
{ "MachineLearningTask", machineLearningTask.ToString() },
{ "NumIterations", allRuns.Count().ToString() },
{ "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() },
},
duration);
}
}
}
Loading