- Notifications
You must be signed in to change notification settings - Fork 1.9k
Add Cluster evaluator #316
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Ivanidzo4ka merged 5 commits into dotnet:master from Ivanidzo4ka:ivanidze/ClusterEvaluator Jun 6, 2018
Merged
Changes from 2 commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
| | ||
| using Microsoft.ML.Runtime; | ||
| using Microsoft.ML.Runtime.Data; | ||
| using Microsoft.ML.Transforms; | ||
| | ||
| namespace Microsoft.ML.Models | ||
| { | ||
| public sealed partial class ClusterEvaluator | ||
| { | ||
| /// <summary> | ||
| /// Computes the quality metrics for the PredictionModel using the specified data set. | ||
| /// </summary> | ||
| /// <param name="model"> | ||
| /// The trained PredictionModel to be evaluated. | ||
| /// </param> | ||
| /// <param name="testData"> | ||
| /// The test data that will be predicted and used to evaluate the model. | ||
| /// </param> | ||
| /// <returns> | ||
| /// A ClusterMetrics instance that describes how well the model performed against the test data. | ||
| /// </returns> | ||
| public ClusterMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) | ||
| { | ||
| using (var environment = new TlcEnvironment()) | ||
| { | ||
| environment.CheckValue(model, nameof(model)); | ||
| environment.CheckValue(testData, nameof(testData)); | ||
| | ||
| Experiment experiment = environment.CreateExperiment(); | ||
| | ||
| ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); | ||
| if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) | ||
| { | ||
| throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); | ||
| } | ||
| | ||
| var datasetScorer = new DatasetTransformScorer | ||
| { | ||
| Data = testDataOutput.Data, | ||
| }; | ||
| DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); | ||
| | ||
| Data = scoreOutput.ScoredData; | ||
| Output evaluteOutput = experiment.Add(this); | ||
| | ||
| experiment.Compile(); | ||
| | ||
| experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); | ||
| testData.SetInput(environment, experiment); | ||
| | ||
| experiment.Run(); | ||
| | ||
| IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); | ||
| | ||
| if (overallMetrics == null) | ||
| { | ||
| throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(ClusterEvaluator)} Evaluate."); | ||
| } | ||
| | ||
| var metric = ClusterMetrics.FromOverallMetrics(environment, overallMetrics); | ||
| | ||
| Contracts.Assert(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); | ||
| | ||
| return metric[0]; | ||
| } | ||
| } | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
| | ||
| using Microsoft.ML.Runtime; | ||
| using Microsoft.ML.Runtime.Api; | ||
| using Microsoft.ML.Runtime.Data; | ||
| using System; | ||
| using System.Collections.Generic; | ||
| | ||
| namespace Microsoft.ML.Models | ||
| { | ||
| /// <summary> | ||
| /// This class contains the overall metrics computed by cluster evaluators. | ||
| /// </summary> | ||
| public sealed class ClusterMetrics | ||
| { | ||
| private ClusterMetrics() | ||
| { | ||
| } | ||
| | ||
| internal static List<ClusterMetrics> FromOverallMetrics(IHostEnvironment env, IDataView overallMetrics) | ||
| { | ||
| Contracts.AssertValue(env); | ||
| env.AssertValue(overallMetrics); | ||
| | ||
| var metricsEnumerable = overallMetrics.AsEnumerable<SerializationClass>(env, true, ignoreMissingColumns: true); | ||
| if (!metricsEnumerable.GetEnumerator().MoveNext()) | ||
| { | ||
| throw env.Except("The overall ClusteringMetrics didn't have any rows."); | ||
| } | ||
| | ||
| var metrics = new List<ClusterMetrics>(); | ||
| foreach (var metric in metricsEnumerable) | ||
| { | ||
| metrics.Add(new ClusterMetrics() | ||
| { | ||
| AvgMinScore = metric.AvgMinScore, | ||
| Nmi = metric.Nmi, | ||
| Dbi = metric.Dbi, | ||
| }); | ||
| } | ||
| | ||
| return metrics; | ||
| } | ||
| | ||
| /// <summary> | ||
| /// Davies-Bouldin Index. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// DBI is a measure of the how much scatter is in the cluster and the cluster separation. | ||
| /// </remarks> | ||
| public double Dbi { get; private set; } | ||
| | ||
| /// <summary> | ||
| /// Normalized Mutual Information | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// NMI is a measure of the mutual dependence of the variables. | ||
| /// Normalized variants work on data that already has cluster labels. | ||
| /// It returns values from 0 to 1, where higher numbers are better. | ||
| /// </remarks> | ||
| public double Nmi { get; private set; } | ||
| | ||
| /// <summary> | ||
| /// Average minimum score. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// This makes sense for K-Means algorithm, where the 'score' is the distance from the centroid to the example. | ||
| ||
| /// The average score is, therefore, a measure of proximity of the examples to cluster centroids. | ||
| /// In other words, it's the 'cluster tightness' measure. | ||
| /// Note however, that this metric will only decrease if the number of clusters is increased, and in the extreme case (where each distinct example is its own cluster) it will be equal to zero. | ||
| /// </remarks> | ||
| public double AvgMinScore { get; private set; } | ||
| | ||
| /// <summary> | ||
| /// This class contains the public fields necessary to deserialize from IDataView. | ||
| /// </summary> | ||
| private sealed class SerializationClass | ||
| { | ||
| #pragma warning disable 649 // never assigned | ||
| [ColumnName(Runtime.Data.ClusteringEvaluator.Dbi)] | ||
| public Double Dbi; | ||
| | ||
| [ColumnName(Runtime.Data.ClusteringEvaluator.Nmi)] | ||
| public Double Nmi; | ||
| | ||
| [ColumnName(Runtime.Data.ClusteringEvaluator.AvgMinScore)] | ||
| public Double AvgMinScore; | ||
| | ||
| #pragma warning restore 649 // never assigned | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -140,6 +140,12 @@ public TrainTestEvaluatorOutput<TInput, TOutput> TrainTestEvaluate<TInput, TOutp | |
| environment, | ||
| experiment.GetOutput(crossValidateOutput.OverallMetrics)).FirstOrDefault(); | ||
| } | ||
| else if (Kind==MacroUtilsTrainerKinds.SignatureClusteringTrainer) | ||
| ||
| { | ||
| trainTestOutput.ClusterMetrics = ClusterMetrics.FromOverallMetrics( | ||
| environment, | ||
| experiment.GetOutput(crossValidateOutput.OverallMetrics)).FirstOrDefault(); | ||
| } | ||
| else | ||
| { | ||
| //Implement metrics for ranking, clustering and anomaly detection. | ||
| | @@ -171,6 +177,7 @@ public class TrainTestEvaluatorOutput<TInput, TOutput> | |
| public BinaryClassificationMetrics BinaryClassificationMetrics; | ||
| public ClassificationMetrics ClassificationMetrics; | ||
| public RegressionMetrics RegressionMetrics; | ||
| public ClusterMetrics ClusterMetrics; | ||
| public PredictionModel<TInput, TOutput> PredictorModels; | ||
| | ||
| //REVIEW: Add warnings and per instance results and implement | ||
| | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we rephrase it like this.