@@ -12,71 +12,73 @@ public static void Example()
1212 // as well as the source of randomness.
1313 var mlContext = new MLContext ( ) ;
1414
15- // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
16- IEnumerable < Microsoft . ML . SamplesUtils . DatasetUtils . SampleInfertData > data = Microsoft . ML . SamplesUtils . DatasetUtils . GetInfertData ( ) ;
17- var trainData = mlContext . Data . LoadFromEnumerable ( data ) ;
15+ // Create a small dataset as an IEnumerable.
16+ var samples = new List < InputData > ( )
17+ {
18+ new InputData ( ) { Age = 21 , Gender = "Male" , Education = "BS" , ExtraColumn = 1 } ,
19+ new InputData ( ) { Age = 23 , Gender = "Female" , Education = "MBA" , ExtraColumn = 2 } ,
20+ new InputData ( ) { Age = 28 , Gender = "Male" , Education = "PhD" , ExtraColumn = 3 } ,
21+ new InputData ( ) { Age = 22 , Gender = "Male" , Education = "BS" , ExtraColumn = 4 } ,
22+ new InputData ( ) { Age = 23 , Gender = "Female" , Education = "MS" , ExtraColumn = 5 } ,
23+ new InputData ( ) { Age = 27 , Gender = "Female" , Education = "PhD" , ExtraColumn = 6 } ,
24+ } ;
1825
19- // Preview of the data.
20- //
21- // Age Case Education Induced Parity Pooled.stratum Row_num ...
22- // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ...
23- // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ...
24- // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ...
25- // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ...
26- // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ...
26+ // Convert training data to IDataView.
27+ var dataview = mlContext . Data . LoadFromEnumerable ( samples ) ;
2728
28- // Drop the Age and Education columns from the dataset.
29- var pipeline = mlContext . Transforms . DropColumns ( "Age" , "Education ") ;
29+ // Drop the ExtraColumn from the dataset.
30+ var pipeline = mlContext . Transforms . DropColumns ( "ExtraColumn " ) ;
3031
3132 // Now we can transform the data and look at the output.
3233 // Don't forget that this operation doesn't actually operate on data until we perform an action that requires
3334 // the data to be materialized.
34- var transformedData = pipeline . Fit ( trainData ) . Transform ( trainData ) ;
35+ var transformedData = pipeline . Fit ( dataview ) . Transform ( dataview ) ;
3536
3637 // Now let's take a look at what the DropColumns operations did.
37- // We can extract the transformed data as an IEnumerable of SampleInfertDataNonExistentColumns, the class we define below.
38- // When we try to pull out the Age and Education columns, ML.NET will raise an exception on the first non-existent column
39- // that it tries to access.
38+ // We can extract the transformed data as an IEnumerable of InputData, the class we define below.
39+ // When we try to pull out the Age, Gender, Education and ExtraColumn columns, ML.NET will raise an exception on the ExtraColumn
4040 try
4141 {
42- var failingRowEnumerable = mlContext . Data . CreateEnumerable < SampleInfertDataNonExistentColumns > ( transformedData , reuseRowObject : false ) ;
43- } catch ( ArgumentOutOfRangeException exception )
42+ var failingRowEnumerable = mlContext . Data . CreateEnumerable < InputData > ( transformedData , reuseRowObject : false ) ;
43+ }
44+ catch ( ArgumentOutOfRangeException exception )
4445 {
45- Console . WriteLine ( $ "Age and Education were not available, so an exception was thrown: { exception . Message } .") ;
46+ Console . WriteLine ( $ "ExtraColumn is not available, so an exception is thrown: { exception . Message } .") ;
4647 }
4748
4849 // Expected output:
49- // Age and Education were not available, so an exception was thrown: Could not find column 'Age '.
50+ // ExtraColumn is not available, so an exception is thrown: Could not find column 'ExtraColumn '.
5051 // Parameter name: Schema
5152
5253 // And we can write a few columns out to see that the rest of the data is still available.
53- var rowEnumerable = mlContext . Data . CreateEnumerable < SampleInfertDataTransformed > ( transformedData , reuseRowObject : false ) ;
54+ var rowEnumerable = mlContext . Data . CreateEnumerable < TransformedData > ( transformedData , reuseRowObject : false ) ;
5455 Console . WriteLine ( $ "The columns we didn't drop are still available.") ;
5556 foreach ( var row in rowEnumerable )
56- {
57- Console . WriteLine ( $ "Case: { row . Case } Induced: { row . Induced } Parity: { row . Parity } ") ;
58- }
57+ Console . WriteLine ( $ "Age: { row . Age } Gender: { row . Gender } Education: { row . Education } ") ;
5958
6059 // Expected output:
6160 // The columns we didn't drop are still available.
62- // Case: 1 Induced: 1 Parity: 6
63- // Case: 1 Induced: 1 Parity: 1
64- // Case: 1 Induced: 2 Parity: 6
65- // Case: 1 Induced: 2 Parity: 4
66- // Case: 1 Induced: 1 Parity: 3
61+ // Age: 21 Gender: Male Education: BS
62+ // Age: 23 Gender: Female Education: MBA
63+ // Age: 28 Gender: Male Education: PhD
64+ // Age: 22 Gender: Male Education: BS
65+ // Age: 23 Gender: Female Education: MS
66+ // Age: 27 Gender: Female Education: PhD
6767 }
6868
69- private class SampleInfertDataNonExistentColumns
69+ private class InputData
7070 {
71- public float Age { get ; set ; }
72- public float Education { get ; set ; }
71+ public int Age { get ; set ; }
72+ public string Gender { get ; set ; }
73+ public string Education { get ; set ; }
74+ public float ExtraColumn { get ; set ; }
7375 }
7476
75- private class SampleInfertDataTransformed
77+ private class TransformedData
7678 {
77- public float Case { get ; set ; }
78- public float Induced { get ; set ; }
79- public float Parity { get ; set ; }
79+ public int Age { get ; set ; }
80+ public string Gender { get ; set ; }
81+ public string Education { get ; set ; }
8082 }
8183 }
8284}
0 commit comments