A way to store and manipulate data
The library exposes in-memory storage for dynamically typed data. The storage is represented by DataFrame class.
- Usage example
- DataFrame API
- Get the header
- Get the rows
- Get the series
- Get the shape
- Add a series
- Drop a series by a name
- Drop a series by an index
- Sample a dataframe from rows
- Sample a dataframe from series indices
- Sample a dataframe from series names
- Save a dataframe
- Shuffle rows of a dataframe
- Get a JSON representation
- Convert to Matrix
- Get a series by name
- Get a series by index
- Map values
- Map values of a series
- Ways to create a dataframe
- Prefilled dataframes
- Contacts
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = [ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]; final dataframe = DataFrame(data); print(dataframe); // DataFrame (5 x 6) // Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species // 1 5.1 3.5 1.4 0.2 Iris-setosa // 2 4.9 3.0 1.4 0.2 Iris-setosa // 89 5.6 3.0 4.1 1.3 Iris-versicolor // 90 5.5 2.5 4.0 1.3 Iris-versicolor // 91 5.5 2.6 4.4 1.2 Iris-versicolor }By default, the very first row is considered a header, unless one specify their own header or autogenerated one. More on this is here
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final header = dataframe.header; print(header); // ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'] }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final rows = dataframe.rows; print(rows); // [ // [1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], // [2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], // [89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], // [90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], // [91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], // ], }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final series = dataframe.series; print(series); // [ // 'Id': [1, 2, 89, 90, 91], // 'SepalLengthCm': [5.1, 4.9, 5.6, 5.5, 5.5], // 'SepalWidthCm': [3.5, 3.0, 3.0, 2.5, 2.6], // 'PetalLengthCm': [1.4, 1.4, 4.1, 4.0, 4.4], // 'PetalWidthCm': [0.2, 0.2, 1.3, 1.3, 1.2], // 'Species': ['Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor'], // ], }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final shape = dataframe.shape; print(shape); // [5, 6] - 5 rows, 6 columns }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final firstSeries = Series('super_series', [1, 2, 3, 4, 5, 6]); final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final modifiedDataframe = dataframe.addSeries([firstSeries]); // The method doesn't mutate the original dataframe print(modifiedDataframe.series.first); // 'super_series': [1, 2, 3, 4, 5, 6] }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); print(dataframe.shape); // [5, 6] - 6 rows, 6 columns final modifiedDataframe = dataframe.dropSeries(names: ['Id']); // The method doesn't mutate the original dataframe print(modifiedDataframe.shape); // [5, 5] - after a series had been dropped, the number of columns became one lesser } import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); print(dataframe.shape); // [5, 6] - 5 rows, 6 columns final modifiedDataframe = dataframe.dropSeries(indices: [0]); // The method doesn't mutate the original dataframe print(modifiedDataframe.shape); // [5, 5] - after a series had been dropped, the number of columns became one lesser } import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final sampled = dataframe.sampleFromRows([0, 5]); print(sampled); // DataFrame (2 x 6) // Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species // 1 5.1 3.5 1.4 0.2 Iris-setosa // 91 5.5 2.6 4.4 1.2 Iris-versicolor } import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final sampled = dataframe.sampleFromSeries(indices: [0, 1]); print(sampled); // DataFrame (5 x 2) // Id SepalLengthCm // 1 5.1 // 2 4.9 // 89 5.6 // 90 5.5 // 91 5.5 }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final sampled = dataframe.sampleFromSeries(names: ['Id', 'SepalLengthCm']); print(sampled); // DataFrame (5 x 2) // Id SepalLengthCm // 1 5.1 // 2 4.9 // 89 5.6 // 90 5.5 // 91 5.5 }import 'package:ml_dataframe/ml_dataframe.dart'; void main() async { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); await dataframe.saveAsJson('path/to/json/file.json'); }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); print(dataframe); // DataFrame (5 x 6) // Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species // 1 5.1 3.5 1.4 0.2 Iris-setosa // 2 4.9 3.0 1.4 0.2 Iris-setosa // 89 5.6 3.0 4.1 1.3 Iris-versicolor // 90 5.5 2.5 4.0 1.3 Iris-versicolor // 91 5.5 2.6 4.4 1.2 Iris-versicolor final shuffled = dataframe.shuffle(); // keep in mind that `shuffle` like other methods returns a new dataframe, the method doesn't mutate the source dataframe print(shuffled); // DataFrame (5 x 6) // Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species // 89 5.6 3.0 4.1 1.3 Iris-versicolor // 1 5.1 3.5 1.4 0.2 Iris-setosa // 91 5.5 2.6 4.4 1.2 Iris-versicolor // 2 4.9 3.0 1.4 0.2 Iris-setosa // 90 5.5 2.5 4.0 1.3 Iris-versicolor }One can use seed parameter to keep the order of rows disregard the number of shuffle calls:
dataframe.shuffle(seed: 10);import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final json = dataframe.toJson(); // json contains a serializable map }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'], [ 1, 5.1, 3.5, 1.4, 0.2], [ 2, 4.9, 3.0, 1.4, 0.2], [ 89, 5.6, 3.0, 4.1, 1.3], [ 90, 5.5, 2.5, 4.0, 1.3], [ 91, 5.5, 2.6, 4.4, 1.2], ]); final matrix = dataframe.toMatrix(); print(matrix); // because of internal representation of Float32 numbers there are some round-off errors in the output // Matrix 5 x 5: // (1.0, 5.099999904632568, 3.5, 1.399999976158142, 0.20000000298023224) // (2.0, 4.900000095367432, 3.0, 1.399999976158142, 0.20000000298023224) // (89.0, 5.599999904632568, 3.0, 4.099999904632568, 1.2999999523162842) // (90.0, 5.5, 2.5, 4.0, 1.2999999523162842) // (91.0, 5.5, 2.5999999046325684, 4.400000095367432, 1.2000000476837158) }the method throws an error if there are inconvertible to a number values in the dataframe.
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final series = dataframe[0]; print(series); // Id: [1, 2, 89, 90, 91] }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final dataframe = DataFrame([ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]); final series = dataframe['Id']; print(series); // Id: [1, 2, 89, 90, 91] }import 'package:ml_dataframe/ml_dataframe'; void main() { final data = DataFrame([ ['col_1', 'col_2', 'col_3'], [ 2, 20, 200], [ 3, 30, 300], [ 4, 40, 400], ]); // the first generic type ia a type of the source value, the second generic type is a type of the mapped value final modifiedData = data.map<num, num>((value) => value * 2); print(modifiedData); // DataFrame (3 x 3) // col_1 col_2 col_3 // 4 40 400 // 6 60 600 // 8 80 800 }import 'package:ml_dataframe/ml_dataframe'; void main() { final data = DataFrame([ ['col_1', 'col_2', 'col_3'], [ 2, 20, 200], [ 3, 30, 300], [ 4, 40, 400], ]); // the first generic type ia a type of the source value, the second generic type is a type of the mapped value final modifiedData = data.mapSeries<num, num>((value) => value * 2, name: 'col_2'); print(modifiedData); // DataFrame (3 x 3) // col_1 col_2 col_3 // 2 40 200 // 3 60 300 // 4 80 400 }import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = [ ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'], [ 1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [ 2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [ 89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [ 90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [ 91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]; final dataframe = DataFrame(data); }By default, the very first row is considered a header. If the data does not have a header, one can use autogenerated header by providing headerExists: false config to the constructor:
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = [ [1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]; final dataframe = DataFrame(data, headerExists: false); print(dataframe.header); }It outputs ['col_1', 'col_2', 'col_3', 'col_4', 'col_5', 'col_6']. col_ is a default prefix for the autogenerated columns.
Also, if there are no header row in the data, one can use a predefined header:
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = [ [1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'], [90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'], [91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'], ]; final dataframe = DataFrame(data, header: ['feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5', 'feature_6']); }import 'package:ml_dataframe/ml_dataframe.dart'; void main() async { final data = await fromCsv('path/to/csv/file.csv'); }If the csv file does not have a header row, it's needed to provide the corresponding flag:
import 'package:ml_dataframe/ml_dataframe.dart'; void main() async { final data = await fromCsv('path/to/csv/file.csv', headerExists: false); }import 'package:ml_dataframe/ml_dataframe.dart'; void main() async { final data = await fromJson('path/to/json/file.json'); }This function works in conjunction with DataFrame saveAsJson method.
In order to test data processing algorithms, one can use "toy" datasets. The library exposes several of them:
One can create a dataframe filled with Iris data:
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = getIrisDataFrame(); print(data); // DataFrame (150 x 6) // Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species // ... }One can create a dataframe filled with Pima Indians diabetes data:
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = getPimaIndiansDiabetesDataFrame(); print(data); // DataFrame (768 x 9) // Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome // ... }One can create a dataframe filled with Red wine quality data:
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = getWineQualityDataframe(); print(data); // DataFrame (1599 x 12) // fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality // ... }One can create a dataframe filled with Boston housing data:
import 'package:ml_dataframe/ml_dataframe.dart'; void main() { final data = getHousingDataframe(); print(data); // DataFrame (506 x 14) // CRIM ZN INDUS CHAS NOX RM ... MEDV // 0.00632 18.0 2.31 0 0.538 6.575 ... 24.0 // 0.02731 0.0 7.07 0 0.469 6.421 ... 21.6 // 0.02729 0.0 7.07 0 0.469 7.185 ... 34.7 // 0.03237 0.0 2.18 0 0.458 6.998 ... 33.4 // 0.06905 0.0 2.18 0 0.458 7.147 ... 36.2 // ... ... ... ... ... ... ... ... // 0.06263 0.0 11.93 0 0.573 6.593 ... 22.4 // 0.04527 0.0 11.93 0 0.573 6.12 ... 20.6 // 0.06076 0.0 11.93 0 0.573 6.976 ... 23.9 // 0.10959 0.0 11.93 0 0.573 6.794 ... 22.0 // 0.04741 0.0 11.93 0 0.573 6.03 ... 11.9 }If you have questions, feel free to text me on