DynaML is a Scala & JVM Machine Learning toolbox for research, education & industry.
| |
|---|
-
Interactive. Don't want to create Maven/sbt project skeletons every time you want to try out ideas? Create and execute scala worksheets in the DynaML shell. DynaML comes packaged with a customized version of the Ammonite REPL, with auto-complete, file operations and scripting capabilities.
-
End to End. Create complex pre-processing pipelines with the data pipes API, train models (deep nets, gaussian processes, linear models and more), optimize over hyper-parameters, evaluate model predictions and visualise results.
-
Enterprise Friendly. Take advantage of the JVM and Scala ecosystem, use Apache Spark to write scalable data analysis jobs, Tensorflow for deep learning, all in the same toolbox.
Currently, only *nix and OSX platforms are supported.
DynaML is compatible with Scala 2.12
Easiest way to install DynaML is cloning & compiling from the github repository. Please take a look at the installation instructions in the user guide, to make sure that you have the pre-requisites and to configure your installation.
Below is a sample script where we train a neural network of stacked Inception cells on the CIFAR-10 image classification task.
{ import _root_.ammonite.ops._ import _root_.io.github.tailhq.dynaml.pipes.DataPipe import _root_.io.github.tailhq.dynaml.tensorflow.{ dtflearn, dtfutils, dtfdata, dtfpipe } import _root_.org.platanios.tensorflow.api._ import _root_.org.platanios.tensorflow.api.learn.layers.Activation import _root_.org.platanios.tensorflow.data.image.CIFARLoader import _root_.java.nio.file.Paths val tempdir = home / "tmp" val dataSet = CIFARLoader.load(Paths.get(tempdir.toString()), CIFARLoader.CIFAR_10) val dtf_cifar_data = dtfdata.tf_dataset( dtfdata.supervised_dataset( dataSet.trainImages.unstack(axis = 0), dataSet.trainLabels.castTo[Long].unstack(axis = -1) ), dtfdata.supervised_dataset( dataSet.testImages.unstack(axis = 0), dataSet.testLabels.castTo[Long].unstack(axis = -1) ) ) println("Building the model.") val relu_act = DataPipe[String, Activation[Float]]((x: String) => tf.learn.ReLU[Float](x)) val architecture = tf.learn.Cast[UByte, Float]("Input/Cast") >> dtflearn.inception_unit[Float](channels = 3, Seq.fill(4)(10), relu_act)( layer_index = 1 ) >> dtflearn.inception_unit[Float](channels = 40, Seq.fill(4)(5), relu_act)( layer_index = 2 ) >> tf.learn.Flatten[Float]("Layer_3/Flatten") >> dtflearn.feedforward[Float](256)(id = 4) >> tf.learn.ReLU[Float]("Layer_4/ReLU", 0.1f) >> dtflearn.feedforward[Float](10)(id = 5) val loss = tf.learn.SparseSoftmaxCrossEntropy[Float, Long, Float]( "Loss/CrossEntropy" ) >> tf.learn.Mean("Loss/Mean") >> tf.learn.ScalarSummary("Loss/Summary", "Loss") val optimizer = tf.train.Adam(0.1f) val cifar_model = dtflearn.model[ Output[UByte], Output[Long], Output[Float], Float, Tensor[UByte], UINT8, Shape, Tensor[Long], INT64, Shape, Tensor[Float], FLOAT32, Shape]( architecture, (UINT8, dataSet.trainImages.shape(1 ::)), (INT64, Shape()), loss ) val data_ops = dtflearn.model.data_ops[(Output[UByte], Output[Long])]( shuffleBuffer = 5000, batchSize = 128, prefetchSize = 10 ) val train_config = dtflearn.model.trainConfig( tempdir / "cifar_summaries", data_ops, optimizer, dtflearn.rel_loss_change_stop(0.05, 500), Some( dtflearn.model._train_hooks( tempdir / "cifar_summaries", stepRateFreq = 100, summarySaveFreq = 100, checkPointFreq = 100 ) ) ) val pattern_to_tensor = DataPipe[Seq[(Tensor[UByte], Tensor[Long])], (Tensor[UByte], Tensor[Long])]( ds => { val (xs, ys) = ds.unzip ( dtfpipe.EagerStack[UByte](axis = 0).run(xs), dtfpipe.EagerStack[Long](axis = 0).run(ys) ) } ) val data_handle_ops = dtflearn.model.tf_data_handle_ops[ (Tensor[UByte], Tensor[Long]), (Tensor[UByte], Tensor[Long]), Tensor[Float], (Output[UByte], Output[Long]) ]( bufferSize = 500, patternToTensor = Some(pattern_to_tensor), concatOpO = Some(dtfpipe.EagerConcatenate[Float]()) ) val data_handle_ops_infer = dtflearn.model.tf_data_handle_ops[Tensor[UByte], Tensor[UByte], Tensor[ Float ], Output[UByte]]( bufferSize = 1000, patternToTensor = Some(dtfpipe.EagerStack[UByte](axis = 0)), concatOpO = Some(dtfpipe.EagerConcatenate[Float]()) ) cifar_model.train( dtf_cifar_data.training_dataset, train_config, data_handle_ops ) def accuracy(predictions: Tensor[Long], labels: Tensor[Long]): Float = tfi .equal(predictions.argmax[Long](1), labels) .castTo[Float] .mean() .scalar .asInstanceOf[Float] val (trainingPreds, testPreds): (Tensor[Float], Tensor[Float]) = ( cifar_model .infer_batch( dtf_cifar_data.training_dataset.map(p => p._1), data_handle_ops_infer ) .left .get, cifar_model .infer_batch( dtf_cifar_data.test_dataset.map(p => p._1), data_handle_ops_infer ) .left .get ) val (trainAccuracy, testAccuracy) = ( accuracy(trainingPreds.castTo[Long], dataSet.trainLabels.castTo[Long]), accuracy(testPreds.castTo[Long], dataSet.testLabels.castTo[Long]) ) print("Train accuracy = ") pprint.pprintln(trainAccuracy) print("Test accuracy = ") pprint.pprintln(testAccuracy) }

