lucastsutsui
diff --git a/‎README.md‎
Lines changed: 92 additions & 3 deletions b/‎README.md‎
Lines changed: 92 additions & 3 deletions
diff --git a/‎embml/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎embml/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎embml/__init__.pyc‎
140 Bytes b/‎embml/__init__.pyc‎
140 Bytes
diff --git a/‎embml/__pycache__/__init__.cpython-35.pyc‎
179 Bytes b/‎embml/__pycache__/__init__.cpython-35.pyc‎
179 Bytes
diff --git a/‎embml/__pycache__/embml.cpython-35.pyc‎
2.2 KB b/‎embml/__pycache__/embml.cpython-35.pyc‎
2.2 KB
diff --git a/‎embml/__pycache__/embmlSklearn.cpython-35.pyc‎
1 KB b/‎embml/__pycache__/embmlSklearn.cpython-35.pyc‎
1 KB
diff --git a/‎embml/__pycache__/embmlWeka.cpython-35.pyc‎
1.37 KB b/‎embml/__pycache__/embmlWeka.cpython-35.pyc‎
1.37 KB
diff --git a/‎embml/embml.py‎
Lines changed: 68 additions & 0 deletions b/‎embml/embml.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎embml/embml.pyc‎
2.36 KB b/‎embml/embml.pyc‎
2.36 KB
diff --git a/‎embml/embml.py~‎
Lines changed: 69 additions & 0 deletions b/‎embml/embml.py~‎
Lines changed: 69 additions & 0 deletions
@@ -1,5 +1,94 @@
+
 # EmbML
-A tool to support using classification models in low-power and microcontroller-based embedded systems
+EmbML is a tool written in Python to automatically convert off-board-trained models into C++ source code files that can be compiled and executed in low-power microcontrollers. The main goal of EmbML is to produce classifier source codes that will run specifically in unresourceful hardware systems, using bare metal programming.
+
+This tool takes as input a classification model that was trained in a desktop or server computer using WEKA or scikit-learn libraries. EmbML is responsible for converting the input model into a carefully crafted C++ code with support for embedded hardware, such as the avoidance of unnecessary use of main memory and implementation of fixed-point operations for non-integer numbers. 
+
+# Input Models
+EmbML accepts a trained model through the file that contains its serialized object. For instance, a classification model, built with WEKA, shall be serialized into a file using the _ObjectOutputStream_ and _FileOutputStream_ classes (available in Java). As for the scikit-learn models, they shall be saved using the _dump_ function, from _pickle_ module.
+
+# Supported Classification Models
+`embml` supports off-board-trained classifiers from the following classes:
+ * From WEKA:
+ * _MultilayerPerceptron_ for MLP classifiers;
+ * _Logistic_ for logistic regression classifiers;
+ * _SMO_ for SVM classifiers -- with linear, polynomial, and RBF kernels;
+ * _J48_ for decision tree classifier.
+* From scikit-learn:
+* _MLPClassifier_ for MLP classifiers;
+* _LogisticRegression_ for logistic regression classifiers;
+* _LinearSVC_ for SVM classifiers with linear kernel;
+* _SVC_ for SVM classifiers -- with polynomial and RBF kernels;
+* _DecisionTreeClassifier_ for decision tree models.
+
+# Installation
+You can install `embml` from PyPi:
+```python
+pip install embml
+```
+This tool is supported on Python 2.7 and Python 3.5 version, and depends on the `javaobj` library.
+
+
+# How To Use
+```python
+ import embml
+
+# For scikit-learn models
+embml.sklearnModel(inputModel, outputFile, opts)
+
+# For WEKA models
+embml.wekaModel(inputModel, outputFile, opts)
+
+# opts can include:
+#-rules: to generate a decision tree classifier code using if-then-else format.
+#-fxp <n> <m>: to generate a classifier code that uses fixed-point format to perform real number operations. In this case, <n> is the number of integer bits and <m> is the number of fractional bits in the Qn.m format. Note that n + m + 1 must be equal to 32, 16, or 8, since that one bit is used to represent signed numbers.
+#-approx: to generate an MLP classifier code that employs an approximation to substitute the sigmoid as an activation function in the neurons.
+#-pwl <x>: to generate an MLP classifier code that employs a piecewise approximation to substitute the sigmoid as an activation function in the neurons. In this case, <x> must be equal to 2 (to use an 2-point PWL approximation) or 4 (to use an 4-point PWL approximation).
+
+# Examples of generating decision tree classifier codes using if-then-else format.
+embml.wekaModel(inputDecisionTreeModel, outputFile, opts='-rules')
+embml.sklearnModel(inputDecisionTreeModel, outputFile, opts='-rules')
+
+# Examples of generating classifier codes using fixed-point formats.
+embml.wekaModel(inputModel, outputFile, opts='-fxp 21 10') # Q21.10
+embml.sklearnModel(inputModel, outputFile, opts='-fxp 21 10') # Q21.10
+embml.wekaModel(inputModel, outputFile, opts='-fxp 11 4') # Q11.4
+embml.sklearnModel(inputModel, outputFile, opts='-fxp 11 4') # Q11.4
+embml.wekaModel(inputModel, outputFile, opts='-fxp 5 2') # Q5.2
+embml.sklearnModel(inputModel, outputFile, opts='-fxp 5 2') # Q5.2
+
+# Examples of generating MLP classifier codes using an approximation function.
+embml.wekaModel(inputMlpModel, outputFile, opts='-approx')
+embml.sklearnModel(inputMlpModel, outputFile, opts='-approx')
+
+# Examples of generating MLP classifier codes using PWL approximations.
+embml.wekaModel(inputMlpModel, outputFile, opts='-pwl 2')
+embml.sklearnModel(inputMlpModel, outputFile, opts='-pwl 2')
+embml.wekaModel(inputMlpModel, outputFile, opts='-pwl 4')
+embml.sklearnModel(inputMlpModel, outputFile, opts='-pwl 4')
+
+# It is also possible to combine some options:
+embml.wekaModel(inputMlpModel, outputFile, opts='-fxp 21 10 -pwl 2')
+embml.sklearnModel(inputMlpModel, outputFile, opts='-fxp 21 10 -pwl 2')
+embml.wekaModel(inputDecisionTreeModel, outputFile, opts='-fxp 21 10 -rules')
+embml.sklearnModel(inputDecisionTreeModel, outputFile, opts='-fxp 21 10 -rules')
+
+```
+
+# Fixed-point library
+If you decide to generate a classifier code using a fixed-point format, you need to include the `FixedNum.h` library available at [https://github.com/lucastsutsui/EmbML](https://github.com/lucastsutsui/EmbML).
+
+# Citation
+If you use this tool on a scientific work, we kindly ask you to use the following reference:
 
-# Note
-We are working on providing a Python module of this library. While it is still not available, please find a preliminary (yet functional) version of this tool at <https://github.com/lucastsutsui/paper-ictai-2019/tree/master/EmbML>
+```tex
+@inproceedings{da2019embml,
+ title={EmbML Tool: supporting the use of supervised learning algorithms in low-cost embedded systems},
+ author={da Silva, Lucas Tsutsui and Souza, Vinicius MA and Batista, Gustavo EAPA},
+ booktitle={2019 IEEE 31st International Conference on Tools with Artificial Intelligence (ICTAI)},
+ pages={1633--1637},
+ year={2019},
+ organization={IEEE}
+}
+```
+ 
@@ -0,0 +1,2 @@
+
+from .embml import *
@@ -0,0 +1,68 @@
+
+from __future__ import print_function
+from . embmlWeka import recoverWeka
+from . embmlSklearn import recoverSklearn
+import javaobj
+import pickle
+import sys
+
+def processOptions(opt):
+ opts = dict()
+ 
+ opts['useFxp'] = ('-fxp' in opt.split())
+ if opts['useFxp']:
+ if len(opt.split()) <= opt.split().index('-fxp') + 2:
+ print ("Error: define numbers of integer and fractional bits")
+ exit(1)
+
+ opts['fracBits'] = int(opt.split()[opt.split().index('-fxp') + 2])
+ opts['totalBits'] = int(opt.split()[opt.split().index('-fxp') + 1]) + opts['fracBits'] + 1
+
+ if opts['totalBits'] != 8 and\
+ opts['totalBits'] != 16 and\
+ opts['totalBits'] != 32:
+ print ("Error: <integer bits> + <fractional bits> needs to be equals to 7, 15, or 31")
+ exit(1)
+
+ opts['rules'] = ('-rules' in opt.split())
+
+ opts['sigApprox'] = ('-sigApprox' in opt.split())
+ opts['pwl'] = ('-sigPwl' in opt.split())
+ if opts['pwl']:
+ opts['nPoints'] = int(opt.split()[int(opt.split().index('-sigPwl')) + 1])
+ if opts['nPoints'] == 2:
+ # Use 2 points
+ opts['pwlPoints'] = [-2.60060859307396, 2.60060859307396]
+ opts['pwlCoefs'] = [[0.19226268856129256, 0.5]]
+ else:
+ # Use 4 points
+ opts['pwlPoints'] = [-3.96049288887045136676, -1.6379627182375, 1.6379627182375, 3.96049288887045136676]
+ opts['pwlCoefs'] = [[0.0588394235821312, 0.23303311868226695], [0.2218265772854816, 0.5], [0.0588394235821312, 0.766966881317733]]
+ 
+ return opts
+ 
+def wekaModel(inputFileName, outputFileName, opts=''):
+ modelFile = open(inputFileName, "rb")
+ marshaller = javaobj.JavaObjectUnmarshaller(modelFile)
+ model = marshaller.readObject()
+ opts = processOptions(opts)
+ 
+ with open(outputFileName, "w") as output:
+ output.write(recoverWeka(model, opts))
+
+def sklearnModel(inputFileName, outputFileName, opts=''):
+ modelFile = open(inputFileName, "rb")
+
+ # Check Python version
+ if sys.version_info[0] == 2:
+ model = pickle.load(modelFile)
+ elif sys.version_info[0] == 3:
+ model = pickle.load(modelFile, encoding='latin1')
+ else:
+ # Default option
+ model = pickle.load(modelFile, encoding='latin1')
+ 
+ opts = processOptions(opts)
+
+ with open(outputFileName, "w") as output:
+ output.write(recoverSklearn(model, opts))
@@ -0,0 +1,69 @@
+
+from __future__ import print_function
+from . embmlWeka import recoverWeka
+from . embmlSklearn import recoverSklearn
+import javaobj
+import pickle
+import sys
+
+def processOptions(opt):
+ opts = dict()
+ 
+ opts['useFxp'] = ('-fxp' in opt.split())
+ if opts['useFxp']:
+ if len(opt.split()) <= opt.split().index('-fxp') + 2:
+ print ("Error: define numbers of integer and fractional bits")
+ exit(1)
+
+ opts['fracBits'] = int(opt.split()[opt.split().index('-fxp') + 2])
+ opts['totalBits'] = int(opt.split()[opt.split().index('-fxp') + 1]) + opts['fracBits'] + 1
+
+ if opts['totalBits'] != 8 and\
+ opts['totalBits'] != 16 and\
+ opts['totalBits'] != 32:
+ print ("Error: <integer bits> + <fractional bits> needs to be equals to 7, 15, or 31")
+ exit(1)
+
+ opts['rules'] = ('-rules' in opt.split())
+
+ opts['sigApprox'] = ('-sigApprox' in opt.split())
+ opts['pwl'] = ('-sigPwl' in opt.split())
+ if opts['pwl']:
+ opts['nPoints'] = int(opt.split()[int(opt.split().index('-sigPwl')) + 1])
+ if opts['nPoints'] == 2:
+ # Use 2 points
+ opts['pwlPoints'] = [-2.60060859307396, 2.60060859307396]
+ opts['pwlCoefs'] = [[0.19226268856129256, 0.5]]
+ else:
+ # Use 4 points
+ opts['pwlPoints'] = [-3.96049288887045136676, -1.6379627182375, 1.6379627182375, 3.96049288887045136676]
+ opts['pwlCoefs'] = [[0.0588394235821312, 0.23303311868226695], [0.2218265772854816, 0.5], [0.0588394235821312, 0.766966881317733]]
+ 
+ return opts
+ 
+def wekaModel(inputFileName, outputFileName, opts=''):
+ modelFile = open(inputFileName, "rb")
+ marshaller = javaobj.JavaObjectUnmarshaller(modelFile)
+ model = marshaller.readObject()
+
+ opts = processOptions(opts)
+ 
+ with open(outputFileName, "w") as output:
+ output.write(recoverWeka(model, opts))
+
+def sklearnModel(inputFileName, outputFileName, opts=''):
+ modelFile = open(inputFileName, "rb")
+
+ # Check Python version
+ if sys.version_info[0] == 2:
+ model = pickle.load(modelFile)
+ elif sys.version_info[0] == 3:
+ model = pickle.load(modelFile, encoding='latin1')
+ else:
+ # Default option
+ model = pickle.load(modelFile, encoding='latin1')
+ 
+ opts = processOptions(opts)
+
+ with open(outputFileName, "w") as output:
+ output.write(recoverSklearn(model, opts))