ddbourgin
diff --git a/‎MANIFEST.in‎
Lines changed: 4 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 170 additions & 2 deletions b/‎README.md‎
Lines changed: 170 additions & 2 deletions
diff --git a/‎numpy_ml/bandits/bandits.py‎
Lines changed: 27 additions & 10 deletions b/‎numpy_ml/bandits/bandits.py‎
Lines changed: 27 additions & 10 deletions
@@ -0,0 +1,4 @@
+include README.md
+include requirements*.txt
+include docs/*.rst
+include docs/img/*.png
@@ -1,9 +1,177 @@
 # numpy-ml
 Ever wish you had an inefficient but somewhat legible collection of machine
-learning algorithms implemented exclusively in numpy? No?
+learning algorithms implemented exclusively in NumPy? No?
+
+## Installation
+
+### For rapid experimentation
+To use this code as a starting point for ML prototyping / experimentation, just clone the repository, create a new [virtualenv](https://pypi.org/project/virtualenv/), and start hacking:
+
+```sh
+$ git clone https://github.com/ddbourgin/numpy-ml.git
+$ cd numpy-ml && virtualenv npml && source npml/bin/activate
+$ pip3 install -r requirements-dev.txt
+```
+
+### As a package
+If you don't plan to modify the source, you can also install numpy-ml as a
+Python package: `pip3 install -u numpy_ml`.
+
+The reinforcement learning agents train on environments defined in the [OpenAI
+gym](https://github.com/openai/gym). To install these alongside numpy-ml, you
+can use `pip3 install -u 'numpy_ml[rl]'`.
 
 ## Documentation
-To see all of the available models, take a look at the [project documentation](https://numpy-ml.readthedocs.io/) or see [here](https://github.com/ddbourgin/numpy-ml/blob/master/numpy_ml/README.md).
+For more details on the available models, see the [project documentation](https://numpy-ml.readthedocs.io/).
+
+## Available models
+1. **Gaussian mixture model**
+ - EM training
+
+2. **Hidden Markov model**
+ - Viterbi decoding
+ - Likelihood computation
+ - MLE parameter estimation via Baum-Welch/forward-backward algorithm
+
+3. **Latent Dirichlet allocation** (topic model)
+ - Standard model with MLE parameter estimation via variational EM
+ - Smoothed model with MAP parameter estimation via MCMC
+
+4. **Neural networks**
+ * Layers / Layer-wise ops
+ - Add
+ - Flatten
+ - Multiply
+ - Softmax
+ - Fully-connected/Dense
+ - Sparse evolutionary connections
+ - LSTM
+ - Elman-style RNN
+ - Max + average pooling
+ - Dot-product attention
+ - Embedding layer
+ - Restricted Boltzmann machine (w. CD-n training)
+ - 2D deconvolution (w. padding and stride)
+ - 2D convolution (w. padding, dilation, and stride)
+ - 1D convolution (w. padding, dilation, stride, and causality)
+ * Modules
+ - Bidirectional LSTM
+ - ResNet-style residual blocks (identity and convolution)
+ - WaveNet-style residual blocks with dilated causal convolutions
+ - Transformer-style multi-headed scaled dot product attention
+ * Regularizers
+ - Dropout
+ * Normalization
+ - Batch normalization (spatial and temporal)
+ - Layer normalization (spatial and temporal)
+ * Optimizers
+ - SGD w/ momentum
+ - AdaGrad
+ - RMSProp
+ - Adam
+ * Learning Rate Schedulers
+ - Constant
+ - Exponential
+ - Noam/Transformer
+ - Dlib scheduler
+ * Weight Initializers
+ - Glorot/Xavier uniform and normal
+ - He/Kaiming uniform and normal
+ - Standard and truncated normal
+ * Losses
+ - Cross entropy
+ - Squared error
+ - Bernoulli VAE loss
+ - Wasserstein loss with gradient penalty
+ - Noise contrastive estimation loss
+ * Activations
+ - ReLU
+ - Tanh
+ - Affine
+ - Sigmoid
+ - Leaky ReLU
+ - ELU
+ - SELU
+ - Exponential
+ - Hard Sigmoid
+ - Softplus
+ * Models
+ - Bernoulli variational autoencoder
+ - Wasserstein GAN with gradient penalty
+ - word2vec encoder with skip-gram and CBOW architectures
+ * Utilities
+ - `col2im` (MATLAB port)
+ - `im2col` (MATLAB port)
+ - `conv1D`
+ - `conv2D`
+ - `deconv2D`
+ - `minibatch`
+
+5. **Tree-based models**
+ - Decision trees (CART)
+ - [Bagging] Random forests
+ - [Boosting] Gradient-boosted decision trees
+
+6. **Linear models**
+ - Ridge regression
+ - Logistic regression
+ - Ordinary least squares
+ - Bayesian linear regression w/ conjugate priors
+ - Unknown mean, known variance (Gaussian prior)
+ - Unknown mean, unknown variance (Normal-Gamma / Normal-Inverse-Wishart prior)
+
+7. **n-Gram sequence models**
+ - Maximum likelihood scores
+ - Additive/Lidstone smoothing
+ - Simple Good-Turing smoothing
+
+8. **Multi-armed bandit models**
+ - UCB1
+ - LinUCB
+ - Epsilon-greedy
+ - Thompson sampling w/ conjugate priors
+ - Beta-Bernoulli sampler
+ - LinUCB
+
+8. **Reinforcement learning models**
+ - Cross-entropy method agent
+ - First visit on-policy Monte Carlo agent
+ - Weighted incremental importance sampling Monte Carlo agent
+ - Expected SARSA agent
+ - TD-0 Q-learning agent
+ - Dyna-Q / Dyna-Q+ with prioritized sweeping
+
+9. **Nonparameteric models**
+ - Nadaraya-Watson kernel regression
+ - k-Nearest neighbors classification and regression
+ - Gaussian process regression
+
+10. **Matrix factorization**
+ - Regularized alternating least-squares
+ - Non-negative matrix factorization
+
+11. **Preprocessing**
+ - Discrete Fourier transform (1D signals)
+ - Discrete cosine transform (type-II) (1D signals)
+ - Bilinear interpolation (2D signals)
+ - Nearest neighbor interpolation (1D and 2D signals)
+ - Autocorrelation (1D signals)
+ - Signal windowing
+ - Text tokenization
+ - Feature hashing
+ - Feature standardization
+ - One-hot encoding / decoding
+ - Huffman coding / decoding
+ - Term frequency-inverse document frequency (TF-IDF) encoding
+ - MFCC encoding
+
+12. **Utilities**
+ - Similarity kernels
+ - Distance metrics
+ - Priority queue
+ - Ball tree
+ - Discrete sampler
+ - Graph processing and generators
 
 ## Contributing
 
 
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from ..utils.testing import random_one_hot_matrix, is_number
+from numpy_ml.utils.testing import random_one_hot_matrix, is_number
 
 
 class Bandit(ABC):
@@ -104,6 +104,7 @@ def __init__(self, payoffs, payoff_probs):
  self.payoff_probs = payoff_probs
  self.arm_evs = np.array([sum(p * v) for p, v in zip(payoff_probs, payoffs)])
  self.best_ev = np.max(self.arm_evs)
+ self.best_arm = np.argmax(self.arm_evs)
 
  @property
  def hyperparameters(self):
@@ -127,8 +128,10 @@ def oracle_payoff(self, context=None):
  -------
  optimal_rwd : float
  The expected reward under an optimal policy.
+ optimal_arm : float
+ The arm ID with the largest expected reward.
  """
- return self.best_ev
+ return self.best_ev, self.best_arm
 
  def _pull(self, arm_id, context):
  payoffs = self.payoffs[arm_id]
@@ -159,6 +162,7 @@ def __init__(self, payoff_probs):
 
  self.arm_evs = self.payoff_probs
  self.best_ev = np.max(self.arm_evs)
+ self.best_arm = np.argmax(self.arm_evs)
 
  @property
  def hyperparameters(self):
@@ -181,8 +185,10 @@ def oracle_payoff(self, context=None):
  -------
  optimal_rwd : float
  The expected reward under an optimal policy.
+ optimal_arm : float
+ The arm ID with the largest expected reward.
  """
- return self.best_ev
+ return self.best_ev, self.best_arm
 
  def _pull(self, arm_id, context):
  return int(np.random.rand() <= self.payoff_probs[arm_id])
@@ -217,6 +223,7 @@ def __init__(self, payoff_dists, payoff_probs):
  self.payoff_probs = payoff_probs
  self.arm_evs = np.array([mu for (mu, var) in payoff_dists])
  self.best_ev = np.max(self.arm_evs)
+ self.best_arm = np.argmax(self.arm_evs)
 
  @property
  def hyperparameters(self):
@@ -249,8 +256,10 @@ def oracle_payoff(self, context=None):
  -------
  optimal_rwd : float
  The expected reward under an optimal policy.
+ optimal_arm : float
+ The arm ID with the largest expected reward.
  """
- return self.best_ev
+ return self.best_ev, self.best_arm
 
 
 class ShortestPathBandit(Bandit):
@@ -282,6 +291,7 @@ def __init__(self, G, start_vertex, end_vertex):
 
  self.arm_evs = self._calc_arm_evs()
  self.best_ev = np.max(self.arm_evs)
+ self.best_arm = np.argmax(self.arm_evs)
 
  placeholder = [None] * len(self.paths)
  super().__init__(placeholder, placeholder)
@@ -309,8 +319,10 @@ def oracle_payoff(self, context=None):
  -------
  optimal_rwd : float
  The expected reward under an optimal policy.
+ optimal_arm : float
+ The arm ID with the largest expected reward.
  """
- return self.best_ev
+ return self.best_ev, self.best_arm
 
  def _calc_arm_evs(self):
  I2V = self.G.get_vertex
@@ -353,7 +365,8 @@ def __init__(self, context_probs):
 
  self.context_probs = context_probs
  self.arm_evs = self.context_probs
- self.best_ev = self.arm_evs.max(axis=1)
+ self.best_evs = self.arm_evs.max(axis=1)
+ self.best_arms = self.arm_evs.argmax(axis=1)
 
  @property
  def hyperparameters(self):
@@ -386,15 +399,17 @@ def oracle_payoff(self, context):
  Parameters
  ----------
  context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
- The current context matrix for each of the bandit arms, if
- applicable. Default is None.
+ The current context matrix for each of the bandit arms.
 
  Returns
  -------
  optimal_rwd : float
  The expected reward under an optimal policy.
+ optimal_arm : float
+ The arm ID with the largest expected reward.
  """
- return context[:, 0] @ self.best_ev
+ context_id = context[:, 0].argmax()
+ return self.best_evs[context_id], self.best_arms[context_id]
 
  def _pull(self, arm_id, context):
  D, K = self.context_probs.shape
@@ -499,9 +514,11 @@ def oracle_payoff(self, context):
  -------
  optimal_rwd : float
  The expected reward under an optimal policy.
+ optimal_arm : float
+ The arm ID with the largest expected reward.
  """
  best_arm = np.argmax(self.arm_evs)
- return self.arm_evs[best_arm]
+ return self.arm_evs[best_arm], best_arm
 
  def _pull(self, arm_id, context):
  K, thetas = self.K, self.thetas