ddbourgin
diff --git a/‎numpy_ml/linear_models/README.md‎
Lines changed: 1 addition & 1 deletion b/‎numpy_ml/linear_models/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎numpy_ml/linear_models/lm.py‎
Lines changed: 109 additions & 17 deletions b/‎numpy_ml/linear_models/lm.py‎
Lines changed: 109 additions & 17 deletions
diff --git a/‎numpy_ml/tests/test_linear_regression.py‎
Lines changed: 58 additions & 0 deletions b/‎numpy_ml/tests/test_linear_regression.py‎
Lines changed: 58 additions & 0 deletions
@@ -1,7 +1,7 @@
 # Linear Models
 The `lm.py` module implements:
 
-1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation.
+1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation. For both (Online and Batch mode)
 2. [Ridge regression / Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)
  with maximum likelihood parameter estimates via the normal equation.
 2. [Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) with maximum likelihood parameter estimates via gradient descent.
 
@@ -11,23 +11,113 @@ def __init__(self, fit_intercept=True):
 
  Notes
  -----
- Given data matrix *X* and target vector *y*, the maximum-likelihood estimate
- for the regression coefficients, :math:`\\beta`, is:
+ Given data matrix **X** and target vector **y**, the maximum-likelihood
+ estimate for the regression coefficients, :math:`\beta`, is:
 
  .. math::
 
- \hat{\beta} =
- \left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}
+ \hat{\beta} = \Sigma^{-1} \mathbf{X}^\top \mathbf{y}
+
+ where :math:`\Sigma^{-1} = (\mathbf{X}^\top \mathbf{X})^{-1}`.
 
  Parameters
  ----------
  fit_intercept : bool
- Whether to fit an additional intercept term in addition to the
- model coefficients. Default is True.
+ Whether to fit an intercept term in addition to the model
+ coefficients. Default is True.
  """
  self.beta = None
+ self.sigma_inv = None
  self.fit_intercept = fit_intercept
 
+ self._is_fit = False
+
+ def update(self, X, y):
+ r"""
+ Incrementally update the least-squares coefficients for a set of new
+ examples.
+
+ Notes
+ -----
+ The recursive least-squares algorithm [1]_ [2]_ is used to efficiently
+ update the regression parameters as new examples become available. For
+ a single new example :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the
+ parameter updates are
+
+ .. math::
+
+ \beta_{t+1} = \left(
+ \mathbf{X}_{1:t}^\top \mathbf{X}_{1:t} +
+ \mathbf{x}_{t+1}\mathbf{x}_{t+1}^\top \right)^{-1}
+ \mathbf{X}_{1:t}^\top \mathbf{Y}_{1:t} +
+ \mathbf{x}_{t+1}^\top \mathbf{y}_{t+1}
+
+ where :math:`\beta_{t+1}` are the updated regression coefficients,
+ :math:`\mathbf{X}_{1:t}` and :math:`\mathbf{Y}_{1:t}` are the set of
+ examples observed from timestep 1 to *t*.
+
+ In the single-example case, the RLS algorithm uses the Sherman-Morrison
+ formula [3]_ to avoid re-inverting the covariance matrix on each new
+ update. In the multi-example case (i.e., where :math:`\mathbf{X}_{t+1}`
+ and :math:`\mathbf{y}_{t+1}` are matrices of `N` examples each), we use
+ the generalized Woodbury matrix identity [4]_ to update the inverse
+ covariance. This comes at a performance cost, but is still more
+ performant than doing multiple single-example updates if *N* is large.
+
+ References
+ ----------
+ .. [1] Gauss, C. F. (1821) _Theoria combinationis observationum
+ erroribus minimis obnoxiae_, Werke, 4. Gottinge
+ .. [2] https://en.wikipedia.org/wiki/Recursive_least_squares_filter
+ .. [3] https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
+ .. [4] https://en.wikipedia.org/wiki/Woodbury_matrix_identity
+
+ Parameters
+ ----------
+ X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
+ A dataset consisting of `N` examples, each of dimension `M`
+ y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, K)`
+ The targets for each of the `N` examples in `X`, where each target
+ has dimension `K`
+ """
+ if not self._is_fit:
+ raise RuntimeError("You must call the `fit` method before calling `update`")
+
+ X, y = np.atleast_2d(X), np.atleast_2d(y)
+
+ X1, Y1 = X.shape[0], y.shape[0]
+ self._update1D(X, y) if X1 == Y1 == 1 else self._update2D(X, y)
+
+ def _update1D(self, x, y):
+ """Sherman-Morrison update for a single example"""
+ beta, S_inv = self.beta, self.sigma_inv
+
+ # convert x to a design vector if we're fitting an intercept
+ if self.fit_intercept:
+ x = np.c_[1, x]
+
+ # update the inverse of the covariance matrix via Sherman-Morrison
+ S_inv -= (S_inv @ x.T @ x @ S_inv) / (1 + x @ S_inv @ x.T)
+
+ # update the model coefficients
+ beta += S_inv @ x.T @ (y - x @ beta)
+
+ def _update2D(self, X, y):
+ """Woodbury update for multiple examples"""
+ beta, S_inv = self.beta, self.sigma_inv
+
+ # convert X to a design matrix if we're fitting an intercept
+ if self.fit_intercept:
+ X = np.c_[np.ones(X.shape[0]), X]
+
+ I = np.eye(X.shape[0])
+
+ # update the inverse of the covariance matrix via Woodbury identity
+ S_inv -= S_inv @ X.T @ np.linalg.pinv(I + X @ S_inv @ X.T) @ X @ S_inv
+
+ # update the model coefficients
+ beta += S_inv @ X.T @ (y - X @ beta)
+
  def fit(self, X, y):
  """
  Fit the regression coefficients via maximum likelihood.
@@ -44,8 +134,10 @@ def fit(self, X, y):
  if self.fit_intercept:
  X = np.c_[np.ones(X.shape[0]), X]
 
- pseudo_inverse = np.linalg.inv(X.T @ X) @ X.T
- self.beta = np.dot(pseudo_inverse, y)
+ self.sigma_inv = np.linalg.pinv(X.T @ X)
+ self.beta = np.atleast_2d(self.sigma_inv @ X.T @ y)
+
+ self._is_fit = True
 
  def predict(self, X):
  """
@@ -166,22 +258,22 @@ def __init__(self, penalty="l2", gamma=0, fit_intercept=True):
  \left(
  \sum_{i=0}^N y_i \log(\hat{y}_i) +
  (1-y_i) \log(1-\hat{y}_i)
- \right) - R(\mathbf{b}, \gamma) 
+ \right) - R(\mathbf{b}, \gamma)
  \right]
- 
+
  where
- 
+
  .. math::
- 
+
  R(\mathbf{b}, \gamma) = \left\{
  \begin{array}{lr}
  \frac{\gamma}{2} ||\mathbf{beta}||_2^2 & :\texttt{ penalty = 'l2'}\\
  \gamma ||\beta||_1 & :\texttt{ penalty = 'l1'}
  \end{array}
  \right.
- 
- is a regularization penalty, :math:`\gamma` is a regularization weight, 
- `N` is the number of examples in **y**, and **b** is the vector of model 
+
+ is a regularization penalty, :math:`\gamma` is a regularization weight,
+ `N` is the number of examples in **y**, and **b** is the vector of model
  coefficients.
 
  Parameters
@@ -251,10 +343,10 @@ def _NLL(self, X, y, y_pred):
  \right]
  """
  N, M = X.shape
- beta, gamma = self.beta, self.gamma 
+ beta, gamma = self.beta, self.gamma
  order = 2 if self.penalty == "l2" else 1
  norm_beta = np.linalg.norm(beta, ord=order)
- 
+
  nll = -np.log(y_pred[y == 1]).sum() - np.log(1 - y_pred[y == 0]).sum()
  penalty = (gamma / 2) * norm_beta ** 2 if order == 2 else gamma * norm_beta
  return (penalty + nll) / N
 
@@ -0,0 +1,58 @@
+# flake8: noqa
+import numpy as np
+
+from sklearn.linear_model import LinearRegression as LinearRegressionGold
+
+from numpy_ml.linear_models.lm import LinearRegression
+from numpy_ml.utils.testing import random_tensor
+
+
+def test_linear_regression(N=10):
+ np.random.seed(12345)
+ N = np.inf if N is None else N
+
+ i = 1
+ while i < N + 1:
+ train_samples = np.random.randint(2, 30)
+ update_samples = np.random.randint(1, 30)
+ n_samples = train_samples + update_samples
+
+ # ensure n_feats < train_samples, otherwise multiple solutions are
+ # possible
+ n_feats = np.random.randint(1, train_samples)
+ target_dim = np.random.randint(1, 10)
+
+ fit_intercept = np.random.choice([True, False])
+
+ X = random_tensor((n_samples, n_feats), standardize=True)
+ y = random_tensor((n_samples, target_dim), standardize=True)
+
+ X_train, X_update = X[:train_samples], X[train_samples:]
+ y_train, y_update = y[:train_samples], y[train_samples:]
+
+ # Fit gold standard model on the entire dataset
+ lr_gold = LinearRegressionGold(fit_intercept=fit_intercept, normalize=False)
+ lr_gold.fit(X, y)
+
+ # Fit our model on just (X_train, y_train)...
+ lr = LinearRegression(fit_intercept=fit_intercept)
+ lr.fit(X_train, y_train)
+
+ do_single_sample_update = np.random.choice([True, False])
+
+ # ...then update our model on the examples (X_update, y_update)
+ if do_single_sample_update:
+ for x_new, y_new in zip(X_update, y_update):
+ lr.update(x_new, y_new)
+ else:
+ lr.update(X_update, y_update)
+
+ # check that model predictions match
+ np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X), decimal=5)
+
+ # check that model coefficients match
+ beta = lr.beta.T[:, 1:] if fit_intercept else lr.beta.T
+ np.testing.assert_almost_equal(beta, lr_gold.coef_, decimal=6)
+
+ print("\tPASSED")
+ i += 1