Skip to content

Commit 8a16373

Browse files
committed
add glm, split linear models into separate files
1 parent f1c43e0 commit 8a16373

File tree

12 files changed

+1130
-693
lines changed

12 files changed

+1130
-693
lines changed

numpy_ml/linear_models/__init__.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,11 @@
1-
from .lm import *
2-
from .naive_bayes import *
1+
"""A module containing assorted linear models."""
2+
3+
from .ridge import RidgeRegression
4+
from .glm import GeneralizedLinearModel
5+
from .logistic import LogisticRegression
6+
from .bayesian_regression import (
7+
BayesianLinearRegressionKnownVariance,
8+
BayesianLinearRegressionUnknownVariance,
9+
)
10+
from .naive_bayes import GaussianNBClassifier
11+
from .linear_regression import LinearRegression
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
"""A module of Bayesian linear regression models."""
2+
import numpy as np
3+
import scipy.stats as stats
4+
5+
from numpy_ml.utils.testing import is_number, is_symmetric_positive_definite
6+
7+
8+
class BayesianLinearRegressionUnknownVariance:
9+
def __init__(self, alpha=1, beta=2, mu=0, V=None, fit_intercept=True):
10+
r"""
11+
Bayesian linear regression model with unknown variance. Assumes a
12+
conjugate normal-inverse-gamma joint prior on the model parameters and
13+
error variance.
14+
15+
Notes
16+
-----
17+
The current model uses a conjugate normal-inverse-gamma joint prior on
18+
model parameters **b** and error variance :math:`\sigma^2`. The joint
19+
and marginal posteriors over each are:
20+
21+
.. math::
22+
23+
\mathbf{b}, \sigma^2 &\sim
24+
\text{N-\Gamma^{-1}}(\mu, \mathbf{V}^{-1}, \alpha, \beta) \\
25+
\sigma^2 &\sim \text{InverseGamma}(\alpha, \beta) \\
26+
\mathbf{b} \mid \sigma^2 &\sim \mathcal{N}(\mu, \sigma^2 \mathbf{V})
27+
28+
Parameters
29+
----------
30+
alpha : float
31+
The shape parameter for the Inverse-Gamma prior on
32+
:math:`\sigma^2`. Must be strictly greater than 0. Default is 1.
33+
beta : float
34+
The scale parameter for the Inverse-Gamma prior on
35+
:math:`\sigma^2`. Must be strictly greater than 0. Default is 1.
36+
mu : :py:class:`ndarray <numpy.ndarray>` of shape `(M,)` or float
37+
The mean of the Gaussian prior on `b`. If a float, assume `mu`
38+
is ``np.ones(M) * mu``. Default is 0.
39+
V : :py:class:`ndarray <numpy.ndarray>` of shape `(N, N)` or `(N,)` or None
40+
A symmetric positive definite matrix that when multiplied
41+
element-wise by :math:`\sigma^2` gives the covariance matrix for
42+
the Gaussian prior on `b`. If a list, assume ``V = diag(V)``. If
43+
None, assume `V` is the identity matrix. Default is None.
44+
fit_intercept : bool
45+
Whether to fit an intercept term in addition to the coefficients in
46+
b. If True, the estimates for b will have `M + 1` dimensions, where
47+
the first dimension corresponds to the intercept. Default is True.
48+
49+
Attributes
50+
----------
51+
posterior : dict or None
52+
Frozen random variables for the posterior distributions
53+
:math:`P(\sigma^2 \mid X)` and :math:`P(b \mid X, \sigma^2)`.
54+
posterior_predictive : dict or None
55+
Frozen random variable for the posterior predictive distribution,
56+
:math:`P(y \mid X)`. This value is only set following a call to
57+
:meth:`numpy_ml.linear_models.BayesianLinearRegressionUnknownVariance.predict`.
58+
""" # noqa: E501
59+
# this is a placeholder until we know the dimensions of X
60+
V = 1.0 if V is None else V
61+
62+
if isinstance(V, list):
63+
V = np.array(V)
64+
65+
if isinstance(V, np.ndarray):
66+
if V.ndim == 1:
67+
V = np.diag(V)
68+
elif V.ndim == 2:
69+
fstr = "V must be symmetric positive definite"
70+
assert is_symmetric_positive_definite(V), fstr
71+
72+
self.V = V
73+
self.mu = mu
74+
self.beta = beta
75+
self.alpha = alpha
76+
self.fit_intercept = fit_intercept
77+
78+
self.posterior = None
79+
self.posterior_predictive = None
80+
81+
def fit(self, X, y):
82+
"""
83+
Compute the posterior over model parameters using the data in `X` and
84+
`y`.
85+
86+
Parameters
87+
----------
88+
X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
89+
A dataset consisting of `N` examples, each of dimension `M`.
90+
y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, K)`
91+
The targets for each of the `N` examples in `X`, where each target
92+
has dimension `K`.
93+
"""
94+
# convert X to a design matrix if we're fitting an intercept
95+
if self.fit_intercept:
96+
X = np.c_[np.ones(X.shape[0]), X]
97+
98+
N, M = X.shape
99+
alpha, beta, V, mu = self.alpha, self.beta, self.V, self.mu
100+
101+
if is_number(V):
102+
V *= np.eye(M)
103+
104+
if is_number(mu):
105+
mu *= np.ones(M)
106+
107+
# sigma
108+
I = np.eye(N) # noqa: E741
109+
a = y - (X @ mu)
110+
b = np.linalg.inv(X @ V @ X.T + I)
111+
c = y - (X @ mu)
112+
113+
shape = N + alpha
114+
sigma = (1 / shape) * (alpha * beta ** 2 + a @ b @ c)
115+
scale = sigma ** 2
116+
117+
# sigma is the mode of the inverse gamma prior on sigma^2
118+
sigma = scale / (shape - 1)
119+
120+
# mean
121+
V_inv = np.linalg.inv(V)
122+
L = np.linalg.inv(V_inv + X.T @ X)
123+
R = V_inv @ mu + X.T @ y
124+
125+
mu = L @ R
126+
cov = L * sigma
127+
128+
# posterior distribution for sigma^2 and b
129+
self.posterior = {
130+
"sigma**2": stats.distributions.invgamma(a=shape, scale=scale),
131+
"b | sigma**2": stats.multivariate_normal(mean=mu, cov=cov),
132+
}
133+
134+
def predict(self, X):
135+
"""
136+
Return the MAP prediction for the targets associated with `X`.
137+
138+
Parameters
139+
----------
140+
X : :py:class:`ndarray <numpy.ndarray>` of shape `(Z, M)`
141+
A dataset consisting of `Z` new examples, each of dimension `M`.
142+
143+
Returns
144+
-------
145+
y_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(Z, K)`
146+
The model predictions for the items in `X`.
147+
"""
148+
# convert X to a design matrix if we're fitting an intercept
149+
if self.fit_intercept:
150+
X = np.c_[np.ones(X.shape[0]), X]
151+
152+
I = np.eye(X.shape[0]) # noqa: E741
153+
mu = X @ self.posterior["b | sigma**2"].mean
154+
cov = X @ self.posterior["b | sigma**2"].cov @ X.T + I
155+
156+
# MAP estimate for y corresponds to the mean of the posterior
157+
# predictive
158+
self.posterior_predictive = stats.multivariate_normal(mu, cov)
159+
return mu
160+
161+
162+
class BayesianLinearRegressionKnownVariance:
163+
def __init__(self, mu=0, sigma=1, V=None, fit_intercept=True):
164+
r"""
165+
Bayesian linear regression model with known error variance and
166+
conjugate Gaussian prior on model parameters.
167+
168+
Notes
169+
-----
170+
Uses a conjugate Gaussian prior on the model coefficients **b**. The
171+
posterior over model coefficients is then
172+
173+
.. math::
174+
175+
\mathbf{b} \mid \mu, \sigma^2, \mathbf{V}
176+
\sim \mathcal{N}(\mu, \sigma^2 \mathbf{V})
177+
178+
Ridge regression is a special case of this model where :math:`\mu =
179+
\mathbf{0}`, :math:`\sigma = 1` and :math:`\mathbf{V} = \mathbf{I}`
180+
(ie., the prior on the model coefficients **b** is a zero-mean, unit
181+
covariance Gaussian).
182+
183+
Parameters
184+
----------
185+
mu : :py:class:`ndarray <numpy.ndarray>` of shape `(M,)` or float
186+
The mean of the Gaussian prior on `b`. If a float, assume `mu` is
187+
``np.ones(M) * mu``. Default is 0.
188+
sigma : float
189+
The square root of the scaling term for covariance of the Gaussian
190+
prior on `b`. Default is 1.
191+
V : :py:class:`ndarray <numpy.ndarray>` of shape `(N,N)` or `(N,)` or None
192+
A symmetric positive definite matrix that when multiplied
193+
element-wise by ``sigma ** 2`` gives the covariance matrix for the
194+
Gaussian prior on `b`. If a list, assume ``V = diag(V)``. If None,
195+
assume `V` is the identity matrix. Default is None.
196+
fit_intercept : bool
197+
Whether to fit an intercept term in addition to the coefficients in
198+
`b`. If True, the estimates for `b` will have `M + 1` dimensions, where
199+
the first dimension corresponds to the intercept. Default is True.
200+
201+
Attributes
202+
----------
203+
posterior : dict or None
204+
Frozen random variable for the posterior distribution :math:`P(b
205+
\mid X, \sigma^2)`.
206+
posterior_predictive : dict or None
207+
Frozen random variable for the posterior predictive distribution,
208+
:math:`P(y \mid X)`. This value is only set following a call to
209+
:meth:`numpy_ml.linear_models.BayesianLinearRegressionKnownVariance.predict`.
210+
""" # noqa: E501
211+
# this is a placeholder until we know the dimensions of X
212+
V = 1.0 if V is None else V
213+
214+
if isinstance(V, list):
215+
V = np.array(V)
216+
217+
if isinstance(V, np.ndarray):
218+
if V.ndim == 1:
219+
V = np.diag(V)
220+
elif V.ndim == 2:
221+
fstr = "V must be symmetric positive definite"
222+
assert is_symmetric_positive_definite(V), fstr
223+
224+
self.posterior = {}
225+
self.posterior_predictive = {}
226+
227+
self.V = V
228+
self.mu = mu
229+
self.sigma = sigma
230+
self.fit_intercept = fit_intercept
231+
232+
def fit(self, X, y):
233+
"""
234+
Compute the posterior over model parameters using the data in `X` and
235+
`y`.
236+
237+
Parameters
238+
----------
239+
X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
240+
A dataset consisting of `N` examples, each of dimension `M`.
241+
y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, K)`
242+
The targets for each of the `N` examples in `X`, where each target
243+
has dimension `K`.
244+
"""
245+
# convert X to a design matrix if we're fitting an intercept
246+
if self.fit_intercept:
247+
X = np.c_[np.ones(X.shape[0]), X]
248+
249+
N, M = X.shape
250+
251+
if is_number(self.V):
252+
self.V *= np.eye(M)
253+
254+
if is_number(self.mu):
255+
self.mu *= np.ones(M)
256+
257+
V = self.V
258+
mu = self.mu
259+
sigma = self.sigma
260+
261+
V_inv = np.linalg.inv(V)
262+
L = np.linalg.inv(V_inv + X.T @ X)
263+
R = V_inv @ mu + X.T @ y
264+
265+
mu = L @ R
266+
cov = L * sigma ** 2
267+
268+
# posterior distribution over b conditioned on sigma
269+
self.posterior["b"] = stats.multivariate_normal(mu, cov)
270+
271+
def predict(self, X):
272+
"""
273+
Return the MAP prediction for the targets associated with `X`.
274+
275+
Parameters
276+
----------
277+
X : :py:class:`ndarray <numpy.ndarray>` of shape `(Z, M)`
278+
A dataset consisting of `Z` new examples, each of dimension `M`.
279+
280+
Returns
281+
-------
282+
y_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(Z, K)`
283+
The MAP predictions for the targets associated with the items in
284+
`X`.
285+
"""
286+
# convert X to a design matrix if we're fitting an intercept
287+
if self.fit_intercept:
288+
X = np.c_[np.ones(X.shape[0]), X]
289+
290+
I = np.eye(X.shape[0]) # noqa: E741
291+
mu = X @ self.posterior["b"].mean
292+
cov = X @ self.posterior["b"].cov @ X.T + I
293+
294+
# MAP estimate for y corresponds to the mean/mode of the gaussian
295+
# posterior predictive distribution
296+
self.posterior_predictive = stats.multivariate_normal(mu, cov)
297+
return mu

0 commit comments

Comments
 (0)