Skip to content

Commit 17a483b

Browse files
authored
Implemented Gaussian Process Regression div-bargali#809
Implemented Gaussian Process Regression
2 parents 4d9b5c5 + 23e1a4b commit 17a483b

File tree

1 file changed

+193
-0
lines changed

1 file changed

+193
-0
lines changed
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# Gaussian Process Regression
2+
3+
import numpy as np
4+
import scipy.optimize
5+
6+
7+
class GaussianProcessRegression:
8+
"""
9+
Gaussian Process Regression with square exponential kernel
10+
"""
11+
12+
def __init__(self, X, Y, noise=1., sigma=1., ell=1.):
13+
"""
14+
Initializes gaussian process regression.
15+
16+
:param X: Data, (N, 1) numpy array
17+
:param Y: Function values, (N, 1) numpy array
18+
:param noise: Measurement noise, float
19+
:param sigma: Gain, float
20+
:param ell: Length scale, float
21+
"""
22+
self.X = X
23+
self.Y = Y
24+
self.noise = noise
25+
self.kernel = SquareExponential(sigma, ell)
26+
self.L, self.alpha = self.invert_gram_matrix()
27+
28+
def invert_gram_matrix(self):
29+
"""
30+
Calculates and inverts the gram matrix with cholesky decomposition.
31+
32+
:return: Lower triangular cholesky factor of the gram matrix, (N, N) numpy array
33+
Inverted gram matrix multiplied with the function values Y, (N, N) numpy array
34+
"""
35+
kXX = self.kernel.k(self.X, self.X)
36+
gram_matrix = kXX + self.noise ** 2 * np.eye(X.shape[0])
37+
L = np.linalg.cholesky(gram_matrix)
38+
alpha = np.linalg.solve(L.T, np.linalg.solve(L, self.Y))
39+
return L, alpha
40+
41+
def neg_log_marginal_likelihood(self):
42+
"""
43+
Calculates the negative logarithm of the marginal likelihood. The marginal likelihood is a multivariate
44+
gaussian with 0 mean and the gram_matrix as covariance-matrix.
45+
46+
:return: Negative log marginal likelihood, float
47+
"""
48+
return 0.5 * (self.Y.T @ self.alpha)[0, 0] + np.sum(np.log(np.diag(self.L)))
49+
50+
def dml_dtheta(self, theta):
51+
"""
52+
Calculate the derivative of the negative log marginal likelihood w.r.t the parameters of the kernel.
53+
54+
:param theta: Parameters of the kernel, (2, 1) numpy array
55+
:return: Negative log marginal likelihood, float
56+
Derivative, (2, 1) numpy array
57+
"""
58+
self.kernel = SquareExponential(theta[0], theta[1])
59+
self.L, self.alpha = self.invert_gram_matrix()
60+
61+
dml_dtheta = []
62+
for dk_dtheta in [self.kernel.dk_dsigma(self.X), self.kernel.dk_dell(self.X)]:
63+
dml_dtheta.append(
64+
-0.5 * (self.alpha.T @ dk_dtheta @ self.alpha)[0, 0]
65+
+ 0.5 * np.trace(np.linalg.solve(self.L.T, np.linalg.solve(self.L, dk_dtheta)))
66+
)
67+
return self.neg_log_marginal_likelihood(), np.array(dml_dtheta)
68+
69+
def train(self):
70+
"""
71+
Trains the gaussian process regression by optimizing the negative log marginal likelihood w.r.t the
72+
parameters of the kernel.
73+
74+
"""
75+
theta = np.array([self.kernel.sigma, self.kernel.ell])
76+
res = scipy.optimize.minimize(self.dml_dtheta, theta, jac=True, options={'maxiter': 25})
77+
print('Optimized values: sigma = %.3f, ell = %.3f' % (res.x[0], res.x[1]))
78+
79+
def predict(self, x):
80+
"""
81+
Predicts function values of unseen data x using gaussian process regression.
82+
83+
:param x: Unseen data, (N, 1) numpy array
84+
:return: Point estimates of the function values, (N, 1) numpy array
85+
Covariance matrix of the function values, (N, N) numpy array
86+
"""
87+
kxx = self.kernel.k(x, x)
88+
kXx = self.kernel.k(self.X, x)
89+
gain = np.linalg.solve(self.L.T, np.linalg.solve(self.L, kXx)).T
90+
mu_post = gain @ self.Y
91+
cov_post = kxx - gain @ kXx
92+
return mu_post, cov_post
93+
94+
95+
class SquareExponential:
96+
"""
97+
Square Exponential kernel
98+
"""
99+
100+
def __init__(self, sigma=1., ell=1.):
101+
"""
102+
Initializes the square exponential kernel.
103+
104+
:param sigma: Gain, float
105+
:param ell: Length scale, float
106+
"""
107+
self.sigma = sigma
108+
self.ell = ell
109+
110+
def k(self, A, B):
111+
"""
112+
Calculate the kernel matrix.
113+
114+
:param A: Input matrix, (N, 1) numpy array
115+
:param B: Input matrix, (N, 1) numpy array
116+
:return: Kernel matrix, (N, N) numpy array
117+
"""
118+
return self.sigma ** 2 * np.exp(-squared_l2_norm(A, B) / (2. * self.ell ** 2))
119+
120+
def dk_dsigma(self, X):
121+
"""
122+
Derivative of the kernel matrix with respect to sigma.
123+
124+
:param X: Data, (N, 1) numpy array
125+
:return: Derivative w.r.t sigma (N, N) numpy array
126+
"""
127+
return 2 * self.k(X, X) / self.sigma
128+
129+
def dk_dell(self, X):
130+
"""
131+
Derivative of the kernel matrix with respect to ell.
132+
133+
:param X: Data, (N, 1) numpy array
134+
:return: Derivative w.r.t ell (N, N) numpy array
135+
"""
136+
return self.k(X, X) * squared_l2_norm(X, X) / self.ell ** 3
137+
138+
139+
def squared_l2_norm(A, B):
140+
"""
141+
Helper function to calculate the squared l2 norm between each possible combination of the rows of A and B.
142+
143+
:param A: Input matrix, (N, 1) numpy array
144+
:param B: Input matrix, (N, 1) numpy array
145+
:return: Matrix of l2 norm, (N, N) numpy array
146+
"""
147+
return np.array([[np.sum((A[i, :] - B[j, :]) ** 2)
148+
for j in range(B.shape[0])]
149+
for i in range(A.shape[0])])
150+
151+
152+
def f(x):
153+
"""
154+
Example function to test GP regression.
155+
156+
:param x: Data, (N, 1) numpy array
157+
:return: Function values, (N, 1) numpy array
158+
"""
159+
return (6 * x - 2) ** 2 * np.sin(12 * x - 4)
160+
161+
162+
# Execute GP regression with a sample dataset
163+
if __name__ == "__main__":
164+
import matplotlib.pyplot as plt
165+
166+
# Create a sample dataset with random noise
167+
noise = 0.4
168+
X = np.linspace(0, 1.0, 10).reshape(-1, 1)
169+
Y = f(X) + noise * np.random.randn(*X.shape)
170+
171+
# Fit GP regression to the data
172+
GP = GaussianProcessRegression(X, Y, noise=noise)
173+
GP.train()
174+
175+
# Predict unseen data
176+
x = np.linspace(-0.2, 1.2, 100).reshape(-1, 1)
177+
y_pred, cov_pred = GP.predict(x)
178+
179+
# Plot the data
180+
plt.figure(1, figsize=(8, 4))
181+
plt.fill_between(
182+
x.ravel(),
183+
y_pred.ravel() - 1.96 * np.sqrt(np.diag(cov_pred)),
184+
y_pred.ravel() + 1.96 * np.sqrt(np.diag(cov_pred)),
185+
color='C1',
186+
alpha=0.3,
187+
label='95%'
188+
)
189+
plt.plot(x, y_pred, 'C1', label='Prediction')
190+
plt.plot(x, f(x), 'k', label='Real function')
191+
plt.scatter(X, Y, alpha=1, marker='o', color='C0', label='Data')
192+
plt.legend()
193+
plt.show()

0 commit comments

Comments
 (0)