Skip to content

Commit 8060ed0

Browse files
committed
exercise 11
1 parent c097bca commit 8060ed0

File tree

4 files changed

+885
-0
lines changed

4 files changed

+885
-0
lines changed

4_machine_learning/data_loader.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright (c) 2012-2018 M. Nielsen (under MIT license)
2+
# modified version of mnist_loader.py from http://neuralnetworksanddeeplearning.com
3+
"""
4+
data_loader
5+
~~~~~~~~~~~
6+
7+
A library to load the MNIST image data. For details of the data
8+
structures that are returned, see the doc strings for ``load_data``
9+
and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
10+
function usually called by our neural network code.
11+
"""
12+
13+
#### Libraries
14+
# Standard library
15+
import pickle
16+
import gzip
17+
18+
# Third-party libraries
19+
import numpy as np
20+
21+
def load_data(filename="mnist.pkl.gz"):
22+
"""Return the MNIST data as a tuple containing the training data,
23+
the validation data, and the test data.
24+
25+
The ``training_data`` is returned as a tuple with two entries.
26+
The first entry contains the actual training images. This is a
27+
numpy ndarray with 50,000 entries. Each entry is, in turn, a
28+
numpy ndarray with 784 values, representing the 28 * 28 = 784
29+
pixels in a single MNIST image.
30+
31+
The second entry in the ``training_data`` tuple is a numpy ndarray
32+
containing 50,000 entries. Those entries are just the digit
33+
values (0...9) for the corresponding images contained in the first
34+
entry of the tuple.
35+
36+
The ``validation_data`` and ``test_data`` are similar, except
37+
each contains only 10,000 images.
38+
39+
This is a nice data format, but for use in neural networks it's
40+
helpful to modify the format of the ``training_data`` a little.
41+
That's done in the wrapper function ``load_data_wrapper()``, see
42+
below.
43+
"""
44+
with gzip.open(filename, 'rb') as f:
45+
training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
46+
return (training_data, validation_data, test_data)
47+
48+
def load_data_wrapper(data="mnist.pkl.gz"):
49+
"""Return a tuple containing ``(training_data, validation_data,
50+
test_data)``. Based on ``load_data``, but the format is more
51+
convenient for use in our implementation of neural networks.
52+
53+
In particular, ``training_data`` is a list containing 50,000
54+
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
55+
containing the input image. ``y`` is a 10-dimensional
56+
numpy.ndarray representing the unit vector corresponding to the
57+
correct digit for ``x``.
58+
59+
``validation_data`` and ``test_data`` are lists containing 10,000
60+
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
61+
numpy.ndarry containing the input image, and ``y`` is the
62+
corresponding classification, i.e., the digit values (integers)
63+
corresponding to ``x``.
64+
65+
Obviously, this means we're using slightly different formats for
66+
the training data and the validation / test data. These formats
67+
turn out to be the most convenient for use in our neural network
68+
code.
69+
70+
The function takes an argument "data", which can be a string
71+
giving the filename of a data file, or the data directly.
72+
"""
73+
if isinstance(data, str):
74+
data = load_data(data)
75+
tr_d, va_d, te_d = data
76+
N_labels = 1 + np.max(tr_d[1])
77+
print("number of different labels for the output:", N_labels)
78+
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
79+
training_results = [vectorized_result(y, N_labels) for y in tr_d[1]]
80+
training_data = list(zip(training_inputs, training_results))
81+
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
82+
validation_data = list(zip(validation_inputs, va_d[1]))
83+
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
84+
test_data = list(zip(test_inputs, te_d[1]))
85+
return (training_data, validation_data, test_data)
86+
87+
def vectorized_result(j, N=10):
88+
"""Return a N-dimensional unit vector with a 1.0 in the jth
89+
position and zeroes elsewhere. This is used to convert a digit
90+
(0...9) into a corresponding desired output from the neural
91+
network."""
92+
e = np.zeros((N, 1))
93+
e[j] = 1.0
94+
return e

4_machine_learning/hw11.pdf

197 KB
Binary file not shown.

4_machine_learning/network.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# Copyright (c) 2012-2018 M. Nielsen (under MIT license)
2+
# modified version of network.py from http://neuralnetworksanddeeplearning.com
3+
"""
4+
network.py
5+
~~~~~~~~~~
6+
7+
A module to implement the stochastic gradient descent learning
8+
algorithm for a feedforward neural network. Gradients are calculated
9+
using backpropagation. Note that I have focused on making the code
10+
simple, easily readable, and easily modifiable. It is not optimized,
11+
and omits many desirable features.
12+
"""
13+
14+
#### Libraries
15+
# Standard library
16+
import random
17+
18+
# Third-party libraries
19+
import numpy as np
20+
21+
class Network:
22+
23+
def __init__(self, sizes):
24+
"""The list ``sizes`` contains the number of neurons in the
25+
respective layers of the network. For example, if the list
26+
was [2, 3, 1] then it would be a three-layer network, with the
27+
first layer containing 2 neurons, the second layer 3 neurons,
28+
and the third layer 1 neuron. The biases and weights for the
29+
network are initialized randomly, using a Gaussian
30+
distribution with mean 0, and variance 1. Note that the first
31+
layer is assumed to be an input layer, and by convention we
32+
won't set any biases for those neurons, since biases are only
33+
ever used in computing the outputs from later layers."""
34+
self.num_layers = len(sizes)
35+
self.sizes = sizes
36+
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
37+
self.weights = [np.random.randn(y, x)
38+
for x, y in zip(sizes[:-1], sizes[1:])]
39+
40+
def feedforward(self, a):
41+
"""Return the output of the network if ``a`` is input."""
42+
for b, w in zip(self.biases, self.weights):
43+
a = sigmoid(np.dot(w, a)+b)
44+
return a
45+
46+
def SGD(self, training_data, epochs, mini_batch_size, eta,
47+
test_data=None):
48+
"""Train the neural network using mini-batch stochastic
49+
gradient descent. The ``training_data`` is a list of tuples
50+
``(x, y)`` representing the training inputs and the desired
51+
outputs. The other non-optional parameters are
52+
self-explanatory. If ``test_data`` is provided then the
53+
network will be evaluated against the test data after each
54+
epoch, and partial progress printed out. This is useful for
55+
tracking progress, but slows things down substantially."""
56+
n = len(training_data)
57+
for j in range(epochs):
58+
random.shuffle(training_data)
59+
mini_batches = [
60+
training_data[k:k+mini_batch_size]
61+
for k in range(0, n, mini_batch_size)]
62+
for mini_batch in mini_batches:
63+
self.update_mini_batch(mini_batch, eta)
64+
if test_data:
65+
eval_correct = self.evaluate(test_data)
66+
n_test = len(test_data)
67+
print("Epoch {0: 2d}: {1:8d} / {2} = {3:.1f}%".format(
68+
j, eval_correct, n_test, 100*eval_correct/n_test))
69+
else:
70+
print("Epoch {0: 2d} complete".format(j))
71+
72+
def update_mini_batch(self, mini_batch, eta):
73+
"""Update the network's weights and biases by applying
74+
gradient descent using backpropagation to a single mini batch.
75+
The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
76+
is the learning rate."""
77+
nabla_b = [np.zeros(b.shape) for b in self.biases]
78+
nabla_w = [np.zeros(w.shape) for w in self.weights]
79+
for x, y in mini_batch:
80+
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
81+
nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
82+
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
83+
self.weights = [w-(eta/len(mini_batch))*nw
84+
for w, nw in zip(self.weights, nabla_w)]
85+
self.biases = [b-(eta/len(mini_batch))*nb
86+
for b, nb in zip(self.biases, nabla_b)]
87+
88+
def backprop(self, x, y):
89+
"""Return a tuple ``(nabla_b, nabla_w)`` representing the
90+
gradient for the cost function C_x. ``nabla_b`` and
91+
``nabla_w`` are layer-by-layer lists of numpy arrays, similar
92+
to ``self.biases`` and ``self.weights``."""
93+
nabla_b = [np.zeros(b.shape) for b in self.biases]
94+
nabla_w = [np.zeros(w.shape) for w in self.weights]
95+
96+
#################
97+
raise NotImplementedError("TODO: implementing this is an exercise")
98+
#################
99+
100+
return (nabla_b, nabla_w)
101+
102+
def evaluate(self, test_data):
103+
"""Return the number of test inputs for which the neural
104+
network outputs the correct result. Note that the neural
105+
network's output is assumed to be the index of whichever
106+
neuron in the final layer has the highest activation."""
107+
test_results = [(np.argmax(self.feedforward(x)), y)
108+
for (x, y) in test_data]
109+
return sum(int(x == y) for (x, y) in test_results)
110+
111+
def cost_derivative(self, output_activations, y):
112+
"""Return the vector of partial derivatives \partial C_x /
113+
\partial a for the output activations."""
114+
return (output_activations-y)
115+
116+
117+
#### Miscellaneous functions
118+
def sigmoid(z):
119+
"""The sigmoid function."""
120+
return 1.0/(1.0+np.exp(-z))
121+
122+
123+
def sigmoid_prime(z):
124+
"""Derivative of the sigmoid function."""
125+
return sigmoid(z)*(1-sigmoid(z))
126+
127+
128+
if __name__ == "__main__":
129+
# example usage: train a network to recognise digits using the mnist data
130+
# first load the data
131+
import data_loader
132+
training_data, _, test_data = data_loader.load_data_wrapper("mnist.pkl.gz")
133+
# then generate the neuronal network
134+
net = networks.Network([784, 30, 10])
135+
# and train it for 15 epochs
136+
net.SGD(training_data, 15, 10, 0.5, test_data=test_data)

4_machine_learning/sol11_neural_networks.ipynb

Lines changed: 655 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)