|
| 1 | +""" Implementing the 2 layer network defined in basics.py |
| 2 | +using tensorflow, for understanding the difference between |
| 3 | +dynamic graph (pytorch) and static graph (tensorflow). |
| 4 | +""" |
| 5 | + |
| 6 | +import tensorflow as tf |
| 7 | +import numpy as np |
| 8 | + |
| 9 | +# ################################################################### # |
| 10 | +# Implementing a 2 layer network with 1 hidden layer using tensorflow # |
| 11 | +# ################################################################### # |
| 12 | + |
| 13 | +N = 64 # batch size |
| 14 | +D_in = 1000 # input dimension |
| 15 | +H = 100 # hidden dimension |
| 16 | +D_out = 10 # output dimension |
| 17 | + |
| 18 | +# define the placeholders for the input and target data |
| 19 | +# these will be filled when the graph is executed. |
| 20 | +x = tf.placeholder(dtype=tf.float32, shape=(None, D_in), name="inputs") |
| 21 | +y = tf.placeholder(dtype=tf.float32, shape=(None, D_out), name="targets") |
| 22 | + |
| 23 | +# create the weights and initialize them with random data |
| 24 | +# A tensorflow variable persists its value across executions of graph. |
| 25 | +w1 = tf.Variable(tf.random_normal((D_in, H)), name="w1") |
| 26 | +w2 = tf.Variable(tf.random_normal((H, D_out)), name="w2") |
| 27 | + |
| 28 | +# forward pass |
| 29 | +# this doesn't perform any numeric operations |
| 30 | +# it merely sets up the computational graph that we will execute later. |
| 31 | +h = tf.matmul(x, w1) |
| 32 | +h_relu = tf.maximum(h, tf.zeros(1)) |
| 33 | +y_pred = tf.matmul(h_relu, w2) |
| 34 | + |
| 35 | +# loss |
| 36 | +loss = tf.reduce_sum((y_pred - y) ** 2.0) |
| 37 | + |
| 38 | +# backward propagation |
| 39 | +# compute gradients |
| 40 | +w1_grad, w2_grad = tf.gradients(loss, [w1, w2]) |
| 41 | + |
| 42 | +learning_rate = 1e-6 |
| 43 | + |
| 44 | +# update the weights |
| 45 | +# to update the weights we need to evaluate new_w1 and new_w2 when executing the graph. |
| 46 | +# In tensorflow updating the weights is part of the computational graph. |
| 47 | +# In pytorch this happens outside the computational graph. |
| 48 | +new_w1 = w1.assign(w1 - (learning_rate * w1_grad)) |
| 49 | +new_w2 = w2.assign(w2 - (learning_rate * w2_grad)) |
| 50 | + |
| 51 | +print('-------------------------------------') |
| 52 | +print("Training the network using tensorflow") |
| 53 | +print('-------------------------------------') |
| 54 | + |
| 55 | +with tf.Session() as sess: |
| 56 | + # initialize all the variables with their initializers. |
| 57 | + sess.run(tf.global_variables_initializer()) |
| 58 | + |
| 59 | + # create the data |
| 60 | + x_value = np.random.randn(N, D_in) |
| 61 | + y_value = np.random.randn(N, D_out) |
| 62 | + |
| 63 | + for epoch in range(500): |
| 64 | + # feed the x_value and y_value to x, y placeholders. |
| 65 | + loss_value, _, _ = sess.run([loss, new_w1, new_w2], feed_dict={x: x_value, y: y_value}) |
| 66 | + if epoch % 50 == 0: |
| 67 | + print(f"epoch : {epoch}, loss : {loss_value}") |
0 commit comments