modern-fortran
diff --git a/‎example/quadratic.f90‎
Lines changed: 81 additions & 2 deletions b/‎example/quadratic.f90‎
Lines changed: 81 additions & 2 deletions
@@ -3,21 +3,23 @@ program quadratic_fit
  ! stochastic gradient descent, batch gradient descent, and minibatch gradient
  ! descent.
  use nf, only: dense, input, network
+ use nf_dense_layer, only: dense_layer
 
  implicit none
- type(network) :: net_sgd, net_batch_sgd, net_minibatch_sgd
+ type(network) :: net_sgd, net_batch_sgd, net_minibatch_sgd, net_rms_prop
 
  ! Training parameters
  integer, parameter :: num_epochs = 1000
  integer, parameter :: train_size = 1000
  integer, parameter :: test_size = 30
  integer, parameter :: batch_size = 10
  real, parameter :: learning_rate = 0.01
+ real, parameter :: decay_rate = 0.9
 
  ! Input and output data
  real, allocatable :: x(:), y(:) ! training data
  real, allocatable :: xtest(:), ytest(:) ! testing data
- real, allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:)
+ real, allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:), ypred_rms_prop(:)
 
  integer :: i, n
 
@@ -42,6 +44,7 @@ program quadratic_fit
  net_sgd = network([input(1), dense(3), dense(1)])
  net_batch_sgd = network([input(1), dense(3), dense(1)])
  net_minibatch_sgd = network([input(1), dense(3), dense(1)])
+ net_rms_prop = network([input(1), dense(3), dense(1)])
 
  ! Print network info to stdout; this will be the same for all three networks.
  call net_sgd % print_info()
@@ -55,15 +58,20 @@ program quadratic_fit
  ! Mini-batch SGD optimizer
  call minibatch_gd_optimizer(net_minibatch_sgd, x, y, learning_rate, num_epochs, batch_size)
 
+ ! RMSProp optimizer
+ call rmsprop_optimizer(net_rms_prop, x, y, learning_rate, num_epochs, decay_rate)
+
  ! Calculate predictions on the test set
  ypred_sgd = [(net_sgd % predict([xtest(i)]), i = 1, test_size)]
  ypred_batch_sgd = [(net_batch_sgd % predict([xtest(i)]), i = 1, test_size)]
  ypred_minibatch_sgd = [(net_minibatch_sgd % predict([xtest(i)]), i = 1, test_size)]
+ ypred_rms_prop = [(net_rms_prop % predict([xtest(i)]), i = 1, test_size)]
 
  ! Print the mean squared error
  print '("Stochastic gradient descent MSE:", f9.6)', sum((ypred_sgd - ytest)**2) / size(ytest)
  print '(" Batch gradient descent MSE: ", f9.6)', sum((ypred_batch_sgd - ytest)**2) / size(ytest)
  print '(" Minibatch gradient descent MSE: ", f9.6)', sum((ypred_minibatch_sgd - ytest)**2) / size(ytest)
+ print '(" RMSProp MSE: ", f9.6)', sum((ypred_rms_prop - ytest)**2) / size(ytest)
 
 contains
 
@@ -161,6 +169,77 @@ subroutine minibatch_gd_optimizer(net, x, y, learning_rate, num_epochs, batch_si
  end do
  end subroutine minibatch_gd_optimizer
 
+ subroutine rmsprop_optimizer(net, x, y, learning_rate, num_epochs, decay_rate)
+ ! RMSprop optimizer for updating weights using root mean square
+ type(network), intent(inout) :: net
+ real, intent(in) :: x(:), y(:)
+ real, intent(in) :: learning_rate, decay_rate
+ integer, intent(in) :: num_epochs
+ integer :: i, j, n
+ real, parameter :: epsilon = 1e-8 ! Small constant to avoid division by zero
+
+ ! Define a dedicated type to store the RMSprop gradients.
+ ! This is needed because array sizes vary between layers and we need to
+ ! keep track of gradients for each layer over time.
+ ! For now this works only for dense layers.
+ ! We will need to define a similar type for conv2d layers.
+ type :: rms_gradient_dense
+ real, allocatable :: dw(:,:)
+ real, allocatable :: db(:)
+ end type rms_gradient_dense
+
+ type(rms_gradient_dense), allocatable :: rms(:)
+
+ print *, "Running RMSprop optimizer..."
+
+ ! Here we allocate the array or RMS gradient derived types.
+ ! We need one for each dense layer, however we will allocate it to the
+ ! length of all layers as it will make housekeeping easier.
+ allocate(rms(size(net % layers)))
+
+ do n = 1, num_epochs
+
+ do i = 1, size(x)
+ call net % forward([x(i)])
+ call net % backward([y(i)])
+ end do
+
+ ! RMSprop update rule
+ do j = 1, size(net % layers)
+ select type (this_layer => net % layers(j) % p)
+ type is (dense_layer)
+
+ ! If this is our first time here for this layer, allocate the
+ ! internal RMS gradient arrays and initialize them to zero.
+ if (.not. allocated(rms(j) % dw)) then
+ allocate(rms(j) % dw, mold=this_layer % dw)
+ allocate(rms(j) % db, mold=this_layer % db)
+ rms(j) % dw = 0
+ rms(j) % db = 0
+ end if
+
+ ! Update the RMS gradients using the RMSprop moving average rule
+ rms(j) % dw = decay_rate * rms(j) % dw + (1 - decay_rate) * this_layer % dw**2
+ rms(j) % db = decay_rate * rms(j) % db + (1 - decay_rate) * this_layer % db**2
+
+ ! Update weights and biases using the RMSprop update rule
+ this_layer % weights = this_layer % weights - learning_rate &
+ / sqrt(rms(j) % dw + epsilon) * this_layer % dw
+ this_layer % biases = this_layer % biases - learning_rate &
+ / sqrt(rms(j) % db + epsilon) * this_layer % db
+
+ ! We have updated the weights and biases, so we need to reset the
+ ! gradients to zero for the next epoch.
+ this_layer % dw = 0
+ this_layer % db = 0
+
+ end select
+ end do
+
+ end do
+
+ end subroutine rmsprop_optimizer
+
  subroutine shuffle(arr)
  ! Shuffle an array using the Fisher-Yates algorithm.
  integer, intent(inout) :: arr(:)