@@ -4,10 +4,10 @@ program quadratic_fit
44 ! descent.
55 use nf, only: dense, input, network
66 use nf_dense_layer, only: dense_layer
7- use nf_optimizers, only: sgd, rmsprop, adam
7+ use nf_optimizers, only: sgd, rmsprop, adam, adagrad
88
99 implicit none
10- type (network) :: net(9 )
10+ type (network) :: net(11 )
1111
1212 ! Training parameters
1313 integer , parameter :: num_epochs = 1000
@@ -95,6 +95,17 @@ program quadratic_fit
9595 beta1, beta2, epsilon, weight_decay_decoupled= 1e-5 &
9696 )
9797
98+ ! Adagrad optimizer
99+ call adagrad_optimizer( &
100+ net(10 ), x, y, xtest, ytest, learning_rate, num_epochs, epsilon &
101+ )
102+
103+ ! Adagrad optimizer with L2 regularization and learning rate decay
104+ call adagrad_optimizer( &
105+ net(11 ), x, y, xtest, ytest, learning_rate, num_epochs, epsilon, &
106+ weight_decay_l2= 1e-4 , learning_rate_decay= 0.99 &
107+ )
108+
98109contains
99110
100111 real elemental function quadratic(x) result(y)
@@ -358,6 +369,68 @@ subroutine adam_optimizer( &
358369
359370 end subroutine adam_optimizer
360371
372+ subroutine adagrad_optimizer ( &
373+ net , x , y , xtest , ytest , learning_rate , num_epochs , epsilon , &
374+ weight_decay_l2 , learning_rate_decay &
375+ )
376+ ! Adagrad optimizer for updating weights using adaptive gradient algorithm
377+ type (network), intent (inout ) :: net
378+ real , intent (in ) :: x(:), y(:)
379+ real , intent (in ) :: xtest(:), ytest(:)
380+ real , intent (in ) :: learning_rate, epsilon
381+ real , intent (in ), optional :: weight_decay_l2
382+ real , intent (in ), optional :: learning_rate_decay
383+ integer , intent (in ) :: num_epochs
384+ integer :: i, n
385+ real , allocatable :: ypred(:)
386+ real :: weight_decay_l2_val
387+ real :: learning_rate_decay_val
388+
389+ ! Set default values for weight_decay_l2
390+ if (.not. present (weight_decay_l2)) then
391+ weight_decay_l2_val = 0.0
392+ else
393+ weight_decay_l2_val = weight_decay_l2
394+ end if
395+
396+ ! Set default values for learning_rate_decay
397+ if (.not. present (learning_rate_decay)) then
398+ learning_rate_decay_val = 0.0
399+ else
400+ learning_rate_decay_val = learning_rate_decay
401+ end if
402+
403+ print ' (a)' , ' Adagrad optimizer'
404+ print ' (34("-"))'
405+
406+ do n = 1 , num_epochs
407+
408+ do i = 1 , size (x)
409+ call net % forward([x(i)])
410+ call net % backward([y(i)])
411+ end do
412+
413+ call net % update( &
414+ adagrad( &
415+ learning_rate= learning_rate, &
416+ epsilon= epsilon, &
417+ weight_decay_l2= weight_decay_l2_val, &
418+ learning_rate_decay= learning_rate_decay_val &
419+ ) &
420+ )
421+
422+ if (mod (n, num_epochs / 10 ) == 0 ) then
423+ ypred = [(net % predict([xtest(i)]), i = 1 , size (xtest))]
424+ print ' ("Epoch: ", i4,"/",i4,", RMSE = ", f9.6)' , &
425+ n, num_epochs, sum ((ypred - ytest)** 2 ) / size (ytest)
426+ end if
427+
428+ end do
429+
430+ print * , ' '
431+
432+ end subroutine adagrad_optimizer
433+
361434 subroutine shuffle (arr )
362435 ! Shuffle an array using the Fisher-Yates algorithm.
363436 integer , intent (inout ) :: arr(:)
0 commit comments