alok
diff --git a/‎reptile/main.py‎
100644100755
Lines changed: 73 additions & 57 deletions b/‎reptile/main.py‎
100644100755
Lines changed: 73 additions & 57 deletions
diff --git a/‎reptile/parallel_reduce.py‎
Lines changed: 63 additions & 0 deletions b/‎reptile/parallel_reduce.py‎
Lines changed: 63 additions & 0 deletions
@@ -3,8 +3,8 @@
 
 from copy import deepcopy
 
-import matplotlib.pyplot as plt
 import numpy as np
+import ray
 import torch
 import torch.nn.functional as F
 from torch import Tensor, linspace, nn, randperm, sin
@@ -15,6 +15,14 @@
 
 from utils import ParamDict as P
 
+# To avoid tkinter not installed error on headless server
+try:
+ import matplotlib
+ matplotlib.use('AGG')
+ import matplotlib.pyplot as plt
+except:
+ pass
+
 Weights = P
 criterion = F.mse_loss
 
@@ -25,7 +33,7 @@
 N = 50 # Use 50 evenly spaced points on sine wave.
 
 LR, META_LR = 0.02, 0.1 # Copy OpenAI's hyperparameters.
-BATCH_SIZE, META_BATCH_SIZE = 10, 1
+BATCH_SIZE, META_BATCH_SIZE = 10, 3
 EPOCHS, META_EPOCHS = 1, 30_000
 TEST_GRAD_STEPS = 2**3
 PLOT_EVERY = 3_000
@@ -54,16 +62,16 @@ def gen_task(num_pts=N) -> DataLoader:
  # Need to make x N,1 instead of N, to avoid
  # https://discuss.pytorch.org/t/dataloader-gives-double-instead-of-float
  x = linspace(-5, 5, num_pts)[:, None].float()
- y = a * sin(x + b)
+ y = a * sin(x + b).float()
 
  dataset = TensorDataset(x, y)
 
  loader = DataLoader(
- dataset,
- batch_size=BATCH_SIZE,
- shuffle=True,
- pin_memory=CUDA_AVAILABLE,
- )
+  dataset,
+  batch_size=BATCH_SIZE,
+  shuffle=True,
+  pin_memory=CUDA_AVAILABLE,
+  )
 
  return loader
 
@@ -102,11 +110,14 @@ def evaluate(model: Model, task: DataLoader, criterion=criterion) -> float:
  """Evaluate model on all the task data at once."""
  model.eval()
 
- x, y = cuda(Variable(task.dataset.data_tensor)), cuda(Variable(task.dataset.target_tensor))
+ x, y = cuda(Variable(task.dataset.data_tensor)), cuda(
+ Variable(task.dataset.target_tensor)
+ )
  loss = criterion(model(x), y)
  return float(loss)
 
 
+@ray.remote
 def sgd(meta_weights: Weights, epochs: int) -> Weights:
  """Run SGD on a randomly generated task."""
 
@@ -120,60 +131,68 @@ def sgd(meta_weights: Weights, epochs: int) -> Weights:
  for x, y in task:
  train_batch(x, y, model, opt)
 
- return P(model.state_dict())
+ return model.state_dict()
 
 
 def REPTILE(
- meta_weights: Weights,
- meta_batch_size: int = META_BATCH_SIZE,
- epochs: int = EPOCHS,
-) -> Weights:
+  meta_weights: Weights,
+  meta_batch_size: int = META_BATCH_SIZE,
+  epochs: int = EPOCHS,
+ ) -> Weights:
  """Run one iteration of REPTILE."""
-
- weights = [sgd(meta_weights, epochs) for _ in range(meta_batch_size)]
+ weights = ray.get([
+ sgd.remote(meta_weights, epochs) for _ in range(meta_batch_size)
+ ])
+ weights = [P(w) for w in weights]
 
  # TODO Implement custom optimizer that makes this work with builtin
  # optimizers easily. The multiplication by 0 is to get a ParamDict of the
  # right size as the identity element for summation.
- meta_weights += (META_LR / epochs) * sum((w - meta_weights for w in weights), 0 * meta_weights)
+ meta_weights += (META_LR / epochs) * sum((w - meta_weights
+ for w in weights), 0 * meta_weights)
  return meta_weights
 
 
 if __name__ == '__main__':
+ try:
+ ray.init()
+ except Exception as e:
+ print(e)
 
  # Need to put model on GPU first for tensors to have the right type.
- meta_weights = P(cuda(Model()).state_dict())
-
- # Generate fixed task to evaluate on.
- plot_task = gen_task()
-
- x_all, y_all = plot_task.dataset.data_tensor, plot_task.dataset.target_tensor
- x_plot, y_plot = shuffle(x_all, y_all, length=10)
-
- # Set up plot
- fig, ax = plt.subplots()
- true_curve = ax.plot(
- x_all.numpy(),
- y_all.numpy(),
- label='True',
- color='g',
- )
-
- ax.plot(
- x_plot.numpy(),
- y_plot.numpy(),
- 'x',
- label='Training points',
- color='k',
- )
-
- ax.legend(loc="lower right")
- ax.set_xlim(-5, 5)
- ax.set_ylim(-5, 5)
+ meta_weights = cuda(Model()).state_dict()
+
+ if PLOT:
+ # Generate fixed task to evaluate on.
+ plot_task = gen_task()
+
+ x_all, y_all = plot_task.dataset.data_tensor, plot_task.dataset.target_tensor
+ x_plot, y_plot = shuffle(x_all, y_all, length=10)
+
+ # Set up plot
+ fig, ax = plt.subplots()
+ true_curve = ax.plot(
+ x_all.numpy(),
+ y_all.numpy(),
+ label='True',
+ color='g',
+ )
+
+ ax.plot(
+ x_plot.numpy(),
+ y_plot.numpy(),
+ 'x',
+ label='Training points',
+ color='k',
+ )
+
+ ax.legend(loc="lower right")
+ ax.set_xlim(-5, 5)
+ ax.set_ylim(-5, 5)
 
  for iteration in range(1, META_EPOCHS + 1):
 
- meta_weights = REPTILE(meta_weights)
+ meta_weights = REPTILE(P(meta_weights))
 
  if iteration == 1 or iteration % PLOT_EVERY == 0:
 
@@ -182,24 +201,21 @@ def REPTILE(
  opt = SGD(model.parameters(), lr=LR)
 
  for _ in range(TEST_GRAD_STEPS):
- # Training on all the points rather than just a sample works better.
  train_batch(x_plot, y_plot, model, opt)
 
  if PLOT:
 
- ax.set_title(f'REPTILE after {iteration:n} iterations')
- (curve, ) = ax.plot(
- x_all.numpy(),
- model(Variable(x_all)).data.numpy(),
- label=f'Pred after {TEST_GRAD_STEPS} gradient steps.',
- color='r',
- )
+ ax.set_title(f'REPTILE after {iteration:n} iterations.')
+ curve, = ax.plot(
+  x_all.numpy(),
+  model(Variable(x_all)).data.numpy(),
+  label=f'Pred after {TEST_GRAD_STEPS:n} gradient steps.',
+  color='r',
+  )
 
  plt.savefig(f'figs/{iteration}.png')
 
- # Pause before clearing and moving on to next plot.
- plt.pause(0.01)
-
  ax.lines.remove(curve)
 
  print(f'Iteration: {iteration}\tLoss: {evaluate(model, plot_task):.3f}')
+
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from typing import List
+import ray
+
+# TODO parallelize
+
+
+def partition(xs) -> List[slice]:
+ def split(xs) -> List[int]:
+ # `bin` gives str '0b----', so we drop the first 2 chars, '0b'. We can
+ # ignore the negative case since length is always nonnegative.
+ base2_decomp = reversed([int(x) for x in f'{len(xs):b}'])
+
+ pows = sorted([i for i, a in enumerate(base2_decomp) if a != 0], )
+ return pows
+
+ parts = split(xs)
+
+ slices, start = [], 0
+
+ for n in parts:
+ stop = start + 2**n
+ slices.append(slice(start, stop))
+ start = stop
+
+ return slices
+
+
+@ray.remote()
+def foldr(f, xs):
+ L = len(xs)
+ if L == 1:
+ return xs[0]
+ elif L == 0:
+ raise ValueError('Sequence must have length greater than 0.')
+ else:
+ slices = partition(xs)
+
+ @ray.remote
+ def _foldr(chunk):
+ while len(chunk) > 1:
+ chunk = [f(chunk[2 * i], chunk[2 * i + 1]) for i, _ in enumerate(chunk[::2])]
+ return chunk[0]
+
+ return foldr(f, [_foldr.remote(xs[slice]) for slice in slices])
+
+
+if __name__ == '__main__':
+ from operator import add
+ import hypothesis
+ import hypothesis.strategies as st
+ from hypothesis import assume, example, given, infer
+ from functools import reduce
+
+ @given(xs=infer)
+ def test_foldr(f, xs: List[int]):
+ assume(xs != [])
+ assert foldr(f, xs) == reduce(f, xs)
+
+ f = add
+ test_foldr(f)