digits122
diff --git a/‎const.py‎
Lines changed: 4 additions & 1 deletion b/‎const.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎lib/dataset.py‎
Lines changed: 3 additions & 7 deletions b/‎lib/dataset.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎lib/evaluate.py‎
Lines changed: 11 additions & 0 deletions b/‎lib/evaluate.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎lib/game.py‎
Lines changed: 166 additions & 0 deletions b/‎lib/game.py‎
Lines changed: 166 additions & 0 deletions
@@ -8,7 +8,7 @@
 ## Dtype of the tensors depending on CUDA
 DEVICE = torch.device("cuda") if CUDA else torch.device("cpu")
 ## Number of self-play parallel games
-PARALLEL_SELF_PLAY = 3
+PARALLEL_SELF_PLAY = 2
 ## Number of evaluation parallel games 
 PARALLEL_EVAL = 2
 ## MCTS parallel
@@ -43,6 +43,9 @@
 BATCH_SIZE_EVAL = 4
 ## Number of self-play before training
 SELF_PLAY_MATCH = 2 * PARALLEL_SELF_PLAY
+## Number of moves before changing temperature to stop
+## exploration
+TEMPERATURE_MOVE = 5 
 
 
 ##### TRAINING
 
@@ -1,9 +1,10 @@
-from torch.utils.data import Dataset, DataLoader
-from const import *
 import numpy as np
 import timeit
+from torch.utils.data import Dataset, DataLoader
+from const import *
 from . import utils
 
+
 class SelfPlayDataset(Dataset):
  """
  Self-play dataset containing state, probabilities
@@ -47,8 +48,3 @@ def update(self, game):
  winners[np.where(winners != -1)] = 1
  self.winners[:number_moves] = winners
  return number_moves
- 
- 
- def update_batch(self, raw_dataset):
- for game in raw_dataset:
- self.update(game)
@@ -1,3 +1,4 @@
+import timeit
 from .play import play
 from const import *
 
@@ -7,7 +8,14 @@ def evaluate(player, new_player):
  the newly trained model """ 
 
  print("[EVALUATION] Starting to evaluate trained model !")
+ start_time = timeit.default_timer()
+ ## Play the matches and get the results
  results = play(player, opponent=new_player)
+ final_time = timeit.default_timer() - start_time
+ print("[EVALUATION] Total duration: %.3f seconds, average duration:"
+ " %0.3f seconds" % ((final_time, final_time / EVAL_MATCHS)))
+
+ ## Count the number of wins for each players
  black_wins = 0
  white_wins = 0
  for result in results:
@@ -18,6 +26,9 @@ def evaluate(player, new_player):
 
  print("[EVALUATION] black wins: %d vs %d for white"\
  % (black_wins, white_wins))
+ 
+ ## Check if the trained player (black) is better than
+ ## the current best player depending on the threshold
  if black_wins >= EVAL_THRESH * len(results):
  return True
  return False
@@ -0,0 +1,166 @@
+import numpy as np
+import pickle
+from const import *
+from models.mcts import MCTS
+from .go import GoEnv as Board
+from .utils import _prepare_state
+
+
+class Game:
+ """ A single process that is used to play a game between 2 agents """
+
+ def __init__(self, player, id, color="black", mcts_flag=MCTS_FLAG, goban_size=GOBAN_SIZE, opponent=False):
+ self.goban_size = goban_size
+ self.id = id + 1
+ self.human_pass = False
+ self.board = self._create_board(color)
+ self.player_color = 2 if color == "black" else 1
+ self.mcts = mcts_flag
+ if mcts_flag:
+ self.mcts = MCTS()
+ self.player = player
+ self.opponent = opponent
+
+
+ def _create_board(self, color):
+ """
+ Create a board with a goban_size and the color is
+ for the starting player
+ """
+ 
+ board = Board(color, self.goban_size)
+ board.reset()
+ return board
+ 
+
+ def _swap_color(self):
+ if self.player_color == 1:
+ self.player_color = 2
+ else:
+ self.player_color = 1
+
+ 
+ def _get_move(self, board, probas):
+ """ Select a move without MCTS """
+
+ player_move = None
+ legal_moves = board.get_legal_moves()
+
+ while player_move not in legal_moves and len(legal_moves) > 0:
+ player_move = np.random.choice(probas.shape[0], p=probas)
+ if player_move not in legal_moves:
+ old_proba = probas[player_move]
+ probas = probas + (old_proba / (probas.shape[0] - 1))
+ probas[player_move] = 0
+
+ return player_move
+
+
+ def _play(self, state, player, other_pass, competitive=False):
+ """ Choose a move depending on MCTS or not """
+
+ if self.mcts:
+ if player.passed is True or other_pass:
+ action_scores = np.zeros((self.goban_size ** 2 + 1,))
+ action_scores[-1] = 1
+ action = self.goban_size ** 2
+ else:
+ action_scores, action = self.mcts.search(self.board, player,\
+ competitive=competitive)
+
+ if action == self.goban_size ** 2:
+ player.passed = True
+ 
+ else:
+ feature_maps = player.extractor(state)
+ probas = player.policy_net(feature_maps)[0] \
+ .cpu().data.numpy()
+ if player.passed is True:
+ action = self.goban_size ** 2
+ else:
+ action = self._get_move(self.board, probas)
+
+ if action == self.goban_size ** 2:
+ player.passed = True
+
+ action_scores = np.zeros((self.goban_size ** 2 + 1),)
+ action_scores[action] = 1
+
+ state, reward, done = self.board.step(action)
+ return state, reward, done, action_scores, action
+
+
+ def __call__(self):
+ """
+ Make a game between the player and the opponent and return all the states
+ and the associated move. Also returns the winner in order to create the
+ training dataset
+ """
+
+ done = False
+ state = self.board.reset()
+ dataset = []
+ moves = 0
+ comp = False
+
+ while not done:
+ ## Prevent cycling in 2 atari situations
+ if moves > MOVE_LIMIT:
+ return pickle.dumps((dataset, self.board.get_winner()))
+ 
+ ## Magic ratio for adaptative temperature
+ if moves > TEMPERATURE_MOVE:
+ comp = True
+
+ ## For evaluation
+ if self.opponent:
+ state, reward, done, _, action = self._play(_prepare_state(state), \
+ self.player, self.opponent.passed, competitive=True)
+ state, reward, done, _, action = self._play(_prepare_state(state), \
+ self.opponent, self.player.passed, competitive=True)
+ moves += 2
+
+ ## For self-play
+ else:
+ state = _prepare_state(state)
+ new_state, reward, done, probas, action = self._play(state, self.player, \
+ False, competitive=comp)
+ self._swap_color()
+ dataset.append((state.cpu().data.numpy(), probas, \
+ self.player_color, action))
+ state = new_state 
+ moves += 1
+ 
+ ## Pickle the result because multiprocessing
+ if self.opponent:
+ print("[EVALUATION] Match %d done in eval, winner %s" % (self.id, "black" if reward == 0 else "white"))
+ self.opponent.passed = False
+ return pickle.dumps([reward])
+
+ self.player.passed = False
+ return pickle.dumps((dataset, reward))
+
+ 
+ def solo_play(self, move=None):
+ """ Used to play against a human or for GTP, cant be called
+ in a multiprocess scenario """
+
+ ## Agent plays the first move of the game
+ if move is None:
+ state = _prepare_state(self.board.state)
+ state, reward, done, probas, move = self._play(state, self.player, self.human_pass, competitive=True)
+ self._swap_color()
+ return move
+ ## Otherwise just play a move and answer it
+ else:
+ state, reward, done = self.board.step(move)
+ if move != self.board.board_size ** 2:
+ self.mcts.advance(move)
+ else:
+ self.human_pass = True
+ self._swap_color()
+ return True
+ 
+
+ def reset(self):
+ state = self.board.reset()