@@ -184,24 +184,6 @@ def _swap_color(self):
184184 else :
185185 self .player_color = 1
186186
187-
188- def _draw_move (self , action_scores , competitive = False ):
189- """
190- Find the best move, either deterministically for competitive play
191- or stochiasticly according to some temperature constant
192- """
193-
194- if competitive :
195- move = np .argmax (action_scores )
196-
197- else :
198- action_scores = np .power (action_scores , (1. / TEMP ))
199- total = np .sum (action_scores )
200- probas = action_scores / total
201- move = np .random .choice (action_scores .shape [0 ], p = probas )
202-
203- return move
204-
205187
206188 def _get_move (self , board , probas ):
207189 """ Select a move without MCTS """
@@ -235,22 +217,22 @@ def _get_move(self, board, probas):
235217 def _play (self , state , player ):
236218 """ Choose a move depending on MCTS or not """
237219
238- if not self .mcts_flag :
239- action_scores = player .mcts .search ()
220+ # if self.mcts_flag:
221+ # action_scores = player.mcts.search()
222+ # else:
223+ feature_maps = player .extractor (state )
224+ probas = player .policy_net (feature_maps )[0 ] \
225+ .cpu ().data .numpy ()
226+ if player .passed is True :
227+ player_move = self .goban_size ** 2
240228 else :
241- feature_maps = player .extractor (state )
242- probas = player .policy_net (feature_maps )[0 ] \
243- .cpu ().data .numpy ()
244- if player .passed is True :
245- player_move = self .goban_size ** 2
246- else :
247- player_move = self ._get_move (self .board , probas )
229+ player_move = self ._get_move (self .board , probas )
248230
249- if player_move == self .goban_size ** 2 :
250- player .passed = True
231+ if player_move == self .goban_size ** 2 :
232+ player .passed = True
251233
252- state , reward , done = self .board .step (player_move )
253- return state , reward , done , player_move
234+ state , reward , done = self .board .step (player_move )
235+ return state , reward , done , player_move
254236
255237
256238 def __call__ (self ):
0 commit comments