Skip to content

Commit 39078ea

Browse files
committed
fixed issues with MCTS and board
1 parent eed94ba commit 39078ea

File tree

1 file changed

+93
-55
lines changed

1 file changed

+93
-55
lines changed

src/main/java/main/java/codingame/TicTacToe/TicTacToe.java

Lines changed: 93 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -10,46 +10,59 @@ public class TicTacToe {
1010
public static void main(String args[]) throws IOException {
1111
final BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
1212
final LargeBoard largeBoard = new LargeBoard();
13-
final MCTS algorithm = new MCTS(largeBoard);
13+
// largeBoard.play(1, 0);
14+
// largeBoard.play(1, 1);
15+
// largeBoard.play(1, 3);
16+
// largeBoard.play(1, 2);
17+
// largeBoard.play(1, 7);
18+
// largeBoard.play(1, 9);
19+
// System.out.println(largeBoard);
20+
final MCTS algorithm = new MCTS();
21+
algorithm.construct(largeBoard, MCTS.TIME_OUT);
1422
while (true) {
1523
String line[] = in.readLine().split(" ");
1624
final int opponentRow = Integer.parseInt(line[0]), opponentCol = Integer.parseInt(line[1]);
1725
if (opponentCol >= 0) {
18-
largeBoard.play(2, opponentRow * 9 + opponentCol);
26+
final int opponentMove = opponentRow * 9 + opponentCol;
27+
if (algorithm.root.getChild(opponentMove) == null) {
28+
algorithm.root.expand(largeBoard, opponentMove);
29+
}
30+
largeBoard.play(2, opponentMove);
31+
algorithm.root = algorithm.root.getChild(opponentMove);
32+
algorithm.construct(largeBoard, MCTS.TIME_OUT);
1933
System.err.println(largeBoard);
2034
}
2135
final int validActionCount = Integer.parseInt(in.readLine());
22-
int bRow = 0, bCol = 0;
2336
for (int i = 0; i < validActionCount; i++) {
24-
line = in.readLine().split(" ");
25-
bRow = Integer.parseInt(line[0]);
26-
bCol = Integer.parseInt(line[1]);
37+
in.readLine();
2738
}
2839
final int bestMove = algorithm.suggestMove();
29-
final int row = bestMove / 3, col = bestMove % 3;
40+
final int row = bestMove / 9, col = bestMove % 9;
41+
System.out.println(row + " " + col);
3042
largeBoard.play(1, bestMove);
31-
System.out.println(((bRow / 3) * 3 + row) + " " + ((bCol / 3) * 3 + col));
43+
algorithm.root = algorithm.root.getChild(bestMove);
44+
algorithm.construct(largeBoard, MCTS.TIME_OUT);
3245
System.err.println(largeBoard);
3346
}
3447
}
3548
}
3649

3750
class MCTS {
38-
public static final int TIME_OUT = 200;
51+
public static final int TIME_OUT = 50;
3952
public static final double CONSTANT = 10000d;
40-
private final TreeNode root = new TreeNode(-1, null, 1);
53+
TreeNode root = new TreeNode(-1, null, 1);
4154

4255
public int suggestMove() {
4356
return root.getChildren()
4457
.stream()
4558
.max(Comparator.comparingDouble(node -> node.wins / (double) node.plays + node.plays / CONSTANT))
4659
.map(c -> c.col)
47-
.orElse(0);
60+
.orElseThrow(() -> new RuntimeException("No moves to play!"));
4861
}
4962

50-
public MCTS(final LargeBoard board) {
63+
public void construct(final LargeBoard board, final int timeOut) {
5164
final long startTime = System.currentTimeMillis();
52-
while (System.currentTimeMillis() - startTime <= TIME_OUT) {
65+
while (System.currentTimeMillis() - startTime <= timeOut) {
5366
TreeNode current = root;
5467
int position = current.selectChild(board);
5568
int player = 1;
@@ -111,6 +124,7 @@ private double getUtility() {
111124

112125
private double simulate(final LargeBoard board, int player) {
113126
int numberOfMovesPlayed = board.movesPlayed;
127+
final int originalPlayer = player;
114128
while (board.result() == -1) {
115129
final int possibilities[] = new int[81];
116130
int movesToPlay = 0;
@@ -120,10 +134,13 @@ private double simulate(final LargeBoard board, int player) {
120134
movesToPlay++;
121135
final int result = board.result();
122136
if (result != -1) {
123-
return result == player ? 1 : (result == 0 ? 0.5 : 0);
137+
return result == originalPlayer ? 1 : (result == 0 ? 0.5 : 0);
124138
}
125139
}
126140
}
141+
if (movesToPlay == 0) {
142+
break;
143+
}
127144
board.play(player, possibilities[random.nextInt(movesToPlay)]);
128145
player = player == 1 ? 2 : 1;
129146
}
@@ -165,21 +182,21 @@ public TreeNode getChild(final int col) {
165182
@Override
166183
public String toString() {
167184
return "TreeNode{" +
168-
"col=" + col +
169-
", plays=" + plays +
170-
", wins=" + wins +
171-
", parent=" + (parent == null ? -1 : parent.col) +
172-
", player=" + player +
173-
", children=" + children.values()
185+
"\ncol=" + col +
186+
", \nplays=" + plays +
187+
", \nwins=" + wins +
188+
", \nparent=" + (parent == null ? -1 : parent.col) +
189+
", \nplayer=" + player +
190+
", \nchildren=" + children.values()
174191
.stream()
175-
.map(c -> "COL: " + c.col + " WINS: " + c.wins + " PLAYS: " + c.plays + "\n")
176-
.collect(Collectors.joining(",")) +
192+
.map(c -> "MOVE: " + c.col + " WINS: " + c.wins + " PLAYS: " + c.plays + "\n")
193+
.collect(Collectors.joining("\n")) +
177194
'}';
178195
}
179196
}
180197

181198
class LargeBoard {
182-
public static final int FULL = (1 << 10) - 1;
199+
public static final int FULL = (1 << 9) - 1;
183200
int movesPlayed;
184201
int largeBoard, largeCaptures, largeOccupied;
185202
final int moves[] = new int[81];
@@ -197,14 +214,25 @@ public void play(final int player, final int p) {
197214
final int row = (p / 9) % 3, col = p % 3;
198215
if (movesPlayed > 0) {
199216
final int previousMove = moves[movesPlayed - 1];
200-
final int pRow = previousMove / 27, pCol = (previousMove % 9) / 3;
201-
assert (largeOccupied & (1 << (pRow * 3 + pCol))) != 0 || (bRow == pRow && bCol == pCol);
217+
final int pMoveRow = (previousMove / 9) % 3, pMoveCol = previousMove % 3;
218+
if (!((largeOccupied & (1 << (pMoveRow * 3 + pMoveCol))) != 0 || (bRow == pMoveRow && bCol == pMoveCol))) {
219+
throw new RuntimeException();
220+
}
202221
}
203222
final int position = bRow * 3 + bCol;
204-
assert (largeOccupied & (1 << position)) == 0;
223+
int bit = 1 << position;
224+
if ((largeOccupied & bit) != 0) {
225+
throw new RuntimeException();
226+
}
205227
boards[position].play(player, row * 3 + col);
206-
if (boards[position].occupied == FULL) {
207-
largeOccupied = largeOccupied | (1 << position);
228+
if (boards[position].result(player) == player) {
229+
if (player == 1) {
230+
largeBoard = largeBoard | bit;
231+
}
232+
largeCaptures = largeCaptures | bit;
233+
largeOccupied = largeOccupied | bit;
234+
} else if (boards[position].occupied == FULL) {
235+
largeOccupied = largeOccupied | bit;
208236
}
209237
movesPlayed++;
210238
}
@@ -223,22 +251,19 @@ public void undo() {
223251

224252
public int result() {
225253
int firstScore = 0, secondScore = 0;
226-
for (int i = 0; i < 3; i++) {
227-
for (int j = 0; j < 3; j++) {
228-
final int position = i * 3 + j;
229-
final int bit = 1 << position;
230-
if (boards[position].result(1) == 1) {
231-
largeBoard = largeBoard | bit;
232-
firstScore++;
233-
largeCaptures = largeCaptures | bit;
234-
largeOccupied = largeOccupied | bit;
235-
} else if (boards[position].result(2) == 2) {
236-
secondScore++;
237-
largeCaptures = largeCaptures | bit;
238-
largeOccupied = largeOccupied | bit;
239-
} else if (boards[position].occupied == FULL) {
240-
largeOccupied = largeOccupied | bit;
241-
}
254+
for (int position = 0; position < 9; position++) {
255+
final int bit = 1 << position;
256+
if (boards[position].result(1) == 1) {
257+
largeBoard = largeBoard | bit;
258+
firstScore++;
259+
largeCaptures = largeCaptures | bit;
260+
largeOccupied = largeOccupied | bit;
261+
} else if (boards[position].result(2) == 2) {
262+
secondScore++;
263+
largeCaptures = largeCaptures | bit;
264+
largeOccupied = largeOccupied | bit;
265+
} else if (boards[position].occupied == FULL) {
266+
largeOccupied = largeOccupied | bit;
242267
}
243268
}
244269
if (firstScore > 4) {
@@ -261,8 +286,8 @@ public boolean canPlay(final int p) {
261286
final int row = (p / 9) % 3, col = p % 3;
262287
if (movesPlayed > 0) {
263288
final int previousMove = moves[movesPlayed - 1];
264-
final int pRow = previousMove / 27, pCol = (previousMove % 9) / 3;
265-
if (!((largeOccupied & (1 << (pRow * 3 + pCol))) != 0 || (bRow == pRow && bCol == pCol))) {
289+
final int pMoveRow = (previousMove / 9) % 3, pMoveCol = previousMove % 3;
290+
if (!((largeOccupied & (1 << (pMoveRow * 3 + pMoveCol))) != 0 || (bRow == pMoveRow && bCol == pMoveCol))) {
266291
return false;
267292
}
268293
}
@@ -273,9 +298,12 @@ public boolean canPlay(final int p) {
273298
@Override
274299
public String toString() {
275300
return "LargeBoard{" +
276-
"largeBoard=" + largeBoard +
277-
", largeCaptures=" + largeCaptures +
278-
", boards=" + Arrays.deepToString(boards) +
301+
"\nlargeBoard=" + largeBoard +
302+
", \nlargeCaptures=" + largeCaptures +
303+
", \nlargeOccupied=" + largeOccupied +
304+
", \nmovesPlayed=" + movesPlayed +
305+
", \nmoves=" + Arrays.toString(moves) +
306+
", \nboards=" + Arrays.deepToString(boards) +
279307
'}';
280308
}
281309
}
@@ -297,7 +325,9 @@ class Board {
297325

298326
public void play(final int player, final int p) {
299327
final int bit = 1 << p;
300-
assert (occupied & bit) == 0;
328+
if ((occupied & bit) != 0) {
329+
throw new RuntimeException();
330+
}
301331
if (player == 1) {
302332
board = board | bit;
303333
}
@@ -306,7 +336,9 @@ public void play(final int player, final int p) {
306336

307337
public void undo(final int p) {
308338
final int bit = 1 << p;
309-
assert (occupied & bit) != 0;
339+
if ((occupied & bit) == 0) {
340+
throw new RuntimeException();
341+
}
310342
board = board & (~bit);
311343
occupied = occupied & (~bit);
312344
decided = 0;
@@ -319,9 +351,15 @@ public int result(final int player) {
319351
public static int evaluateBoard(final int player, int board, int occupied) {
320352
final int boardForPlayer = player == 1 ? board : ~board;
321353
final int effectiveBoard = boardForPlayer & occupied;
322-
for (final int winningState : winningStates) {
323-
if (effectiveBoard >= winningState) {
324-
if (winningState == (effectiveBoard & winningState)) {
354+
int i = 0;
355+
for (; i < winningStates.length; i++) {
356+
if (effectiveBoard >= winningStates[i]) {
357+
break;
358+
}
359+
}
360+
for (; i < winningStates.length; i++) {
361+
if (effectiveBoard >= winningStates[i]) {
362+
if (winningStates[i] == (effectiveBoard & winningStates[i])) {
325363
return player;
326364
}
327365
} else {
@@ -333,6 +371,6 @@ public static int evaluateBoard(final int player, int board, int occupied) {
333371

334372
@Override
335373
public String toString() {
336-
return "Occupied:" + Integer.toBinaryString(occupied) + "\nBoard:" + Integer.toBinaryString(board);
374+
return "Occupied:" + Integer.toBinaryString(occupied) + " Board:" + Integer.toBinaryString(board) + "\n";
337375
}
338376
}

0 commit comments

Comments
 (0)