Hey guys,
Here's my source code for the MCTS AI. It's pretty much entirely domain independent, with a lot of general performance optimizations. Since the performance is not really an issue (I can simulate about 4000 games per second), and increasing the sample size does NOT make the AI play better (with the issues that I currently have), I am not implementing domain-dependant information for now. Also, my goal is not to make the AI as hard as possible: This is made for casual to semi-casual players.
class TreeNodeTablut
{
public void SelectAction()
{
LinkedList<TreeNodeTablut> visited = new LinkedList<TreeNodeTablut>();
TreeNodeTablut cur = this;
visited.AddLast(this);
while (!cur.IsLeaf() && !cur._isTerminalNode)
{
cur = cur.Select();
visited.AddLast(cur);
}
if (cur.Expand())
{
// Node has been expanded
if (cur.Children != null)
{
cur = cur.Select();
}
else // Maximum expansion depth reached: Simulate on the currently selected parent node
{ }
visited.AddLast(cur);
float value = Simulate(ref cur);
foreach (TreeNodeTablut node in visited)
{
node.UpdateStats(value);
}
}
else
{
return;
}
}
// Select will select the best-looking child
// Optimization: Make the move on the selected child during select. NOT during expand on all children
private TreeNodeTablut Select()
{
TreeNodeTablut selected = null;
if (Children == null)
{
return null;
}
double bestValue = double.MinValue;
foreach (TreeNodeTablut c in Children)
{
double uctValue = c.TotValue / (c.Visits + epsilon) + Math.Sqrt(Math.Log(Visits + 1) / (c.Visits + epsilon)) + MBRandom.RandomFloat * epsilon;
if (uctValue > bestValue)
{
selected = c;
bestValue = uctValue;
}
}
// Performance optimization: Now finish the expansion by making a move and storing the resulting board-state
// This is only useful when using large search depths, as all nodes will be visited up to a certain depth
if (selected != null && selected.BoardState.PawnInformation == null)
{
Board.UndoMove(ref selected.Parent.BoardState);
PawnTablut pawn = (selected.CurrentPlayerTurn == PlayerTurn.PlayerOne ? Board.PlayerOneUnits : Board.PlayerTwoUnits)[selected.PawnIndex] as PawnTablut;
Move move;
move.GoalTile = Board.Tiles[selected.goalTile.X][selected.goalTile.Y];
move.Unit = pawn;
bool madeCapture;
bool madeMove = Board.AIMakeMove(move, out madeCapture);
selected.BoardState = Board.TakeBoardSnapshot();
}
return selected;
}
public bool Expand()
{
// Maximum expanding depth reached. Don't create child nodes
if (Depth == SearchDepth)
{
return true;
}
// Set the board up for this node
Board.UndoMove(ref BoardState);
// All moves for every pawn
List<List<Move>> moves = Board.CalculateAllValidMoves(nextPlayerTurn == PlayerTurn.PlayerOne ? -1 : 1);
// Some pawns might have no moves, but they are still in the list (with 0 moves), filter those out.
// The pawns with 0 moves are still stored for performance optimization reasons
List<List<Move>> orderedMoves = moves.OrderByDescending(o => o.Count).TakeWhile(m => m.Count != 0).ToList();
// Create a child for every available move. This is less efficient than pruning some moves out, but it guarantees that every move (and subsequent moves) can be evaluated
int moveCount = Board.GetTotalMovesAvailable(ref orderedMoves);
if (moveCount > 0)
{
Children = new List<TreeNodeTablut>(moveCount);
int index = 0;
foreach (List<Move> pawnMoves in orderedMoves)
{
foreach (Move move in pawnMoves)
{
PawnTablut pawn = move.Unit as PawnTablut;
TileTablut tile = move.GoalTile as TileTablut;
Children.Add(new TreeNodeTablut());
Children[index].OpeningMove = move;
Children[index].CurrentPlayerTurn = NextPlayerTurn;
Children[index].nextPlayerTurn = (nextPlayerTurn == PlayerTurn.PlayerOne ? PlayerTurn.PlayerTwo : PlayerTurn.PlayerOne);
Children[index].Depth = Depth + 1;
// Performance stuff: We don't make a move for every child, only store the information required to make that move.
// In the select phase, we will then make the move.
// Advantages:
// - No need to undo the move, since we will continue with the board in the simulation
// - A lot less moves to make on the board, since not every node will be visited, especially after a depth of 3/4
// - A lot less memory required, since we don't store the board-state for each move
Children[index].Parent = this;
Children[index].PawnIndex = pawn.Index;
Children[index].goalTile = new IntVec2(tile.X, tile.Y);
++index;
}
}
}
else
{
_isTerminalNode = true;
return false;
}
return true;
}
private bool IsLeaf()
{
return Children == null;
}
private float Simulate(ref TreeNodeTablut tn)
{
int moveID, pawnID = -1;
State state = Board.AICheckGameEnded();
int simDepth = 0;
List<List<Move>> validMovesAI = Board.CalculateAllValidMoves(1);
List<List<Move>> validMovesPlayer = Board.CalculateAllValidMoves(-1);
PlayerTurn saveTurn = tn.CurrentPlayerTurn;
while (state == State.Empty)
{
List<List<Move>> moves = (tn.CurrentPlayerTurn == PlayerTurn.PlayerOne ? validMovesPlayer : validMovesAI);
if (moves.Count > 0)
{
pawnID = MBRandom.RandomInt(0, moves.Count - 1);
moveID = MBRandom.RandomInt(0, moves[pawnID].Count - 1);
PawnTablut pawn = moves[pawnID][moveID].Unit as PawnTablut;
TileTablut tile = moves[pawnID][moveID].GoalTile as TileTablut;
int prevX = pawn.X;
int prevY = pawn.Y;
bool madeCapture, madeMove;
madeMove = Board.AIMakeMove(moves[pawnID][moveID], out madeCapture);
int curX = pawn.X;
int curY = pawn.Y;
state = Board.AICheckGameEnded();
// Only recalculate the moves if the game isn't won yet
if (state == State.Empty)
{
// Performance optimization: Start from the previous moves state, and only update the affected pawns
Board.UpdateAllValidMoves(ref validMovesAI, ref validMovesPlayer, ref pawn, prevX, prevY, curX, curY);
}
}
tn.CurrentPlayerTurn = (tn.CurrentPlayerTurn == PlayerTurn.PlayerOne ? PlayerTurn.PlayerTwo : PlayerTurn.PlayerOne);
simDepth++;
}
tn.CurrentPlayerTurn = saveTurn;
int score = 1;
// Avoid fatalistic play. AI has no trouble seeing winning moves, but it assumes too much that the player won't win if AI can win too.
// The deeper the simulation goes, the less we value the outcome.
if (state == State.PlayerWon)
{
switch (tn.Depth + simDepth)
{
case 1:
score = 20;
break;
case 2:
score = 15;
break;
case 3:
score = 10;
break;
case 4:
score = 5;
break;
case 5:
score = 3;
break;
default:
score = 1;
break;
}
}
score *= (state == State.AIWon ? 1 : -1);
// Maximize score player, minimize for AI during player turn
//score *= (saveTurn == PlayerTurn.PlayerOne ? -1 : 1);
return score;
}
private void UpdateStats(float value)
{
Visits++;
TotValue += value;
}
}
And here's what starts the tree search:
public void CalculateNextMove(out PawnTablut pawn, out TileTablut tile)
{
pawn = null;
tile = null;
TreeNodeTablut.Board = _board;
BoardTablutInformation boardInfo = _board.TakeBoardSnapshot();
TreeNodeTablut tn = new TreeNodeTablut();
tn.BoardState = boardInfo;
tn.CurrentPlayerTurn = boardInfo.Turn;
tn.NextPlayerTurn = boardInfo.Turn;
tn.Depth = 0;
for (int i = 0; i < _sampleCount; i++)
{
tn.SelectAction();
}
// Put pawns back in their previous positions
_board.UndoMove(ref boardInfo);
float bestScore = float.MinValue;
Move bestMove;
bestMove.GoalTile = null;
bestMove.Unit = null;
int index = 0;
int bestIndex = -1;
// Select best child based on win-rate
foreach (TreeNodeTablut node in tn.Children)
{
float nodeScore = node.TotValue / node.Visits;
if (nodeScore > bestScore)
{
bestScore = nodeScore;
bestMove.GoalTile = node.OpeningMove.GoalTile;
bestMove.Unit = node.OpeningMove.Unit;
bestIndex = index;
}
++index;
}
if (bestMove.Unit != null && bestMove.GoalTile != null)
{
pawn = bestMove.Unit as PawnTablut;
tile = bestMove.GoalTile as TileTablut;
}
tn = null;
}
I hope that helps, thanks again!