816 words
4 minutes
五子棋AI - 2
关于评分函数
使用minimax算法的一个局限性是需要获取局面的精确评分,需要设计者拥有一定的五子棋知识。同时评分函数的权重大都靠经验来设置,这显然与设计AI的初衷不一致,所以说还是应该走AlphaZero的路线🤔
AI的事还是得AI来
原始
def evaluate(board, player): score = 0 opponent = BLACK_STONE if player == WHITE_STONE else WHITE_STONE
directions = [ (1,0), (0,1), (1,1), (1,-1) ]
def get_score(current_player): s = 0 for i in range(BOARD_SIZE): for j in range(BOARD_SIZE): if board[i][j] != current_player: continue for d_x, d_y in directions: prev_r = i - d_x prev_c = j - d_y if 0 <= prev_r < BOARD_SIZE and 0 <= prev_c < BOARD_SIZE and board[prev_r][prev_c] == current_player: continue
count = 1 x = i + d_x y = j + d_y while 0 <= x < BOARD_SIZE and 0 <= y < BOARD_SIZE and board[x][y] == current_player: count += 1 x += d_x y += d_y
if count >= 5: s += 1919810 elif count == 4: s += 1000 elif count == 3: s += 100 elif count == 2: s += 10 return s
score += get_score(player) score -= get_score(opponent)
return score优化
# 权重 W_FIVE = 100000 W_FOUR_OPEN = 10000 W_FOUR_HALF = 2500 W_THREE_OPEN = 1500 W_THREE_HALF = 200 W_TWO_OPEN = 100 W_GAPPED_BONUS = 400 # 间隔一格接成的扩展价值 OPPONENT_WEIGHT = 1.2 # 对手分放大(鼓励防守) IMMEDIATE_THREAT_PENALTY = 1000000 # 若对手有立即必胜点,强制返回极小值 cnt = 0 x = i y = j while in_bounds(x,y) and board[x][y] == cur: cnt += 1 x += dx y += dy right_x, right_y = x, y- 从
(i,j)开始,沿(dx,dy)连续计数相同颜色cur的棋子数cnt,直到遇到边界或非cur的格子。 - 循环结束后,
(right_x, right_y)标记的是第一个不属于cur的格子(可能越界、对方棋子或空格)。
left_x, left_y = i - dx, j - dy检查左侧是否为空或被堵。
left_empty = (in_bounds(left_x,left_y) and board[left_x][left_y] == EMPTY) left_blocked = (not in_bounds(left_x,left_y)) or (in_bounds(left_x,left_y) and board[left_x,left_y] == opp)- 判断左侧状态:
right_empty = (in_bounds(right_x,right_y) and board[right_x][right_y] == EMPTY) right_blocked = (not in_bounds(right_x,right_y)) or (in_bounds(right_x,right_y) and board[right_x,right_y] == opp)同理,右侧
open_ends = (1 if left_empty else 0) + (1 if right_empty else 0)统计连子两端空格数量(0、1 或 2)
score = 0 threats = set() immediate_win = False连子长度与威胁
if cnt >= 5: score += W_FIVE immediate_win = True elif cnt == 4: if open_ends == 2: score += W_FOUR_OPEN if cur == opponent: immediate_win = True if left_empty: threats.add((left_x,left_y)) if right_empty: threats.add((right_x,right_y)) elif open_ends == 1: score += W_FOUR_HALF if left_empty: threats.add((left_x,left_y)) if right_empty: threats.add((right_x,right_y)) if cur == opponent: immediate_win = True else: score += W_FOUR_HALF // 4 elif cnt == 3: if open_ends == 2: score += W_THREE_OPEN if left_empty: threats.add((left_x,left_y)) if right_empty: threats.add((right_x,right_y)) elif open_ends == 1: score += W_THREE_HALF if left_empty: threats.add((left_x,left_y)) if right_empty: threats.add((right_x,right_y)) else: score += 30 elif cnt == 2: if open_ends == 2: score += W_TWO_OPEN elif open_ends == 1: score += 8 else: score += 2 elif cnt == 1: if open_ends == 2: score += 3间隔一格
if in_bounds(right_x,right_y) and board[right_x][right_y] == EMPTY: after_gap_x = right_x + dx after_gap_y = right_y + dy if in_bounds(after_gap_x, after_gap_y) and board[after_gap_x][after_gap_y] == cur: score += W_GAPPED_BONUS * (cnt) threats.add((right_x,right_y)) if cur == opponent and (cnt + 1) >= 4: immediate_win = True if in_bounds(left_x,left_y) and board[left_x,left_y] == EMPTY: before_gap_x = left_x - dx before_gap_y = left_y - dy if in_bounds(before_gap_x, before_gap_y) and board[before_gap_x,before_gap_y] == cur: score += W_GAPPED_BONUS * (cnt) threats.add((left_x,left_y)) if cur == opponent and (cnt + 1) >= 4: immediate_win = True遍历整盘
def get_aggregate(cur): total = 0 threat_positions = set() has_immediate = False for i in range(BOARD_SIZE): for j in range(BOARD_SIZE): if board[i][j] != cur: continue for dx,dy in directions: prev_x, prev_y = i - dx, j - dy if in_bounds(prev_x, prev_y) and board[prev_x][prev_y] == cur: continue add, threats, immediate = analyze_from(i,j,dx,dy,cur) total += add threat_positions |= threats if immediate: has_immediate = True return total, threat_positions, has_immediate if opp_immediate: return -IMMEDIATE_THREAT_PENALTY threat_penalty = len(opp_threats) * 2000 score = (my_score - OPPONENT_WEIGHT * opp_score) - threat_penalty