816 words
4 minutes
五子棋AI - 2
2025-11-04

关于评分函数#

使用minimax算法的一个局限性是需要获取局面的精确评分,需要设计者拥有一定的五子棋知识。同时评分函数的权重大都靠经验来设置,这显然与设计AI的初衷不一致,所以说还是应该走AlphaZero的路线🤔

AI的事还是得AI来

原始#

def evaluate(board, player):
score = 0
opponent = BLACK_STONE if player == WHITE_STONE else WHITE_STONE
directions = [
(1,0),
(0,1),
(1,1),
(1,-1)
]
def get_score(current_player):
s = 0
for i in range(BOARD_SIZE):
for j in range(BOARD_SIZE):
if board[i][j] != current_player:
continue
for d_x, d_y in directions:
prev_r = i - d_x
prev_c = j - d_y
if 0 <= prev_r < BOARD_SIZE and 0 <= prev_c < BOARD_SIZE and board[prev_r][prev_c] == current_player:
continue
count = 1
x = i + d_x
y = j + d_y
while 0 <= x < BOARD_SIZE and 0 <= y < BOARD_SIZE and board[x][y] == current_player:
count += 1
x += d_x
y += d_y
if count >= 5:
s += 1919810
elif count == 4:
s += 1000
elif count == 3:
s += 100
elif count == 2:
s += 10
return s
score += get_score(player)
score -= get_score(opponent)
return score

优化#

# 权重
W_FIVE = 100000
W_FOUR_OPEN = 10000
W_FOUR_HALF = 2500
W_THREE_OPEN = 1500
W_THREE_HALF = 200
W_TWO_OPEN = 100
W_GAPPED_BONUS = 400 # 间隔一格接成的扩展价值
OPPONENT_WEIGHT = 1.2 # 对手分放大(鼓励防守)
IMMEDIATE_THREAT_PENALTY = 1000000 # 若对手有立即必胜点,强制返回极小值
cnt = 0
x = i
y = j
while in_bounds(x,y) and board[x][y] == cur:
cnt += 1
x += dx
y += dy
right_x, right_y = x, y
  • (i,j) 开始,沿 (dx,dy) 连续计数相同颜色 cur 的棋子数 cnt,直到遇到边界或非 cur 的格子。
  • 循环结束后, (right_x, right_y) 标记的是第一个不属于 cur 的格子(可能越界、对方棋子或空格)。
left_x, left_y = i - dx, j - dy

检查左侧是否为空或被堵。

left_empty = (in_bounds(left_x,left_y) and board[left_x][left_y] == EMPTY)
left_blocked = (not in_bounds(left_x,left_y)) or (in_bounds(left_x,left_y) and board[left_x,left_y] == opp)
  • 判断左侧状态:
right_empty = (in_bounds(right_x,right_y) and board[right_x][right_y] == EMPTY)
right_blocked = (not in_bounds(right_x,right_y)) or (in_bounds(right_x,right_y) and board[right_x,right_y] == opp)

同理,右侧

open_ends = (1 if left_empty else 0) + (1 if right_empty else 0)

统计连子两端空格数量(0、1 或 2)

score = 0
threats = set()
immediate_win = False

连子长度与威胁#

if cnt >= 5:
score += W_FIVE
immediate_win = True
elif cnt == 4:
if open_ends == 2:
score += W_FOUR_OPEN
if cur == opponent:
immediate_win = True
if left_empty:
threats.add((left_x,left_y))
if right_empty:
threats.add((right_x,right_y))
elif open_ends == 1:
score += W_FOUR_HALF
if left_empty:
threats.add((left_x,left_y))
if right_empty:
threats.add((right_x,right_y))
if cur == opponent:
immediate_win = True
else:
score += W_FOUR_HALF // 4
elif cnt == 3:
if open_ends == 2:
score += W_THREE_OPEN
if left_empty:
threats.add((left_x,left_y))
if right_empty:
threats.add((right_x,right_y))
elif open_ends == 1:
score += W_THREE_HALF
if left_empty:
threats.add((left_x,left_y))
if right_empty:
threats.add((right_x,right_y))
else:
score += 30
elif cnt == 2:
if open_ends == 2:
score += W_TWO_OPEN
elif open_ends == 1:
score += 8
else:
score += 2
elif cnt == 1:
if open_ends == 2:
score += 3

间隔一格#

if in_bounds(right_x,right_y) and board[right_x][right_y] == EMPTY:
after_gap_x = right_x + dx
after_gap_y = right_y + dy
if in_bounds(after_gap_x, after_gap_y) and board[after_gap_x][after_gap_y] == cur:
score += W_GAPPED_BONUS * (cnt)
threats.add((right_x,right_y))
if cur == opponent and (cnt + 1) >= 4:
immediate_win = True
if in_bounds(left_x,left_y) and board[left_x,left_y] == EMPTY:
before_gap_x = left_x - dx
before_gap_y = left_y - dy
if in_bounds(before_gap_x, before_gap_y) and board[before_gap_x,before_gap_y] == cur:
score += W_GAPPED_BONUS * (cnt)
threats.add((left_x,left_y))
if cur == opponent and (cnt + 1) >= 4:
immediate_win = True

遍历整盘#

def get_aggregate(cur):
total = 0
threat_positions = set()
has_immediate = False
for i in range(BOARD_SIZE):
for j in range(BOARD_SIZE):
if board[i][j] != cur:
continue
for dx,dy in directions:
prev_x, prev_y = i - dx, j - dy
if in_bounds(prev_x, prev_y) and board[prev_x][prev_y] == cur:
continue
add, threats, immediate = analyze_from(i,j,dx,dy,cur)
total += add
threat_positions |= threats
if immediate:
has_immediate = True
return total, threat_positions, has_immediate
if opp_immediate:
return -IMMEDIATE_THREAT_PENALTY
threat_penalty = len(opp_threats) * 2000
score = (my_score - OPPONENT_WEIGHT * opp_score) - threat_penalty
五子棋AI - 2
https://blog.282994.xyz/posts/五子棋ai/gobang-2/
Author
Rock
Published at
2025-11-04
License
CC BY-NC-SA 4.0