五子棋AI - 2 - Rock's log

关于评分函数#

使用minimax算法的一个局限性是需要获取局面的精确评分，需要设计者拥有一定的五子棋知识。同时评分函数的权重大都靠经验来设置，这显然与设计AI的初衷不一致，所以说还是应该走AlphaZero的路线🤔

AI的事还是得AI来

原始#

1
def evaluate(board, player):
2
    score = 0
3
    opponent = BLACK_STONE if player == WHITE_STONE else WHITE_STONE
4

5
    directions = [
6
        (1,0),
7
        (0,1),
8
        (1,1),
9
        (1,-1)
10
    ]
11

12
    def get_score(current_player):
13
        s = 0
14
        for i in range(BOARD_SIZE):
15
            for j in range(BOARD_SIZE):
16
                if board[i][j] != current_player:
17
                    continue
18
                for d_x, d_y in directions:
19
                    prev_r = i - d_x
20
                    prev_c = j - d_y
21
                    if 0 <= prev_r < BOARD_SIZE and 0 <= prev_c < BOARD_SIZE and board[prev_r][prev_c] == current_player:
22
                        continue
23

24
                    count = 1
25
                    x = i + d_x
26
                    y = j + d_y
27
                    while 0 <= x < BOARD_SIZE and 0 <= y < BOARD_SIZE and board[x][y] == current_player:
28
                        count += 1
29
                        x += d_x
30
                        y += d_y
31

32
                    if count >= 5:
33
                        s += 1919810
34
                    elif count == 4:
35
                        s += 1000
36
                    elif count == 3:
37
                        s += 100
38
                    elif count == 2:
39
                        s += 10
40
        return s
41

42
    score += get_score(player)
43
    score -= get_score(opponent)
44

45
    return score

优化#

1
    # 权重
2
    W_FIVE = 100000
3
    W_FOUR_OPEN = 10000
4
    W_FOUR_HALF = 2500
5
    W_THREE_OPEN = 1500
6
    W_THREE_HALF = 200
7
    W_TWO_OPEN = 100
8
    W_GAPPED_BONUS = 400  # 间隔一格接成的扩展价值
9
    OPPONENT_WEIGHT = 1.2  # 对手分放大（鼓励防守）
10
    IMMEDIATE_THREAT_PENALTY = 1000000  # 若对手有立即必胜点，强制返回极小值

1
        cnt = 0
2
        x = i
3
        y = j
4
        while in_bounds(x,y) and board[x][y] == cur:
5
            cnt += 1
6
            x += dx
7
            y += dy
8
        right_x, right_y = x, y

从 (i,j) 开始，沿 (dx,dy) 连续计数相同颜色 cur 的棋子数 cnt，直到遇到边界或非 cur 的格子。
循环结束后， (right_x, right_y) 标记的是第一个不属于 cur 的格子（可能越界、对方棋子或空格）。

1
        left_x, left_y = i - dx, j - dy

检查左侧是否为空或被堵。

1
        left_empty = (in_bounds(left_x,left_y) and board[left_x][left_y] == EMPTY)
2
        left_blocked = (not in_bounds(left_x,left_y)) or (in_bounds(left_x,left_y) and board[left_x,left_y] == opp)

判断左侧状态：

1
        right_empty = (in_bounds(right_x,right_y) and board[right_x][right_y] == EMPTY)
2
        right_blocked = (not in_bounds(right_x,right_y)) or (in_bounds(right_x,right_y) and board[right_x,right_y] == opp)

同理，右侧

1
        open_ends = (1 if left_empty else 0) + (1 if right_empty else 0)

统计连子两端空格数量（0、1 或 2）

1
        score = 0
2
        threats = set()
3
        immediate_win = False

连子长度与威胁#

1
        if cnt >= 5:
2
            score += W_FIVE
3
            immediate_win = True

1
        elif cnt == 4:
2
            if open_ends == 2:
3
                score += W_FOUR_OPEN
4
                if cur == opponent:
5
                    immediate_win = True
6
                if left_empty:
7
                    threats.add((left_x,left_y))
8
                if right_empty:
9
                    threats.add((right_x,right_y))
10
            elif open_ends == 1:
11
                score += W_FOUR_HALF
12
                if left_empty:
13
                    threats.add((left_x,left_y))
14
                if right_empty:
15
                    threats.add((right_x,right_y))
16
                if cur == opponent:
17
                    immediate_win = True
18
            else:
19
                score += W_FOUR_HALF // 4

1
        elif cnt == 3:
2
            if open_ends == 2:
3
                score += W_THREE_OPEN
4
                if left_empty:
5
                    threats.add((left_x,left_y))
6
                if right_empty:
7
                    threats.add((right_x,right_y))
8
            elif open_ends == 1:
9
                score += W_THREE_HALF
10
                if left_empty:
11
                    threats.add((left_x,left_y))
12
                if right_empty:
13
                    threats.add((right_x,right_y))
14
            else:
15
                score += 30

1
        elif cnt == 2:
2
            if open_ends == 2:
3
                score += W_TWO_OPEN
4
            elif open_ends == 1:
5
                score += 8
6
            else:
7
                score += 2
8
        elif cnt == 1:
9
            if open_ends == 2:
10
                score += 3

间隔一格#

1
        if in_bounds(right_x,right_y) and board[right_x][right_y] == EMPTY:
2
            after_gap_x = right_x + dx
3
            after_gap_y = right_y + dy
4
            if in_bounds(after_gap_x, after_gap_y) and board[after_gap_x][after_gap_y] == cur:
5
                score += W_GAPPED_BONUS * (cnt)
6
                threats.add((right_x,right_y))
7
                if cur == opponent and (cnt + 1) >= 4:
8
                    immediate_win = True

1
        if in_bounds(left_x,left_y) and board[left_x,left_y] == EMPTY:
2
            before_gap_x = left_x - dx
3
            before_gap_y = left_y - dy
4
            if in_bounds(before_gap_x, before_gap_y) and board[before_gap_x,before_gap_y] == cur:
5
                score += W_GAPPED_BONUS * (cnt)
6
                threats.add((left_x,left_y))
7
                if cur == opponent and (cnt + 1) >= 4:
8
                    immediate_win = True

遍历整盘#

1
    def get_aggregate(cur):
2
        total = 0
3
        threat_positions = set()
4
        has_immediate = False
5
        for i in range(BOARD_SIZE):
6
            for j in range(BOARD_SIZE):
7
                if board[i][j] != cur:
8
                    continue
9
                for dx,dy in directions:
10
                    prev_x, prev_y = i - dx, j - dy
11
                    if in_bounds(prev_x, prev_y) and board[prev_x][prev_y] == cur:
12
                        continue
13
                    add, threats, immediate = analyze_from(i,j,dx,dy,cur)
14
                    total += add
15
                    threat_positions |= threats
16
                    if immediate:
17
                        has_immediate = True
18
        return total, threat_positions, has_immediate

1
    if opp_immediate:
2
        return -IMMEDIATE_THREAT_PENALTY

1
    threat_penalty = len(opp_threats) * 2000
2
    score = (my_score - OPPONENT_WEIGHT * opp_score) - threat_penalty