Skip to main content

figrid_board/
pattern_table.rs

1//! Pattern4 mini — 11-cell line window를 lossless ID로 압축하는 테이블.
2//!
3//! Rapfi식 Pattern4 인프라의 mini 버전. 한 cell의 한 방향에서 ±5 = 11칸의
4//! 시퀀스를 정규화해 unique pattern ID를 부여하고, 보드 안에 (cell, dir)
5//! 별 pattern_id 상태를 유지하면 매 수마다 영향받는 라인의 ID만 lookup으로
6//! 갱신할 수 있어 region recompute가 사라진다.
7//!
8//! 이 모듈은 그 인프라의 **첫 단계**: 패턴 표현, canonicalize, packed
9//! encoding, 그리고 모든 가능 패턴의 enumeration. NNUE 통합과 보드 상태
10//! 유지는 후속 단계에서 추가된다.
11
12use std::collections::HashMap;
13use std::sync::OnceLock;
14
15/// 11-cell line window. cell 값:
16/// - 0 = empty
17/// - 1 = mine (현재 stm 관점)
18/// - 2 = opp
19/// - 3 = boundary (board 밖)
20///
21/// 인덱스 0이 라인의 한 쪽 끝, 인덱스 5가 anchor cell, 10이 반대 끝.
22pub type LineWindow = [u8; 11];
23
24/// Cell 값 enum (가독성용). u8와 1:1.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26#[repr(u8)]
27pub enum Cell {
28    Empty = 0,
29    Mine = 1,
30    Opp = 2,
31    Boundary = 3,
32}
33
34impl From<u8> for Cell {
35    #[inline]
36    fn from(v: u8) -> Self {
37        match v {
38            0 => Cell::Empty,
39            1 => Cell::Mine,
40            2 => Cell::Opp,
41            3 => Cell::Boundary,
42            _ => panic!("invalid cell value"),
43        }
44    }
45}
46
47/// 11-cell window를 22-bit u32로 packed. 각 cell 2 bit.
48/// 인덱스 0이 high bits, 인덱스 10이 low bits.
49#[inline]
50pub fn pack_window(w: &LineWindow) -> u32 {
51    let mut packed = 0u32;
52    for &c in w {
53        debug_assert!(c < 4);
54        packed = (packed << 2) | (c as u32);
55    }
56    packed
57}
58
59/// packed u32에서 LineWindow 복원.
60/// 인덱스 0이 high bits, 인덱스 10이 low bits — pack_window의 역.
61#[inline]
62pub fn unpack_window(packed: u32) -> LineWindow {
63    let mut w = [0u8; 11];
64    for i in 0..11 {
65        let shift = (10 - i) * 2;
66        w[i] = ((packed >> shift) & 0b11) as u8;
67    }
68    w
69}
70
71/// 좌우 reflection 정규화. canonical = min(w, reverse(w)) packed.
72///
73/// 색 swap (mine ↔ opp) 은 별도 perspective 차원으로 처리하므로 여기선
74/// 적용하지 않는다. 즉 같은 패턴의 mine/opp 버전은 다른 ID를 갖는다.
75#[inline]
76pub fn canonicalize(w: &LineWindow) -> LineWindow {
77    let reversed: LineWindow = std::array::from_fn(|i| w[10 - i]);
78    let p1 = pack_window(w);
79    let p2 = pack_window(&reversed);
80    if p1 <= p2 {
81        *w
82    } else {
83        reversed
84    }
85}
86
87/// 패턴이 보드에서 실제 등장 가능한가?
88/// 규칙: boundary는 양 끝에서만 연속으로 나타날 수 있다. 보드 안쪽에서는
89/// boundary가 등장하지 않는다.
90///
91/// 즉 시퀀스가 [B*, valid*, B*] 형태여야 함 (B는 boundary, valid는 0/1/2).
92/// 양 끝의 B 길이 합이 11을 넘지 않으면서 가운데가 모두 non-boundary.
93pub fn is_realizable(w: &LineWindow) -> bool {
94    // 왼쪽 boundary 길이
95    let mut left_b = 0;
96    while left_b < 11 && w[left_b] == 3 {
97        left_b += 1;
98    }
99    // 오른쪽 boundary 길이
100    let mut right_b = 0;
101    while right_b < 11 && w[10 - right_b] == 3 {
102        right_b += 1;
103    }
104    if left_b + right_b > 11 {
105        return false;
106    }
107    // 가운데(left_b..11-right_b)에 boundary 없어야 함
108    for i in left_b..(11 - right_b) {
109        if w[i] == 3 {
110            return false;
111        }
112    }
113    true
114}
115
116/// Pattern ID type. canonical pattern 수가 65k 초과 (실측 ~수십만)이라
117/// u16으로는 부족. u32 사용.
118pub type PatternId = u32;
119
120/// 모든 가능한 11-cell pattern을 enumerate해 canonical packed → ID로 매핑.
121/// `is_realizable` 통과 + canonical form만 unique ID.
122///
123/// 결과 테이블 크기 실측: 수십만 pattern (4^11 = 4M raw 중 실현 가능 +
124/// canonical 정규화 후 unique). 모델 weights table 크기 영향 큼 — NNUE
125/// 통합 시 frequent pattern만 keep하는 추가 필터 필요할 수 있다.
126pub fn enumerate_patterns() -> HashMap<u32, PatternId> {
127    let mut table: HashMap<u32, PatternId> = HashMap::new();
128    let mut next_id: PatternId = 0;
129
130    // 4^11 = 4194304 가능. release 빌드에서 ~수 초.
131    for raw in 0..(1u32 << 22) {
132        let w = unpack_window(raw);
133        if !is_realizable(&w) {
134            continue;
135        }
136        let canonical = canonicalize(&w);
137        let canonical_packed = pack_window(&canonical);
138        if !table.contains_key(&canonical_packed) {
139            table.insert(canonical_packed, next_id);
140            next_id += 1;
141        }
142    }
143    table
144}
145
146/// 우리 NNUE에서 실제로 추적하는 mapped pattern ID 수.
147/// Top 16384 = 학습 데이터에서 99.24% 커버 + rare bucket 1개 = 16385.
148// Top-K를 4096으로 축소 — 16384에서 17:1 (params:samples) ratio 였던 것을
149// 4:1 (v13 수준)로 정상화. coverage는 99.24% → 약 97.5% (top 5K가 97.81%).
150// 거의 lossless이면서 Pattern4 weights 대부분이 학습 가능 영역에 진입.
151pub const PATTERN_TOP_K: usize = 4096;
152pub const PATTERN_RARE_ID: u16 = PATTERN_TOP_K as u16; // = 4096, "그 외" bucket
153pub const PATTERN_NUM_IDS: usize = PATTERN_TOP_K + 1;  // = 4097
154
155/// `pattern_freq_stats --dump-top-k 16384` 가 만든 binary embed.
156/// 16384 × u32 little-endian = 65 536 bytes. canonical packed value를
157/// 빈도 내림차순으로 나열.
158const TOPK_BYTES: &[u8] = include_bytes!("../data/topk.bin");
159
160/// 4M 엔트리 dense lookup table. raw packed window → mapped pattern ID
161/// (u16). Top 16K canonical packed에 들어가는 packed/reflection은 0..16383
162/// 값, 그 외 realizable raw는 PATTERN_RARE_ID (16384).
163///
164/// 메모리 8 MB (u16 × 4M). lookup은 단순 array index.
165/// OnceLock 으로 첫 호출 시 build (~수 초).
166fn dense_mapped_table() -> &'static Vec<u16> {
167    static TABLE: OnceLock<Vec<u16>> = OnceLock::new();
168    TABLE.get_or_init(build_dense_mapped_table)
169}
170
171fn build_dense_mapped_table() -> Vec<u16> {
172    let n = 1usize << 22;
173    let mut t = vec![PATTERN_RARE_ID; n];
174
175    // Top 16K canonical packed → mapped ID 0..16383
176    let mut canonical_to_mapped: HashMap<u32, u16> = HashMap::with_capacity(PATTERN_TOP_K);
177    debug_assert!(TOPK_BYTES.len() == PATTERN_TOP_K * 4);
178    for (i, chunk) in TOPK_BYTES.chunks(4).enumerate() {
179        let p = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
180        canonical_to_mapped.insert(p, i as u16);
181    }
182
183    // 모든 raw → canonicalize → topk 안이면 그 mapped id, 아니면 rare.
184    for raw in 0..(n as u32) {
185        let w = unpack_window(raw);
186        if !is_realizable(&w) {
187            continue; // unreachable raw → 기본값 RARE 유지 (사실 도달 안 함)
188        }
189        let canonical_packed = pack_window(&canonicalize(&w));
190        let mapped = canonical_to_mapped
191            .get(&canonical_packed)
192            .copied()
193            .unwrap_or(PATTERN_RARE_ID);
194        t[raw as usize] = mapped;
195    }
196
197    t
198}
199
200/// raw packed 11-cell window → mapped pattern ID (0..16384, 16384=rare).
201/// Caller가 read_window로 만든 packed를 그대로 넘기면 O(1) lookup.
202#[inline]
203pub fn lookup_mapped_id(packed: u32) -> u16 {
204    debug_assert!((packed as usize) < (1usize << 22));
205    dense_mapped_table()[packed as usize]
206}
207
208/// 모든 빈 셀의 LineWindow `[0; 11]` 의 mapped ID. 보드 안쪽 빈 cell의
209/// 초기값 — 우리 freq 측정에서 11.94% 빈도 1위라 mapped id 0으로 매핑됨.
210/// Board::new() 의 default fill 에 사용.
211#[inline]
212pub fn empty_pattern_mapped_id() -> u16 {
213    let all_empty: LineWindow = [0u8; 11];
214    lookup_mapped_id(pack_window(&all_empty))
215}
216
217/// mine ↔ opp swap 후의 mapped pattern ID 반환.
218///
219/// `line_pattern_ids` 는 black-relative storage이므로 stm == White일 때
220/// stm-perspective feature를 emit하려면 ID를 swap해야 한다. 이 lookup이
221/// O(1)로 가능하도록 16385-entry swap table을 미리 빌드.
222///
223/// rare bucket(16384)은 swap도 rare로 매핑.
224fn swap_table() -> &'static [u16; PATTERN_NUM_IDS] {
225    static TABLE: OnceLock<Box<[u16; PATTERN_NUM_IDS]>> = OnceLock::new();
226    TABLE.get_or_init(|| Box::new(build_swap_table()))
227}
228
229fn build_swap_table() -> [u16; PATTERN_NUM_IDS] {
230    let mut t = [PATTERN_RARE_ID; PATTERN_NUM_IDS];
231
232    // Top 16K canonical packed의 swap → 그 canonical의 mapped ID.
233    // 1) canonical_packed → mapped 사전 재구성.
234    let mut canonical_to_mapped: HashMap<u32, u16> =
235        HashMap::with_capacity(PATTERN_TOP_K);
236    for (i, chunk) in TOPK_BYTES.chunks(4).enumerate() {
237        let p = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
238        canonical_to_mapped.insert(p, i as u16);
239    }
240
241    // 2) 각 mapped id의 canonical → mine/opp swap → re-canonicalize → mapped.
242    for (i, chunk) in TOPK_BYTES.chunks(4).enumerate() {
243        let canonical_packed =
244            u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
245        let w = unpack_window(canonical_packed);
246        let swapped: LineWindow = std::array::from_fn(|j| match w[j] {
247            1 => 2,
248            2 => 1,
249            x => x,
250        });
251        let swapped_canonical = pack_window(&canonicalize(&swapped));
252        let mapped = canonical_to_mapped
253            .get(&swapped_canonical)
254            .copied()
255            .unwrap_or(PATTERN_RARE_ID);
256        t[i] = mapped;
257    }
258    // rare ↔ rare
259    t[PATTERN_RARE_ID as usize] = PATTERN_RARE_ID;
260    t
261}
262
263/// mine/opp swap된 perspective의 mapped pattern ID.
264#[inline]
265pub fn swap_mapped_id(id: u16) -> u16 {
266    debug_assert!((id as usize) < PATTERN_NUM_IDS);
267    swap_table()[id as usize]
268}
269
270/// Threat tier produced when `mine` (hypothetically) plays at the anchor cell
271/// of a single direction's window. Mirrors `vct.rs::LineThreat` but is kept
272/// separate here to avoid a cross-module type dependency; callers translate
273/// between the two enums.
274#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
275#[repr(u8)]
276pub enum WindowThreat {
277    None = 0,
278    OpenTwo = 1,
279    ClosedThree = 2,
280    OpenThree = 3,
281    ClosedFour = 4,
282    OpenFour = 5,
283    Five = 6,
284}
285
286/// Classify the line threat for an 11-cell window where the anchor (index 5)
287/// is `mine` (=1). Walks consecutive mines outward from the anchor on both
288/// sides and inspects both terminals for openness. The 11-cell width covers
289/// up to a full five-in-a-row anchored at the center.
290fn classify_window_anchor_mine(w: &LineWindow) -> WindowThreat {
291    debug_assert_eq!(w[5], 1);
292    let mut count = 1u32;
293    // forward
294    let mut open_front = false;
295    for off in 1usize..=5 {
296        match w[5 + off] {
297            1 => count += 1,
298            0 => {
299                open_front = true;
300                break;
301            }
302            _ => break, // 2 (opp) or 3 (boundary)
303        }
304    }
305    // backward
306    let mut open_back = false;
307    for off in 1usize..=5 {
308        match w[5 - off] {
309            1 => count += 1,
310            0 => {
311                open_back = true;
312                break;
313            }
314            _ => break,
315        }
316    }
317    let open_ends = open_front as u32 + open_back as u32;
318    match (count, open_ends) {
319        (5..=u32::MAX, _) => WindowThreat::Five,
320        (4, 2) => WindowThreat::OpenFour,
321        (4, 1) => WindowThreat::ClosedFour,
322        (3, 2) => WindowThreat::OpenThree,
323        (3, 1) => WindowThreat::ClosedThree,
324        (2, 2) => WindowThreat::OpenTwo,
325        _ => WindowThreat::None,
326    }
327}
328
329/// O(1) lookup of the `LineThreat` produced when `mine` plays at an empty
330/// anchor cell, given the current `mapped pattern ID` for the surrounding
331/// 11-cell window. Top-K canonical IDs are precomputed; the `RARE` bucket
332/// returns `WindowThreat::None` and callers must fall back to
333/// `read_window` + `classify_window_anchor_mine` for those.
334///
335/// Intended use: in search hot paths where `classify_move(my_bb, opp_bb, mv)`
336/// would otherwise scan all four directions, feed
337/// `board.line_pattern_ids[mv][dir]` (optionally `swap_mapped_id`-ed for
338/// the white-to-move perspective) to this function and the four-direction
339/// scan cost collapses to four array accesses.
340pub fn pattern_threat_after_my_play(pid: u16) -> WindowThreat {
341    debug_assert!((pid as usize) < PATTERN_NUM_IDS);
342    threat_after_my_play_table()[pid as usize]
343}
344
345fn threat_after_my_play_table() -> &'static [WindowThreat; PATTERN_NUM_IDS] {
346    static TABLE: OnceLock<Box<[WindowThreat; PATTERN_NUM_IDS]>> = OnceLock::new();
347    TABLE.get_or_init(|| Box::new(build_threat_after_my_play_table()))
348}
349
350fn build_threat_after_my_play_table() -> [WindowThreat; PATTERN_NUM_IDS] {
351    let mut t = [WindowThreat::None; PATTERN_NUM_IDS];
352
353    // Canonical patterns may have anchor=0/1/2. Our use case only queries
354    // empty anchor cells (candidate move squares), so the entries that
355    // matter are those with anchor==0 — for them we simulate "mine plays
356    // at anchor" and classify. Patterns with anchor already mine are
357    // classified directly (debug/extra use cases); patterns with
358    // anchor==opp are left as `None` because they cannot occur for our
359    // hot path.
360    for (i, chunk) in TOPK_BYTES.chunks(4).enumerate() {
361        let canonical_packed = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
362        let mut w = unpack_window(canonical_packed);
363        if w[5] != 0 {
364            if w[5] == 1 {
365                t[i] = classify_window_anchor_mine(&w);
366            }
367            continue;
368        }
369        w[5] = 1; // mine plays at anchor
370        t[i] = classify_window_anchor_mine(&w);
371    }
372    // PATTERN_RARE_ID slot stays `None`; the caller is expected to fall
373    // back to a direct `read_window` + classify when it sees that ID.
374    t
375}
376
377/// 보드 상태(stones bitboard 두 개)에서 (row, col, dir) 의 11-cell window를
378/// 읽어내는 helper. mine/opp 관점에 따라 cell 값 결정.
379#[inline]
380pub fn read_window(
381    mine: &crate::board::BitBoard,
382    opp: &crate::board::BitBoard,
383    row: i32,
384    col: i32,
385    dr: i32,
386    dc: i32,
387) -> LineWindow {
388    use crate::board::BOARD_SIZE;
389    let mut w = [3u8; 11]; // 기본 boundary
390    for off in -5i32..=5 {
391        let r = row + dr * off;
392        let c = col + dc * off;
393        if r < 0 || r >= BOARD_SIZE as i32 || c < 0 || c >= BOARD_SIZE as i32 {
394            continue;
395        }
396        let idx = (r as usize) * BOARD_SIZE + c as usize;
397        let slot = (off + 5) as usize;
398        if mine.get(idx) {
399            w[slot] = 1;
400        } else if opp.get(idx) {
401            w[slot] = 2;
402        } else {
403            w[slot] = 0;
404        }
405    }
406    w
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn pack_unpack_roundtrip() {
415        let w: LineWindow = [3, 3, 0, 1, 2, 1, 0, 2, 0, 3, 3];
416        let packed = pack_window(&w);
417        let unpacked = unpack_window(packed);
418        assert_eq!(w, unpacked);
419    }
420
421    #[test]
422    fn canonicalize_idempotent() {
423        let w: LineWindow = [3, 3, 0, 1, 2, 1, 0, 2, 0, 3, 3];
424        let c1 = canonicalize(&w);
425        let c2 = canonicalize(&c1);
426        assert_eq!(c1, c2);
427    }
428
429    #[test]
430    fn canonicalize_pairs_with_reverse() {
431        let w: LineWindow = [3, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0];
432        let reversed: LineWindow = std::array::from_fn(|i| w[10 - i]);
433        assert_eq!(canonicalize(&w), canonicalize(&reversed));
434    }
435
436    #[test]
437    fn realizability_rejects_internal_boundary() {
438        // boundary가 가운데 끼어있으면 안 됨
439        let bad: LineWindow = [0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0];
440        assert!(!is_realizable(&bad));
441
442        // 양 끝의 boundary는 OK
443        let good: LineWindow = [3, 3, 0, 1, 2, 1, 0, 0, 0, 3, 3];
444        assert!(is_realizable(&good));
445
446        // 모두 빈 칸도 OK
447        let empty: LineWindow = [0; 11];
448        assert!(is_realizable(&empty));
449
450        // anchor (slot 5) 가 항상 보드 안이므로 모두 boundary는 불가능.
451        // anchor 좌우 5칸씩 합 11 → boundary 합이 11이면 가운데도 boundary
452        // 되어야 하는데 그건 가능 (left_b + right_b > 11 인 경우만 reject).
453        let all_b: LineWindow = [3; 11];
454        // left_b=11, right_b=11, 합 22 > 11 → reject
455        assert!(!is_realizable(&all_b));
456
457        // 한쪽 끝에서 anchor에 다다르기 직전까지 boundary
458        // [3,3,3,3,3,0,0,0,0,0,0] — left_b=5, right_b=0, 합 5, OK
459        let edge: LineWindow = [3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0];
460        assert!(is_realizable(&edge));
461    }
462
463    #[test]
464    fn enumerate_patterns_smoke() {
465        // 너무 무거운 4M 순회는 release 빌드에서만 빠름.
466        // 여기선 enumerate가 결정적임을 적은 sanity로 확인.
467        let table = enumerate_patterns();
468        // canonical은 reverse 적용해도 같은 ID여야 함
469        let w1: LineWindow = [3, 3, 0, 1, 2, 0, 0, 0, 0, 3, 3];
470        let w2: LineWindow = std::array::from_fn(|i| w1[10 - i]);
471        let id1 = table[&pack_window(&canonicalize(&w1))];
472        let id2 = table[&pack_window(&canonicalize(&w2))];
473        assert_eq!(id1, id2, "left-right reflection should share canonical ID");
474
475        // 패턴 수가 합리적 범위인지
476        let n = table.len();
477        eprintln!("[pattern_table] enumerated {n} canonical patterns");
478        assert!(n > 1000, "too few patterns: {n}");
479        assert!(n < 200_000, "too many patterns (u16 overflow risk): {n}");
480    }
481
482    #[test]
483    fn read_window_centers_on_anchor() {
484        use crate::board::BitBoard;
485        let mut mine = BitBoard::EMPTY;
486        let mut opp = BitBoard::EMPTY;
487        mine.set(7 * 15 + 7); // (7,7) mine
488        opp.set(7 * 15 + 8); // (7,8) opp
489
490        let w = read_window(&mine, &opp, 7, 7, 0, 1);
491        // anchor (7,7) at slot 5, (7,8) at slot 6
492        assert_eq!(w[5], 1, "anchor should be mine");
493        assert_eq!(w[6], 2, "right neighbor should be opp");
494        assert_eq!(w[4], 0, "left neighbor empty");
495
496        // 보드 가장자리: (0, 0) 에서 가로 방향 → 왼쪽 5칸 모두 boundary
497        let mine2 = BitBoard::EMPTY;
498        let opp2 = BitBoard::EMPTY;
499        let w2 = read_window(&mine2, &opp2, 0, 0, 0, 1);
500        for i in 0..5 {
501            assert_eq!(w2[i], 3, "out-of-board should be boundary at slot {i}");
502        }
503        assert_eq!(w2[5], 0, "anchor (0,0) is empty");
504    }
505}