Skip to main content

chipzen_bot/
conformance.rs

1//! Protocol-conformance harness — drives a `Bot` through one canned
2//! handshake + hand + match_end against an in-process mock socket and
3//! reports per-scenario verdicts.
4//!
5//! Mirrors the Python (`chipzen.conformance`) and JavaScript
6//! (`chipzen-bot` / `runConformanceChecks`) harnesses — same scenario
7//! shape, same severity model, same canned exchange — so a clean run
8//! in any of the three SDKs means the upload pipeline will accept the
9//! bot on protocol grounds. It does NOT mean the bot is good.
10
11use crate::bot::Bot;
12use crate::client::{_run_session, MessageReader, MessageWriter, SessionContext};
13use crate::error::Error;
14use async_trait::async_trait;
15use serde_json::{json, Value};
16use std::sync::{Arc, Mutex};
17
18/// Severity of a single conformance verdict. Same shape as
19/// `chipzen_sdk::Severity`; the CLI renders them uniformly.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum Severity {
22    Pass,
23    Warn,
24    Fail,
25}
26
27/// One conformance scenario's verdict.
28#[derive(Debug, Clone)]
29pub struct ConformanceCheck {
30    pub severity: Severity,
31    pub name: String,
32    pub message: String,
33}
34
35/// Optional knobs for [`run_conformance_checks`].
36#[derive(Debug, Clone)]
37pub struct RunConformanceOptions {
38    /// Per-scenario timeout. Default 10s — well above the platform's
39    /// per-action 5-second budget but still bounded for CI use.
40    pub timeout: std::time::Duration,
41}
42
43impl Default for RunConformanceOptions {
44    fn default() -> Self {
45        Self {
46            timeout: std::time::Duration::from_secs(10),
47        }
48    }
49}
50
51const MATCH_ID: &str = "m_conformance_test";
52const VALID_ACTION_KINDS: &[&str] = &["fold", "check", "call", "raise", "all_in"];
53
54// ---------------------------------------------------------------------------
55// Mock reader / writer
56// ---------------------------------------------------------------------------
57
58struct ScriptedReader {
59    messages: Vec<String>,
60    index: usize,
61}
62
63#[async_trait]
64impl MessageReader for ScriptedReader {
65    async fn next(&mut self) -> Result<Option<String>, Error> {
66        if self.index >= self.messages.len() {
67            return Ok(None);
68        }
69        let msg = self.messages[self.index].clone();
70        self.index += 1;
71        Ok(Some(msg))
72    }
73}
74
75#[derive(Clone, Default)]
76struct CapturingWriter {
77    sent: Arc<Mutex<Vec<String>>>,
78}
79
80#[async_trait]
81impl MessageWriter for CapturingWriter {
82    async fn send(&mut self, payload: String) -> Result<(), Error> {
83        self.sent
84            .lock()
85            .expect("CapturingWriter mutex poisoned")
86            .push(payload);
87        Ok(())
88    }
89}
90
91// ---------------------------------------------------------------------------
92// Canned scripts
93// ---------------------------------------------------------------------------
94
95fn server_hello() -> Value {
96    json!({
97        "type": "hello",
98        "match_id": MATCH_ID,
99        "seq": 1,
100        "server_ts": "2026-04-13T14:30:05.123Z",
101        "supported_versions": ["1.0"],
102        "selected_version": "1.0",
103        "game_type": "nlhe_6max",
104    })
105}
106
107fn match_start() -> Value {
108    json!({
109        "type": "match_start",
110        "match_id": MATCH_ID,
111        "seq": 2,
112        "game_config": {
113            "small_blind": 5,
114            "big_blind": 10,
115            "starting_stack": 1000,
116        },
117    })
118}
119
120fn round_start() -> Value {
121    json!({
122        "type": "round_start",
123        "match_id": MATCH_ID,
124        "seq": 3,
125        "round_id": "r_1",
126        "round_number": 1,
127        "state": { "hand_number": 1, "your_hole_cards": ["Ah", "Kd"] },
128    })
129}
130
131fn turn_request_n(seq: u64, request_id: &str) -> Value {
132    json!({
133        "type": "turn_request",
134        "match_id": MATCH_ID,
135        "seq": seq,
136        "request_id": request_id,
137        "valid_actions": ["fold", "call", "raise"],
138        "state": {
139            "hand_number": 1,
140            "phase": "preflop",
141            "your_hole_cards": ["Ah", "Kd"],
142            "to_call": 5,
143            "min_raise": 20,
144            "max_raise": 995,
145        },
146    })
147}
148
149fn turn_result_n(seq: u64) -> Value {
150    json!({
151        "type": "turn_result",
152        "match_id": MATCH_ID,
153        "seq": seq,
154        "details": { "seat": 0, "action": "call", "amount": 5 },
155    })
156}
157
158fn phase_change(seq: u64, phase: &str, board: &[&str]) -> Value {
159    json!({
160        "type": "phase_change",
161        "match_id": MATCH_ID,
162        "seq": seq,
163        "state": { "phase": phase, "board": board },
164    })
165}
166
167fn round_result_n(seq: u64) -> Value {
168    json!({
169        "type": "round_result",
170        "match_id": MATCH_ID,
171        "seq": seq,
172        "result": { "hand_number": 1, "winner_seats": [0], "pot": 40 },
173    })
174}
175
176fn match_end_n(seq: u64) -> Value {
177    json!({
178        "type": "match_end",
179        "match_id": MATCH_ID,
180        "seq": seq,
181        "reason": "complete",
182    })
183}
184
185/// Server-side rejection of a previously-sent `turn_action`. Drives
186/// the SDK's safe-fallback retry path. The SDK should respond with a
187/// `turn_action` echoing this same `request_id` and a safe action
188/// (`check` or `fold`) within `remaining_ms`.
189fn action_rejected(seq: u64, request_id: &str) -> Value {
190    json!({
191        "type": "action_rejected",
192        "match_id": MATCH_ID,
193        "seq": seq,
194        "request_id": request_id,
195        "reason": "invalid_action",
196        "message": "action not in valid_actions",
197        "remaining_ms": 4000,
198        "valid_actions": ["check", "fold"],
199    })
200}
201
202fn full_match_script() -> Vec<String> {
203    [
204        server_hello(),
205        match_start(),
206        round_start(),
207        turn_request_n(4, "req_1"),
208        turn_result_n(5),
209        round_result_n(6),
210        match_end_n(7),
211    ]
212    .into_iter()
213    .map(|v| v.to_string())
214    .collect()
215}
216
217/// Three turn_requests across preflop/flop/turn — exercises request_id
218/// echo on every turn. The original full-match script only checks the
219/// first action; a bug where the second-or-later action drops or
220/// rewrites the `request_id` would slip through.
221fn multi_turn_script() -> Vec<String> {
222    [
223        server_hello(),
224        match_start(),
225        round_start(),
226        turn_request_n(4, "req_1"),
227        turn_result_n(5),
228        phase_change(6, "flop", &["2s", "7d", "Tc"]),
229        turn_request_n(7, "req_2"),
230        turn_result_n(8),
231        phase_change(9, "turn", &["2s", "7d", "Tc", "Kh"]),
232        turn_request_n(10, "req_3"),
233        turn_result_n(11),
234        round_result_n(12),
235        match_end_n(13),
236    ]
237    .into_iter()
238    .map(|v| v.to_string())
239    .collect()
240}
241
242/// One turn_request followed by an action_rejected — exercises the
243/// SDK's safe-fallback retry path. The full-match script never
244/// delivers an action_rejected, so the SDK's retry path goes untested
245/// in conformance even though it's a routine production code path.
246fn action_rejected_script() -> Vec<String> {
247    [
248        server_hello(),
249        match_start(),
250        round_start(),
251        turn_request_n(4, "req_1"),
252        action_rejected(5, "req_1"),
253        turn_result_n(6),
254        round_result_n(7),
255        match_end_n(8),
256    ]
257    .into_iter()
258    .map(|v| v.to_string())
259    .collect()
260}
261
262/// One turn_request followed by THREE consecutive action_rejected
263/// messages. Catches a class of failure where a buggy SDK might enter
264/// an infinite response loop or hang waiting for a non-rejection
265/// message that never arrives. The SDK should be purely reactive: one
266/// safe-fallback `turn_action` per rejection, then exit cleanly on
267/// `match_end`.
268fn retry_storm_script() -> Vec<String> {
269    [
270        server_hello(),
271        match_start(),
272        round_start(),
273        turn_request_n(4, "req_1"),
274        action_rejected(5, "req_1"),
275        action_rejected(6, "req_1"),
276        action_rejected(7, "req_1"),
277        turn_result_n(8),
278        round_result_n(9),
279        match_end_n(10),
280    ]
281    .into_iter()
282    .map(|v| v.to_string())
283    .collect()
284}
285
286fn ctx() -> SessionContext {
287    SessionContext::new(
288        MATCH_ID.to_string(),
289        Some("conformance".to_string()),
290        None,
291        "chipzen-sdk-conformance".to_string(),
292        "0.0.0".to_string(),
293    )
294}
295
296// ---------------------------------------------------------------------------
297// Scenario evaluation
298// ---------------------------------------------------------------------------
299
300#[derive(Debug)]
301struct ClassifyResult {
302    ok: bool,
303    message: String,
304}
305
306/// Validate a single payload the bot sent. ok=true with a non-fatal
307/// note for messages that aren't `turn_action`; ok=false with a
308/// diagnostic for anything malformed.
309///
310/// `expected_request_id` is the request_id the server sent for the
311/// turn this action is responding to. The SDK MUST echo it back so
312/// the server can correlate, deduplicate, and route action_rejected
313/// retries.
314fn classify_turn_action(payload: &str, expected_request_id: &str) -> ClassifyResult {
315    let msg: Value = match serde_json::from_str(payload) {
316        Ok(v) => v,
317        Err(e) => {
318            return ClassifyResult {
319                ok: false,
320                message: format!("sent payload was not valid JSON: {e}"),
321            }
322        }
323    };
324    if msg.get("type").and_then(|v| v.as_str()) != Some("turn_action") {
325        return ClassifyResult {
326            ok: true,
327            message: format!(
328                "non-action message ({:?}) — ignored",
329                msg.get("type").and_then(|v| v.as_str())
330            ),
331        };
332    }
333    if msg.get("request_id").and_then(|v| v.as_str()) != Some(expected_request_id) {
334        return ClassifyResult {
335            ok: false,
336            message: format!(
337                "turn_action request_id {:?} did not echo the server's {expected_request_id:?} — \
338                 the server uses request_id for correlation, idempotency, and \
339                 action_rejected retries",
340                msg.get("request_id")
341            ),
342        };
343    }
344    let action = msg.get("action").and_then(|v| v.as_str()).or_else(|| {
345        msg.get("params")
346            .and_then(|p| p.get("action"))
347            .and_then(|v| v.as_str())
348    });
349    let Some(action) = action else {
350        return ClassifyResult {
351            ok: false,
352            message: "turn_action missing `action` field".to_string(),
353        };
354    };
355    if !VALID_ACTION_KINDS.contains(&action) {
356        return ClassifyResult {
357            ok: false,
358            message: format!("turn_action action {action:?} is not in the legal set"),
359        };
360    }
361    ClassifyResult {
362        ok: true,
363        message: format!("sent turn_action: action={action:?}"),
364    }
365}
366
367/// Filter the captured-send buffer down to parsed `turn_action`
368/// payloads. The string form is preserved alongside the parsed value
369/// so callers can re-pass the original string to
370/// [`classify_turn_action`] without re-serializing.
371fn extract_turn_actions(sent: &[String]) -> Vec<(String, Value)> {
372    sent.iter()
373        .filter_map(|payload| {
374            let parsed: Value = serde_json::from_str(payload).ok()?;
375            if parsed.get("type").and_then(|t| t.as_str()) == Some("turn_action") {
376                Some((payload.clone(), parsed))
377            } else {
378                None
379            }
380        })
381        .collect()
382}
383
384/// Outcome of `drive_session` — either the captured writer if the
385/// session completed cleanly, or a `Severity::Fail` diagnostic if the
386/// timeout fired or the inner future returned an error.
387enum DriveOutcome {
388    Completed(Vec<String>),
389    Failed { fail_message: String },
390}
391
392async fn drive_session<B: Bot>(
393    bot: &mut B,
394    script: Vec<String>,
395    timeout: std::time::Duration,
396) -> DriveOutcome {
397    let mut reader = ScriptedReader {
398        messages: script,
399        index: 0,
400    };
401    let mut writer = CapturingWriter::default();
402    let context = ctx();
403
404    let session_future = _run_session(&mut reader, &mut writer, bot, &context);
405    let result = tokio::time::timeout(timeout, session_future).await;
406
407    match result {
408        Err(_) => DriveOutcome::Failed {
409            fail_message: format!(
410                "did not complete within {timeout:?} — either decide() is too slow or \
411                 the bot is hung waiting on something"
412            ),
413        },
414        Ok(Err(e)) => DriveOutcome::Failed {
415            fail_message: format!("session returned {e:?}"),
416        },
417        Ok(Ok(_match_end)) => {
418            let sent = writer
419                .sent
420                .lock()
421                .expect("CapturingWriter mutex poisoned")
422                .clone();
423            DriveOutcome::Completed(sent)
424        }
425    }
426}
427
428async fn run_full_match_scenario<B: Bot>(
429    bot: &mut B,
430    timeout: std::time::Duration,
431) -> ConformanceCheck {
432    let name = "connectivity_full_match".to_string();
433    let sent = match drive_session(bot, full_match_script(), timeout).await {
434        DriveOutcome::Failed { fail_message } => {
435            return ConformanceCheck {
436                severity: Severity::Fail,
437                name,
438                message: format!("full-match scenario {fail_message}"),
439            }
440        }
441        DriveOutcome::Completed(sent) => sent,
442    };
443
444    if sent.is_empty() {
445        return ConformanceCheck {
446            severity: Severity::Fail,
447            name,
448            message: "bot did not send any messages during the canned exchange — at minimum \
449                     the client should have sent authenticate / hello / turn_action"
450                .to_string(),
451        };
452    }
453
454    let turn_actions = extract_turn_actions(&sent);
455    if turn_actions.is_empty() {
456        return ConformanceCheck {
457            severity: Severity::Fail,
458            name,
459            message: "bot completed the exchange but never sent a turn_action — decide() may \
460                     have returned an unexpected value or the SDK's runner hit a fallback path"
461                .to_string(),
462        };
463    }
464
465    let (raw, _) = &turn_actions[0];
466    let verdict = classify_turn_action(raw, "req_1");
467    if !verdict.ok {
468        return ConformanceCheck {
469            severity: Severity::Fail,
470            name,
471            message: verdict.message,
472        };
473    }
474    ConformanceCheck {
475        severity: Severity::Pass,
476        name,
477        message: format!(
478            "completed handshake + 1 hand + match_end; {}",
479            verdict.message
480        ),
481    }
482}
483
484/// Drive three turn_requests and verify request_id is echoed correctly
485/// on each. The full-match scenario only checks the first action; a
486/// bug where the second-or-later action drops or rewrites the
487/// `request_id` would slip through.
488async fn run_multi_turn_scenario<B: Bot>(
489    bot: &mut B,
490    timeout: std::time::Duration,
491) -> ConformanceCheck {
492    let name = "multi_turn_request_id_echo".to_string();
493    let sent = match drive_session(bot, multi_turn_script(), timeout).await {
494        DriveOutcome::Failed { fail_message } => {
495            return ConformanceCheck {
496                severity: Severity::Fail,
497                name,
498                message: format!("multi-turn scenario {fail_message}"),
499            }
500        }
501        DriveOutcome::Completed(sent) => sent,
502    };
503
504    let turn_actions = extract_turn_actions(&sent);
505    let expected_ids = ["req_1", "req_2", "req_3"];
506
507    if turn_actions.len() < expected_ids.len() {
508        return ConformanceCheck {
509            severity: Severity::Fail,
510            name,
511            message: format!(
512                "expected {} turn_actions across preflop/flop/turn, saw only {} — \
513                 bot stopped responding partway through the hand",
514                expected_ids.len(),
515                turn_actions.len(),
516            ),
517        };
518    }
519
520    for (i, expected_id) in expected_ids.iter().enumerate() {
521        let (raw, _) = &turn_actions[i];
522        let verdict = classify_turn_action(raw, expected_id);
523        if !verdict.ok {
524            return ConformanceCheck {
525                severity: Severity::Fail,
526                name,
527                message: format!("turn {} of 3 failed: {}", i + 1, verdict.message),
528            };
529        }
530    }
531
532    ConformanceCheck {
533        severity: Severity::Pass,
534        name,
535        message: format!(
536            "all {} turn_actions echoed request_id correctly across preflop/flop/turn",
537            expected_ids.len()
538        ),
539    }
540}
541
542/// Drive a turn_request followed by an action_rejected and verify the
543/// SDK retries safely. On rejection the SDK should send a second
544/// `turn_action` echoing the same `request_id` and using a safe
545/// action (`check` or `fold`).
546async fn run_action_rejected_scenario<B: Bot>(
547    bot: &mut B,
548    timeout: std::time::Duration,
549) -> ConformanceCheck {
550    let name = "action_rejected_recovery".to_string();
551    let sent = match drive_session(bot, action_rejected_script(), timeout).await {
552        DriveOutcome::Failed { fail_message } => {
553            return ConformanceCheck {
554                severity: Severity::Fail,
555                name,
556                message: format!("action_rejected scenario {fail_message}"),
557            }
558        }
559        DriveOutcome::Completed(sent) => sent,
560    };
561
562    let turn_actions = extract_turn_actions(&sent);
563    if turn_actions.len() < 2 {
564        return ConformanceCheck {
565            severity: Severity::Fail,
566            name,
567            message: format!(
568                "expected 2 turn_actions (initial + safe-fallback retry), saw {}; \
569                 the SDK did not respond to the action_rejected message",
570                turn_actions.len()
571            ),
572        };
573    }
574
575    let (_, retry) = &turn_actions[1];
576    let retry_request_id = retry
577        .get("request_id")
578        .and_then(|v| v.as_str())
579        .unwrap_or("");
580    if retry_request_id != "req_1" {
581        return ConformanceCheck {
582            severity: Severity::Fail,
583            name,
584            message: format!(
585                "safe-fallback retry used request_id {retry_request_id:?} instead of \
586                 the original \"req_1\" — server-side correlation will fail"
587            ),
588        };
589    }
590
591    let retry_action = retry.get("action").and_then(|v| v.as_str()).or_else(|| {
592        retry
593            .get("params")
594            .and_then(|p| p.get("action"))
595            .and_then(|v| v.as_str())
596    });
597    let Some(action) = retry_action else {
598        return ConformanceCheck {
599            severity: Severity::Fail,
600            name,
601            message: "safe-fallback retry was missing the `action` field".to_string(),
602        };
603    };
604    if action != "check" && action != "fold" {
605        return ConformanceCheck {
606            severity: Severity::Fail,
607            name,
608            message: format!(
609                "safe-fallback retry sent action {action:?}; expected \"check\" or \"fold\" \
610                 (the only universally-safe actions when valid_actions is unknown)"
611            ),
612        };
613    }
614
615    ConformanceCheck {
616        severity: Severity::Pass,
617        name,
618        message: format!(
619            "action_rejected handled cleanly: original action sent, retry sent {action:?} \
620             with original request_id"
621        ),
622    }
623}
624
625/// Drive a turn_request followed by THREE action_rejected messages
626/// back-to-back. Catches a class of failure where a buggy SDK might
627/// hang after the first rejection or enter an infinite send loop. The
628/// SDK should respond reactively (one safe-fallback per rejection)
629/// and exit cleanly when match_end arrives.
630async fn run_retry_storm_scenario<B: Bot>(
631    bot: &mut B,
632    timeout: std::time::Duration,
633) -> ConformanceCheck {
634    let name = "retry_storm_bounded".to_string();
635    let sent = match drive_session(bot, retry_storm_script(), timeout).await {
636        DriveOutcome::Failed { fail_message } => {
637            return ConformanceCheck {
638                severity: Severity::Fail,
639                name,
640                message: format!("retry-storm scenario {fail_message}"),
641            }
642        }
643        DriveOutcome::Completed(sent) => sent,
644    };
645
646    let turn_actions = extract_turn_actions(&sent);
647    // Expected: 1 initial + 3 retries = 4 turn_actions total. The SDK
648    // is reactive: each action_rejected provokes exactly one retry.
649    let expected_count = 4;
650    if turn_actions.len() != expected_count {
651        let severity = if turn_actions.len() < expected_count {
652            Severity::Fail
653        } else {
654            Severity::Warn
655        };
656        return ConformanceCheck {
657            severity,
658            name,
659            message: format!(
660                "expected {expected_count} turn_actions (1 initial + 3 retries) under \
661                 retry storm, saw {} — the SDK's retry behavior may be unbounded or \
662                 may have stopped responding",
663                turn_actions.len()
664            ),
665        };
666    }
667
668    ConformanceCheck {
669        severity: Severity::Pass,
670        name,
671        message: format!(
672            "SDK responded to all 3 action_rejected messages with safe-fallback retries \
673             ({expected_count} turn_actions total) and exited cleanly on match_end"
674        ),
675    }
676}
677
678// ---------------------------------------------------------------------------
679// Public entry
680// ---------------------------------------------------------------------------
681
682/// The set of conformance scenario names registered with
683/// [`run_conformance_checks`]. Listed in the order they're executed.
684/// Useful for downstream tooling that wants to enumerate scenarios
685/// without parsing CLI output.
686pub const SCENARIO_NAMES: &[&str] = &[
687    "connectivity_full_match",
688    "multi_turn_request_id_echo",
689    "action_rejected_recovery",
690    "retry_storm_bounded",
691];
692
693/// Drive `bot` through every conformance scenario and return per-check
694/// verdicts. The bot instance is consumed (passed by value) — matches
695/// the production usage shape where `run_bot` also takes ownership.
696///
697/// Note on hung bots: the timeout uses `tokio::time::timeout` which
698/// cancels at await points inside the session loop. A bot whose
699/// `decide()` synchronously busy-loops (or calls a long-blocking
700/// non-async function) starves the tokio runtime task and prevents
701/// the timeout from firing on time. The Python SDK has a daemon-thread
702/// hard watchdog for this; the Rust equivalent (running decide in
703/// `tokio::task::spawn_blocking`) is more invasive and deferred to a
704/// follow-up. Bots that block their task will hang the harness.
705pub async fn run_conformance_checks<B: Bot>(
706    mut bot: B,
707    options: RunConformanceOptions,
708) -> Vec<ConformanceCheck> {
709    vec![
710        run_full_match_scenario(&mut bot, options.timeout).await,
711        run_multi_turn_scenario(&mut bot, options.timeout).await,
712        run_action_rejected_scenario(&mut bot, options.timeout).await,
713        run_retry_storm_scenario(&mut bot, options.timeout).await,
714    ]
715}