rbp_core/
lib.rs

1//! Core type aliases, traits, and constants for robopoker.
2//!
3//! This crate provides the foundational types and configuration parameters
4//! used throughout the robopoker workspace.
5#![allow(dead_code)]
6
7pub mod dto;
8pub use dto::*;
9
10// ============================================================================
11// TYPE ALIASES
12// ============================================================================
13/// Stack sizes and bet amounts in big blinds.
14pub type Chips = i16;
15/// Seat index around the table (0 = button in heads-up).
16pub type Position = usize;
17/// Training iteration counter for CFR epochs.
18pub type Epoch = i16;
19/// Distance metrics, convergence thresholds, and smoothing terms.
20pub type Energy = f32;
21/// Temperature parameters and information-theoretic measures.
22pub type Entropy = f32;
23/// Expected values, regrets, and payoffs.
24pub type Utility = f32;
25/// Strategy weights, sampling distributions, and reach probabilities.
26pub type Probability = f32;
27
28// ============================================================================
29// TRAITS
30// ============================================================================
31/// Random instance generation for testing and Monte Carlo sampling.
32pub trait Arbitrary {
33    /// Generate a uniformly random instance.
34    fn random() -> Self;
35}
36
37/// Unique identifier trait for domain entities.
38pub trait Unique<T = Self> {
39    fn id(&self) -> ID<T>;
40}
41
42// ============================================================================
43// IDENTITY TYPES
44// ============================================================================
45use std::cmp::Ordering;
46use std::fmt::Debug;
47use std::fmt::Display;
48use std::fmt::Formatter;
49use std::hash::Hash;
50use std::hash::Hasher;
51use std::marker::PhantomData;
52
53/// Generic ID wrapper providing compile-time type safety over uuid::Uuid.
54pub struct ID<T> {
55    inner: uuid::Uuid,
56    marker: PhantomData<T>,
57}
58
59impl<T> ID<T> {
60    pub fn inner(&self) -> uuid::Uuid {
61        self.inner
62    }
63    /// Cast ID<T> to ID<U> while preserving the underlying UUID.
64    /// Useful for converting between marker types.
65    pub fn cast<U>(self) -> ID<U> {
66        ID {
67            inner: self.inner,
68            marker: PhantomData,
69        }
70    }
71}
72
73impl<T> From<ID<T>> for uuid::Uuid {
74    fn from(id: ID<T>) -> Self {
75        id.inner()
76    }
77}
78impl<T> From<uuid::Uuid> for ID<T> {
79    fn from(inner: uuid::Uuid) -> Self {
80        Self {
81            inner,
82            marker: PhantomData,
83        }
84    }
85}
86
87impl<T> Default for ID<T> {
88    fn default() -> Self {
89        Self {
90            inner: uuid::Uuid::now_v7(),
91            marker: PhantomData,
92        }
93    }
94}
95
96impl<T> Copy for ID<T> {}
97impl<T> Clone for ID<T> {
98    fn clone(&self) -> Self {
99        *self
100    }
101}
102
103impl<T> Eq for ID<T> {}
104impl<T> PartialEq for ID<T> {
105    fn eq(&self, other: &Self) -> bool {
106        self.inner == other.inner
107    }
108}
109
110impl<T> Ord for ID<T> {
111    fn cmp(&self, other: &Self) -> Ordering {
112        self.inner.cmp(&other.inner)
113    }
114}
115impl<T> PartialOrd for ID<T> {
116    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
117        Some(self.cmp(other))
118    }
119}
120
121impl<T> Hash for ID<T> {
122    fn hash<H>(&self, state: &mut H)
123    where
124        H: Hasher,
125    {
126        self.inner.hash(state);
127    }
128}
129
130impl<T> Debug for ID<T> {
131    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
132        f.debug_tuple("ID").field(&self.inner).finish()
133    }
134}
135impl<T> Display for ID<T> {
136    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
137        Display::fmt(&self.inner, f)
138    }
139}
140
141// ============================================================================
142// GAME TREE PARAMETERS
143// ============================================================================
144/// Number of players at the table.
145pub const N: usize = 2;
146/// Starting stack size in big blinds.
147pub const STACK: Chips = 100;
148/// Big blind amount.
149pub const B_BLIND: Chips = 2;
150/// Small blind amount.
151pub const S_BLIND: Chips = 1;
152/// Maximum re-raises per betting round (limits tree width).
153pub const MAX_RAISE_REPEATS: usize = 3;
154/// Maximum tree depth for real-time subgame solving.
155pub const MAX_DEPTH_SUBGAME: usize = 16;
156/// Maximum tree depth for full game abstraction.
157pub const MAX_DEPTH_ALLGAME: usize = 32;
158
159/// Timeout for voluntary card reveal at showdown (seconds).
160pub const SHOWDOWN_TIMEOUT: u64 = 5;
161
162// ============================================================================
163// SINKHORN OPTIMAL TRANSPORT
164// Entropy-regularized EMD for comparing hand distributions across abstractions.
165// ============================================================================
166/// Entropy regularization strength. Lower = closer to true EMD, higher = faster convergence.
167pub const SINKHORN_TEMPERATURE: Entropy = 0.025;
168/// Maximum Sinkhorn-Knopp iterations before stopping.
169pub const SINKHORN_ITERATIONS: usize = 128;
170/// Early stopping threshold on marginal constraint violation.
171pub const SINKHORN_TOLERANCE: Energy = 0.001;
172
173// ============================================================================
174// K-MEANS CLUSTERING
175// Hierarchical abstraction: river equity → turn clusters → flop clusters.
176// ============================================================================
177/// Lloyd's algorithm iterations for flop clustering.
178pub const KMEANS_FLOP_TRAINING_ITERATIONS: usize = 20;
179/// Lloyd's algorithm iterations for turn clustering.
180pub const KMEANS_TURN_TRAINING_ITERATIONS: usize = 24;
181/// Number of flop buckets (distributions over turn clusters).
182pub const KMEANS_FLOP_CLUSTER_COUNT: usize = 128;
183/// Number of turn buckets (distributions over river equity).
184pub const KMEANS_TURN_CLUSTER_COUNT: usize = 144;
185/// Equity histogram resolution (0%, 1%, ..., 100%).
186pub const KMEANS_EQTY_CLUSTER_COUNT: usize = 101;
187
188// ============================================================================
189// MCCFR SOLVER CONFIGURATIONS
190// Batch size = trees per iteration, tree count = total training budget.
191// ============================================================================
192/// Asymmetric payoff for RPS test game (rock beats scissors by 2x).
193pub const ASYMMETRIC_UTILITY: f32 = 2.0;
194/// Trees sampled per RPS iteration.
195pub const CFR_BATCH_SIZE_RPS: usize = 1;
196/// Total RPS training budget (small game converges fast).
197pub const CFR_TREE_COUNT_RPS: usize = 8192;
198/// Trees sampled per NLHE iteration (parallelized across threads).
199pub const CFR_BATCH_SIZE_NLHE: usize = 128;
200/// Total NLHE training budget (~268M trees for production).
201pub const CFR_TREE_COUNT_NLHE: usize = 0x10000000;
202/// Trees sampled per river-only iteration (testing/debugging).
203pub const CFR_BATCH_SIZE_RIVER: usize = 16;
204/// River-only training budget (~65K trees).
205pub const CFR_TREE_COUNT_RIVER: usize = 0x10000;
206
207// ============================================================================
208// AVERAGE STRATEGY SAMPLING
209// Biased sampling from cumulative policy: σ'(a) = max(ε, (τ·σ(a) + β) / (Σσ + β))
210// ============================================================================
211/// Temperature (T) - controls sampling entropy via policy scaling.
212/// Higher T → more uniform (exploratory); lower T → more peaked (greedy).
213/// Formula: σ'(a) = max(ε, (σ(a)/T + β) / (Σσ + β)).
214pub const SAMPLING_TEMPERATURE: Entropy = 2.0;
215/// Smoothing (β) - pseudocount added to numerator and denominator.
216/// Higher values pull sampling toward uniform (maximum entropy prior).
217pub const SAMPLING_SMOOTHING: Energy = 0.5;
218/// Epsilon (ε) - minimum sampling probability floor.
219/// Ensures every action retains at least ε probability for exploration.
220pub const SAMPLING_CURIOSITY: Probability = 0.01;
221
222// ============================================================================
223// REGRET MATCHING
224// Convert cumulative regrets to current iteration strategy via normalization.
225// ============================================================================
226/// Minimum policy weight to prevent division by zero in normalization.
227pub const POLICY_MIN: Probability = Probability::MIN_POSITIVE;
228/// Floor for cumulative regret storage (prevents unbounded negative growth).
229pub const REGRET_MIN: Utility = -4e6;
230
231// ============================================================================
232// PROBABILISTIC PRUNING (see `mccfr::PluribusSampling`)
233// Skip sampling low-regret actions to accelerate convergence.
234// PRUNING_THRESHOLD > REGRET_MIN so floored actions can recover via exploration.
235// ============================================================================
236/// Actions with regret below this are candidates for pruning (-300k ≈ 3× max pot).
237pub const PRUNING_THRESHOLD: Utility = -3e5;
238/// Probability of sampling pruned actions anyway (prevents permanent lock-out).
239pub const PRUNING_EXPLORE: Probability = 0.05;
240/// Warm-up epochs before pruning activates (let regrets stabilize first).
241pub const PRUNING_WARMUP: usize = 524288;
242
243// ============================================================================
244// SUBGAME SOLVING (see `mccfr::subgame`)
245// Real-time refinement of blueprint strategy at decision points.
246// ============================================================================
247/// Alternative hands in the gadget game (Pluribus uses 4).
248pub const SUBGAME_ALTS: usize = 4;
249/// CFR iterations for real-time subgame refinement.
250pub const SUBGAME_ITERATIONS: usize = 1024;
251
252// ============================================================================
253// TRAINING INFRASTRUCTURE
254// ============================================================================
255/// Interval between progress log messages during training.
256pub const TRAINING_LOG_INTERVAL: std::time::Duration = std::time::Duration::from_secs(60);
257
258// ============================================================================
259// REGRET INITIALIZATION BIAS
260// Weights (not probabilities) for initial regret seeding. Only ratios matter.
261// With k=4 raises: p(fold)≈50%, p(raises)≈33%, p(other)≈17%.
262// ============================================================================
263/// Initial regret weight for fold actions (high = fold more often early).
264pub const BIAS_FOLDS: Utility = 3.0;
265/// Initial regret weight for raise actions (low = raise less often early).
266pub const BIAS_RAISE: Utility = 0.5;
267/// Initial regret weight for call/check actions (baseline).
268pub const BIAS_OTHER: Utility = 1.0;
269
270// ============================================================================
271// RUNTIME UTILITIES
272// ============================================================================
273/// Initialize dual logging (terminal + file) with timestamped log files.
274/// Creates `logs/` directory and writes DEBUG level to file, INFO to terminal.
275#[cfg(feature = "server")]
276pub fn log() {
277    std::fs::create_dir_all("logs").expect("create logs directory");
278    let config = simplelog::ConfigBuilder::new()
279        .set_location_level(log::LevelFilter::Off)
280        .set_target_level(log::LevelFilter::Off)
281        .set_thread_level(log::LevelFilter::Off)
282        .build();
283    let time = std::time::SystemTime::now()
284        .duration_since(std::time::UNIX_EPOCH)
285        .expect("time moves slow")
286        .as_secs();
287    let file = simplelog::WriteLogger::new(
288        log::LevelFilter::Debug,
289        config.clone(),
290        std::fs::File::create(format!("logs/{}.log", time)).expect("create log file"),
291    );
292    let term = simplelog::TermLogger::new(
293        log::LevelFilter::Info,
294        config.clone(),
295        simplelog::TerminalMode::Mixed,
296        simplelog::ColorChoice::Auto,
297    );
298    simplelog::CombinedLogger::init(vec![term, file]).expect("initialize logger");
299}
300
301/// Register Ctrl+C handler for immediate (non-graceful) termination.
302/// Use when you need hard shutdown without waiting for current batch.
303#[cfg(feature = "server")]
304pub fn kys() {
305    tokio::spawn(async move {
306        tokio::signal::ctrl_c().await.unwrap();
307        println!();
308        log::warn!("violent interrupt received, exiting immediately");
309        std::process::exit(0);
310    });
311}
312
313/// Global interrupt flag for graceful shutdown coordination.
314#[cfg(feature = "server")]
315static INTERRUPTED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
316/// Optional training deadline from TRAIN_DURATION env var.
317#[cfg(feature = "server")]
318static DEADLINE: std::sync::OnceLock<std::time::Instant> = std::sync::OnceLock::new();
319/// Check if graceful shutdown was requested (via stdin "Q") or deadline reached.
320#[cfg(feature = "server")]
321pub fn interrupted() -> bool {
322    INTERRUPTED.load(std::sync::atomic::Ordering::Relaxed)
323        || DEADLINE
324            .get()
325            .map_or(false, |d| std::time::Instant::now() >= *d)
326}
327/// No-op interrupt check when server feature disabled.
328#[cfg(not(feature = "server"))]
329pub fn interrupted() -> bool {
330    false
331}
332/// Register graceful interrupt handler. Type "Q" + Enter to stop after current batch.
333/// Optionally set TRAIN_DURATION env var (e.g., "2h", "30m") for timed runs.
334#[cfg(feature = "server")]
335pub fn brb() {
336    if let Ok(duration) = std::env::var("TRAIN_DURATION") {
337        if let Some(deadline) = parse_duration(&duration) {
338            let _ = DEADLINE.set(std::time::Instant::now() + deadline);
339            log::info!("training will stop after {}", duration);
340        }
341    }
342    std::thread::spawn(|| {
343        loop {
344            let ref mut buffer = String::new();
345            if let Ok(_) = std::io::stdin().read_line(buffer) {
346                if buffer.trim().to_uppercase() == "Q" {
347                    log::warn!("graceful interrupt requested, finishing current batch...");
348                    INTERRUPTED.store(true, std::sync::atomic::Ordering::Relaxed);
349                    break;
350                }
351            }
352        }
353    });
354}
355/// Parse duration string like "30s", "5m", "2h", "1d" into Duration.
356#[cfg(feature = "server")]
357fn parse_duration(s: &str) -> Option<std::time::Duration> {
358    let s = s.trim();
359    let (num, unit) = s.split_at(s.len().saturating_sub(1));
360    let value: u64 = num.parse().ok()?;
361    match unit {
362        "s" => Some(std::time::Duration::from_secs(value)),
363        "m" => Some(std::time::Duration::from_secs(value * 60)),
364        "h" => Some(std::time::Duration::from_secs(value * 3600)),
365        "d" => Some(std::time::Duration::from_secs(value * 86400)),
366        _ => None,
367    }
368}
rbp_core/lib.rs

rbp_core/
lib.rs