rbp_core/lib.rs
1//! Core type aliases, traits, and constants for robopoker.
2//!
3//! This crate provides the foundational types and configuration parameters
4//! used throughout the robopoker workspace.
5#![allow(dead_code)]
6
7pub mod dto;
8pub use dto::*;
9
10// ============================================================================
11// TYPE ALIASES
12// ============================================================================
13/// Stack sizes and bet amounts in big blinds.
14pub type Chips = i16;
15/// Seat index around the table (0 = button in heads-up).
16pub type Position = usize;
17/// Training iteration counter for CFR epochs.
18pub type Epoch = i16;
19/// Distance metrics, convergence thresholds, and smoothing terms.
20pub type Energy = f32;
21/// Temperature parameters and information-theoretic measures.
22pub type Entropy = f32;
23/// Expected values, regrets, and payoffs.
24pub type Utility = f32;
25/// Strategy weights, sampling distributions, and reach probabilities.
26pub type Probability = f32;
27
28// ============================================================================
29// TRAITS
30// ============================================================================
31/// Random instance generation for testing and Monte Carlo sampling.
32pub trait Arbitrary {
33 /// Generate a uniformly random instance.
34 fn random() -> Self;
35}
36
37/// Unique identifier trait for domain entities.
38pub trait Unique<T = Self> {
39 fn id(&self) -> ID<T>;
40}
41
42// ============================================================================
43// IDENTITY TYPES
44// ============================================================================
45use std::cmp::Ordering;
46use std::fmt::Debug;
47use std::fmt::Display;
48use std::fmt::Formatter;
49use std::hash::Hash;
50use std::hash::Hasher;
51use std::marker::PhantomData;
52
53/// Generic ID wrapper providing compile-time type safety over uuid::Uuid.
54pub struct ID<T> {
55 inner: uuid::Uuid,
56 marker: PhantomData<T>,
57}
58
59impl<T> ID<T> {
60 pub fn inner(&self) -> uuid::Uuid {
61 self.inner
62 }
63 /// Cast ID<T> to ID<U> while preserving the underlying UUID.
64 /// Useful for converting between marker types.
65 pub fn cast<U>(self) -> ID<U> {
66 ID {
67 inner: self.inner,
68 marker: PhantomData,
69 }
70 }
71}
72
73impl<T> From<ID<T>> for uuid::Uuid {
74 fn from(id: ID<T>) -> Self {
75 id.inner()
76 }
77}
78impl<T> From<uuid::Uuid> for ID<T> {
79 fn from(inner: uuid::Uuid) -> Self {
80 Self {
81 inner,
82 marker: PhantomData,
83 }
84 }
85}
86
87impl<T> Default for ID<T> {
88 fn default() -> Self {
89 Self {
90 inner: uuid::Uuid::now_v7(),
91 marker: PhantomData,
92 }
93 }
94}
95
96impl<T> Copy for ID<T> {}
97impl<T> Clone for ID<T> {
98 fn clone(&self) -> Self {
99 *self
100 }
101}
102
103impl<T> Eq for ID<T> {}
104impl<T> PartialEq for ID<T> {
105 fn eq(&self, other: &Self) -> bool {
106 self.inner == other.inner
107 }
108}
109
110impl<T> Ord for ID<T> {
111 fn cmp(&self, other: &Self) -> Ordering {
112 self.inner.cmp(&other.inner)
113 }
114}
115impl<T> PartialOrd for ID<T> {
116 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
117 Some(self.cmp(other))
118 }
119}
120
121impl<T> Hash for ID<T> {
122 fn hash<H>(&self, state: &mut H)
123 where
124 H: Hasher,
125 {
126 self.inner.hash(state);
127 }
128}
129
130impl<T> Debug for ID<T> {
131 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
132 f.debug_tuple("ID").field(&self.inner).finish()
133 }
134}
135impl<T> Display for ID<T> {
136 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
137 Display::fmt(&self.inner, f)
138 }
139}
140
141// ============================================================================
142// GAME TREE PARAMETERS
143// ============================================================================
144/// Number of players at the table.
145pub const N: usize = 2;
146/// Starting stack size in big blinds.
147pub const STACK: Chips = 100;
148/// Big blind amount.
149pub const B_BLIND: Chips = 2;
150/// Small blind amount.
151pub const S_BLIND: Chips = 1;
152/// Maximum re-raises per betting round (limits tree width).
153pub const MAX_RAISE_REPEATS: usize = 3;
154/// Maximum tree depth for real-time subgame solving.
155pub const MAX_DEPTH_SUBGAME: usize = 16;
156/// Maximum tree depth for full game abstraction.
157pub const MAX_DEPTH_ALLGAME: usize = 32;
158
159/// Timeout for voluntary card reveal at showdown (seconds).
160pub const SHOWDOWN_TIMEOUT: u64 = 5;
161
162// ============================================================================
163// SINKHORN OPTIMAL TRANSPORT
164// Entropy-regularized EMD for comparing hand distributions across abstractions.
165// ============================================================================
166/// Entropy regularization strength. Lower = closer to true EMD, higher = faster convergence.
167pub const SINKHORN_TEMPERATURE: Entropy = 0.025;
168/// Maximum Sinkhorn-Knopp iterations before stopping.
169pub const SINKHORN_ITERATIONS: usize = 128;
170/// Early stopping threshold on marginal constraint violation.
171pub const SINKHORN_TOLERANCE: Energy = 0.001;
172
173// ============================================================================
174// K-MEANS CLUSTERING
175// Hierarchical abstraction: river equity → turn clusters → flop clusters.
176// ============================================================================
177/// Lloyd's algorithm iterations for flop clustering.
178pub const KMEANS_FLOP_TRAINING_ITERATIONS: usize = 20;
179/// Lloyd's algorithm iterations for turn clustering.
180pub const KMEANS_TURN_TRAINING_ITERATIONS: usize = 24;
181/// Number of flop buckets (distributions over turn clusters).
182pub const KMEANS_FLOP_CLUSTER_COUNT: usize = 128;
183/// Number of turn buckets (distributions over river equity).
184pub const KMEANS_TURN_CLUSTER_COUNT: usize = 144;
185/// Equity histogram resolution (0%, 1%, ..., 100%).
186pub const KMEANS_EQTY_CLUSTER_COUNT: usize = 101;
187
188// ============================================================================
189// MCCFR SOLVER CONFIGURATIONS
190// Batch size = trees per iteration, tree count = total training budget.
191// ============================================================================
192/// Asymmetric payoff for RPS test game (rock beats scissors by 2x).
193pub const ASYMMETRIC_UTILITY: f32 = 2.0;
194/// Trees sampled per RPS iteration.
195pub const CFR_BATCH_SIZE_RPS: usize = 1;
196/// Total RPS training budget (small game converges fast).
197pub const CFR_TREE_COUNT_RPS: usize = 8192;
198/// Trees sampled per NLHE iteration (parallelized across threads).
199pub const CFR_BATCH_SIZE_NLHE: usize = 128;
200/// Total NLHE training budget (~268M trees for production).
201pub const CFR_TREE_COUNT_NLHE: usize = 0x10000000;
202/// Trees sampled per river-only iteration (testing/debugging).
203pub const CFR_BATCH_SIZE_RIVER: usize = 16;
204/// River-only training budget (~65K trees).
205pub const CFR_TREE_COUNT_RIVER: usize = 0x10000;
206
207// ============================================================================
208// AVERAGE STRATEGY SAMPLING
209// Biased sampling from cumulative policy: σ'(a) = max(ε, (τ·σ(a) + β) / (Σσ + β))
210// ============================================================================
211/// Temperature (T) - controls sampling entropy via policy scaling.
212/// Higher T → more uniform (exploratory); lower T → more peaked (greedy).
213/// Formula: σ'(a) = max(ε, (σ(a)/T + β) / (Σσ + β)).
214pub const SAMPLING_TEMPERATURE: Entropy = 2.0;
215/// Smoothing (β) - pseudocount added to numerator and denominator.
216/// Higher values pull sampling toward uniform (maximum entropy prior).
217pub const SAMPLING_SMOOTHING: Energy = 0.5;
218/// Epsilon (ε) - minimum sampling probability floor.
219/// Ensures every action retains at least ε probability for exploration.
220pub const SAMPLING_CURIOSITY: Probability = 0.01;
221
222// ============================================================================
223// REGRET MATCHING
224// Convert cumulative regrets to current iteration strategy via normalization.
225// ============================================================================
226/// Minimum policy weight to prevent division by zero in normalization.
227pub const POLICY_MIN: Probability = Probability::MIN_POSITIVE;
228/// Floor for cumulative regret storage (prevents unbounded negative growth).
229pub const REGRET_MIN: Utility = -4e6;
230
231// ============================================================================
232// PROBABILISTIC PRUNING (see `mccfr::PluribusSampling`)
233// Skip sampling low-regret actions to accelerate convergence.
234// PRUNING_THRESHOLD > REGRET_MIN so floored actions can recover via exploration.
235// ============================================================================
236/// Actions with regret below this are candidates for pruning (-300k ≈ 3× max pot).
237pub const PRUNING_THRESHOLD: Utility = -3e5;
238/// Probability of sampling pruned actions anyway (prevents permanent lock-out).
239pub const PRUNING_EXPLORE: Probability = 0.05;
240/// Warm-up epochs before pruning activates (let regrets stabilize first).
241pub const PRUNING_WARMUP: usize = 524288;
242
243// ============================================================================
244// SUBGAME SOLVING (see `mccfr::subgame`)
245// Real-time refinement of blueprint strategy at decision points.
246// ============================================================================
247/// Alternative hands in the gadget game (Pluribus uses 4).
248pub const SUBGAME_ALTS: usize = 4;
249/// CFR iterations for real-time subgame refinement.
250pub const SUBGAME_ITERATIONS: usize = 1024;
251
252// ============================================================================
253// TRAINING INFRASTRUCTURE
254// ============================================================================
255/// Interval between progress log messages during training.
256pub const TRAINING_LOG_INTERVAL: std::time::Duration = std::time::Duration::from_secs(60);
257
258// ============================================================================
259// REGRET INITIALIZATION BIAS
260// Weights (not probabilities) for initial regret seeding. Only ratios matter.
261// With k=4 raises: p(fold)≈50%, p(raises)≈33%, p(other)≈17%.
262// ============================================================================
263/// Initial regret weight for fold actions (high = fold more often early).
264pub const BIAS_FOLDS: Utility = 3.0;
265/// Initial regret weight for raise actions (low = raise less often early).
266pub const BIAS_RAISE: Utility = 0.5;
267/// Initial regret weight for call/check actions (baseline).
268pub const BIAS_OTHER: Utility = 1.0;
269
270// ============================================================================
271// RUNTIME UTILITIES
272// ============================================================================
273/// Initialize dual logging (terminal + file) with timestamped log files.
274/// Creates `logs/` directory and writes DEBUG level to file, INFO to terminal.
275#[cfg(feature = "server")]
276pub fn log() {
277 std::fs::create_dir_all("logs").expect("create logs directory");
278 let config = simplelog::ConfigBuilder::new()
279 .set_location_level(log::LevelFilter::Off)
280 .set_target_level(log::LevelFilter::Off)
281 .set_thread_level(log::LevelFilter::Off)
282 .build();
283 let time = std::time::SystemTime::now()
284 .duration_since(std::time::UNIX_EPOCH)
285 .expect("time moves slow")
286 .as_secs();
287 let file = simplelog::WriteLogger::new(
288 log::LevelFilter::Debug,
289 config.clone(),
290 std::fs::File::create(format!("logs/{}.log", time)).expect("create log file"),
291 );
292 let term = simplelog::TermLogger::new(
293 log::LevelFilter::Info,
294 config.clone(),
295 simplelog::TerminalMode::Mixed,
296 simplelog::ColorChoice::Auto,
297 );
298 simplelog::CombinedLogger::init(vec![term, file]).expect("initialize logger");
299}
300
301/// Register Ctrl+C handler for immediate (non-graceful) termination.
302/// Use when you need hard shutdown without waiting for current batch.
303#[cfg(feature = "server")]
304pub fn kys() {
305 tokio::spawn(async move {
306 tokio::signal::ctrl_c().await.unwrap();
307 println!();
308 log::warn!("violent interrupt received, exiting immediately");
309 std::process::exit(0);
310 });
311}
312
313/// Global interrupt flag for graceful shutdown coordination.
314#[cfg(feature = "server")]
315static INTERRUPTED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
316/// Optional training deadline from TRAIN_DURATION env var.
317#[cfg(feature = "server")]
318static DEADLINE: std::sync::OnceLock<std::time::Instant> = std::sync::OnceLock::new();
319/// Check if graceful shutdown was requested (via stdin "Q") or deadline reached.
320#[cfg(feature = "server")]
321pub fn interrupted() -> bool {
322 INTERRUPTED.load(std::sync::atomic::Ordering::Relaxed)
323 || DEADLINE
324 .get()
325 .map_or(false, |d| std::time::Instant::now() >= *d)
326}
327/// No-op interrupt check when server feature disabled.
328#[cfg(not(feature = "server"))]
329pub fn interrupted() -> bool {
330 false
331}
332/// Register graceful interrupt handler. Type "Q" + Enter to stop after current batch.
333/// Optionally set TRAIN_DURATION env var (e.g., "2h", "30m") for timed runs.
334#[cfg(feature = "server")]
335pub fn brb() {
336 if let Ok(duration) = std::env::var("TRAIN_DURATION") {
337 if let Some(deadline) = parse_duration(&duration) {
338 let _ = DEADLINE.set(std::time::Instant::now() + deadline);
339 log::info!("training will stop after {}", duration);
340 }
341 }
342 std::thread::spawn(|| {
343 loop {
344 let ref mut buffer = String::new();
345 if let Ok(_) = std::io::stdin().read_line(buffer) {
346 if buffer.trim().to_uppercase() == "Q" {
347 log::warn!("graceful interrupt requested, finishing current batch...");
348 INTERRUPTED.store(true, std::sync::atomic::Ordering::Relaxed);
349 break;
350 }
351 }
352 }
353 });
354}
355/// Parse duration string like "30s", "5m", "2h", "1d" into Duration.
356#[cfg(feature = "server")]
357fn parse_duration(s: &str) -> Option<std::time::Duration> {
358 let s = s.trim();
359 let (num, unit) = s.split_at(s.len().saturating_sub(1));
360 let value: u64 = num.parse().ok()?;
361 match unit {
362 "s" => Some(std::time::Duration::from_secs(value)),
363 "m" => Some(std::time::Duration::from_secs(value * 60)),
364 "h" => Some(std::time::Duration::from_secs(value * 3600)),
365 "d" => Some(std::time::Duration::from_secs(value * 86400)),
366 _ => None,
367 }
368}