1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
use std::{collections::HashMap, fmt::Debug, sync::Arc};
use tokio::sync::Semaphore;
use tokio::task::JoinSet;
use crate::arena::{
HoldemSimulation,
errors::HoldemSimulationError,
game_state::{MAX_PLAYERS, Round},
};
/// A struct to help seeing which agent is likely to do well
///
/// Each competition is a series of `HoldemSimulations`
/// from the `HoldemSimulationGenerator` passed in.
pub struct HoldemCompetition<T: Iterator<Item = HoldemSimulation>> {
simulation_iterator: T,
/// The number of rounds that have been run.
pub num_rounds: usize,
/// stack size change normalized in big blinds
pub total_change: Vec<f32>,
pub max_change: Vec<f32>,
pub min_change: Vec<f32>,
/// How many hands each agent has made some profit
pub win_count: Vec<usize>,
/// How many hands the agents have lost money
pub loss_count: Vec<usize>,
// How many times the agent has lost no money
pub zero_count: Vec<usize>,
// Count of the round before the simulation stopped
pub before_count: HashMap<Round, usize>,
/// In-flight cap on the number of *simulations* this competition runs
/// concurrently. This is the competition's own limiter; it is independent
/// of the limiter each CFR agent uses for its exploration unless a caller
/// deliberately threads the same `Arc<Semaphore>` into both (via
/// `CFRAgentBuilder::limiter`). It is not shared automatically.
limiter: Arc<Semaphore>,
}
impl<T: Iterator<Item = HoldemSimulation>> HoldemCompetition<T> {
/// Creates a new HoldemHandCompetition instance with the provided
/// HoldemSimulation.
///
/// Initializes the number of rounds to 0 and the stack change vectors to 0
/// for each agent. Each simulation owns and deals from its own RNG (seeded
/// by the iterator), so the competition holds no RNG of its own.
pub fn new(simulation_iterator: T) -> HoldemCompetition<T> {
HoldemCompetition {
simulation_iterator,
limiter: crate::arena::cfr::build_default_limiter(),
// Set everything to zero
num_rounds: 0,
total_change: vec![0.0; MAX_PLAYERS],
min_change: vec![0.0; MAX_PLAYERS],
max_change: vec![0.0; MAX_PLAYERS],
win_count: vec![0; MAX_PLAYERS],
loss_count: vec![0; MAX_PLAYERS],
zero_count: vec![0; MAX_PLAYERS],
// Round before stopping
before_count: HashMap::new(),
}
}
/// Run `num_rounds` simulations and return every completed one.
///
/// Simulations are spawned onto the runtime and run concurrently, with
/// `limiter` capping how many are *in flight* at once — this bounds peak
/// CPU/inference pressure (and lets a caller share that pressure budget
/// with the agents' own exploration) without serializing the rounds.
/// Finished simulations are drained each iteration so the `JoinSet` does
/// not accumulate completed results, and their per-player metrics are
/// folded into the competition as they land.
///
/// The returned `Vec` contains all `num_rounds` simulations, each retaining
/// its full final state and history so callers can inspect every hand.
/// Memory therefore scales linearly with `num_rounds`; size the call
/// accordingly when running long competitions.
///
/// # Errors
///
/// Returns a [`HoldemSimulationError`] if a spawned simulation fails to
/// join; a panic inside a simulation is re-raised on the calling thread.
pub async fn run(
&mut self,
num_rounds: usize,
) -> Result<Vec<HoldemSimulation>, HoldemSimulationError> {
let mut set: JoinSet<HoldemSimulation> = JoinSet::new();
let mut completed = Vec::with_capacity(num_rounds);
for _ in 0..num_rounds {
let mut sim = self.simulation_iterator.next().unwrap();
// Permit acquisition bounds the number of *running* sims; draining
// finished ones each iteration keeps the JoinSet from holding
// completed results until the very end.
let permit = self.limiter.clone().acquire_owned().await.unwrap();
set.spawn(async move {
let _permit = permit; // held for the sim's lifetime
sim.run().await;
sim
});
while let Some(joined) = set.try_join_next() {
self.record(joined, &mut completed)?;
}
}
while let Some(joined) = set.join_next().await {
self.record(joined, &mut completed)?;
}
Ok(completed)
}
/// Fold one completed simulation into the running metrics and the result
/// vector, surfacing a join failure as an error.
fn record(
&mut self,
joined: Result<HoldemSimulation, tokio::task::JoinError>,
completed: &mut Vec<HoldemSimulation>,
) -> Result<(), HoldemSimulationError> {
let sim = match joined {
Ok(sim) => sim,
// A `JoinError` from a panicking spawned simulation is a bug to
// surface, not swallow: re-raise the original panic on this thread
// (mirrors the CFR exploration engine). Non-panic join errors
// (e.g. an aborted task) still convert to a recoverable error.
Err(join_err) => {
if join_err.is_panic() {
std::panic::resume_unwind(join_err.into_panic());
}
return Err(HoldemSimulationError::from(join_err));
}
};
self.update_metrics(&sim);
self.num_rounds += 1;
completed.push(sim);
Ok(())
}
fn update_metrics(&mut self, running_sim: &HoldemSimulation) {
// Calculates the change in each player's winnings for the round,
// normalized by the big blind amount.
//
// TODO: we need to filter out the players that never started the hand.
let changes = running_sim
.game_state
.starting_stacks
.iter()
.zip(running_sim.game_state.stacks.iter())
.enumerate()
.map(|(idx, (starting, ending))| {
(
idx,
(*ending - *starting) / running_sim.game_state.big_blind,
)
});
for (idx, norm_change) in changes {
// Running total
self.total_change[idx] += norm_change;
// What's the most we lose
self.min_change[idx] = self.min_change[idx].min(norm_change);
// What's the most we win
self.max_change[idx] = self.max_change[idx].max(norm_change);
// Count how many times the agent wins or loses
if norm_change > 0.0 {
self.win_count[idx] += 1;
} else if norm_change < 0.0 {
self.loss_count[idx] += 1;
} else {
self.zero_count[idx] += 1;
}
}
// Update the count
let count = self
.before_count
.entry(running_sim.game_state.round_before)
.or_default();
*count += 1;
}
}
impl<T: Iterator<Item = HoldemSimulation>> Debug for HoldemCompetition<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HoldemCompetition")
.field("num_rounds", &self.num_rounds)
.field("total_change", &self.total_change)
.field("max_change", &self.max_change)
.field("min_change", &self.min_change)
.field("win_count", &self.win_count)
.field("zero_count", &self.zero_count)
.field("loss_count", &self.loss_count)
.field("round_before", &self.before_count)
.finish()
}
}
#[cfg(test)]
mod tests {
use crate::arena::{
Agent, AgentGenerator, CloneGameStateGenerator,
agent::{CallingAgentGenerator, RandomAgentGenerator},
competition::StandardSimulationIterator,
};
use super::*;
use crate::arena::GameStateBuilder;
#[tokio::test]
async fn test_standard_simulation() {
let agent_gens: Vec<Box<dyn AgentGenerator>> = vec![
Box::<RandomAgentGenerator>::default(),
Box::<CallingAgentGenerator>::default(),
];
let game_state = GameStateBuilder::new()
.num_players_with_stack(2, 100.0)
.blinds(10.0, 5.0)
.build()
.unwrap();
let sim_gen = StandardSimulationIterator::new(
agent_gens,
vec![], // no historians
CloneGameStateGenerator::new(game_state),
);
let mut competition = HoldemCompetition::new(sim_gen);
let _first_results = competition.run(100).await.unwrap();
}
#[tokio::test]
async fn test_thirteen_player_competition() {
// Regression: HoldemCompetition used to hardcode MAX_PLAYERS = 12,
// so a 13+ player simulation would index out of bounds when
// updating per-player metrics. Use 13 (one past the old cap) so
// the bug triggers without requiring a 16-player game.
const NUM_PLAYERS: usize = 13;
let agent_gens: Vec<Box<dyn AgentGenerator>> = (0..NUM_PLAYERS)
.map(|_| Box::<CallingAgentGenerator>::default() as Box<dyn AgentGenerator>)
.collect();
let game_state = GameStateBuilder::new()
.num_players_with_stack(NUM_PLAYERS, 100.0)
.blinds(10.0, 5.0)
.build()
.unwrap();
let sim_gen = StandardSimulationIterator::new(
agent_gens,
vec![],
CloneGameStateGenerator::new(game_state),
);
let mut competition = HoldemCompetition::new(sim_gen);
let _results = competition.run(5).await.unwrap();
assert_eq!(MAX_PLAYERS, competition.total_change.len());
}
#[tokio::test]
#[should_panic(expected = "boom: agent panicked")]
async fn test_spawned_sim_panic_is_resurfaced() {
// Regression: a panic inside a spawned simulation must be re-raised by
// the competition (resume_unwind), not silently converted to a
// recoverable `HoldemSimulationError` and swallowed.
use crate::arena::GameState;
use crate::arena::action::AgentAction;
use async_trait::async_trait;
struct PanicAgent;
#[async_trait]
impl Agent for PanicAgent {
async fn act(&mut self, _id: u128, _game_state: &GameState) -> AgentAction {
panic!("boom: agent panicked");
}
fn name(&self) -> &str {
"PanicAgent"
}
}
struct PanicAgentGenerator;
impl AgentGenerator for PanicAgentGenerator {
fn generate(&self, _player_idx: usize, _game_state: &GameState) -> Box<dyn Agent> {
Box::new(PanicAgent)
}
}
let agent_gens: Vec<Box<dyn AgentGenerator>> =
vec![Box::new(PanicAgentGenerator), Box::new(PanicAgentGenerator)];
let game_state = GameStateBuilder::new()
.num_players_with_stack(2, 100.0)
.blinds(10.0, 5.0)
.build()
.unwrap();
let sim_gen = StandardSimulationIterator::new(
agent_gens,
vec![],
CloneGameStateGenerator::new(game_state),
);
let mut competition = HoldemCompetition::new(sim_gen);
let _ = competition.run(1).await;
}
}