rs_poker 5.0.0

A library to help with any Rust code dealing with poker. This includes card values, suits, hands, hand ranks, 5 card hand strength calculation, 7 card hand strength calulcation, and monte carlo game simulation helpers.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
//! The CFR exploration engine: [`CFRAgent`] and its async tree walk.
//!
//! `explore_all_actions` is the core loop. For the agent's decision node it
//! runs a budget-driven series of **waves** (mini-batch PCFR+). Each wave fans
//! out `wave_width × (non-pruned) actions` reward samples against the same
//! pre-wave strategy snapshot, averages the per-slot samples
//! ([`wave_mean`]), and applies exactly one atomic regret update. Rewards are
//! produced either by recursing into a sub-simulation (when the budget
//! returns `NextStep::Wave`) or by the fast-forward Monte-Carlo path (when
//! it returns `NextStep::FastForward`). `wave_width == 1` reproduces the prior
//! single-sample-per-action behavior (one sample per slot → mean == sample).
//!
//! Concurrency follows a "try-acquire-or-inline" model bounded by a shared
//! [`InFlightLimiter`](super::super::InFlightLimiter): at the shallow spawn
//! frontier a sample is `tokio::spawn`ed when a permit is free and otherwise
//! runs inline, so recursion is deadlock-free at any depth. Regret-based
//! pruning (Brown & Sandholm, 2015) skips actions driven to zero strategy
//! weight, re-probing periodically; the prune decision is computed once per
//! wave so every sample in the wave agrees.
//!
//! Stopping is cooperative and checked only at the wave boundary: the
//! [`Budget`](super::super::Budget) decides what each wave does (recursive
//! `Wave`, one-shot `FastForward`, `Stop`, or `StartTimer` to arm a deadline),
//! and a lock-free `Arc<AtomicBool>` stop flag is the single cross-task stop
//! signal. A stop never leaves a partial regret update — it simply means
//! fewer completed waves, and `act` picks from whatever regret has
//! accumulated.

use std::borrow::Cow;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};

use little_sorry::{PcfrPlusRegretMatcher, RegretMinimizer};
use rand::SeedableRng;
use rand::rngs::StdRng;
use smallvec::SmallVec;
use tracing::event;

use crate::arena::hand_estimator::sample_world;
use crate::arena::{
    Agent, GameState, HoldemSimulationBuilder, action::AgentAction, game_state::Round,
};

use super::super::{
    ActionIndexMapper, Budget, CFRState, ExplorationStats, InFlightLimiter, NUM_ACTION_INDICES,
    NextStep, NodeData, PlayerData, TraversalSet, TraversalState, action_bit_set::ActionBitSet,
    action_generator::ActionGenerator, action_validator::validate_actions,
};

/// Why `explore_all_actions` exited. Five reasons end the wave loop
/// (Deadline, BudgetStop, BudgetStartTimer, FastForward, StableStrategy);
/// a sixth (SingleAction) bypasses the loop entirely when there's
/// nothing to explore. The budget tree's `MostRestrictive` composer
/// collapses internal Stop causes, so callers disambiguate by inspecting
/// the emitted field values (final_iterations vs configured cap,
/// last(regret_series) vs epsilon, elapsed vs deadline).
#[derive(Copy, Clone, Debug)]
enum StopCause {
    /// The lock-free stop atomic flipped — the deadline timer fired or
    /// an external cancellation was requested.
    Deadline,
    /// The budget tree returned `NextStep::Stop` or `NextStep::Pass`.
    BudgetStop,
    /// `NextStep::StartTimer` arrived after the timer was already armed
    /// (the engine treats this as Stop). Degenerate but observable.
    BudgetStartTimer,
    /// The wave loop completed a one-shot `FastForward` step.
    FastForward,
    /// Only one legal action remained after validation; the wave loop
    /// was skipped because there is no decision to learn. The regret
    /// matcher's trivial strategy `[1.0]` over that action is what the
    /// picker will return.
    SingleAction,
    /// Strategy stabilized: L1 distance between consecutive waves'
    /// strategies stayed below `EARLY_EXIT_EPSILON` for
    /// `EARLY_EXIT_STABLE_ITERS` consecutive waves. Modeled on
    /// Stockfish's stability-based time management — if the answer
    /// hasn't moved in a while, more iterations won't help.
    StableStrategy,
}

impl std::fmt::Display for StopCause {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s = match self {
            StopCause::Deadline => "deadline",
            StopCause::BudgetStop => "budget_stop",
            StopCause::BudgetStartTimer => "budget_start_timer",
            StopCause::FastForward => "fast_forward",
            StopCause::SingleAction => "single_action",
            StopCause::StableStrategy => "stable_strategy",
        };
        f.write_str(s)
    }
}

/// Strategy-stability early exit thresholds. See Tier-1 Experiment 3 in
/// `docs/superpowers/specs/2026-05-28-cfr-tier1-tuning-experiments.md`.
///
/// `MIN_ITERS` protects against pre-warmup noise: the first few waves
/// can hit a temporary plateau before real exploration kicks in.
/// `STABLE_ITERS` requires the L1 strategy delta to stay below
/// `EPSILON` for that many consecutive waves before we declare
/// convergence and stop the loop.
const EARLY_EXIT_MIN_ITERS: usize = 4;
const EARLY_EXIT_STABLE_ITERS: u32 = 3;
const EARLY_EXIT_EPSILON: f32 = 0.001;

use super::builder::CFRAgentBuilder;
use super::fast_forward::{
    fast_forward_advance_betting, fast_forward_apply_action, fast_forward_distribute_pot,
    fast_forward_enumerate_showdowns, fast_forward_run_to_showdown,
    fast_forward_sample_flop_enumerate_runout,
};
use super::hand_log::{HandLog, HandLogHistorian};
use super::reward_context::ComputeRewardContext;

/// Write the per-slot mean over the samples a wave produced into `out`. Slots
/// with zero samples (pruned / never explored) get `penalty`, matching invariant
/// #1's full vector. Writes into a caller-owned buffer so the wave loop reuses
/// one allocation-free array across every wave.
pub(super) fn wave_mean_into(out: &mut [f32], sums: &[f32], counts: &[u32], penalty: f32) {
    for (o, (&s, &c)) in out.iter_mut().zip(sums.iter().zip(counts)) {
        *o = if c > 0 { s / c as f32 } else { penalty };
    }
}

/// A CFR (Counterfactual Regret Minimization) agent for poker.
///
/// This agent uses CFR to compute optimal strategies by exploring the game tree
/// and learning from regret. It maintains state across simulations via shared
/// CFR states.
///
/// # Type Parameters
/// * `T` - The action generator type (implements `ActionGenerator`)
pub struct CFRAgent<T>
where
    T: ActionGenerator,
{
    pub(super) name: Cow<'static, str>,
    pub(super) traversal_set: TraversalSet,
    pub(super) traversal_state: TraversalState,
    pub(super) cfr_state: CFRState,
    pub(super) action_generator: T,
    pub(super) action_gen_config: Arc<T::Config>,
    pub(super) action_index_mapper: ActionIndexMapper,
    pub(super) forced_action: Option<AgentAction>,
    pub(super) depth: usize,
    pub(super) allow_node_mutation: bool,
    pub(super) limiter: InFlightLimiter,
    pub(super) budget: Arc<dyn Budget>,
    pub(super) stop: Arc<AtomicBool>,
    pub(super) estimator: std::sync::Arc<dyn crate::arena::HandDistributionEstimator>,
    /// Per-line copy-on-write action log; `Some` only when the estimator needs
    /// history. Read at `act()` to build the `GameLog`; frozen at depth 0 so the
    /// real hand is copied once and shared by reference through the recursion.
    pub(super) hand_log: Option<HandLog>,
}

/// Spawn a tokio task that flips `stop` to `true` after `duration`. The
/// returned `AbortOnDrop` wrapper aborts the task when dropped, so a fast
/// `act` leaves no lingering timer.
pub(super) fn spawn_stop_timer(
    duration: std::time::Duration,
    stop: Arc<AtomicBool>,
) -> super::AbortOnDrop {
    super::AbortOnDrop(tokio::spawn(async move {
        tokio::time::sleep(duration).await;
        stop.store(true, Ordering::Relaxed);
    }))
}

impl<T> CFRAgent<T>
where
    T: ActionGenerator + Send + 'static,
    T::Config: Send + Sync,
{
    /// Returns a reference to this agent's CFR state.
    ///
    /// The CFR state contains the game tree with regret information learned
    /// during simulations. This can be used for visualization or analysis.
    pub fn cfr_state(&self) -> &CFRState {
        &self.cfr_state
    }

    /// Returns a clone of this agent's traversal set.
    pub fn traversal_set(&self) -> &TraversalSet {
        &self.traversal_set
    }

    /// Returns whether this agent allows node mutation.
    pub fn allow_node_mutation(&self) -> bool {
        self.allow_node_mutation
    }

    /// Compute the expected reward for taking a specific action.
    ///
    /// This is an associated function (no `&self`) so it can be called from
    /// parallel contexts where `self` cannot be borrowed. Shared state is
    /// passed via `ComputeRewardContext`.
    ///
    /// There are two reward strategies, both defined in this file so a reader
    /// can see them side by side:
    ///
    /// 1. [`Self::compute_reward_recursive`] — the full-strength path. It
    ///    spawns a `HoldemSimulation` whose agents are fresh CFR sub-agents,
    ///    allowing the game tree to keep branching and mutual best-response
    ///    play to develop. Accurate but exponentially expensive with depth.
    ///
    /// 2. [`Self::compute_reward_fast_forward`] — the cheap path used when
    ///    the budget returns `NextStep::FastForward` (typically deep in the
    ///    tree). It applies the candidate action on a cloned state and assumes
    ///    every *subsequent* action (by any player, in any round) is a check
    ///    or call. Remaining community cards are dealt and the pot is
    ///    distributed with simple one-pot showdown logic. This throws away the
    ///    mutual-best-response signal deep in the tree, but returns a
    ///    realistic showdown reward and has bounded cost.
    async fn compute_reward(
        game_state: &GameState,
        action: &AgentAction,
        ctx: &ComputeRewardContext<T>,
    ) -> f32 {
        if ctx.fast_forward {
            let player_idx = ctx.traversal_state.player_idx() as usize;
            // Fast-forward is pure CPU on a cloned GameState — no awaiting.
            Self::compute_reward_fast_forward(game_state, action, player_idx)
        } else {
            Self::compute_reward_recursive(game_state, action, ctx).await
        }
    }

    /// Full recursive reward: spawn a sub-simulation driven by new CFR agents.
    ///
    /// This is the expensive path — each call clones the game state, builds
    /// one CFR sub-agent per seat (the acting seat is forced to play `action`),
    /// and runs a complete `HoldemSimulation`. Those sub-agents may in turn
    /// call `compute_reward` themselves, which is where the exponential
    /// branching lives. `compute_reward` switches to the fast-forward sibling
    /// once the depth schedule runs out to cap that blowup.
    async fn compute_reward_recursive(
        game_state: &GameState,
        action: &AgentAction,
        ctx: &ComputeRewardContext<T>,
    ) -> f32 {
        let num_agents = game_state.num_players;

        // Get all traversal state fields in a single lock acquisition
        let (_before_node_idx, _before_child_idx, player_idx) = ctx.traversal_state.get_all();

        event!(
            tracing::Level::TRACE,
            num_agents,
            ?action,
            player_idx = player_idx,
            "Computing reward via sub-simulation"
        );

        // Fork the traversal set for sub-simulation isolation.
        // The forked set starts at the same positions but is independent —
        // mutations in the sub-simulation won't affect the parent.
        let forked_traversal_set = ctx.traversal_set.fork();

        let sub_depth = ctx.depth + 1;

        let action_config = ctx.action_gen_config.clone();
        let cached_mapper_config = *ctx.action_index_mapper.config();

        // All sub-agents share the same CFR state (single shared tree).
        let shared_cfr_state = ctx.cfr_state.clone();

        // Per-sub-sim action log: same shared real-hand prefix, fresh tail
        // seeded with this line's accumulated actions (copy-on-descend → full
        // path). `None` when the estimator doesn't need history.
        let child_log: Option<HandLog> = ctx.hand_log.as_ref().map(|l| l.spawn_child());

        let mut agents: Vec<Box<dyn Agent>> = Vec::with_capacity(num_agents);
        for i in 0..num_agents {
            let mut builder = CFRAgentBuilder::<T>::new()
                .name("CFRAgent-sub")
                .player_idx(i)
                .cfr_state(shared_cfr_state.clone())
                .mapper_config(cached_mapper_config)
                .action_gen_config_arc(action_config.clone())
                .traversal_set(forked_traversal_set.clone())
                .depth(sub_depth)
                .limiter(ctx.limiter.clone())
                .budget(ctx.budget.clone())
                .stop_flag(ctx.stop.clone())
                .estimator(ctx.estimator.clone());

            if let Some(ref cl) = child_log {
                builder = builder.hand_log(cl.clone());
            }

            // Sub-agents share the SAME in-flight limiter, budget, and stop
            // flag as the root, so adaptive `try_acquire`-or-inline spawning
            // at deeper levels draws from one global bound instead of
            // multiplying into oversubscription, and budget/stop signals
            // reach every recursive level.

            if i == player_idx as usize {
                builder = builder.forced_action((*action).clone());
            }

            agents.push(Box::new(builder.build()) as Box<dyn Agent>);
        }

        // Seed the sub-simulation's RNG from the thread-local generator.
        let sub_sim_rng = StdRng::from_rng(&mut rand::rng());
        let mut sim_builder = HoldemSimulationBuilder::default()
            .game_state(game_state.clone())
            .agents(agents)
            .cfr_context(
                shared_cfr_state,
                forked_traversal_set,
                ctx.allow_node_mutation,
            );
        // Install exactly one writer for this sub-sim's tail. Sub-agents are at
        // depth >= 1, so their `Agent::historian` returns `None` (no duplicate
        // writers).
        if let Some(cl) = child_log {
            sim_builder = sim_builder.historians(vec![
                Box::new(HandLogHistorian::new(cl)) as Box<dyn crate::arena::Historian>
            ]);
        }
        let mut sim = sim_builder.build_with_rng(sub_sim_rng).unwrap();

        sim.run().await;

        // Verify parent traversal is unaffected by the fork
        #[cfg(debug_assertions)]
        {
            let (after_node_idx, after_child_idx) = ctx.traversal_state.get_position();
            assert_eq!(
                _before_node_idx, after_node_idx,
                "Node index should be the same after exploration"
            );
            assert_eq!(
                _before_child_idx, after_child_idx,
                "Child index should be the same after exploration"
            );
        }

        sim.game_state.player_reward(player_idx as usize)
    }

    /// Fast-forward reward: apply `action` on a clone, then play the rest of
    /// the hand out assuming every further action is a check or call.
    ///
    /// This is the bounded-cost sibling to [`Self::compute_reward_recursive`].
    /// No CFR sub-agents are spawned and no `HoldemSimulation` is built — we
    /// just mutate a cloned `GameState` directly via the `fast_forward_*`
    /// helpers below, deal the remaining community cards from a reconstructed
    /// deck, and distribute a single pot at showdown.
    ///
    /// Simplifications (per design):
    /// - One pot only; side pots are collapsed into the main pot.
    /// - A player who cannot cover the current bet is treated as all-in for
    ///   what they have and remains eligible for the single pot.
    /// - Ties split the pot evenly.
    fn compute_reward_fast_forward(
        game_state: &GameState,
        action: &AgentAction,
        player_idx: usize,
    ) -> f32 {
        // Thread-local RNG: fn is sync so this never crosses an await.
        let mut rng = rand::rng();
        let mut gs = game_state.clone();
        fast_forward_apply_action(&mut gs, action);

        // Check if at most one player can contest the pot after the action.
        let contenders = gs.player_active.count() + gs.player_all_in.count();
        if contenders <= 1 {
            fast_forward_run_to_showdown(&mut gs, &mut rng);
            fast_forward_distribute_pot(&mut gs);
            return gs.player_reward(player_idx);
        }

        // Try exhaustive board enumeration for improved reward accuracy.
        // Advance through any remaining betting (everyone calls) to reach
        // a deal round or showdown, then enumerate remaining community cards
        // instead of sampling.
        fast_forward_advance_betting(&mut gs);

        let cards_needed = match gs.round {
            Round::Showdown | Round::Complete => 0,
            Round::DealFlop => 3,
            Round::DealTurn => 2, // turn + river
            Round::DealRiver => 1,
            _ => {
                // Unexpected round after advancing betting. Fall back.
                fast_forward_run_to_showdown(&mut gs, &mut rng);
                fast_forward_distribute_pot(&mut gs);
                return gs.player_reward(player_idx);
            }
        };

        // Enumerate all remaining board completions for zero-variance rewards.
        // Eliminating sampling noise from board completions produces
        // deterministic reward signals, improving CFR convergence.
        //
        // 0 cards: deterministic showdown (1 eval).
        // 1 card: ~46 evaluations (river only).
        // 2 cards: ~C(46,2) ≈ 1035 evaluations (turn + river).
        // 3 cards: sample FLOP_SAMPLES random flops, then enumerate all
        //   turn+river combinations for each (~1035 evals per flop).
        //   This gives low-variance rewards without the cost of full
        //   C(47,3) ≈ 16K enumeration.
        if cards_needed <= 2 {
            fast_forward_enumerate_showdowns(&gs, player_idx, cards_needed)
        } else {
            fast_forward_sample_flop_enumerate_runout(&gs, player_idx, &mut rng)
        }
    }

    pub(super) fn target_node_idx(&self) -> Option<usize> {
        let (from_node_idx, from_child_idx) = self.traversal_state.get_position();
        self.cfr_state.get_child(from_node_idx, from_child_idx)
    }

    /// Ensure that the target node is created and that it is a player node.
    ///
    /// Uses `CFRState::ensure_child` which handles the case where different bet
    /// amounts map to the same index but lead to different outcomes. If a node
    /// exists with a different type and `allow_node_mutation` is true, it will
    /// be updated to a Player node.
    pub(super) fn ensure_target_node(&self) -> usize {
        // Get all traversal state fields in a single lock acquisition
        let (node_idx, chosen_child_idx, player_idx) = self.traversal_state.get_all();

        let expected_data = NodeData::Player(PlayerData {
            regret_matcher: None,
            player_idx,
        });

        self.cfr_state.ensure_child(
            node_idx,
            chosen_child_idx,
            expected_data,
            self.allow_node_mutation,
        )
    }

    pub(super) fn ensure_regret_matcher(&mut self) {
        let target_node_idx = self.ensure_target_node();

        self.cfr_state
            .update_node(target_node_idx, |data| {
                if let NodeData::Player(player_data) = data
                    && player_data.regret_matcher.is_none()
                {
                    // Use the fixed constant for number of action indices (52)
                    let regret_matcher = Box::new(PcfrPlusRegretMatcher::new(NUM_ACTION_INDICES));
                    player_data.regret_matcher = Some(regret_matcher);
                }
            })
            .unwrap();
    }

    /// Update regret at a node, handling the case where a concurrent thread
    /// may have changed the node type (e.g., from Player to Chance) via
    /// `allow_node_mutation`. When this happens, we re-ensure the node as
    /// Player and create a fresh regret matcher before updating.
    fn update_regret_at_node(&self, target_node_idx: usize, rewards: &[f32]) {
        self.cfr_state
            .update_node(target_node_idx, |data| {
                if let NodeData::Player(player_data) = data {
                    if let Some(regret_matcher) = player_data.regret_matcher.as_mut() {
                        regret_matcher.update_regret(rewards);
                    }
                    // If regret_matcher is None, skip this update — it will
                    // be created on the next ensure_regret_matcher call.
                } else {
                    // A concurrent sub-simulation's historian overwrote this
                    // node's type (e.g., to Chance or Terminal) via
                    // allow_node_mutation. Restore it to Player with a fresh
                    // regret matcher so exploration can continue.
                    event!(
                        tracing::Level::DEBUG,
                        target_node_idx,
                        found_type = %data,
                        "Concurrent node type change detected — restoring Player"
                    );
                    let mut regret_matcher =
                        Box::new(PcfrPlusRegretMatcher::new(NUM_ACTION_INDICES));
                    regret_matcher.update_regret(rewards);
                    *data = NodeData::Player(PlayerData {
                        regret_matcher: Some(regret_matcher),
                        player_idx: self.traversal_state.player_idx(),
                    });
                }
            })
            .unwrap();
    }

    /// Run the budget-driven wave loop for this node.
    ///
    /// Stopping is cooperative via `self.stop` (a lock-free `Arc<AtomicBool>`
    /// shared with every recursive sub-agent). The `Budget` decides what each
    /// wave does (recursive wave, fast-forward, stop, or arm a deadline timer);
    /// when the budget asks for `StartTimer`, the engine spawns a tokio task
    /// that flips `self.stop` after the requested duration, so every recursive
    /// level sees the stop at its next wave boundary.
    pub async fn explore_all_actions(&mut self, game_state: &GameState) {
        let raw_actions = self.action_generator.gen_possible_actions(game_state);
        let validated_actions = validate_actions(raw_actions, game_state);

        // Filter actions to ensure each maps to a unique index.
        // Different bet amounts can map to the same index due to the logarithmic
        // mapping (only 49 slots for raises). We keep the first action for each index.
        // Using ActionBitSet for O(1) operations with no heap allocation.
        // Pre-compute action indices once to avoid repeated action_to_idx calls.
        let mut seen_indices = ActionBitSet::new();
        let indexed_actions: SmallVec<[(AgentAction, usize); 8]> = validated_actions
            .into_iter()
            .filter_map(|a| {
                let idx = self.action_index_mapper.action_to_idx(&a, game_state);
                if seen_indices.insert(idx) {
                    Some((a, idx))
                } else {
                    None
                }
            })
            .collect();

        // If no valid actions remain after filtering, skip exploration entirely.
        // This can happen at deep recursion depths in Limited mode when all
        // generated actions get filtered out by the validator chain.
        if indexed_actions.is_empty() {
            return;
        }

        // Single-action shortcut. When only one action survives validation
        // the strategy is forced ([1.0] over that action) and a wave loop
        // would only waste budget — there's no alternative to regret
        // against. Empirically this fires for a large fraction of root
        // acts in heads-up / short-stack scenarios where the betting line
        // collapses to a single legal move. Emit a diag event so the
        // analyzer can still count these in the cross-tab and
        // actions-considered histogram.
        if indexed_actions.len() == 1 {
            if tracing::event_enabled!(target: "cfr_diag", tracing::Level::TRACE) {
                let nodes = self.cfr_state.node_count() as u64;
                let empty: &[f32] = &[];
                tracing::event!(
                    target: "cfr_diag",
                    tracing::Level::TRACE,
                    depth = self.depth as u64,
                    stop_cause = %StopCause::SingleAction,
                    final_iterations = 0u64,
                    final_elapsed_us = 0u64,
                    nodes_touched_start = nodes,
                    nodes_touched_end = nodes,
                    timer_armed = false,
                    actions_considered = 1u64,
                    regret_series = ?empty,
                );
            }
            return;
        }

        // Penalty for invalid actions - using player's starting stack since
        // losing your whole stack is the worst outcome.
        let invalid_action_penalty =
            -(game_state.starting_stacks[self.traversal_state.player_idx() as usize]);

        let target_node_idx = self.target_node_idx().unwrap();

        // ── Opponent-hand range estimate (decision 2): once per act, from
        // this agent's own seat (`game_state.to_act_idx()`). Sampled per wave
        // below.
        let estimator = self.estimator.clone();

        // Build (once) the exploration log this act uses. At depth 0 we freeze
        // the real-hand log into a shared immutable prefix; deeper agents
        // already hold a frozen-prefix log and reuse it directly. `None` on the
        // default fast path (estimator doesn't need history) — unchanged.
        let exploration_log: Option<HandLog> = if estimator.needs_history() {
            let log = self
                .hand_log
                .as_ref()
                .expect("needs_history agent must have a hand_log");
            Some(if self.depth == 0 {
                log.freeze()
            } else {
                log.clone()
            })
        } else {
            None
        };

        // Owned snapshot backing the GameLog across the async estimate boundary.
        let history_actions: Option<Vec<crate::arena::action::Action>> =
            exploration_log.as_ref().map(|l| l.to_actions());
        let game_log = history_actions
            .as_ref()
            .map(|a| crate::arena::GameLog { actions: a });

        let ranges = estimator.estimate(game_state, game_log.as_ref()).await;

        // Per-slot wave accumulators, reused across waves to avoid repeated Vec
        // allocations. Each wave sums every sample landing in a slot and counts
        // them; `wave_mean` then averages (slots with zero samples — pruned or
        // never explored — fall back to `invalid_action_penalty`).
        // Stack-allocated, fixed-size accumulators (NUM_ACTION_INDICES is a
        // compile-time const) reused across waves — no per-node heap allocation.
        let mut sums = [0.0f32; NUM_ACTION_INDICES];
        let mut counts = [0u32; NUM_ACTION_INDICES];
        // Reused per-wave reward buffer (the averaged vector handed to the regret
        // matcher), also stack-allocated and reused — no per-wave allocation.
        let mut rewards = [0.0f32; NUM_ACTION_INDICES];

        // ── Regret-Based Pruning (Brown & Sandholm, NeurIPS 2015) ────
        //
        // After a warmup period, read the regret matcher's current strategy
        // to identify actions with zero strategy weight. These are actions
        // whose cumulative regret has been driven to 0 by PCFR+ clamping
        // and whose predicted future regret is also non-positive. Skipping
        // their reward computation (which involves expensive sub-simulations)
        // saves significant computation. Pruned actions keep the penalty
        // reward, which naturally maintains their zero cumulative regret.
        //
        // Every REPROBE_INTERVAL-th iteration, all actions are explored to
        // detect actions that may have become relevant again.
        //
        // Pruning is only applied when:
        // - The regret matcher has enough history (>= PRUNE_WARMUP updates)
        // - There are more than 2 actions (with only 2, pruning saves little)
        // - It's not a reprobe iteration
        const PRUNE_WARMUP: usize = 3;
        const REPROBE_INTERVAL: usize = 4;

        // Dynamic strategy-probability thresholding (Brown, Kroer, Sandholm
        // AAAI 2017). Complements regret-based pruning: each wave, any
        // action whose *current* strategy probability falls below
        // `c / sqrt(max(iter, PRUNE_WARMUP))` is skipped for that wave.
        // Fires under PCFR+'s positive-part dynamics where standard
        // regret-based pruning can't (regrets rarely go negative).
        // Reprobe waves bypass thresholding so dropped actions can recover.
        // Set to 0.0 to disable for A/B baselines.
        const DYNAMIC_THRESHOLD_C: f32 = 0.01;

        // Coarse spawn frontier. We only fan reward computations out to tokio
        // tasks while `depth < SPAWN_FRONTIER_DEPTH`; deeper nodes recurse
        // inline. The per-action spawn cost (a `try_acquire` atomic on the
        // shared semaphore + a `tokio::spawn` + a `GameState` clone) is paid
        // once per spawned node, so spawning at *every* depth makes it scale
        // with total node count and dominates on large trees. Confining
        // spawning to the top few levels turns each spawned task into a whole
        // subtree (work amortized like a work-stealing unit) while keeping that
        // overhead O(shallow nodes). Iteration × action fan-out at depths
        // 0..SPAWN_FRONTIER_DEPTH still provides ample parallelism. (Measured:
        // K=2 matches spawning at every depth while allocating far fewer tasks;
        // K=1 under-parallelizes.)
        // Tuning note: the poker action tree is highly unbalanced (a raise
        // branch explodes while fold is trivial), so spawning only at the top
        // 1-2 levels left the deep fast-forward enumeration — the bulk of the
        // work — running inline and unstealable, idling most workers (measured
        // ~4 of 16 cores busy at K=2). Pushing the frontier one level deeper
        // turns each depth-2 node's fan-out of fast-forward leaves into
        // stealable tasks the multi-thread scheduler load-balances.
        const SPAWN_FRONTIER_DEPTH: usize = 3;

        let (initial_active, initial_updates) = self.cfr_state.get_pruning_info(target_node_idx);
        let can_prune = indexed_actions.len() > 2 && initial_updates >= PRUNE_WARMUP;

        // ── Budget-driven ordered waves: try-acquire-or-inline spawning ──
        //
        // The loop runs WAVES until the budget says stop (or the lock-free
        // `stop` flag flips). A wave fans out `wave_width` reward samples for
        // every non-pruned action against the same pre-wave strategy snapshot,
        // averages the per-slot samples (`wave_mean`), and applies exactly one
        // atomic PCFR+ regret update (invariant #1 — mini-batch PCFR+). At
        // `wave_width == 1` each slot gets one sample → mean == sample → the
        // prior single-sample behavior exactly. Each sample's `compute_reward`
        // is fanned out: a permit is immediately available → `spawn` the
        // subtree concurrently; none → run it inline. Acquisition is always
        // `try_acquire` (never blocking), so recursion at any depth is
        // deadlock-free (invariant #3).
        //
        // Regret-based pruning is preserved: the active set starts from the
        // initial read and is refreshed after each reprobe wave, gated
        // identically on `can_prune` / `updates_since_warmup` / the `len() > 2`
        // guard. The prune decision is computed ONCE per wave (below) so every
        // sample in the wave agrees.
        let mut active_actions = initial_active;
        let mut updates_since_warmup = initial_updates;

        // Wall-clock start for `Budget` accounting. `std::time::Instant` (not
        // tokio's) so the elapsed value is meaningful regardless of runtime.
        let started = std::time::Instant::now();

        // Node-local convergence signal from the *previous* completed update,
        // surfaced to the budget at the next wave boundary. `None` until the
        // first update lands.
        let mut latest_avg_regret: Option<f32> = None;

        // Completed waves at this node (= matcher updates). This is what the
        // budget's `iterations` signal reports.
        let mut iter_idx: u64 = 0;

        // ── Strategy-stability early exit ──
        //
        // Track the L1 distance between consecutive waves' strategies. If
        // the strategy stops moving for STABLE_ITERS consecutive waves,
        // further iterations are unlikely to help — bail and return
        // budget to the rest of the simulation. Stack-allocated buffers,
        // no heap alloc.
        let mut early_exit_prev_strategy = [0.0f32; NUM_ACTION_INDICES];
        let mut early_exit_curr_strategy = [0.0f32; NUM_ACTION_INDICES];
        let mut early_exit_stable_count: u32 = 0;
        let mut early_exit_has_prev = false;

        // Sub-agents inherit "timer already armed" — only the root agent at
        // depth 0 ever sees StartTimer from the Deadline leaf.
        let mut timer_armed = self.depth > 0;
        // Holds the spawned timer's abort guard so it's cancelled when this fn
        // returns. Only ever Some at the root after a successful arm.
        let mut _timer_guard: Option<super::AbortOnDrop> = None;

        // ── Diagnostics: per-act ExplorationSummary ──
        //
        // Entire diagnostic path is gated by `event_enabled!`. When no
        // subscriber is interested, `diag_on == false`, the Vec stays empty
        // (capacity 0, no allocation), and the per-wave push is skipped.
        // When enabled, the Vec is pre-sized to a conservative estimate (32)
        // to avoid reallocs in common configurations.
        let diag_on = tracing::event_enabled!(target: "cfr_diag", tracing::Level::TRACE);
        let diag_nodes_touched_start: u64 = if diag_on {
            self.cfr_state.node_count() as u64
        } else {
            0
        };
        let mut diag_regret_series: Vec<f32> = if diag_on {
            Vec::with_capacity(32)
        } else {
            Vec::new()
        };
        // Default; overwritten by every break path below. If you add a new
        // break without tagging it, the emitted stop_cause will incorrectly
        // say "budget_stop".
        let mut diag_stop_cause: StopCause = StopCause::BudgetStop;

        loop {
            // ── Budget / stop check at the WAVE BOUNDARY ──
            //
            // INVARIANT #1: the pre-wave stop check sits before any reward
            // computation for this wave, so breaking here can never leave a
            // partial reward vector — it simply means fewer completed waves.
            // `act` then picks from whatever regret has accumulated, returning
            // the best-known action. Budget exhaustion and stop are NOT errors.
            let stats = ExplorationStats {
                elapsed: started.elapsed(),
                iterations: iter_idx,
                nodes_touched: self.cfr_state.node_count() as u64,
                depth: self.depth,
                avg_regret: latest_avg_regret,
                timer_armed,
            };
            if self.stop.load(Ordering::Relaxed) {
                if diag_on {
                    diag_stop_cause = StopCause::Deadline;
                }
                break;
            }

            let (wave_width, fast_forward) = match self.budget.next_step(&stats) {
                NextStep::Stop | NextStep::Pass => {
                    if diag_on {
                        diag_stop_cause = StopCause::BudgetStop;
                    }
                    break;
                }
                NextStep::StartTimer { duration } if !timer_armed => {
                    debug_assert!(
                        self.depth == 0,
                        "NextStep::StartTimer should only arrive at the root (depth 0). \
                         Sub-agents inherit `timer_armed = true`, so a StartTimer here means \
                         a budget returned StartTimer at depth > 0, which is unsupported by \
                         the current engine. If you want per-depth timers, the engine needs \
                         to grow per-depth timer slots."
                    );
                    _timer_guard = Some(spawn_stop_timer(duration, self.stop.clone()));
                    timer_armed = true;
                    continue;
                }
                NextStep::StartTimer { .. } => {
                    if diag_on {
                        diag_stop_cause = StopCause::BudgetStartTimer;
                    }
                    break;
                }
                NextStep::Wave { width } => (width, false),
                NextStep::FastForward => (1, true),
            };

            // Build the per-wave ComputeRewardContext. `fast_forward` varies
            // per iteration so we rebuild ctx each wave; every field is an
            // `Arc`-style handle, so this is cheap.
            let ctx = ComputeRewardContext::<T> {
                traversal_set: self.traversal_set.clone(),
                traversal_state: self.traversal_state.clone(),
                cfr_state: self.cfr_state.clone(),
                action_gen_config: self.action_gen_config.clone(),
                action_index_mapper: self.action_index_mapper.clone(),
                limiter: self.limiter.clone(),
                budget: self.budget.clone(),
                stop: self.stop.clone(),
                depth: self.depth,
                fast_forward,
                allow_node_mutation: self.allow_node_mutation,
                estimator: self.estimator.clone(),
                hand_log: exploration_log.clone(),
            };

            // ── Per-wave world (decision 2 + 9) ──
            // Recursive waves play against a freshly sampled world (acting seat
            // + board fixed). Fast-forward waves are leaves: never re-sample.
            // With KnownHandsEstimator the sample reproduces the real hands and
            // never touches the RNG, so behavior is byte-for-byte unchanged.
            // The ThreadRng lives only inside this sync block, never crossing an
            // await (keeps the explore future Send).
            let wave_state: Option<GameState> = if fast_forward {
                None
            } else {
                let mut rng = rand::rng();
                Some(sample_world(&ranges, game_state, &mut rng))
            };
            // Wrap the sampled world in an Arc once so the inline borrow and the
            // spawn snapshot share a single allocation (no second clone). For
            // fast-forward waves (wave_state == None) there is no sampled world;
            // effective_gs borrows the base game_state.
            let sampled_arc: Option<std::sync::Arc<GameState>> =
                wave_state.map(std::sync::Arc::new);
            let effective_gs: &GameState = sampled_arc.as_deref().unwrap_or(game_state);

            // Decide whether to prune this wave. On reprobe waves (every
            // REPROBE_INTERVAL-th), explore all actions. Computed ONCE per wave
            // so all `wave_width` samples make the same prune decision and the
            // averaged vector is a mean over a consistent active set.
            //
            // The len() > 2 guard is required here even though `can_prune`
            // already checks it. The second disjunct (`updates_since_warmup
            // >= PRUNE_WARMUP`) handles nodes that cross the warmup
            // threshold mid-call, but it does not carry the action-count
            // check. Without the outer guard, 2-action nodes could have one
            // action pruned on 75% of waves, collapsing to a fixed
            // policy with no exploration.
            let prune_this_iter =
                indexed_actions.len() > 2 && (can_prune || updates_since_warmup >= PRUNE_WARMUP);
            let is_reprobe = iter_idx.is_multiple_of(REPROBE_INTERVAL as u64);
            let skip_pruned = prune_this_iter && !is_reprobe;

            // Dynamic-threshold active set: one strategy snapshot per wave,
            // gated identically to RBP. The bitset contains action indices
            // whose current strategy probability is at or above
            // `c / sqrt(max(iter, PRUNE_WARMUP))`. Re-built every wave
            // because both the threshold (1/sqrt(iter)) and the strategy
            // shift as iterations accumulate. Bypassed on reprobe waves so
            // dropped actions get periodic re-evaluation. None when the
            // gate is off (e.g. pre-warmup, 2-action nodes) — in that case
            // we never reference it below.
            let dyn_thresh_set: Option<ActionBitSet> = if skip_pruned && DYNAMIC_THRESHOLD_C > 0.0 {
                let mut dyn_strategy = [0.0f32; NUM_ACTION_INDICES];
                if self
                    .cfr_state
                    .node_current_strategy_into(target_node_idx, &mut dyn_strategy)
                {
                    let denom = (iter_idx as f32).max(PRUNE_WARMUP as f32).sqrt();
                    let threshold = DYNAMIC_THRESHOLD_C / denom;
                    let mut set = ActionBitSet::new();
                    for (i, &p) in dyn_strategy.iter().enumerate() {
                        if p >= threshold {
                            set.insert(i);
                        }
                    }
                    Some(set)
                } else {
                    None
                }
            } else {
                None
            };

            // INVARIANT #1: accumulate the COMPLETE per-slot sample sums/counts
            // for this wave before updating regret. Pruned (and never-sampled)
            // slots stay at count 0 and fall back to `invalid_action_penalty`
            // via `wave_mean`, never omitted.
            sums.fill(0.0);
            counts.fill(0);

            let mut set: tokio::task::JoinSet<(usize, f32)> = tokio::task::JoinSet::new();
            let mut inline: Vec<(usize, f32)> = Vec::new();

            // Only spawn at the shallow frontier. Below it, recurse inline with
            // no semaphore traffic, no spawn, and no per-sample clone. Build the
            // shared `Arc<GameState>` snapshot once (cloned once, then cheap Arc
            // clones per spawned task) and only when we may actually spawn.
            let spawn_here = self.depth < SPAWN_FRONTIER_DEPTH;
            // Reuse the sampled-world Arc when present (cheap refcount bump);
            // only clone the base game_state when there was no sampled world
            // (fast-forward waves) and we still need a spawn snapshot.
            let gs_arc = spawn_here.then(|| match &sampled_arc {
                Some(arc) => arc.clone(),
                None => std::sync::Arc::new(game_state.clone()),
            });

            // `wave_width` samples × each active action.
            for _sample in 0..wave_width {
                for (action, reward_idx) in &indexed_actions {
                    let reward_idx = *reward_idx;

                    // Regret-based pruning: skip actions with zero strategy
                    // weight. Pruned actions take no sample and keep the penalty
                    // via `wave_mean` (count stays 0).
                    if skip_pruned && !active_actions.contains(reward_idx) {
                        event!(
                            tracing::Level::TRACE,
                            action_idx = reward_idx,
                            wave = iter_idx,
                            "RBP: skipping pruned action"
                        );
                        continue;
                    }

                    // Dynamic strategy-probability thresholding: skip actions
                    // whose current strategy weight is below threshold(iter).
                    // Same per-iteration pruning contract as RBP — pruned slot
                    // stays at count 0 and falls back to penalty.
                    if let Some(dyn_set) = &dyn_thresh_set
                        && !dyn_set.contains(reward_idx)
                    {
                        event!(
                            tracing::Level::TRACE,
                            action_idx = reward_idx,
                            wave = iter_idx,
                            "DynThresh: skipping low-probability action"
                        );
                        continue;
                    }

                    debug_assert!(
                        reward_idx < sums.len(),
                        "Action index {} should be less than number of potential actions {}",
                        reward_idx,
                        sums.len()
                    );

                    let action = action.clone();

                    // At the frontier, spawn the subtree if a permit is free
                    // (`try_acquire` never blocks → recursion stays
                    // deadlock-free, invariant #3). Below the frontier, or when
                    // saturated, compute inline.
                    if let Some(gs_arc) = &gs_arc
                        && let Ok(permit) = ctx.limiter.clone().try_acquire_owned()
                    {
                        let ctx = ctx.clone();
                        let gs = gs_arc.clone();
                        set.spawn(async move {
                            // Permit is held for the whole subtree's lifetime.
                            let _permit = permit;
                            let r = CFRAgent::<T>::compute_reward(&gs, &action, &ctx).await;
                            (reward_idx, r)
                        });
                        continue;
                    }
                    let r = Self::compute_reward(effective_gs, &action, &ctx).await;
                    inline.push((reward_idx, r));
                }
            }

            // Accumulate inline samples, then join all spawned handles, so the
            // complete per-slot sums/counts are ready before the single update.
            for (idx, r) in inline.drain(..) {
                sums[idx] += r;
                counts[idx] += 1;
            }
            while let Some(joined) = set.join_next().await {
                match joined {
                    Ok((idx, r)) => {
                        sums[idx] += r;
                        counts[idx] += 1;
                    }
                    Err(join_err) => {
                        // A `JoinError` here means a spawned exploration task
                        // panicked — a bug to surface, not swallow. Re-raise
                        // the original panic on this thread.
                        if join_err.is_panic() {
                            std::panic::resume_unwind(join_err.into_panic());
                        } else {
                            panic!("CFR exploration task failed to join: {join_err}");
                        }
                    }
                }
            }

            // INVARIANT #1 — discard a stopped wave. The lock-free `stop`
            // flag can flip mid-wave (after the boundary check, while samples
            // are in flight). Without aborting in-flight `compute_reward`
            // calls we wait for them to finish on their own, but if `stop`
            // fired during the wave we drop the whole wave — no
            // `update_regret_at_node`, no `iter_idx` increment, no active-set
            // refresh — and break. The next iteration's boundary check would
            // be too late: this wave's samples would already have updated.
            if self.stop.load(Ordering::Relaxed) {
                if diag_on {
                    diag_stop_cause = StopCause::Deadline;
                }
                break;
            }

            // INVARIANT #1: one atomic, complete averaged vector → one update.
            wave_mean_into(&mut rewards, &sums, &counts, invalid_action_penalty);
            self.update_regret_at_node(target_node_idx, &rewards);
            updates_since_warmup += 1;
            iter_idx += 1;
            // Refresh the convergence signal for the next wave's budget check
            // from the regret matrix this update just produced.
            latest_avg_regret = self.cfr_state.node_avg_regret(target_node_idx);
            if diag_on && let Some(r) = latest_avg_regret {
                diag_regret_series.push(r);
            }

            // Strategy-stability early exit. Read the post-update strategy,
            // compare against the snapshot we took after the previous wave,
            // and accumulate a stable-iteration counter. We only consider
            // the gate once the warmup has passed (MIN_ITERS) — the first
            // few waves often look "stable" before real exploration begins.
            if self
                .cfr_state
                .node_current_strategy_into(target_node_idx, &mut early_exit_curr_strategy)
            {
                if early_exit_has_prev && (iter_idx as usize) >= EARLY_EXIT_MIN_ITERS {
                    let mut l1 = 0.0f32;
                    for (a, b) in early_exit_curr_strategy
                        .iter()
                        .zip(early_exit_prev_strategy.iter())
                    {
                        l1 += (a - b).abs();
                    }
                    if l1 < EARLY_EXIT_EPSILON {
                        early_exit_stable_count += 1;
                        if early_exit_stable_count >= EARLY_EXIT_STABLE_ITERS {
                            if diag_on {
                                diag_stop_cause = StopCause::StableStrategy;
                            }
                            break;
                        }
                    } else {
                        early_exit_stable_count = 0;
                    }
                }
                early_exit_prev_strategy.copy_from_slice(&early_exit_curr_strategy);
                early_exit_has_prev = true;
            }

            // After a reprobe wave, refresh the active action set from the
            // updated regret matcher. The len() > 2 guard keeps this consistent
            // with the pruning decision above — there is no point refreshing an
            // active set we will never use.
            if is_reprobe
                && indexed_actions.len() > 2
                && (can_prune || updates_since_warmup >= PRUNE_WARMUP)
            {
                let (new_active, _) = self.cfr_state.get_pruning_info(target_node_idx);
                active_actions = new_active;
            }

            // `FastForward` is a one-shot — fast-forward is deterministic for
            // 0–2 remaining community cards (full enumeration) and samples
            // flops internally for 3 cards; doing it more than once per node
            // yields no new information.
            if fast_forward {
                if diag_on {
                    diag_stop_cause = StopCause::FastForward;
                }
                break;
            }
        }

        if diag_on {
            let elapsed_us = started.elapsed().as_micros() as u64;
            let nodes_touched_end = self.cfr_state.node_count() as u64;
            tracing::event!(
                target: "cfr_diag",
                tracing::Level::TRACE,
                depth = self.depth as u64,
                stop_cause = %diag_stop_cause,
                final_iterations = iter_idx,
                final_elapsed_us = elapsed_us,
                nodes_touched_start = diag_nodes_touched_start,
                nodes_touched_end = nodes_touched_end,
                timer_armed = timer_armed,
                actions_considered = indexed_actions.len() as u64,
                regret_series = ?diag_regret_series.as_slice(),
            );
        }
    }
}

#[cfg(test)]
mod wave_tests {
    use super::wave_mean_into;

    #[test]
    fn wave_mean_averages_only_sampled_slots() {
        // 3 action slots, penalty -100. Slots 0 and 2 each got 2 samples; slot 1 none.
        let penalty = -100.0_f32;
        let sums = [3.0, 0.0, 8.0];
        let counts = [2u32, 0, 2];
        let mut mean = [0.0f32; 3];
        wave_mean_into(&mut mean, &sums, &counts, penalty);
        assert_eq!(mean, [1.5, -100.0, 4.0]);
    }

    #[test]
    fn wave_mean_single_sample_equals_sample() {
        // wave_width == 1: each sampled slot has count 1, so mean == sample.
        let penalty = -7.0_f32;
        let sums = [5.0, -2.0, 0.0];
        let counts = [1u32, 1, 0];
        let mut mean = [0.0f32; 3];
        wave_mean_into(&mut mean, &sums, &counts, penalty);
        assert_eq!(mean, [5.0, -2.0, -7.0]);
    }
}