mctrust 0.4.0 - Docs.rs

//! Environment-based MCTS — the classic game-tree search engine.
//!
//! Implements the four MCTS phases: Selection → Expansion → Simulation → Backpropagation.

use rand::Rng;
use rand::SeedableRng;

use crate::config::SearchConfig;
use crate::environment::{Environment, Outcome};
use crate::node::{Node, NodeStats};

/// Game-tree MCTS engine.
///
/// Given an [`Environment`], searches for the best action by running
/// repeated simulations through a growing tree of explored states.
///
/// # Type Parameters
///
/// - `E`: the environment type implementing [`Environment`].
///
/// # Examples
///
/// ```
/// use mctrust::{Environment, Outcome, TreeSearch, SearchConfig, Reward};
///
/// #[derive(Clone)]
/// struct Counter { value: i32, target: i32 }
///
/// #[derive(Clone, Debug, PartialEq)]
/// enum Step { Inc, Dec }
///
/// impl Environment for Counter {
///     type Action = Step;
///
///     fn legal_actions(&self) -> Vec<Step> {
///         vec![Step::Inc, Step::Dec]
///     }
///
///     fn apply(&mut self, action: &Step) {
///         match action {
///             Step::Inc => self.value += 1,
///             Step::Dec => self.value -= 1,
///         }
///     }
///
///     fn evaluate(&self) -> Outcome {
///         if self.value == self.target {
///             Outcome::Success(Reward::WIN)
///         } else if (self.value - self.target).abs() > 10 {
///             Outcome::Failure
///         } else {
///             Outcome::Ongoing
///         }
///     }
/// }
///
/// let game = Counter { value: 0, target: 3 };
/// let config = SearchConfig::builder().iterations(1_000).max_depth(15).build();
/// let mut search = TreeSearch::new(game, config);
///
/// if let Some(best) = search.run() {
///     // best is the action with most visits from root
///     println!("Best action: {best:?}");
/// }
/// let _all_moves = vec![Step::Inc, Step::Dec];
/// ```
#[derive(Clone)]
pub struct TreeSearch<E: Environment> {
    /// The root environment state.
    pub(crate) root_env: E,

    /// Search hyperparameters.
    pub(crate) config: SearchConfig,

    /// Flat arena of tree nodes. Index 0 is always root.
    pub(crate) nodes: Vec<Node<E::Action>>,

    /// Fast, deterministic, no_std-compatible RNG.
    pub(crate) rng: rand_chacha::ChaCha8Rng,

    /// Optional Transposition Table (DAG) storing `HashMap<EnvironmentHash, NodeIndex>`.
    #[cfg(feature = "dag")]
    pub transposition_table: Option<hashbrown::HashMap<u64, u32>>,

    /// Optional pluggable leaf evaluator. When `Some`, replaces random rollout
    /// with the evaluator's `evaluate()` call. When `None`, uses standard rollout.
    pub(crate) evaluator: Option<std::sync::Arc<dyn crate::environment::Evaluator<E>>>,

    /// Optional node budget. When set, expansion stops after this many nodes.
    pub(crate) max_nodes: Option<usize>,
}

/// Serialisable game-search checkpoint used for mid-search persistence.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct TreeSearchCheckpoint<E>
where
    E: Environment + Clone,
    E::Action: serde::Serialize + for<'action> serde::Deserialize<'action>,
{
    /// Root environment at checkpoint time.
    pub root_env: E,
    /// Search configuration at checkpoint time.
    pub config: SearchConfig,
    /// Full search tree snapshot.
    pub nodes: Vec<Node<E::Action>>,
    /// Optional RNG seed captured at checkpoint time to enable deterministic restore.
    pub rng_seed: Option<u64>,
}

impl<E: Environment> TreeSearch<E> {
    /// Creates a new game-tree search from an environment and configuration.
    ///
    /// # Parameters
    ///
    /// - `environment`: Root state to search from.
    /// - `config`: Search hyperparameters.
    ///
    /// # Returns
    ///
    /// Returns a [`TreeSearch`] with a root node populated from the environment's legal
    /// actions.
    ///
    /// # Panics
    ///
    /// This function does not panic.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mctrust::{Environment, TreeSearch, Outcome, SearchConfig};
    ///
    /// #[derive(Clone)]
    /// struct Env;
    ///
    /// impl Environment for Env {
    ///     type Action = ();
    ///     fn legal_actions(&self) -> Vec<Self::Action> { vec![()] }
    ///     fn apply(&mut self, _action: &Self::Action) {}
    ///     fn evaluate(&self) -> Outcome { Outcome::Neutral }
    /// }
    ///
    /// let search = TreeSearch::new(Env, SearchConfig::default());
    /// assert_eq!(search.tree_size(), 1);
    /// ```
    pub fn new(environment: E, config: SearchConfig) -> Self {
        let root_actions = environment.legal_actions();
        let root = Node::root(root_actions);

        Self {
            root_env: environment,
            config,
            nodes: vec![root],
            rng: entropy_rng(),
            #[cfg(feature = "dag")]
            transposition_table: None,
            evaluator: None,
            max_nodes: None,
        }
    }

    /// Creates a new game-tree search with a deterministic RNG seed.
    ///
    /// # Parameters
    ///
    /// - `environment`: Root state to search from.
    /// - `config`: Search hyperparameters.
    /// - `seed`: Seed used for rollout randomness and tie-breaking.
    ///
    /// # Returns
    ///
    /// Returns a deterministic [`TreeSearch`].
    ///
    /// # Panics
    ///
    /// This function does not panic.
    pub fn with_seed(environment: E, config: SearchConfig, seed: u64) -> Self {
        let root_actions = environment.legal_actions();
        let root = Node::root(root_actions);

        Self {
            root_env: environment,
            config,
            nodes: vec![root],
            rng: rand_chacha::ChaCha8Rng::seed_from_u64(seed),
            #[cfg(feature = "dag")]
            transposition_table: None,
            evaluator: None,
            max_nodes: None,
        }
    }

    /// Creates a serialisable checkpoint of the current search state.
    ///
    /// This can be used to pause and resume search work in a separate
    /// process.
    /// # Parameters
    ///
    /// This function takes no additional parameters.
    ///
    /// # Returns
    ///
    /// Returns a [`TreeSearchCheckpoint`] containing the root environment, config, and tree.
    ///
    /// # Panics
    ///
    /// This function does not panic.
    pub fn checkpoint(&self) -> TreeSearchCheckpoint<E>
    where
        E: serde::Serialize + for<'de> serde::Deserialize<'de>,
        E::Action: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone,
    {
        // Capture a deterministic seed from a cloned RNG so we don't advance the
        // real searcher's RNG state while producing the checkpoint.
        let mut rng_clone = self.rng.clone();
        let seed = rng_clone.next_u64();

        TreeSearchCheckpoint {
            root_env: self.root_env.clone(),
            config: self.config.clone(),
            nodes: self.nodes.clone(),
            rng_seed: Some(seed),
        }
    }

    /// Restores a search state from a checkpoint.
    ///
    /// # Parameters
    ///
    /// - `checkpoint`: Previously captured search state.
    ///
    /// # Returns
    ///
    /// Returns a [`TreeSearch`] resumed from the checkpoint.
    ///
    /// # Panics
    ///
    /// This function does not panic.
    pub fn restore(checkpoint: TreeSearchCheckpoint<E>) -> Self
    where
        E: serde::Serialize + for<'de> serde::Deserialize<'de>,
        E::Action: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone,
    {
        let rng = if let Some(seed) = checkpoint.rng_seed {
            rand_chacha::ChaCha8Rng::seed_from_u64(seed)
        } else {
            entropy_rng()
        };

        Self {
            root_env: checkpoint.root_env,
            config: checkpoint.config,
            nodes: checkpoint.nodes,
            rng,
            #[cfg(feature = "dag")]
            transposition_table: None,
            evaluator: None,
            max_nodes: None,
        }
    }

    /// Runs the search for the configured number of iterations, or until the
    /// optional time budget is exhausted, whichever comes first.
    ///
    /// # Returns
    ///
    /// Returns the most-visited root action, or `None` if the environment has
    /// no legal actions.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mctrust::{Environment, TreeSearch, Outcome, Reward, SearchConfig};
    ///
    /// #[derive(Clone)]
    /// struct Env(bool);
    ///
    /// #[derive(Clone, Debug, PartialEq)]
    /// enum Action { Win }
    ///
    /// impl Environment for Env {
    ///     type Action = Action;
    ///     fn legal_actions(&self) -> Vec<Self::Action> { if self.0 { vec![] } else { vec![Action::Win] } }
    ///     fn apply(&mut self, _action: &Self::Action) { self.0 = true; }
    ///     fn evaluate(&self) -> Outcome { if self.0 { Outcome::Success(Reward::WIN) } else { Outcome::Ongoing } }
    /// }
    ///
    /// let mut search = TreeSearch::with_seed(Env(false), SearchConfig::builder().iterations(8).build(), 1);
    /// assert_eq!(search.run(), Some(Action::Win));
    /// ```
    pub fn run(&mut self) -> Option<E::Action> {
        let deadline = self
            .config
            .time_budget
            .map(|d| std::time::Instant::now() + d);

        for _ in 0..self.config.iterations {
            if let Some(dl) = deadline {
                if std::time::Instant::now() >= dl {
                    break;
                }
            }
            self.run_step();
        }

        self.best_root_action()
    }

    /// Executes exactly **one** MCTS iteration (select → expand → simulate → backpropagate).
    ///
    /// This is the fundamental building block for fine-grained control. Use it when:
    /// - You need to interleave search with network I/O (e.g., batched NN evaluation)
    /// - You want to stream intermediate results to a progress bar or TUI
    /// - You need to check an external cancellation token between iterations
    /// - You are building a custom time-control or pondering loop
    ///
    /// After calling this N times, use [`best_root_action()`](Self::best_root_action),
    /// [`principal_variation()`](Self::principal_variation), or
    /// [`root_stats()`](Self::root_stats) to inspect the result.
    pub fn run_step(&mut self) {
        let mut env = self.root_env.clone();

        // 1. Selection — descend using UCT variants.
        let (node_id, mut path) = self.select(&mut env);

        // 2. Expansion — add one untried child if node is expandable.
        let state = env.evaluate();
        if state == Outcome::Ongoing && self.should_expand(node_id) {
            let expanded = self.expand(node_id, &mut env);
            if expanded != node_id {
                path.push(expanded);
            }
        } else if state != Outcome::Ongoing {
            self.nodes[node_id as usize].terminal = true;
        }

        // 3. Simulation — random rollout to terminal state.
        let reward = self.simulate(&mut env);

        // 4. Backpropagation — update statistics up to root.
        self.backpropagate(&path, reward);
    }

    /// Runs the search utilizing root-level parallelism across available threads.
    ///
    /// This runs multiple distinct search trees in parallel, effectively achieving
    /// lock-free linear scaling, before merging the visitation statistics of the root
    /// branches optimally.
    ///
    /// # Parameters
    /// - `threads`: Number of simultaneous MCTS instances to run.
    #[cfg(feature = "parallel")]
    pub fn run_parallel(&mut self, threads: usize) -> Option<E::Action>
    where
        E: Send + Sync,
        E::Action: Eq + std::hash::Hash + Send + Sync,
    {
        use rayon::prelude::*;

        // Generate seeds safely from the single-threaded context.
        let mut seeds = Vec::with_capacity(threads);
        for _ in 0..threads {
            seeds.push(self.rng.next_u64());
        }

        let merged_stats = seeds
            .into_par_iter()
            .map(|seed| {
                // Create a sub-searcher
                let mut search =
                    TreeSearch::with_seed(self.root_env.clone(), self.config.clone(), seed);
                let iterations_per_thread = self.config.iterations / threads.max(1);
                search.config.iterations = iterations_per_thread;

                // Each sub-searcher gets its own fresh DAG table (if the parent had one enabled).
                // Cloning the parent table would waste O(threads × entries) memory with zero
                // cross-thread benefit since root parallelism uses independent trees.
                #[cfg(feature = "dag")]
                if self.transposition_table.is_some() {
                    search.enable_dag();
                }

                search.run();
                search.root_stats()
            })
            .reduce(Vec::new, |mut acc, thread_stats| {
                if acc.is_empty() {
                    return thread_stats;
                }
                // Merge statistics by action
                for (action, stat) in thread_stats {
                    if let Some((_, acc_stat)) = acc.iter_mut().find(|(a, _)| a == &action) {
                        acc_stat.visits += stat.visits;
                        // Avoid averaging incorrectly — weight by old visits + new visits
                        let total_visits = acc_stat.visits;
                        if total_visits > 0 {
                            acc_stat.average_reward = ((acc_stat.average_reward
                                * f64::from(acc_stat.visits - stat.visits))
                                + (stat.average_reward * f64::from(stat.visits)))
                                / f64::from(total_visits);
                        }
                    } else {
                        acc.push((action, stat));
                    }
                }
                acc
            });

        // Find the action with the most visits across all parallel threads
        merged_stats
            .into_iter()
            .max_by_key(|(_, stat)| stat.visits)
            .map(|(action, _)| action)
    }

    /// Returns statistics for each root child.
    ///
    /// # Parameters
    ///
    /// This function takes no additional parameters.
    ///
    /// # Returns
    ///
    /// Returns `(action, stats)` pairs for every expanded child of the root.
    ///
    /// # Panics
    ///
    /// This function does not panic.
    pub fn root_stats(&self) -> Vec<(E::Action, NodeStats)>
    where
        E::Action: Clone,
    {
        let root = &self.nodes[0];
        root.children
            .iter()
            .filter_map(|&child_id| {
                let child = &self.nodes[child_id as usize];
                let action = child.action.clone()?;
                let avg = if child.visits > 0 {
                    child.cumulative_reward / f64::from(child.visits)
                } else {
                    0.0
                };
                Some((
                    action,
                    NodeStats {
                        visits: child.visits,
                        average_reward: avg,
                        children_count: child.children.len(),
                        unexpanded_count: child.unexpanded.len(),
                    },
                ))
            })
            .collect()
    }

    /// Returns the total number of nodes currently stored in the search tree.
    ///
    /// # Parameters
    ///
    /// This function takes no additional parameters.
    ///
    /// # Returns
    ///
    /// Returns the arena length.
    ///
    /// # Panics
    ///
    /// This function does not panic.
    pub fn tree_size(&self) -> usize {
        self.nodes.len()
    }

    /// Returns the total number of simulations run so far.
    ///
    /// # Parameters
    ///
    /// This function takes no additional parameters.
    ///
    /// # Returns
    ///
    /// Returns the root visit count.
    ///
    /// # Panics
    ///
    /// This function does not panic.
    pub fn total_simulations(&self) -> u32 {
        self.nodes[0].visits
    }

    /// Attaches a pluggable evaluator that replaces random rollout with
    /// a domain-specific or neural network-based leaf evaluation.
    ///
    /// When set, the engine calls `evaluator.evaluate(env)` instead of running
    /// a random rollout to a terminal state.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mctrust::{Environment, Evaluator, TreeSearch, Outcome, Reward, SearchConfig};
    ///
    /// #[derive(Clone)]
    /// struct Env;
    ///
    /// impl Environment for Env {
    ///     type Action = ();
    ///     fn legal_actions(&self) -> Vec<()> { vec![()] }
    ///     fn apply(&mut self, _: &()) {}
    ///     fn evaluate(&self) -> Outcome { Outcome::Neutral }
    /// }
    ///
    /// struct ConstEval;
    /// impl Evaluator<Env> for ConstEval {
    ///     fn evaluate(&self, _: &Env) -> Reward { Reward::new(0.8) }
    /// }
    ///
    /// let mut search = TreeSearch::new(Env, SearchConfig::builder().iterations(10).build());
    /// search.with_evaluator(std::sync::Arc::new(ConstEval));
    /// ```
    pub fn with_evaluator(
        &mut self,
        evaluator: std::sync::Arc<dyn crate::environment::Evaluator<E>>,
    ) {
        self.evaluator = Some(evaluator);
    }

    /// Sets the maximum number of tree nodes the search may allocate.
    ///
    /// When this limit is reached, expansion stops and the engine runs
    /// selection + simulation on existing nodes only. Use this to bound
    /// memory usage in resource-constrained environments.
    pub fn with_max_nodes(&mut self, limit: usize) {
        self.max_nodes = Some(limit);
    }

    /// Runs the search until `predicate` returns `true`.
    ///
    /// The predicate receives the current search state after each iteration,
    /// allowing arbitrary stopping conditions: target reward, time limit,
    /// external cancellation, convergence detection, etc.
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use mctrust::{Environment, TreeSearch, Outcome, SearchConfig, Reward};
    /// # #[derive(Clone)] struct E;
    /// # impl Environment for E {
    /// #     type Action = ();
    /// #     fn legal_actions(&self) -> Vec<()> { vec![] }
    /// #     fn apply(&mut self, _: &()) {}
    /// #     fn evaluate(&self) -> Outcome { Outcome::Neutral }
    /// # }
    /// # let mut search = TreeSearch::new(E, SearchConfig::default());
    /// // Stop when we've found a line with > 0.9 reward, or 50k iterations
    /// search.run_until(|s| {
    ///     s.best_root_reward().unwrap_or(0.0) > 0.9
    ///     || s.total_simulations() >= 50_000
    /// });
    /// ```
    pub fn run_until<F>(&mut self, mut predicate: F) -> Option<E::Action>
    where
        F: FnMut(&Self) -> bool,
    {
        loop {
            if predicate(self) {
                break;
            }
            self.run_step();
        }
        self.best_root_action()
    }

    /// Replays the principal variation through the environment, returning
    /// the full sequence of intermediate states.
    ///
    /// This is invaluable for debugging, visualization, and post-search
    /// analysis. The first element is always the root state; the last is the
    /// state after applying the final PV move.
    ///
    /// # Panics
    ///
    /// This function does not panic. The internal `unwrap` on `states.last()`
    /// is safe because the vector always has at least one element (the root).
    pub fn principal_variation_states(&self) -> Vec<E>
    where
        E::Action: Clone,
    {
        let pv = self.principal_variation();
        let mut current = self.root_env.clone();
        let mut states = vec![current.clone()];
        for action in &pv {
            current.apply(action);
            states.push(current.clone());
        }
        states
    }

    /// Exports a Graphviz DOT representation of the top `depth` levels of the
    /// search tree, with node visit counts and average rewards as labels.
    ///
    /// ```text
    /// dot -Tsvg tree.dot -o tree.svg
    /// ```
    pub fn export_dot(&self, depth: usize) -> String
    where
        E::Action: std::fmt::Debug,
    {
        let mut dot = String::from("digraph mctrust {\n  rankdir=TB;\n  node [shape=record, style=filled, fillcolor=\"#f0f0f0\"];\n");
        self.export_dot_recursive(0, 0, depth, &mut dot);
        dot.push_str("}\n");
        dot
    }

    /// Recursive helper for DOT export.
    fn export_dot_recursive(
        &self,
        node_id: u32,
        current_depth: usize,
        max_depth: usize,
        dot: &mut String,
    ) where
        E::Action: std::fmt::Debug,
    {
        use std::fmt::Write;

        if current_depth >= max_depth {
            return;
        }

        let node = &self.nodes[node_id as usize];
        let avg = if node.visits > 0 {
            node.cumulative_reward / f64::from(node.visits)
        } else {
            0.0
        };

        let label = if let Some(ref action) = node.action {
            format!("{action:?}|V:{} R:{avg:.3}", node.visits)
        } else {
            format!("root|V:{} R:{avg:.3}", node.visits)
        };

        let _ = writeln!(dot, "  n{node_id} [label=\"{{{label}}}\"];");

        for &child_id in &node.children {
            let _ = writeln!(dot, "  n{node_id} -> n{child_id};");
            self.export_dot_recursive(child_id, current_depth + 1, max_depth, dot);
        }
    }
    /// Reports whether the configured search uses RAVE blending.
    ///
    /// # Parameters
    ///
    /// This function takes no additional parameters.
    ///
    /// # Returns
    ///
    /// Returns `true` when `config.rave.enabled` is set.
    ///
    /// # Panics
    ///
    /// This function does not panic.
    pub fn uses_rave(&self) -> bool {
        self.config.rave.enabled
    }

    /// Enables the DAG transposition table, allowing identical states reached through
    /// different action sequences to share a single node in the search tree.
    ///
    /// This dramatically reduces memory usage and accelerates convergence in environments
    /// with many transpositions (board games, planning problems, etc.).
    #[cfg(feature = "dag")]
    pub fn enable_dag(&mut self) {
        self.transposition_table = Some(hashbrown::HashMap::new());
    }

    /// Disables the DAG transposition table and frees its memory.
    #[cfg(feature = "dag")]
    pub fn disable_dag(&mut self) {
        self.transposition_table = None;
    }

    /// Returns the number of state hashes stored in the transposition table,
    /// a proxy for how many unique states the engine has explored.
    #[cfg(feature = "dag")]
    pub fn dag_hit_count(&self) -> usize {
        self.transposition_table
            .as_ref()
            .map_or(0, hashbrown::HashMap::len)
    }

    /// Extracts the **principal variation** — the sequence of actions the engine
    /// considers optimal from the root, chosen by following the most-visited child
    /// at every level of the tree.
    ///
    /// This is the single most important debugging and visualization primitive for
    /// any tree search engine. Safe in DAG mode (cycle-aware).
    pub fn principal_variation(&self) -> Vec<E::Action>
    where
        E::Action: Clone,
    {
        let mut pv = Vec::new();
        let mut current = 0u32;
        let mut visited = std::collections::HashSet::new();
        visited.insert(current);

        loop {
            let node = &self.nodes[current as usize];
            if node.children.is_empty() {
                break;
            }

            let best_child = node
                .children
                .iter()
                .copied()
                .max_by_key(|&id| self.nodes[id as usize].visits);

            match best_child {
                Some(child_id) => {
                    // DAG cycle guard: stop if we've already visited this node.
                    if !visited.insert(child_id) {
                        break;
                    }
                    if let Some(ref action) = self.nodes[child_id as usize].action {
                        pv.push(action.clone());
                    }
                    current = child_id;
                }
                None => break,
            }
        }

        pv
    }

    /// Returns the average reward of the best root action, or `None` if no
    /// simulations have been performed.
    pub fn best_root_reward(&self) -> Option<f64> {
        let id = self.best_root_child_id()?;
        let node = &self.nodes[id as usize];
        if node.visits > 0 {
            Some(node.cumulative_reward / f64::from(node.visits))
        } else {
            Some(0.0)
        }
    }

    /// Returns the node index of the root child with the most visits.
    fn best_root_child_id(&self) -> Option<u32> {
        let root = &self.nodes[0];
        root.children
            .iter()
            .copied()
            .max_by_key(|&id| self.nodes[id as usize].visits)
    }

    /// **Tree reuse** — re-roots the search tree at the child reached by `action`,
    /// preserving the entire subtree and all accumulated statistics.
    ///
    /// This is the technique used by every elite game engine (Stockfish, Leela,
    /// `KataGo`). Instead of discarding the tree after each move and starting from
    /// scratch, `advance_to_action` keeps all the work done in the selected
    /// subtree, giving the next search a massive warm start.
    ///
    /// Returns `true` if the action was found among the root's children and the
    /// tree was successfully re-rooted. Returns `false` if the action didn't
    /// match any child (the tree remains unchanged — caller should create a fresh
    /// search instead).
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use mctrust::{Environment, TreeSearch, Outcome, SearchConfig, Reward};
    /// # #[derive(Clone)] struct G;
    /// # #[derive(Clone, Debug, PartialEq)] enum M { A }
    /// # impl Environment for G {
    /// #     type Action = M;
    /// #     fn legal_actions(&self) -> Vec<M> { vec![] }
    /// #     fn apply(&mut self, _: &M) {}
    /// #     fn evaluate(&self) -> Outcome { Outcome::Neutral }
    /// # }
    /// # let mut search = TreeSearch::new(G, SearchConfig::default());
    /// // After choosing an action, re-root to reuse the subtree:
    /// if let Some(best) = search.run() {
    ///     search.advance_to_action(&best);
    ///     // Next search.run() starts with the preserved subtree.
    /// }
    /// ```
    pub fn advance_to_action(&mut self, action: &E::Action) -> bool {
        let root = &self.nodes[0];
        let child_id = root
            .children
            .iter()
            .copied()
            .find(|&id| self.nodes[id as usize].action.as_ref() == Some(action));

        match child_id {
            Some(id) => {
                self.advance_to_child(id);
                true
            }
            None => false,
        }
    }

    /// Internal: compact the arena by extracting the subtree rooted at `child_id`
    /// into a fresh arena with contiguous indices starting from 0.
    fn advance_to_child(&mut self, child_id: u32) {
        // Apply the action to root_env so future searches start from the new state.
        if let Some(ref action) = self.nodes[child_id as usize].action {
            self.root_env.apply(action);
        }

        // BFS to collect all reachable nodes from child_id, building old→new index map.
        let mut old_to_new = std::collections::HashMap::new();
        let mut queue = std::collections::VecDeque::new();
        let mut new_nodes = Vec::new();

        queue.push_back(child_id);
        old_to_new.insert(child_id, 0u32);

        while let Some(old_id) = queue.pop_front() {
            let node = &self.nodes[old_id as usize];
            for &child in &node.children {
                if !old_to_new.contains_key(&child) {
                    let new_id = u32::try_from(old_to_new.len()).unwrap_or(u32::MAX);
                    old_to_new.insert(child, new_id);
                    queue.push_back(child);
                }
            }
        }

        // Build new arena with remapped indices.
        new_nodes.resize_with(old_to_new.len(), || Node::root(Vec::new()));

        for (&old_id, &new_id) in &old_to_new {
            let old_node = &self.nodes[old_id as usize];
            let mut new_node = old_node.clone();

            // Remap parent
            new_node.parent = old_node.parent.and_then(|p| old_to_new.get(&p).copied());

            // Remap children — only keep children that exist in the subtree
            new_node.children = old_node
                .children
                .iter()
                .filter_map(|c| old_to_new.get(c).copied())
                .collect();

            new_nodes[new_id as usize] = new_node;
        }

        // The new root (index 0) should have no parent and no action.
        if let Some(root) = new_nodes.first_mut() {
            root.parent = None;
            root.action = None;
        }

        self.nodes = new_nodes;

        // Clear DAG table — old hashes point to old indices. If it was
        // enabled, we keep it enabled but fresh.
        #[cfg(feature = "dag")]
        {
            if self.transposition_table.is_some() {
                self.transposition_table = Some(hashbrown::HashMap::new());
            }
        }
    }
}

fn entropy_rng() -> rand_chacha::ChaCha8Rng {
    match rand_chacha::ChaCha8Rng::try_from_rng(&mut rand::rngs::SysRng) {
        Ok(rng) => rng,
        Err(error) => panic!("failed to seed ChaCha8Rng from system RNG: {error}"),
    }
}

mod phases;

#[cfg(test)]
mod tests;