mctrust 0.4.0 - Docs.rs

//! Search configuration and policy types.

use std::time::Duration;

/// Tree policy used during child selection.
#[derive(Clone, Debug, PartialEq, Default, serde::Serialize, serde::Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
#[non_exhaustive]
pub enum TreePolicy {
    /// Classic Upper Confidence Bounds for Trees.
    #[default]
    Uct,
    /// AlphaZero-style PUCT using action priors.
    Puct {
        /// Prior contribution multiplier.
        prior_weight: f64,
    },
    /// Thompson-style optimistic sampling from a node's reward estimate.
    ThompsonSampling {
        /// Standard deviation multiplier used to perturb the sampled value.
        temperature: f64,
    },
    /// Gumbel `MuZero` — hyperparameter-free exploration via Sequential Halving.
    ///
    /// Based on "Policy Improvement by Planning with Gumbel" (Danihelka et al., 2022).
    /// Uses Gumbel noise to perturb action logits at the root, then applies
    /// Sequential Halving to prune actions, achieving equivalent search quality
    /// in ~16x fewer simulations compared to standard PUCT.
    Gumbel {
        /// Number of actions to sample at the root before halving.
        /// Defaults to 16 if set to 0.
        sampled_actions: usize,
        /// Maximum completions mixing coefficient for completed Q-values.
        /// Typical value: 50.0
        max_completions_coeff: f64,
    },
}

/// Configuration for AMAF/RAVE value blending.
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct RaveConfig {
    /// Enables or disables RAVE.
    pub enabled: bool,
    /// Weight used for visit-count-based RAVE decay.
    pub bias: f64,
}

impl Default for RaveConfig {
    fn default() -> Self {
        Self {
            enabled: true,
            bias: 300.0,
        }
    }
}

/// Progressive widening for large or continuous action spaces.
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct ProgressiveWideningConfig {
    /// Base number of children always allowed.
    pub minimum_children: usize,
    /// The widening coefficient `k`.
    pub coefficient: f64,
    /// The exponent `alpha` in `k * visits^alpha`.
    pub exponent: f64,
}

impl Default for ProgressiveWideningConfig {
    fn default() -> Self {
        Self {
            minimum_children: 1,
            coefficient: 1.5,
            exponent: 0.5,
        }
    }
}

/// Configuration for [`crate::TreeSearch`].
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct SearchConfig {
    /// Number of simulations to execute (iteration budget).
    pub iterations: usize,
    /// Exploration constant used by UCT and as a fallback with other policies.
    pub exploration_constant: f64,
    /// Default rollout depth cap.
    pub max_depth: usize,
    /// Tree policy used for child selection.
    pub tree_policy: TreePolicy,
    /// Heuristic blend weight when a heuristic estimate is available.
    pub heuristic_weight: f64,
    /// Enables and configures RAVE.
    pub rave: RaveConfig,
    /// Optional progressive widening.
    pub progressive_widening: Option<ProgressiveWideningConfig>,
    /// Optional wall-clock time budget. When set, the search will stop
    /// after this duration even if `iterations` has not been reached.
    /// This takes priority over `iterations`.
    #[serde(skip)]
    pub time_budget: Option<Duration>,
}

impl Default for SearchConfig {
    fn default() -> Self {
        Self {
            iterations: 10_000,
            exploration_constant: std::f64::consts::SQRT_2,
            max_depth: 50,
            tree_policy: TreePolicy::default(),
            heuristic_weight: 0.35,
            rave: RaveConfig::default(),
            progressive_widening: None,
            time_budget: None,
        }
    }
}

impl SearchConfig {
    /// Creates a builder initialized with the default search configuration.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mctrust::SearchConfig;
    ///
    /// let config = SearchConfig::builder().iterations(256).build();
    /// assert_eq!(config.iterations, 256);
    /// ```
    pub fn builder() -> SearchConfigBuilder {
        SearchConfigBuilder(Self::default())
    }

    /// Parses a config from TOML.
    ///
    /// # Errors
    ///
    /// Returns [`toml::de::Error`] when the configuration cannot be parsed.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mctrust::SearchConfig;
    ///
    /// let config = SearchConfig::from_toml_str("iterations = 32").unwrap();
    /// assert_eq!(config.iterations, 32);
    /// ```
    #[cfg(feature = "toml")]
    pub fn from_toml_str(input: &str) -> Result<Self, toml::de::Error> {
        let mut cfg: SearchConfig = toml::from_str(input)?;
        // Sanitize parsed configuration so TOML can't inject NaN/Inf/negative values.
        let _warnings = cfg.sanitize();
        Ok(cfg)
    }

    /// Reads a TOML config file from disk.
    ///
    /// # Errors
    ///
    /// Returns [`SearchConfigLoadError::Io`] if the file cannot be read,
    /// or [`SearchConfigLoadError::Toml`] if parsing fails.
    #[cfg(feature = "toml")]
    pub fn from_toml_file(
        path: impl AsRef<std::path::Path>,
    ) -> Result<Self, SearchConfigLoadError> {
        let path = path.as_ref();
        let contents = std::fs::read_to_string(path).map_err(SearchConfigLoadError::Io)?;
        let mut cfg: SearchConfig =
            toml::from_str(&contents).map_err(SearchConfigLoadError::Toml)?;
        let _warnings = cfg.sanitize();
        Ok(cfg)
    }

    /// Validates and corrects invalid configuration fields.
    ///
    /// Returns a list of human-readable warnings describing what was fixed.
    /// An empty list means the configuration was already valid.
    #[must_use]
    pub fn sanitize(&mut self) -> Vec<String> {
        let default = SearchConfig::default();
        let mut warnings = Vec::new();

        if self.iterations == 0 {
            warnings.push(format!(
                "iterations must be >= 1, resetting to default {}",
                default.iterations
            ));
            self.iterations = default.iterations;
        }

        if !self.exploration_constant.is_finite() || self.exploration_constant < 0.0 {
            warnings.push(format!(
                "exploration_constant invalid ({}), resetting to default {}",
                self.exploration_constant, default.exploration_constant
            ));
            self.exploration_constant = default.exploration_constant;
        }

        if !self.heuristic_weight.is_finite() {
            warnings.push(format!(
                "heuristic_weight invalid ({}), resetting to default {}",
                self.heuristic_weight, default.heuristic_weight
            ));
            self.heuristic_weight = default.heuristic_weight;
        } else if !(0.0..=1.0).contains(&self.heuristic_weight) {
            warnings.push(format!(
                "heuristic_weight ({}) out of [0,1], clamping",
                self.heuristic_weight
            ));
            self.heuristic_weight = self.heuristic_weight.clamp(0.0, 1.0);
        }

        if !self.rave.bias.is_finite() || self.rave.bias < 0.0 {
            warnings.push(format!(
                "rave.bias invalid ({}), resetting to default {}",
                self.rave.bias, default.rave.bias
            ));
            self.rave.bias = default.rave.bias;
        }

        if let Some(pw) = &mut self.progressive_widening {
            if pw.minimum_children == 0 {
                warnings.push(
                    "progressive_widening.minimum_children must be >= 1, setting to 1".to_string(),
                );
                pw.minimum_children = 1;
            }

            if !pw.coefficient.is_finite() || pw.coefficient < 0.0 {
                let default_coeff = default
                    .progressive_widening
                    .as_ref()
                    .map_or(1.5, |c| c.coefficient);
                warnings.push(format!(
                    "progressive_widening.coefficient invalid ({}), resetting to {}",
                    pw.coefficient, default_coeff
                ));
                pw.coefficient = default_coeff;
            }

            if !pw.exponent.is_finite() || pw.exponent < 0.0 {
                let default_exp = default
                    .progressive_widening
                    .as_ref()
                    .map_or(0.5, |c| c.exponent);
                warnings.push(format!(
                    "progressive_widening.exponent invalid ({}), resetting to {}",
                    pw.exponent, default_exp
                ));
                pw.exponent = default_exp;
            }
        }

        self.sanitize_tree_policy(&mut warnings);

        warnings
    }

    /// Validates tree-policy-specific fields, split out to keep `sanitize()` within
    /// the clippy line-count threshold.
    fn sanitize_tree_policy(&mut self, warnings: &mut Vec<String>) {
        match &mut self.tree_policy {
            TreePolicy::Puct { prior_weight } => {
                if !prior_weight.is_finite() || *prior_weight < 0.0 {
                    warnings.push(format!(
                        "tree_policy.puct.prior_weight invalid ({prior_weight}), resetting to default 1.0",
                    ));
                    *prior_weight = 1.0;
                }
            }
            TreePolicy::ThompsonSampling { temperature } => {
                if !temperature.is_finite() || *temperature < 0.0 {
                    warnings.push(format!(
                        "tree_policy.thompson_sampling.temperature invalid ({temperature}), resetting to default 1.0",
                    ));
                    *temperature = 1.0;
                }
            }
            TreePolicy::Gumbel {
                sampled_actions,
                max_completions_coeff,
            } => {
                if *sampled_actions == 0 {
                    warnings.push(
                        "tree_policy.gumbel.sampled_actions set to 0, defaulting to 16".to_string(),
                    );
                    *sampled_actions = 16;
                }

                if !max_completions_coeff.is_finite() || *max_completions_coeff < 0.0 {
                    warnings.push(format!(
                        "tree_policy.gumbel.max_completions_coeff invalid ({max_completions_coeff}), resetting to default 50.0",
                    ));
                    *max_completions_coeff = 50.0;
                }
            }
            TreePolicy::Uct => {}
        }
    }
}

/// Builder for [`SearchConfig`].
pub struct SearchConfigBuilder(SearchConfig);

impl SearchConfigBuilder {
    /// Sets the maximum number of simulations to run.
    pub fn iterations(mut self, iterations: usize) -> Self {
        self.0.iterations = iterations;
        self
    }

    /// Sets the exploration constant used by the search policy.
    pub fn exploration_constant(mut self, exploration_constant: f64) -> Self {
        self.0.exploration_constant = exploration_constant;
        self
    }

    /// Sets the default rollout depth cap.
    pub fn max_depth(mut self, max_depth: usize) -> Self {
        self.0.max_depth = max_depth;
        self
    }

    /// Selects the tree policy used during child selection.
    pub fn tree_policy(mut self, tree_policy: TreePolicy) -> Self {
        self.0.tree_policy = tree_policy;
        self
    }

    /// Sets how strongly heuristic estimates influence simulation rewards.
    pub fn heuristic_weight(mut self, heuristic_weight: f64) -> Self {
        self.0.heuristic_weight = heuristic_weight;
        self
    }

    /// Sets the RAVE/AMAF configuration.
    pub fn rave(mut self, rave: RaveConfig) -> Self {
        self.0.rave = rave;
        self
    }

    /// Enables progressive widening with the provided settings.
    pub fn progressive_widening(mut self, widening: ProgressiveWideningConfig) -> Self {
        self.0.progressive_widening = Some(widening);
        self
    }

    /// Sets an optional wall-clock time budget for the search.
    ///
    /// When set, the engine stops after this duration even if the iteration
    /// budget has not been exhausted. This is the **correct** way to use MCTS
    /// in real-time systems (game servers, web handlers, security probes).
    pub fn time_budget(mut self, budget: Duration) -> Self {
        self.0.time_budget = Some(budget);
        self
    }

    /// Finalizes the builder and returns the accumulated [`SearchConfig`].
    pub fn build(self) -> SearchConfig {
        let mut cfg = self.0;
        let _warnings = cfg.sanitize();
        cfg
    }
}

/// Error returned when loading config from disk.
#[cfg(feature = "toml")]
#[derive(Debug)]
#[non_exhaustive]
pub enum SearchConfigLoadError {
    /// File read failed.
    Io(std::io::Error),
    /// TOML parsing failed.
    Toml(toml::de::Error),
}

#[cfg(feature = "toml")]
impl std::fmt::Display for SearchConfigLoadError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Io(error) => {
                write!(
                    f,
                    "failed to read config: {error}: check file path permissions and ownership"
                )
            }
            Self::Toml(error) => write!(
                f,
                "failed to parse TOML config: {error}: validate section layout and key/value types"
            ),
        }
    }
}

#[cfg(feature = "toml")]
impl std::error::Error for SearchConfigLoadError {}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn default_config_values() {
        let c = SearchConfig::default();
        assert_eq!(c.iterations, 10_000);
        assert_eq!(c.tree_policy, TreePolicy::Uct);
        assert!(c.rave.enabled);
        assert!(c.time_budget.is_none());
    }

    #[test]
    fn builder_overrides() {
        let c = SearchConfig::builder()
            .iterations(500)
            .exploration_constant(3.0)
            .max_depth(10)
            .tree_policy(TreePolicy::Puct { prior_weight: 2.0 })
            .heuristic_weight(0.5)
            .build();

        assert_eq!(c.iterations, 500);
        assert_eq!(c.max_depth, 10);
        assert_eq!(c.tree_policy, TreePolicy::Puct { prior_weight: 2.0 });
    }

    #[test]
    fn builder_time_budget() {
        let c = SearchConfig::builder()
            .iterations(100)
            .time_budget(Duration::from_millis(50))
            .build();
        assert_eq!(c.time_budget, Some(Duration::from_millis(50)));
    }

    #[test]
    fn sanitize_returns_warnings() {
        let mut c = SearchConfig::default();
        c.iterations = 0;
        c.heuristic_weight = 5.0;
        let warnings = c.sanitize();
        assert!(warnings.len() >= 2);
        assert!(warnings[0].contains("iterations"));
    }

    #[test]
    fn sanitize_valid_config_returns_empty() {
        let mut c = SearchConfig::default();
        let warnings = c.sanitize();
        assert!(warnings.is_empty());
    }

    #[test]
    fn parse_from_toml() {
        let config = SearchConfig::from_toml_str(
            r#"
iterations = 64
max_depth = 12

[tree_policy]
kind = "thompson_sampling"
temperature = 0.25
"#,
        )
        .unwrap();

        assert_eq!(config.iterations, 64);
        assert_eq!(
            config.tree_policy,
            TreePolicy::ThompsonSampling { temperature: 0.25 }
        );
    }

    #[test]
    fn progressive_widening_roundtrip() {
        let config = SearchConfig::builder()
            .progressive_widening(ProgressiveWideningConfig {
                minimum_children: 2,
                coefficient: 1.75,
                exponent: 0.4,
            })
            .build();

        let serialized = toml::to_string(&config).unwrap();
        let parsed: SearchConfig = toml::from_str(&serialized).unwrap();

        let widening = parsed.progressive_widening.unwrap();
        assert_eq!(widening.minimum_children, 2);
        assert!((widening.exponent - 0.4).abs() < f64::EPSILON);
    }

    #[test]
    fn parse_toml_with_all_sections() {
        let config = SearchConfig::from_toml_str(
            r"
iterations = 64
max_depth = 7
heuristic_weight = 0.42

[rave]
enabled = false
bias = 111.0

[progressive_widening]
minimum_children = 2
coefficient = 2.5
exponent = 0.4
",
        )
        .unwrap();

        assert!(!config.rave.enabled);
        assert!((config.rave.bias - 111.0).abs() < f64::EPSILON);
        assert_eq!(
            config
                .progressive_widening
                .as_ref()
                .unwrap()
                .minimum_children,
            2
        );
    }

    #[test]
    fn parse_from_toml_file_error() {
        let err = SearchConfig::from_toml_file("/does/not/exist.toml").unwrap_err();
        assert!(matches!(err, SearchConfigLoadError::Io(_)));
    }

    #[test]
    fn tree_policy_default_is_uct() {
        let policy: TreePolicy = TreePolicy::default();
        assert!(matches!(policy, TreePolicy::Uct));
    }

    #[test]
    fn tree_policy_puct_is_round_trip_toml() {
        let config = SearchConfig::builder()
            .tree_policy(TreePolicy::Puct { prior_weight: 0.8 })
            .build();

        let text = toml::to_string(&config).unwrap();
        let loaded: SearchConfig = toml::from_str(&text).unwrap();
        assert_eq!(loaded.tree_policy, TreePolicy::Puct { prior_weight: 0.8 });
    }

    #[test]
    fn parse_bad_toml_reports_error() {
        let bad = "max_depth = 'oops'";
        assert!(SearchConfig::from_toml_str(bad).is_err());
    }
}