atelier_quant 0.0.12

//! Configuration schemas for inter-arrival model binaries.
//!
//! These structs are shared between `inter_fit` and `inter_serve`,
//! and can be reused by any downstream tool that needs to parse the
//! same TOML configuration format.

use serde::Deserialize;
use std::path::PathBuf;

// ── Top-level config ─────────────────────────────────────────────

/// Root configuration for the inter-arrival fitting pipeline.
#[derive(Debug, Deserialize)]
pub struct FitConfig {
    pub input: InputConfig,
    pub model: ModelConfig,
    pub output: OutputConfig,
    /// Forecast evaluation settings (optional — sensible defaults).
    #[serde(default)]
    pub forecast: ForecastConfig,
}

// ── Input ────────────────────────────────────────────────────────

/// Specifies where to find the input parquet data.
#[derive(Debug, Deserialize)]
pub struct InputConfig {
    /// Directory containing parquet files.
    pub path: PathBuf,
    /// `"trades"` or `"orderbook"`.
    pub data_type: String,
    /// `"latest"` or a specific filename.
    #[serde(default = "default_selection")]
    pub selection: String,
}

fn default_selection() -> String {
    "latest".into()
}

// ── Model ────────────────────────────────────────────────────────

/// MLE estimation hyperparameters and train/test split ratio.
#[derive(Debug, Deserialize)]
pub struct ModelConfig {
    #[serde(default = "default_max_iter")]
    pub max_iter: usize,
    #[serde(default = "default_tolerance")]
    pub tolerance: f64,
    #[serde(default = "default_learning_rate")]
    pub learning_rate: f64,
    /// Fraction of data used for training (remainder is test).
    #[serde(default = "default_train_ratio")]
    pub train_ratio: f64,
    /// Large-gap detection threshold in seconds.
    #[serde(default = "default_gap_threshold")]
    pub gap_threshold_secs: f64,
}

fn default_max_iter() -> usize {
    50_000
}
fn default_tolerance() -> f64 {
    1e3
}
fn default_learning_rate() -> f64 {
    1e-2
}
fn default_train_ratio() -> f64 {
    0.8
}
fn default_gap_threshold() -> f64 {
    5.0
}

// ── Output ───────────────────────────────────────────────────────

/// Where and how to write the model artifact.
#[derive(Debug, Deserialize)]
pub struct OutputConfig {
    /// Directory to write the model artifact into.
    pub artifact_dir: PathBuf,
    /// `"json"` (only supported format for now).
    #[serde(default = "default_format", rename = "format")]
    pub _format: String,
    /// Include diagnostics in the artifact.
    #[serde(default = "default_include_diag")]
    pub include_diagnostics: bool,
}

fn default_format() -> String {
    "json".into()
}
fn default_include_diag() -> bool {
    true
}

// ── Forecast ─────────────────────────────────────────────────────

/// Monte-Carlo ensemble forecast settings.
///
/// When `mc_paths > 1`, the forecast evaluation runs multiple
/// stochastic simulations and reduces each event-index column
/// with the chosen [`EnsembleStatistic`].
#[derive(Debug, Deserialize)]
pub struct ForecastConfig {
    /// Number of Monte Carlo paths.
    /// 1 = single stochastic run (backward-compatible default).
    #[serde(default = "default_mc_paths")]
    pub mc_paths: usize,
    /// Reduction statistic applied across the ensemble.
    #[serde(default)]
    pub mc_statistic: EnsembleStatistic,
}

impl Default for ForecastConfig {
    fn default() -> Self {
        Self {
            mc_paths: default_mc_paths(),
            mc_statistic: EnsembleStatistic::default(),
        }
    }
}

fn default_mc_paths() -> usize {
    1
}

/// Statistic used to reduce an ensemble of Monte-Carlo forecast
/// trajectories into a single consensus trajectory.
#[derive(Default, Debug, Clone, Copy, PartialEq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EnsembleStatistic {
    #[default]
    Median,
    Mean,
    P25,
    P75,
}