swarm-engine-core 0.1.6

Core types and orchestration for SwarmEngine
Documentation
//! ValidationStrategy - 検証戦略
//!
//! 検証基準を定義する trait と実装。
//!
//! # 概要
//!
//! 3つの検証戦略を提供:
//!
//! - [`NoRegression`]: 回帰なし(current >= baseline)
//! - [`Improvement`]: 改善必須(current >= baseline × threshold)
//! - [`Absolute`]: 絶対値判定(current >= threshold)
//!
//! # Example
//!
//! ```
//! use swarm_engine_core::validation::{NoRegression, Improvement, Absolute, ValidationStrategy};
//!
//! // NoRegression: 100% → 100% は PASS
//! let strategy = NoRegression::new();
//! let result = strategy.evaluate(1.0, 1.0, 10);
//! assert!(result.passed);
//!
//! // Improvement: 10% 改善必須
//! let strategy = Improvement::ten_percent();
//! let result = strategy.evaluate(0.7, 0.8, 10);  // 14% 改善
//! assert!(result.passed);
//!
//! // Absolute: 80% 以上必須
//! let strategy = Absolute::eighty_percent();
//! let result = strategy.evaluate(0.5, 0.85, 10);  // baseline 無視
//! assert!(result.passed);
//! ```

use super::result::ValidationResult;

/// 検証戦略 trait
///
/// 検証基準を定義し、baseline と current を比較して判定を行う。
pub trait ValidationStrategy: Send + Sync {
    /// 戦略名
    fn name(&self) -> &str;

    /// 判定を実行
    ///
    /// # Arguments
    /// * `baseline` - ベースライン成績 (0.0-1.0)
    /// * `current` - 検証時成績 (0.0-1.0)
    /// * `sample_count` - 検証サンプル数
    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult;
}

// ============================================================================
// NoRegression - 回帰なし
// ============================================================================

/// NoRegression: current >= baseline なら PASS
///
/// 新しいモデルが既存より悪くなければ採用。
/// 100% → 100% は PASS。
#[derive(Debug, Clone, Default)]
pub struct NoRegression {
    /// 浮動小数点比較の許容誤差
    epsilon: f64,
}

impl NoRegression {
    pub fn new() -> Self {
        Self { epsilon: 1e-9 }
    }

    pub fn with_epsilon(mut self, epsilon: f64) -> Self {
        self.epsilon = epsilon;
        self
    }
}

impl ValidationStrategy for NoRegression {
    fn name(&self) -> &str {
        "no_regression"
    }

    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult {
        if current >= baseline - self.epsilon {
            ValidationResult::pass(baseline, current, self.name(), sample_count)
        } else {
            ValidationResult::fail(
                baseline,
                current,
                self.name(),
                format!(
                    "regression detected: {:.1}% < baseline {:.1}%",
                    current * 100.0,
                    baseline * 100.0
                ),
                sample_count,
            )
        }
    }
}

// ============================================================================
// Improvement - 改善必須
// ============================================================================

/// Improvement: current >= baseline * threshold なら PASS
///
/// 明確な改善を要求する場合に使用。
/// threshold = 1.1 なら 10% 改善が必要。
#[derive(Debug, Clone)]
pub struct Improvement {
    /// 改善率閾値 (1.1 = 10% 改善必須)
    threshold: f64,
    /// 浮動小数点比較の許容誤差
    epsilon: f64,
}

impl Improvement {
    pub fn new(threshold: f64) -> Self {
        Self {
            threshold,
            epsilon: 1e-9,
        }
    }

    /// 10% 改善必須
    pub fn ten_percent() -> Self {
        Self::new(1.1)
    }

    /// 5% 改善必須
    pub fn five_percent() -> Self {
        Self::new(1.05)
    }
}

impl ValidationStrategy for Improvement {
    fn name(&self) -> &str {
        "improvement"
    }

    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult {
        let required = baseline * self.threshold;
        if current >= required - self.epsilon {
            ValidationResult::pass(baseline, current, self.name(), sample_count)
        } else {
            let actual_improvement = if baseline > 0.0 {
                (current / baseline - 1.0) * 100.0
            } else {
                0.0
            };
            let required_improvement = (self.threshold - 1.0) * 100.0;
            ValidationResult::fail(
                baseline,
                current,
                self.name(),
                format!(
                    "insufficient improvement: {:.1}% (need {:.0}%, got {:.1}%)",
                    current * 100.0,
                    required_improvement,
                    actual_improvement
                ),
                sample_count,
            )
        }
    }
}

// ============================================================================
// Absolute - 絶対値判定
// ============================================================================

/// Absolute: current >= threshold なら PASS
///
/// ベースラインに関係なく、絶対的な基準で判定。
/// threshold = 0.8 なら 80% 以上で PASS。
#[derive(Debug, Clone)]
pub struct Absolute {
    /// 成功率閾値 (0.8 = 80% 以上必須)
    threshold: f64,
    /// 浮動小数点比較の許容誤差
    epsilon: f64,
}

impl Absolute {
    pub fn new(threshold: f64) -> Self {
        Self {
            threshold,
            epsilon: 1e-9,
        }
    }

    /// 80% 以上必須
    pub fn eighty_percent() -> Self {
        Self::new(0.8)
    }

    /// 90% 以上必須
    pub fn ninety_percent() -> Self {
        Self::new(0.9)
    }
}

impl ValidationStrategy for Absolute {
    fn name(&self) -> &str {
        "absolute"
    }

    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult {
        if current >= self.threshold - self.epsilon {
            ValidationResult::pass(baseline, current, self.name(), sample_count)
        } else {
            ValidationResult::fail(
                baseline,
                current,
                self.name(),
                format!(
                    "below threshold: {:.1}% < required {:.1}%",
                    current * 100.0,
                    self.threshold * 100.0
                ),
                sample_count,
            )
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_no_regression_pass() {
        let strategy = NoRegression::new();

        // current == baseline
        let result = strategy.evaluate(0.7, 0.7, 100);
        assert!(result.passed);

        // current > baseline
        let result = strategy.evaluate(0.7, 0.8, 100);
        assert!(result.passed);

        // 100% → 100%
        let result = strategy.evaluate(1.0, 1.0, 100);
        assert!(result.passed);
    }

    #[test]
    fn test_no_regression_fail() {
        let strategy = NoRegression::new();
        let result = strategy.evaluate(0.7, 0.65, 100);
        assert!(!result.passed);
        assert!(result.failure_reason.unwrap().contains("regression"));
    }

    #[test]
    fn test_improvement_pass() {
        let strategy = Improvement::ten_percent();

        // 14% improvement (> 10%)
        let result = strategy.evaluate(0.7, 0.8, 100);
        assert!(result.passed);

        // exactly 10% improvement
        let result = strategy.evaluate(0.7, 0.77, 100);
        assert!(result.passed);
    }

    #[test]
    fn test_improvement_fail() {
        let strategy = Improvement::ten_percent();

        // only 3% improvement
        let result = strategy.evaluate(0.7, 0.72, 100);
        assert!(!result.passed);
        assert!(result.failure_reason.unwrap().contains("insufficient"));
    }

    #[test]
    fn test_absolute_pass() {
        let strategy = Absolute::eighty_percent();

        let result = strategy.evaluate(0.5, 0.85, 100);
        assert!(result.passed);

        let result = strategy.evaluate(0.9, 0.8, 100);
        assert!(result.passed);
    }

    #[test]
    fn test_absolute_fail() {
        let strategy = Absolute::eighty_percent();

        let result = strategy.evaluate(0.9, 0.75, 100);
        assert!(!result.passed);
        assert!(result.failure_reason.unwrap().contains("below threshold"));
    }
}