Skip to main content

swarm_engine_core/validation/
strategy.rs

1//! ValidationStrategy - 検証戦略
2//!
3//! 検証基準を定義する trait と実装。
4//!
5//! # 概要
6//!
7//! 3つの検証戦略を提供:
8//!
9//! - [`NoRegression`]: 回帰なし(current >= baseline)
10//! - [`Improvement`]: 改善必須(current >= baseline × threshold)
11//! - [`Absolute`]: 絶対値判定(current >= threshold)
12//!
13//! # Example
14//!
15//! ```
16//! use swarm_engine_core::validation::{NoRegression, Improvement, Absolute, ValidationStrategy};
17//!
18//! // NoRegression: 100% → 100% は PASS
19//! let strategy = NoRegression::new();
20//! let result = strategy.evaluate(1.0, 1.0, 10);
21//! assert!(result.passed);
22//!
23//! // Improvement: 10% 改善必須
24//! let strategy = Improvement::ten_percent();
25//! let result = strategy.evaluate(0.7, 0.8, 10);  // 14% 改善
26//! assert!(result.passed);
27//!
28//! // Absolute: 80% 以上必須
29//! let strategy = Absolute::eighty_percent();
30//! let result = strategy.evaluate(0.5, 0.85, 10);  // baseline 無視
31//! assert!(result.passed);
32//! ```
33
34use super::result::ValidationResult;
35
36/// 検証戦略 trait
37///
38/// 検証基準を定義し、baseline と current を比較して判定を行う。
39pub trait ValidationStrategy: Send + Sync {
40    /// 戦略名
41    fn name(&self) -> &str;
42
43    /// 判定を実行
44    ///
45    /// # Arguments
46    /// * `baseline` - ベースライン成績 (0.0-1.0)
47    /// * `current` - 検証時成績 (0.0-1.0)
48    /// * `sample_count` - 検証サンプル数
49    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult;
50}
51
52// ============================================================================
53// NoRegression - 回帰なし
54// ============================================================================
55
56/// NoRegression: current >= baseline なら PASS
57///
58/// 新しいモデルが既存より悪くなければ採用。
59/// 100% → 100% は PASS。
60#[derive(Debug, Clone, Default)]
61pub struct NoRegression {
62    /// 浮動小数点比較の許容誤差
63    epsilon: f64,
64}
65
66impl NoRegression {
67    pub fn new() -> Self {
68        Self { epsilon: 1e-9 }
69    }
70
71    pub fn with_epsilon(mut self, epsilon: f64) -> Self {
72        self.epsilon = epsilon;
73        self
74    }
75}
76
77impl ValidationStrategy for NoRegression {
78    fn name(&self) -> &str {
79        "no_regression"
80    }
81
82    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult {
83        if current >= baseline - self.epsilon {
84            ValidationResult::pass(baseline, current, self.name(), sample_count)
85        } else {
86            ValidationResult::fail(
87                baseline,
88                current,
89                self.name(),
90                format!(
91                    "regression detected: {:.1}% < baseline {:.1}%",
92                    current * 100.0,
93                    baseline * 100.0
94                ),
95                sample_count,
96            )
97        }
98    }
99}
100
101// ============================================================================
102// Improvement - 改善必須
103// ============================================================================
104
105/// Improvement: current >= baseline * threshold なら PASS
106///
107/// 明確な改善を要求する場合に使用。
108/// threshold = 1.1 なら 10% 改善が必要。
109#[derive(Debug, Clone)]
110pub struct Improvement {
111    /// 改善率閾値 (1.1 = 10% 改善必須)
112    threshold: f64,
113    /// 浮動小数点比較の許容誤差
114    epsilon: f64,
115}
116
117impl Improvement {
118    pub fn new(threshold: f64) -> Self {
119        Self {
120            threshold,
121            epsilon: 1e-9,
122        }
123    }
124
125    /// 10% 改善必須
126    pub fn ten_percent() -> Self {
127        Self::new(1.1)
128    }
129
130    /// 5% 改善必須
131    pub fn five_percent() -> Self {
132        Self::new(1.05)
133    }
134}
135
136impl ValidationStrategy for Improvement {
137    fn name(&self) -> &str {
138        "improvement"
139    }
140
141    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult {
142        let required = baseline * self.threshold;
143        if current >= required - self.epsilon {
144            ValidationResult::pass(baseline, current, self.name(), sample_count)
145        } else {
146            let actual_improvement = if baseline > 0.0 {
147                (current / baseline - 1.0) * 100.0
148            } else {
149                0.0
150            };
151            let required_improvement = (self.threshold - 1.0) * 100.0;
152            ValidationResult::fail(
153                baseline,
154                current,
155                self.name(),
156                format!(
157                    "insufficient improvement: {:.1}% (need {:.0}%, got {:.1}%)",
158                    current * 100.0,
159                    required_improvement,
160                    actual_improvement
161                ),
162                sample_count,
163            )
164        }
165    }
166}
167
168// ============================================================================
169// Absolute - 絶対値判定
170// ============================================================================
171
172/// Absolute: current >= threshold なら PASS
173///
174/// ベースラインに関係なく、絶対的な基準で判定。
175/// threshold = 0.8 なら 80% 以上で PASS。
176#[derive(Debug, Clone)]
177pub struct Absolute {
178    /// 成功率閾値 (0.8 = 80% 以上必須)
179    threshold: f64,
180    /// 浮動小数点比較の許容誤差
181    epsilon: f64,
182}
183
184impl Absolute {
185    pub fn new(threshold: f64) -> Self {
186        Self {
187            threshold,
188            epsilon: 1e-9,
189        }
190    }
191
192    /// 80% 以上必須
193    pub fn eighty_percent() -> Self {
194        Self::new(0.8)
195    }
196
197    /// 90% 以上必須
198    pub fn ninety_percent() -> Self {
199        Self::new(0.9)
200    }
201}
202
203impl ValidationStrategy for Absolute {
204    fn name(&self) -> &str {
205        "absolute"
206    }
207
208    fn evaluate(&self, baseline: f64, current: f64, sample_count: usize) -> ValidationResult {
209        if current >= self.threshold - self.epsilon {
210            ValidationResult::pass(baseline, current, self.name(), sample_count)
211        } else {
212            ValidationResult::fail(
213                baseline,
214                current,
215                self.name(),
216                format!(
217                    "below threshold: {:.1}% < required {:.1}%",
218                    current * 100.0,
219                    self.threshold * 100.0
220                ),
221                sample_count,
222            )
223        }
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn test_no_regression_pass() {
233        let strategy = NoRegression::new();
234
235        // current == baseline
236        let result = strategy.evaluate(0.7, 0.7, 100);
237        assert!(result.passed);
238
239        // current > baseline
240        let result = strategy.evaluate(0.7, 0.8, 100);
241        assert!(result.passed);
242
243        // 100% → 100%
244        let result = strategy.evaluate(1.0, 1.0, 100);
245        assert!(result.passed);
246    }
247
248    #[test]
249    fn test_no_regression_fail() {
250        let strategy = NoRegression::new();
251        let result = strategy.evaluate(0.7, 0.65, 100);
252        assert!(!result.passed);
253        assert!(result.failure_reason.unwrap().contains("regression"));
254    }
255
256    #[test]
257    fn test_improvement_pass() {
258        let strategy = Improvement::ten_percent();
259
260        // 14% improvement (> 10%)
261        let result = strategy.evaluate(0.7, 0.8, 100);
262        assert!(result.passed);
263
264        // exactly 10% improvement
265        let result = strategy.evaluate(0.7, 0.77, 100);
266        assert!(result.passed);
267    }
268
269    #[test]
270    fn test_improvement_fail() {
271        let strategy = Improvement::ten_percent();
272
273        // only 3% improvement
274        let result = strategy.evaluate(0.7, 0.72, 100);
275        assert!(!result.passed);
276        assert!(result.failure_reason.unwrap().contains("insufficient"));
277    }
278
279    #[test]
280    fn test_absolute_pass() {
281        let strategy = Absolute::eighty_percent();
282
283        let result = strategy.evaluate(0.5, 0.85, 100);
284        assert!(result.passed);
285
286        let result = strategy.evaluate(0.9, 0.8, 100);
287        assert!(result.passed);
288    }
289
290    #[test]
291    fn test_absolute_fail() {
292        let strategy = Absolute::eighty_percent();
293
294        let result = strategy.evaluate(0.9, 0.75, 100);
295        assert!(!result.passed);
296        assert!(result.failure_reason.unwrap().contains("below threshold"));
297    }
298}