mockforge_chaos/
ab_testing.rs

1//! A/B testing framework for chaos engineering strategies
2//!
3//! Compare different chaos configurations and strategies to determine
4//! which approach is most effective for testing system resilience.
5
6use crate::analytics::ChaosAnalytics;
7use chrono::{DateTime, Utc};
8use parking_lot::RwLock;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::sync::Arc;
12use uuid::Uuid;
13
14/// A/B test configuration
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct ABTestConfig {
17    /// Test name
18    pub name: String,
19    /// Description
20    pub description: String,
21    /// Variant A (control)
22    pub variant_a: TestVariant,
23    /// Variant B (treatment)
24    pub variant_b: TestVariant,
25    /// Test duration
26    pub duration_minutes: i64,
27    /// Traffic split (0.0 - 1.0, percentage for variant B)
28    pub traffic_split: f64,
29    /// Success criteria
30    pub success_criteria: SuccessCriteria,
31    /// Minimum sample size per variant
32    pub min_sample_size: usize,
33}
34
35/// Test variant (A or B)
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct TestVariant {
38    /// Variant name
39    pub name: String,
40    /// Chaos configuration
41    pub config: HashMap<String, serde_json::Value>,
42    /// Scenario to run (optional)
43    pub scenario: Option<String>,
44    /// Description
45    pub description: String,
46}
47
48/// Success criteria for A/B test
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct SuccessCriteria {
51    /// Primary metric to optimize
52    pub primary_metric: MetricType,
53    /// Secondary metrics to track
54    pub secondary_metrics: Vec<MetricType>,
55    /// Minimum improvement threshold (0.0 - 1.0)
56    pub min_improvement: f64,
57    /// Statistical significance level (e.g., 0.95 for 95%)
58    pub significance_level: f64,
59    /// Maximum acceptable degradation in secondary metrics
60    pub max_secondary_degradation: f64,
61}
62
63/// Metric type for comparison
64#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
65#[serde(rename_all = "snake_case")]
66pub enum MetricType {
67    ErrorRate,
68    LatencyP50,
69    LatencyP95,
70    LatencyP99,
71    SuccessRate,
72    RecoveryTime,
73    ResilienceScore,
74    ChaosEffectiveness,
75    FaultDetectionRate,
76}
77
78/// A/B test status
79#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
80#[serde(rename_all = "lowercase")]
81pub enum ABTestStatus {
82    Draft,
83    Running,
84    Paused,
85    Completed,
86    Cancelled,
87}
88
89/// A/B test execution
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ABTest {
92    /// Test ID
93    pub id: String,
94    /// Test configuration
95    pub config: ABTestConfig,
96    /// Test status
97    pub status: ABTestStatus,
98    /// Start time
99    pub started_at: Option<DateTime<Utc>>,
100    /// End time
101    pub ended_at: Option<DateTime<Utc>>,
102    /// Variant A results
103    pub variant_a_results: Option<VariantResults>,
104    /// Variant B results
105    pub variant_b_results: Option<VariantResults>,
106    /// Test conclusion
107    pub conclusion: Option<TestConclusion>,
108    /// Created at
109    pub created_at: DateTime<Utc>,
110}
111
112/// Results for a test variant
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct VariantResults {
115    /// Variant name
116    pub variant_name: String,
117    /// Number of requests/tests
118    pub sample_size: usize,
119    /// Metrics
120    pub metrics: VariantMetrics,
121    /// Chaos events recorded
122    pub chaos_events: usize,
123    /// Duration
124    pub duration_ms: u64,
125    /// Success rate
126    pub success_rate: f64,
127}
128
129/// Metrics for a variant
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct VariantMetrics {
132    pub error_rate: f64,
133    pub latency_p50: f64,
134    pub latency_p95: f64,
135    pub latency_p99: f64,
136    pub avg_latency: f64,
137    pub success_rate: f64,
138    pub recovery_time_ms: f64,
139    pub resilience_score: f64,
140    pub chaos_effectiveness: f64,
141    pub fault_detection_rate: f64,
142}
143
144/// Test conclusion
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct TestConclusion {
147    /// Winner variant (A or B)
148    pub winner: String,
149    /// Statistical significance achieved
150    pub statistically_significant: bool,
151    /// P-value
152    pub p_value: f64,
153    /// Improvement percentage for primary metric
154    pub improvement_pct: f64,
155    /// Detailed comparison
156    pub comparison: MetricComparison,
157    /// Recommendation
158    pub recommendation: String,
159    /// Confidence level (0.0 - 1.0)
160    pub confidence: f64,
161}
162
163/// Detailed metric comparison
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct MetricComparison {
166    /// Primary metric comparison
167    pub primary: SingleMetricComparison,
168    /// Secondary metrics comparison
169    pub secondary: Vec<SingleMetricComparison>,
170}
171
172/// Single metric comparison
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct SingleMetricComparison {
175    pub metric: MetricType,
176    pub variant_a_value: f64,
177    pub variant_b_value: f64,
178    pub difference: f64,
179    pub difference_pct: f64,
180    pub winner: String,
181    pub significant: bool,
182}
183
184/// A/B testing engine
185pub struct ABTestingEngine {
186    tests: Arc<RwLock<HashMap<String, ABTest>>>,
187    #[allow(dead_code)]
188    analytics: Arc<ChaosAnalytics>,
189    max_concurrent_tests: usize,
190}
191
192impl ABTestingEngine {
193    /// Create a new A/B testing engine
194    pub fn new(analytics: Arc<ChaosAnalytics>) -> Self {
195        Self {
196            tests: Arc::new(RwLock::new(HashMap::new())),
197            analytics,
198            max_concurrent_tests: 5,
199        }
200    }
201
202    /// Create a new A/B test
203    pub fn create_test(&self, config: ABTestConfig) -> Result<String, String> {
204        // Validate config
205        if config.traffic_split < 0.0 || config.traffic_split > 1.0 {
206            return Err("Traffic split must be between 0.0 and 1.0".to_string());
207        }
208
209        if config.success_criteria.min_improvement < 0.0 {
210            return Err("Minimum improvement must be non-negative".to_string());
211        }
212
213        // Check concurrent limit
214        let tests = self.tests.read();
215        let running_tests = tests.values().filter(|t| t.status == ABTestStatus::Running).count();
216
217        if running_tests >= self.max_concurrent_tests {
218            return Err(format!(
219                "Maximum concurrent tests ({}) reached",
220                self.max_concurrent_tests
221            ));
222        }
223        drop(tests);
224
225        let test = ABTest {
226            id: format!("abtest-{}", Uuid::new_v4()),
227            config,
228            status: ABTestStatus::Draft,
229            started_at: None,
230            ended_at: None,
231            variant_a_results: None,
232            variant_b_results: None,
233            conclusion: None,
234            created_at: Utc::now(),
235        };
236
237        let test_id = test.id.clone();
238
239        let mut tests = self.tests.write();
240        tests.insert(test_id.clone(), test);
241
242        Ok(test_id)
243    }
244
245    /// Start an A/B test
246    pub fn start_test(&self, test_id: &str) -> Result<(), String> {
247        let mut tests = self.tests.write();
248        let test = tests.get_mut(test_id).ok_or_else(|| "Test not found".to_string())?;
249
250        if test.status != ABTestStatus::Draft {
251            return Err("Test must be in Draft status to start".to_string());
252        }
253
254        test.status = ABTestStatus::Running;
255        test.started_at = Some(Utc::now());
256
257        Ok(())
258    }
259
260    /// Stop an A/B test and analyze results
261    pub fn stop_test(&self, test_id: &str) -> Result<TestConclusion, String> {
262        let mut tests = self.tests.write();
263        let test = tests.get_mut(test_id).ok_or_else(|| "Test not found".to_string())?;
264
265        if test.status != ABTestStatus::Running {
266            return Err("Test must be running to stop".to_string());
267        }
268
269        test.status = ABTestStatus::Completed;
270        test.ended_at = Some(Utc::now());
271
272        // Analyze results
273        let conclusion = self.analyze_results(test)?;
274        test.conclusion = Some(conclusion.clone());
275
276        Ok(conclusion)
277    }
278
279    /// Record variant execution results
280    pub fn record_variant_result(
281        &self,
282        test_id: &str,
283        variant: &str,
284        results: VariantResults,
285    ) -> Result<(), String> {
286        let mut tests = self.tests.write();
287        let test = tests.get_mut(test_id).ok_or_else(|| "Test not found".to_string())?;
288
289        if test.status != ABTestStatus::Running {
290            return Err("Test must be running to record results".to_string());
291        }
292
293        if variant == "A" {
294            test.variant_a_results = Some(results);
295        } else if variant == "B" {
296            test.variant_b_results = Some(results);
297        } else {
298            return Err("Invalid variant name".to_string());
299        }
300
301        Ok(())
302    }
303
304    /// Analyze test results and determine winner
305    fn analyze_results(&self, test: &ABTest) -> Result<TestConclusion, String> {
306        let variant_a = test
307            .variant_a_results
308            .as_ref()
309            .ok_or_else(|| "Variant A results not available".to_string())?;
310        let variant_b = test
311            .variant_b_results
312            .as_ref()
313            .ok_or_else(|| "Variant B results not available".to_string())?;
314
315        // Check minimum sample size
316        if variant_a.sample_size < test.config.min_sample_size
317            || variant_b.sample_size < test.config.min_sample_size
318        {
319            return Err("Insufficient sample size for analysis".to_string());
320        }
321
322        // Compare primary metric
323        let primary =
324            self.compare_metric(&test.config.success_criteria.primary_metric, variant_a, variant_b);
325
326        // Compare secondary metrics
327        let secondary: Vec<SingleMetricComparison> = test
328            .config
329            .success_criteria
330            .secondary_metrics
331            .iter()
332            .map(|metric| self.compare_metric(metric, variant_a, variant_b))
333            .collect();
334
335        // Determine winner
336        let winner = if primary.variant_b_value > primary.variant_a_value {
337            "B".to_string()
338        } else {
339            "A".to_string()
340        };
341
342        // Calculate improvement
343        let improvement_pct = if primary.variant_a_value > 0.0 {
344            ((primary.variant_b_value - primary.variant_a_value) / primary.variant_a_value) * 100.0
345        } else {
346            0.0
347        };
348
349        // Check if statistically significant
350        let p_value =
351            self.calculate_p_value(variant_a.sample_size, variant_b.sample_size, &primary);
352        let statistically_significant =
353            p_value < (1.0 - test.config.success_criteria.significance_level);
354
355        // Check secondary metrics for degradation
356        let secondary_degraded = secondary.iter().any(|comp| {
357            comp.winner == "A"
358                && comp.difference_pct.abs()
359                    > test.config.success_criteria.max_secondary_degradation
360        });
361
362        // Generate recommendation
363        let recommendation = if !statistically_significant {
364            format!("Results are not statistically significant (p-value: {:.4}). Consider running the test longer or with more traffic.", p_value)
365        } else if secondary_degraded {
366            format!("Variant {} shows improvement in primary metric but degrades secondary metrics beyond acceptable threshold.", winner)
367        } else if improvement_pct >= test.config.success_criteria.min_improvement {
368            format!(
369                "Variant {} is the clear winner with {:.2}% improvement in {:?}.",
370                winner, improvement_pct, test.config.success_criteria.primary_metric
371            )
372        } else {
373            format!("Variants show similar performance. Improvement ({:.2}%) below minimum threshold ({:.2}%).", improvement_pct, test.config.success_criteria.min_improvement)
374        };
375
376        // Calculate confidence
377        let confidence = if statistically_significant && !secondary_degraded {
378            test.config.success_criteria.significance_level
379        } else if statistically_significant {
380            test.config.success_criteria.significance_level * 0.7
381        } else {
382            1.0 - p_value
383        };
384
385        Ok(TestConclusion {
386            winner,
387            statistically_significant,
388            p_value,
389            improvement_pct,
390            comparison: MetricComparison { primary, secondary },
391            recommendation,
392            confidence,
393        })
394    }
395
396    /// Compare a single metric between variants
397    fn compare_metric(
398        &self,
399        metric: &MetricType,
400        variant_a: &VariantResults,
401        variant_b: &VariantResults,
402    ) -> SingleMetricComparison {
403        let (a_value, b_value) = match metric {
404            MetricType::ErrorRate => (variant_a.metrics.error_rate, variant_b.metrics.error_rate),
405            MetricType::LatencyP50 => {
406                (variant_a.metrics.latency_p50, variant_b.metrics.latency_p50)
407            }
408            MetricType::LatencyP95 => {
409                (variant_a.metrics.latency_p95, variant_b.metrics.latency_p95)
410            }
411            MetricType::LatencyP99 => {
412                (variant_a.metrics.latency_p99, variant_b.metrics.latency_p99)
413            }
414            MetricType::SuccessRate => {
415                (variant_a.metrics.success_rate, variant_b.metrics.success_rate)
416            }
417            MetricType::RecoveryTime => {
418                (variant_a.metrics.recovery_time_ms, variant_b.metrics.recovery_time_ms)
419            }
420            MetricType::ResilienceScore => {
421                (variant_a.metrics.resilience_score, variant_b.metrics.resilience_score)
422            }
423            MetricType::ChaosEffectiveness => {
424                (variant_a.metrics.chaos_effectiveness, variant_b.metrics.chaos_effectiveness)
425            }
426            MetricType::FaultDetectionRate => {
427                (variant_a.metrics.fault_detection_rate, variant_b.metrics.fault_detection_rate)
428            }
429        };
430
431        let difference = b_value - a_value;
432        let difference_pct = if a_value > 0.0 {
433            (difference / a_value) * 100.0
434        } else {
435            0.0
436        };
437
438        // For error rate and latency, lower is better
439        let winner = match metric {
440            MetricType::ErrorRate
441            | MetricType::LatencyP50
442            | MetricType::LatencyP95
443            | MetricType::LatencyP99
444            | MetricType::RecoveryTime => {
445                if b_value < a_value {
446                    "B"
447                } else {
448                    "A"
449                }
450            }
451            _ => {
452                if b_value > a_value {
453                    "B"
454                } else {
455                    "A"
456                }
457            }
458        };
459
460        SingleMetricComparison {
461            metric: metric.clone(),
462            variant_a_value: a_value,
463            variant_b_value: b_value,
464            difference,
465            difference_pct,
466            winner: winner.to_string(),
467            significant: difference_pct.abs() > 5.0, // Simple threshold
468        }
469    }
470
471    /// Calculate p-value (simplified t-test approximation)
472    fn calculate_p_value(
473        &self,
474        n_a: usize,
475        n_b: usize,
476        comparison: &SingleMetricComparison,
477    ) -> f64 {
478        // Simplified statistical significance calculation
479        // In a real implementation, this would use proper statistical tests
480
481        let pooled_n = (n_a + n_b) as f64;
482        let effect_size = comparison.difference_pct.abs() / 100.0;
483
484        // Rough approximation: larger sample sizes and larger effect sizes = lower p-value
485        let p_value = 1.0 / (1.0 + pooled_n * effect_size);
486
487        p_value.clamp(0.001, 0.999)
488    }
489
490    /// Get test by ID
491    pub fn get_test(&self, test_id: &str) -> Option<ABTest> {
492        let tests = self.tests.read();
493        tests.get(test_id).cloned()
494    }
495
496    /// Get all tests
497    pub fn get_all_tests(&self) -> Vec<ABTest> {
498        let tests = self.tests.read();
499        tests.values().cloned().collect()
500    }
501
502    /// Get running tests
503    pub fn get_running_tests(&self) -> Vec<ABTest> {
504        let tests = self.tests.read();
505        tests.values().filter(|t| t.status == ABTestStatus::Running).cloned().collect()
506    }
507
508    /// Delete a test
509    pub fn delete_test(&self, test_id: &str) -> Result<(), String> {
510        let mut tests = self.tests.write();
511        let test = tests.get(test_id).ok_or_else(|| "Test not found".to_string())?;
512
513        if test.status == ABTestStatus::Running {
514            return Err("Cannot delete running test".to_string());
515        }
516
517        tests.remove(test_id);
518        Ok(())
519    }
520
521    /// Pause a running test
522    pub fn pause_test(&self, test_id: &str) -> Result<(), String> {
523        let mut tests = self.tests.write();
524        let test = tests.get_mut(test_id).ok_or_else(|| "Test not found".to_string())?;
525
526        if test.status != ABTestStatus::Running {
527            return Err("Only running tests can be paused".to_string());
528        }
529
530        test.status = ABTestStatus::Paused;
531        Ok(())
532    }
533
534    /// Resume a paused test
535    pub fn resume_test(&self, test_id: &str) -> Result<(), String> {
536        let mut tests = self.tests.write();
537        let test = tests.get_mut(test_id).ok_or_else(|| "Test not found".to_string())?;
538
539        if test.status != ABTestStatus::Paused {
540            return Err("Only paused tests can be resumed".to_string());
541        }
542
543        test.status = ABTestStatus::Running;
544        Ok(())
545    }
546
547    /// Get test statistics
548    pub fn get_stats(&self) -> ABTestStats {
549        let tests = self.tests.read();
550
551        let total = tests.len();
552        let running = tests.values().filter(|t| t.status == ABTestStatus::Running).count();
553        let completed = tests.values().filter(|t| t.status == ABTestStatus::Completed).count();
554        let cancelled = tests.values().filter(|t| t.status == ABTestStatus::Cancelled).count();
555
556        let successful_tests = tests
557            .values()
558            .filter(|t| {
559                t.status == ABTestStatus::Completed
560                    && t.conclusion.as_ref().is_some_and(|c| c.statistically_significant)
561            })
562            .count();
563
564        ABTestStats {
565            total_tests: total,
566            running_tests: running,
567            completed_tests: completed,
568            cancelled_tests: cancelled,
569            successful_tests,
570            avg_improvement: self.calculate_avg_improvement(&tests),
571        }
572    }
573
574    fn calculate_avg_improvement(&self, tests: &HashMap<String, ABTest>) -> f64 {
575        let improvements: Vec<f64> = tests
576            .values()
577            .filter_map(|t| {
578                if t.status == ABTestStatus::Completed {
579                    t.conclusion.as_ref().map(|c| c.improvement_pct)
580                } else {
581                    None
582                }
583            })
584            .collect();
585
586        if improvements.is_empty() {
587            0.0
588        } else {
589            improvements.iter().sum::<f64>() / improvements.len() as f64
590        }
591    }
592}
593
594impl Default for ABTestingEngine {
595    fn default() -> Self {
596        Self::new(Arc::new(ChaosAnalytics::new()))
597    }
598}
599
600/// A/B test statistics
601#[derive(Debug, Clone, Serialize, Deserialize)]
602pub struct ABTestStats {
603    pub total_tests: usize,
604    pub running_tests: usize,
605    pub completed_tests: usize,
606    pub cancelled_tests: usize,
607    pub successful_tests: usize,
608    pub avg_improvement: f64,
609}
610
611#[cfg(test)]
612mod tests {
613    use super::*;
614
615    #[test]
616    fn test_engine_creation() {
617        let analytics = Arc::new(ChaosAnalytics::new());
618        let engine = ABTestingEngine::new(analytics);
619        let stats = engine.get_stats();
620        assert_eq!(stats.total_tests, 0);
621    }
622
623    #[test]
624    fn test_create_test() {
625        let analytics = Arc::new(ChaosAnalytics::new());
626        let engine = ABTestingEngine::new(analytics);
627
628        let config = ABTestConfig {
629            name: "Test 1".to_string(),
630            description: "Test description".to_string(),
631            variant_a: TestVariant {
632                name: "Control".to_string(),
633                config: HashMap::new(),
634                scenario: None,
635                description: "Control variant".to_string(),
636            },
637            variant_b: TestVariant {
638                name: "Treatment".to_string(),
639                config: HashMap::new(),
640                scenario: None,
641                description: "Treatment variant".to_string(),
642            },
643            duration_minutes: 60,
644            traffic_split: 0.5,
645            success_criteria: SuccessCriteria {
646                primary_metric: MetricType::ErrorRate,
647                secondary_metrics: vec![],
648                min_improvement: 0.1,
649                significance_level: 0.95,
650                max_secondary_degradation: 10.0,
651            },
652            min_sample_size: 100,
653        };
654
655        let result = engine.create_test(config);
656        assert!(result.is_ok());
657    }
658
659    #[test]
660    fn test_invalid_traffic_split() {
661        let analytics = Arc::new(ChaosAnalytics::new());
662        let engine = ABTestingEngine::new(analytics);
663
664        let config = ABTestConfig {
665            name: "Test".to_string(),
666            description: "Test".to_string(),
667            variant_a: TestVariant {
668                name: "A".to_string(),
669                config: HashMap::new(),
670                scenario: None,
671                description: "".to_string(),
672            },
673            variant_b: TestVariant {
674                name: "B".to_string(),
675                config: HashMap::new(),
676                scenario: None,
677                description: "".to_string(),
678            },
679            duration_minutes: 60,
680            traffic_split: 1.5,
681            success_criteria: SuccessCriteria {
682                primary_metric: MetricType::ErrorRate,
683                secondary_metrics: vec![],
684                min_improvement: 0.1,
685                significance_level: 0.95,
686                max_secondary_degradation: 10.0,
687            },
688            min_sample_size: 100,
689        };
690
691        let result = engine.create_test(config);
692        assert!(result.is_err());
693    }
694}