1#![allow(clippy::excessive_precision)]
3
4use std::collections::HashMap;
48use std::hash::{Hash, Hasher};
49use std::sync::Arc;
50
51use chrono::{DateTime, Duration, Utc};
52use parking_lot::RwLock;
53use serde::{Deserialize, Serialize};
54use uuid::Uuid;
55
56use crate::relevance::LearnedWeights;
57
58pub const DEFAULT_SIGNIFICANCE_LEVEL: f64 = 0.05;
64
65pub const MIN_SAMPLE_SIZE: u64 = 100;
67
68pub const DEFAULT_TRAFFIC_SPLIT: f32 = 0.5;
70
71const CHI_SQUARED_CRITICAL_005: f64 = 3.841;
74const CHI_SQUARED_CRITICAL_001: f64 = 6.635;
75const CHI_SQUARED_CRITICAL_0001: f64 = 10.828;
76
77const SRM_THRESHOLD: f64 = 0.05;
79
80const MIN_PRACTICAL_EFFECT_SIZE: f64 = 0.1;
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct BayesianAnalysis {
90 pub prob_treatment_better: f64,
92 pub prob_control_better: f64,
94 pub expected_lift: f64,
96 pub credible_interval: (f64, f64),
98 pub risk_treatment: f64,
100 pub risk_control: f64,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct EffectSize {
107 pub cohens_h: f64,
109 pub interpretation: EffectSizeInterpretation,
111 pub relative_risk: f64,
113 pub odds_ratio: f64,
115 pub nnt: f64,
117}
118
119#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
121pub enum EffectSizeInterpretation {
122 Negligible,
123 Small,
124 Medium,
125 Large,
126}
127
128impl std::fmt::Display for EffectSizeInterpretation {
129 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
130 match self {
131 Self::Negligible => write!(f, "negligible"),
132 Self::Small => write!(f, "small"),
133 Self::Medium => write!(f, "medium"),
134 Self::Large => write!(f, "large"),
135 }
136 }
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct SRMCheck {
142 pub srm_detected: bool,
144 pub expected_ratio: f64,
146 pub observed_ratio: f64,
148 pub chi_squared: f64,
150 pub p_value: f64,
152 pub severity: SRMSeverity,
154}
155
156#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
158pub enum SRMSeverity {
159 None,
160 Warning,
161 Critical,
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct SequentialTest {
167 pub analysis_number: u32,
169 pub planned_analyses: u32,
171 pub alpha_spent: f64,
173 pub current_alpha: f64,
175 pub can_stop_early: bool,
177 pub stop_reason: Option<String>,
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct GuardrailMetric {
184 pub name: String,
186 pub baseline: f64,
188 pub current: f64,
190 pub max_degradation: f64,
192 pub is_breached: bool,
194 pub degradation_p_value: f64,
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct BanditState {
201 pub algorithm: BanditAlgorithm,
203 pub alphas: Vec<f64>,
205 pub betas: Vec<f64>,
207 pub allocation_probs: Vec<f64>,
209 pub total_reward: f64,
211 pub estimated_regret: f64,
213}
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
217pub enum BanditAlgorithm {
218 ThompsonSampling,
220 UCB1,
222 EpsilonGreedy,
224}
225
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
232pub enum ABTestVariant {
233 Control,
235 Treatment,
237}
238
239impl ABTestVariant {
240 pub fn as_str(&self) -> &'static str {
241 match self {
242 ABTestVariant::Control => "control",
243 ABTestVariant::Treatment => "treatment",
244 }
245 }
246}
247
248#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
250pub enum ABTestStatus {
251 Draft,
253 Running,
255 Paused,
257 Completed,
259 Archived,
261}
262
263#[derive(Debug, Clone, Default, Serialize, Deserialize)]
265pub struct VariantMetrics {
266 pub impressions: u64,
268 pub clicks: u64,
270 pub positive_feedback: u64,
272 pub negative_feedback: u64,
274 pub total_relevance_score: f64,
276 pub total_latency_us: u64,
278 pub latency_samples: u64,
280 pub unique_users: u64,
282 pub clicked_memory_ids: Vec<Uuid>,
284}
285
286impl VariantMetrics {
287 pub fn ctr(&self) -> f64 {
289 if self.impressions == 0 {
290 0.0
291 } else {
292 self.clicks as f64 / self.impressions as f64
293 }
294 }
295
296 pub fn success_rate(&self) -> f64 {
298 let total = self.positive_feedback + self.negative_feedback;
299 if total == 0 {
300 0.0
301 } else {
302 self.positive_feedback as f64 / total as f64
303 }
304 }
305
306 pub fn avg_relevance_score(&self) -> f64 {
308 if self.impressions == 0 {
309 0.0
310 } else {
311 self.total_relevance_score / self.impressions as f64
312 }
313 }
314
315 pub fn avg_latency_ms(&self) -> f64 {
317 if self.latency_samples == 0 {
318 0.0
319 } else {
320 (self.total_latency_us as f64 / self.latency_samples as f64) / 1000.0
321 }
322 }
323
324 pub fn conversion_rate(&self) -> f64 {
326 if self.unique_users == 0 {
327 0.0
328 } else {
329 self.clicks as f64 / self.unique_users as f64
330 }
331 }
332}
333
334#[derive(Debug, Clone, Serialize, Deserialize)]
336pub struct ABTestConfig {
337 pub id: String,
339 pub name: String,
341 pub description: String,
343 pub control_weights: LearnedWeights,
345 pub treatment_weights: LearnedWeights,
347 pub traffic_split: f32,
349 pub significance_level: f64,
351 pub min_impressions: u64,
353 pub max_duration_hours: Option<u64>,
355 pub tags: Vec<String>,
357}
358
359impl Default for ABTestConfig {
360 fn default() -> Self {
361 Self {
362 id: Uuid::new_v4().to_string(),
363 name: String::new(),
364 description: String::new(),
365 control_weights: LearnedWeights::default(),
366 treatment_weights: LearnedWeights::default(),
367 traffic_split: DEFAULT_TRAFFIC_SPLIT,
368 significance_level: DEFAULT_SIGNIFICANCE_LEVEL,
369 min_impressions: MIN_SAMPLE_SIZE,
370 max_duration_hours: Some(168), tags: Vec::new(),
372 }
373 }
374}
375
376#[derive(Debug, Clone, Serialize, Deserialize)]
378pub struct ABTest {
379 #[serde(skip)]
381 pub id: String,
382 pub config: ABTestConfig,
384 pub status: ABTestStatus,
386 pub created_at: DateTime<Utc>,
388 pub started_at: Option<DateTime<Utc>>,
390 pub completed_at: Option<DateTime<Utc>>,
392 pub control_metrics: VariantMetrics,
394 pub treatment_metrics: VariantMetrics,
396 #[serde(skip)]
398 user_assignments: HashMap<String, ABTestVariant>,
399}
400
401impl ABTest {
402 pub fn builder(name: &str) -> ABTestBuilder {
404 ABTestBuilder::new(name)
405 }
406
407 pub fn from_config(config: ABTestConfig) -> Self {
409 let id = config.id.clone();
410 Self {
411 id,
412 config,
413 status: ABTestStatus::Draft,
414 created_at: Utc::now(),
415 started_at: None,
416 completed_at: None,
417 control_metrics: VariantMetrics::default(),
418 treatment_metrics: VariantMetrics::default(),
419 user_assignments: HashMap::new(),
420 }
421 }
422
423 pub fn get_variant(&mut self, user_id: &str) -> ABTestVariant {
425 if let Some(&variant) = self.user_assignments.get(user_id) {
427 return variant;
428 }
429
430 let variant = self.assign_variant(user_id);
432
433 match variant {
435 ABTestVariant::Control => self.control_metrics.unique_users += 1,
436 ABTestVariant::Treatment => self.treatment_metrics.unique_users += 1,
437 }
438
439 self.user_assignments.insert(user_id.to_string(), variant);
440 variant
441 }
442
443 fn assign_variant(&self, user_id: &str) -> ABTestVariant {
445 let mut hasher = std::collections::hash_map::DefaultHasher::new();
447 user_id.hash(&mut hasher);
448 self.config.id.hash(&mut hasher);
449 let hash = hasher.finish();
450
451 let bucket = (hash % 10000) as f32 / 10000.0;
453
454 if bucket < self.config.traffic_split {
455 ABTestVariant::Treatment
456 } else {
457 ABTestVariant::Control
458 }
459 }
460
461 pub fn get_weights(&self, variant: ABTestVariant) -> &LearnedWeights {
463 match variant {
464 ABTestVariant::Control => &self.config.control_weights,
465 ABTestVariant::Treatment => &self.config.treatment_weights,
466 }
467 }
468
469 pub fn get_metrics(&self, variant: ABTestVariant) -> &VariantMetrics {
471 match variant {
472 ABTestVariant::Control => &self.control_metrics,
473 ABTestVariant::Treatment => &self.treatment_metrics,
474 }
475 }
476
477 fn get_metrics_mut(&mut self, variant: ABTestVariant) -> &mut VariantMetrics {
479 match variant {
480 ABTestVariant::Control => &mut self.control_metrics,
481 ABTestVariant::Treatment => &mut self.treatment_metrics,
482 }
483 }
484
485 pub fn record_impression(&mut self, user_id: &str, relevance_score: f64, latency_us: u64) {
487 let variant = self.get_variant(user_id);
488 let metrics = self.get_metrics_mut(variant);
489 metrics.impressions += 1;
490 metrics.total_relevance_score += relevance_score;
491 metrics.total_latency_us += latency_us;
492 metrics.latency_samples += 1;
493 }
494
495 pub fn record_click(&mut self, user_id: &str, memory_id: Uuid) {
497 let variant = self.get_variant(user_id);
498 let metrics = self.get_metrics_mut(variant);
499 metrics.clicks += 1;
500 metrics.clicked_memory_ids.push(memory_id);
501 }
502
503 pub fn record_feedback(&mut self, user_id: &str, positive: bool) {
505 let variant = self.get_variant(user_id);
506 let metrics = self.get_metrics_mut(variant);
507 if positive {
508 metrics.positive_feedback += 1;
509 } else {
510 metrics.negative_feedback += 1;
511 }
512 }
513
514 pub fn has_sufficient_data(&self) -> bool {
516 self.control_metrics.impressions >= self.config.min_impressions
517 && self.treatment_metrics.impressions >= self.config.min_impressions
518 }
519
520 pub fn is_expired(&self) -> bool {
522 if let (Some(started), Some(max_hours)) = (self.started_at, self.config.max_duration_hours)
523 {
524 let elapsed = Utc::now().signed_duration_since(started);
525 elapsed > Duration::hours(max_hours as i64)
526 } else {
527 false
528 }
529 }
530
531 pub fn start(&mut self) {
533 if self.status == ABTestStatus::Draft {
534 self.status = ABTestStatus::Running;
535 self.started_at = Some(Utc::now());
536 }
537 }
538
539 pub fn pause(&mut self) {
541 if self.status == ABTestStatus::Running {
542 self.status = ABTestStatus::Paused;
543 }
544 }
545
546 pub fn resume(&mut self) {
548 if self.status == ABTestStatus::Paused {
549 self.status = ABTestStatus::Running;
550 }
551 }
552
553 pub fn complete(&mut self) {
555 if self.status == ABTestStatus::Running || self.status == ABTestStatus::Paused {
556 self.status = ABTestStatus::Completed;
557 self.completed_at = Some(Utc::now());
558 }
559 }
560
561 pub fn archive(&mut self) {
563 self.status = ABTestStatus::Archived;
564 }
565}
566
567pub struct ABTestBuilder {
569 config: ABTestConfig,
570}
571
572impl ABTestBuilder {
573 pub fn new(name: &str) -> Self {
574 Self {
575 config: ABTestConfig {
576 name: name.to_string(),
577 ..Default::default()
578 },
579 }
580 }
581
582 pub fn with_id(mut self, id: &str) -> Self {
583 self.config.id = id.to_string();
584 self
585 }
586
587 pub fn with_description(mut self, description: &str) -> Self {
588 self.config.description = description.to_string();
589 self
590 }
591
592 pub fn with_control(mut self, weights: LearnedWeights) -> Self {
593 self.config.control_weights = weights;
594 self
595 }
596
597 pub fn with_treatment(mut self, weights: LearnedWeights) -> Self {
598 self.config.treatment_weights = weights;
599 self
600 }
601
602 pub fn with_traffic_split(mut self, split: f32) -> Self {
603 self.config.traffic_split = split.clamp(0.0, 1.0);
604 self
605 }
606
607 pub fn with_significance_level(mut self, level: f64) -> Self {
608 self.config.significance_level = level.clamp(0.001, 0.1);
609 self
610 }
611
612 pub fn with_min_impressions(mut self, min: u64) -> Self {
613 self.config.min_impressions = min.max(MIN_SAMPLE_SIZE);
614 self
615 }
616
617 pub fn with_max_duration_hours(mut self, hours: u64) -> Self {
618 self.config.max_duration_hours = Some(hours);
619 self
620 }
621
622 pub fn with_tags(mut self, tags: Vec<String>) -> Self {
623 self.config.tags = tags;
624 self
625 }
626
627 pub fn build(self) -> ABTest {
628 ABTest::from_config(self.config)
629 }
630}
631
632#[derive(Debug, Clone, Serialize, Deserialize)]
638pub struct ABTestResults {
639 pub test_id: String,
641 pub is_significant: bool,
643 pub confidence_level: f64,
645 pub chi_squared: f64,
647 pub p_value: f64,
649 pub winner: Option<ABTestVariant>,
651 pub relative_improvement: f64,
653 pub control_ctr: f64,
655 pub treatment_ctr: f64,
657 pub control_success_rate: f64,
659 pub treatment_success_rate: f64,
661 pub confidence_interval: (f64, f64),
663 pub recommendations: Vec<String>,
665 pub analyzed_at: DateTime<Utc>,
667}
668
669pub struct ABTestAnalyzer;
671
672impl ABTestAnalyzer {
673 pub fn analyze(test: &ABTest) -> ABTestResults {
675 let control = &test.control_metrics;
676 let treatment = &test.treatment_metrics;
677
678 let control_ctr = control.ctr();
680 let treatment_ctr = treatment.ctr();
681
682 let control_success = control.success_rate();
684 let treatment_success = treatment.success_rate();
685
686 let (chi_squared, p_value) = Self::chi_squared_test(
688 control.impressions,
689 control.clicks,
690 treatment.impressions,
691 treatment.clicks,
692 );
693
694 let is_significant = p_value < test.config.significance_level
696 && control.impressions >= test.config.min_impressions
697 && treatment.impressions >= test.config.min_impressions;
698
699 let winner = if is_significant {
701 if treatment_ctr > control_ctr {
702 Some(ABTestVariant::Treatment)
703 } else {
704 Some(ABTestVariant::Control)
705 }
706 } else {
707 None
708 };
709
710 let relative_improvement = if control_ctr > 0.0 {
712 (treatment_ctr - control_ctr) / control_ctr * 100.0
713 } else {
714 0.0
715 };
716
717 let confidence_interval = Self::calculate_confidence_interval(
719 control.impressions,
720 control.clicks,
721 treatment.impressions,
722 treatment.clicks,
723 );
724
725 let recommendations = Self::generate_recommendations(
727 test,
728 is_significant,
729 winner,
730 relative_improvement,
731 &confidence_interval,
732 );
733
734 ABTestResults {
735 test_id: test.config.id.clone(),
736 is_significant,
737 confidence_level: 1.0 - p_value,
738 chi_squared,
739 p_value,
740 winner,
741 relative_improvement,
742 control_ctr,
743 treatment_ctr,
744 control_success_rate: control_success,
745 treatment_success_rate: treatment_success,
746 confidence_interval,
747 recommendations,
748 analyzed_at: Utc::now(),
749 }
750 }
751
752 fn chi_squared_test(n1: u64, x1: u64, n2: u64, x2: u64) -> (f64, f64) {
757 if n1 == 0 || n2 == 0 {
758 return (0.0, 1.0);
759 }
760
761 let n1 = n1 as f64;
762 let x1 = x1 as f64;
763 let n2 = n2 as f64;
764 let x2 = x2 as f64;
765
766 let p_pooled = (x1 + x2) / (n1 + n2);
768
769 let e1_success = n1 * p_pooled;
771 let e1_failure = n1 * (1.0 - p_pooled);
772 let e2_success = n2 * p_pooled;
773 let e2_failure = n2 * (1.0 - p_pooled);
774
775 if e1_success < 5.0 || e1_failure < 5.0 || e2_success < 5.0 || e2_failure < 5.0 {
777 return (0.0, 1.0);
779 }
780
781 let chi_squared = (x1 - e1_success).powi(2) / e1_success
783 + ((n1 - x1) - e1_failure).powi(2) / e1_failure
784 + (x2 - e2_success).powi(2) / e2_success
785 + ((n2 - x2) - e2_failure).powi(2) / e2_failure;
786
787 let p_value = Self::chi_squared_p_value(chi_squared);
789
790 (chi_squared, p_value)
791 }
792
793 fn chi_squared_p_value(chi_squared: f64) -> f64 {
795 if chi_squared <= 0.0 {
796 return 1.0;
797 }
798
799 if chi_squared >= CHI_SQUARED_CRITICAL_0001 {
801 0.0001
802 } else if chi_squared >= CHI_SQUARED_CRITICAL_001 {
803 let ratio = (chi_squared - CHI_SQUARED_CRITICAL_001)
805 / (CHI_SQUARED_CRITICAL_0001 - CHI_SQUARED_CRITICAL_001);
806 0.001 - ratio * 0.0009
807 } else if chi_squared >= CHI_SQUARED_CRITICAL_005 {
808 let ratio = (chi_squared - CHI_SQUARED_CRITICAL_005)
810 / (CHI_SQUARED_CRITICAL_001 - CHI_SQUARED_CRITICAL_005);
811 0.05 - ratio * 0.049
812 } else {
813 0.05 + (1.0 - chi_squared / CHI_SQUARED_CRITICAL_005) * 0.95
816 }
817 }
818
819 fn calculate_confidence_interval(n1: u64, x1: u64, n2: u64, x2: u64) -> (f64, f64) {
821 if n1 == 0 || n2 == 0 {
822 return (0.0, 0.0);
823 }
824
825 let p1 = x1 as f64 / n1 as f64;
826 let p2 = x2 as f64 / n2 as f64;
827 let diff = p2 - p1;
828
829 let se = ((p1 * (1.0 - p1) / n1 as f64) + (p2 * (1.0 - p2) / n2 as f64)).sqrt();
831
832 let margin = 1.96 * se;
834 (diff - margin, diff + margin)
835 }
836
837 fn generate_recommendations(
839 test: &ABTest,
840 is_significant: bool,
841 winner: Option<ABTestVariant>,
842 relative_improvement: f64,
843 confidence_interval: &(f64, f64),
844 ) -> Vec<String> {
845 let mut recommendations = Vec::new();
846
847 let total_impressions =
848 test.control_metrics.impressions + test.treatment_metrics.impressions;
849
850 if total_impressions < MIN_SAMPLE_SIZE * 2 {
852 recommendations.push(format!(
853 "Insufficient data: {} impressions collected, need at least {} for reliable analysis",
854 total_impressions,
855 MIN_SAMPLE_SIZE * 2
856 ));
857 return recommendations;
858 }
859
860 if is_significant {
861 match winner {
862 Some(ABTestVariant::Treatment) => {
863 recommendations.push(format!(
864 "Treatment variant wins with {relative_improvement:.1}% relative improvement"
865 ));
866 recommendations
867 .push("Recommendation: Deploy treatment weights to production".to_string());
868
869 if relative_improvement > 20.0 {
870 recommendations.push(
871 "Strong effect detected - consider investigating what drove the improvement".to_string()
872 );
873 }
874 }
875 Some(ABTestVariant::Control) => {
876 recommendations.push(format!(
877 "Control variant wins - treatment performed {:.1}% worse",
878 -relative_improvement
879 ));
880 recommendations.push(
881 "Recommendation: Keep current weights, do not deploy treatment".to_string(),
882 );
883 }
884 None => {}
885 }
886 } else {
887 recommendations.push("No statistically significant difference detected".to_string());
888
889 let (ci_low, ci_high) = *confidence_interval;
891 if ci_low < 0.0 && ci_high > 0.0 {
892 recommendations.push(
893 "Confidence interval includes zero - effect may be negligible".to_string(),
894 );
895 }
896
897 let current_power = Self::estimate_power(test);
899 if current_power < 0.8 {
900 let needed = Self::estimate_needed_sample_size(test, 0.8);
901 recommendations.push(format!(
902 "Current statistical power: {:.1}%. Need ~{} more impressions per variant for 80% power",
903 current_power * 100.0,
904 needed
905 ));
906 }
907 }
908
909 if test.control_metrics.latency_samples > 0 && test.treatment_metrics.latency_samples > 0 {
911 let control_latency = test.control_metrics.avg_latency_ms();
912 let treatment_latency = test.treatment_metrics.avg_latency_ms();
913 let latency_diff = (treatment_latency - control_latency) / control_latency * 100.0;
914
915 if latency_diff.abs() > 20.0 {
916 recommendations.push(format!(
917 "Warning: Latency differs by {latency_diff:.1}% between variants - may affect user behavior"
918 ));
919 }
920 }
921
922 recommendations
923 }
924
925 fn estimate_power(test: &ABTest) -> f64 {
927 let n1 = test.control_metrics.impressions as f64;
928 let n2 = test.treatment_metrics.impressions as f64;
929 let p1 = test.control_metrics.ctr();
930 let p2 = test.treatment_metrics.ctr();
931
932 if n1 == 0.0 || n2 == 0.0 || p1 == 0.0 {
933 return 0.0;
934 }
935
936 let h = 2.0 * ((p2.sqrt()).asin() - (p1.sqrt()).asin());
938
939 let n_eff = 2.0 / (1.0 / n1 + 1.0 / n2);
941
942 let z = h * (n_eff / 2.0).sqrt();
944 let power = 0.5 * (1.0 + Self::erf(z / 2.0_f64.sqrt()));
945
946 power.clamp(0.0, 1.0)
947 }
948
949 fn erf(x: f64) -> f64 {
951 let a1 = 0.254829592;
952 let a2 = -0.284496736;
953 let a3 = 1.421413741;
954 let a4 = -1.453152027;
955 let a5 = 1.061405429;
956 let p = 0.3275911;
957
958 let sign = if x < 0.0 { -1.0 } else { 1.0 };
959 let x = x.abs();
960
961 let t = 1.0 / (1.0 + p * x);
962 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
963
964 sign * y
965 }
966
967 fn estimate_needed_sample_size(test: &ABTest, target_power: f64) -> u64 {
969 let p1 = test.control_metrics.ctr();
970 let p2 = test.treatment_metrics.ctr();
971
972 if p1 == 0.0 || p2 == 0.0 || (p2 - p1).abs() < 0.001 {
973 return 10000; }
975
976 let effect = (p2 - p1).abs();
978 let pooled_p = (p1 + p2) / 2.0;
979 let pooled_var = pooled_p * (1.0 - pooled_p);
980
981 let z_alpha = 1.96;
983 let z_beta = Self::inverse_normal_cdf(target_power);
984
985 let n = 2.0 * pooled_var * (z_alpha + z_beta).powi(2) / effect.powi(2);
987
988 n.ceil() as u64
989 }
990
991 fn inverse_normal_cdf(p: f64) -> f64 {
993 let a = [
995 -3.969683028665376e+01,
996 2.209460984245205e+02,
997 -2.759285104469687e+02,
998 1.383577518672690e+02,
999 -3.066479806614716e+01,
1000 2.506628277459239e+00,
1001 ];
1002 let b = [
1003 -5.447609879822406e+01,
1004 1.615858368580409e+02,
1005 -1.556989798598866e+02,
1006 6.680131188771972e+01,
1007 -1.328068155288572e+01,
1008 ];
1009 let c = [
1010 -7.784894002430293e-03,
1011 -3.223964580411365e-01,
1012 -2.400758277161838e+00,
1013 -2.549732539343734e+00,
1014 4.374664141464968e+00,
1015 2.938163982698783e+00,
1016 ];
1017 let d = [
1018 7.784695709041462e-03,
1019 3.224671290700398e-01,
1020 2.445134137142996e+00,
1021 3.754408661907416e+00,
1022 ];
1023
1024 let p_low = 0.02425;
1025 let p_high = 1.0 - p_low;
1026
1027 if p < p_low {
1028 let q = (-2.0 * p.ln()).sqrt();
1029 (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
1030 / ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
1031 } else if p <= p_high {
1032 let q = p - 0.5;
1033 let r = q * q;
1034 (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q
1035 / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1.0)
1036 } else {
1037 let q = (-2.0 * (1.0 - p).ln()).sqrt();
1038 -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
1039 / ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
1040 }
1041 }
1042
1043 pub fn bayesian_analysis(test: &ABTest) -> BayesianAnalysis {
1051 let control = &test.control_metrics;
1052 let treatment = &test.treatment_metrics;
1053
1054 let alpha_c = control.clicks as f64 + 0.5;
1056 let beta_c = (control.impressions as f64 - control.clicks as f64) + 0.5;
1057 let alpha_t = treatment.clicks as f64 + 0.5;
1058 let beta_t = (treatment.impressions as f64 - treatment.clicks as f64) + 0.5;
1059
1060 let n_samples = 10000;
1062 let mut treatment_wins = 0;
1063 let mut lift_sum = 0.0;
1064 let mut lifts = Vec::with_capacity(n_samples);
1065
1066 let mut seed = 12345u64;
1068 let lcg = |s: &mut u64| -> f64 {
1069 *s = s.wrapping_mul(6364136223846793005).wrapping_add(1);
1070 (*s as f64) / (u64::MAX as f64)
1071 };
1072
1073 for _ in 0..n_samples {
1074 let p_c = Self::beta_sample(alpha_c, beta_c, &mut seed, &lcg);
1076 let p_t = Self::beta_sample(alpha_t, beta_t, &mut seed, &lcg);
1077
1078 if p_t > p_c {
1079 treatment_wins += 1;
1080 }
1081
1082 let lift = if p_c > 0.0 { (p_t - p_c) / p_c } else { 0.0 };
1083 lift_sum += lift;
1084 lifts.push(lift);
1085 }
1086
1087 lifts.sort_by(|a, b| a.total_cmp(b));
1088
1089 let prob_treatment_better = treatment_wins as f64 / n_samples as f64;
1090 let expected_lift = lift_sum / n_samples as f64;
1091
1092 let ci_low = lifts[(n_samples as f64 * 0.025) as usize];
1094 let ci_high = lifts[(n_samples as f64 * 0.975) as usize];
1095
1096 let risk_treatment =
1098 lifts.iter().filter(|&&l| l < 0.0).map(|l| -l).sum::<f64>() / n_samples as f64;
1099 let risk_control = lifts.iter().filter(|&&l| l > 0.0).sum::<f64>() / n_samples as f64;
1100
1101 BayesianAnalysis {
1102 prob_treatment_better,
1103 prob_control_better: 1.0 - prob_treatment_better,
1104 expected_lift,
1105 credible_interval: (ci_low, ci_high),
1106 risk_treatment,
1107 risk_control,
1108 }
1109 }
1110
1111 fn beta_sample(alpha: f64, beta: f64, seed: &mut u64, lcg: &impl Fn(&mut u64) -> f64) -> f64 {
1113 let gamma_a = Self::gamma_sample(alpha, seed, lcg);
1115 let gamma_b = Self::gamma_sample(beta, seed, lcg);
1116 gamma_a / (gamma_a + gamma_b)
1117 }
1118
1119 fn gamma_sample(alpha: f64, seed: &mut u64, lcg: &impl Fn(&mut u64) -> f64) -> f64 {
1121 if alpha < 1.0 {
1122 return Self::gamma_sample(alpha + 1.0, seed, lcg) * lcg(seed).powf(1.0 / alpha);
1124 }
1125
1126 let d = alpha - 1.0 / 3.0;
1127 let c = 1.0 / (9.0 * d).sqrt();
1128
1129 loop {
1130 let x = Self::normal_sample(seed, lcg);
1131 let v = (1.0 + c * x).powi(3);
1132 if v > 0.0 {
1133 let u = lcg(seed);
1134 if u < 1.0 - 0.0331 * x.powi(4) || u.ln() < 0.5 * x.powi(2) + d * (1.0 - v + v.ln())
1135 {
1136 return d * v;
1137 }
1138 }
1139 }
1140 }
1141
1142 fn normal_sample(seed: &mut u64, lcg: &impl Fn(&mut u64) -> f64) -> f64 {
1144 let u1 = lcg(seed);
1145 let u2 = lcg(seed);
1146 (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
1147 }
1148
1149 pub fn calculate_effect_size(test: &ABTest) -> EffectSize {
1151 let p1 = test.control_metrics.ctr();
1152 let p2 = test.treatment_metrics.ctr();
1153
1154 let phi1 = 2.0 * p1.sqrt().asin();
1156 let phi2 = 2.0 * p2.sqrt().asin();
1157 let cohens_h = (phi2 - phi1).abs();
1158
1159 let interpretation = if cohens_h < 0.2 {
1161 EffectSizeInterpretation::Negligible
1162 } else if cohens_h < 0.5 {
1163 EffectSizeInterpretation::Small
1164 } else if cohens_h < 0.8 {
1165 EffectSizeInterpretation::Medium
1166 } else {
1167 EffectSizeInterpretation::Large
1168 };
1169
1170 let relative_risk = if p1 > 0.0 { p2 / p1 } else { 0.0 };
1172
1173 let odds_c = if p1 < 1.0 {
1175 p1 / (1.0 - p1)
1176 } else {
1177 f64::INFINITY
1178 };
1179 let odds_t = if p2 < 1.0 {
1180 p2 / (1.0 - p2)
1181 } else {
1182 f64::INFINITY
1183 };
1184 let odds_ratio = if odds_c > 0.0 && odds_c.is_finite() {
1185 odds_t / odds_c
1186 } else {
1187 0.0
1188 };
1189
1190 let ard = (p2 - p1).abs(); let nnt = if ard > 0.0 { 1.0 / ard } else { f64::INFINITY };
1193
1194 EffectSize {
1195 cohens_h,
1196 interpretation,
1197 relative_risk,
1198 odds_ratio,
1199 nnt,
1200 }
1201 }
1202
1203 pub fn check_srm(test: &ABTest) -> SRMCheck {
1208 let expected_ratio = test.config.traffic_split as f64;
1209 let total = test.control_metrics.impressions + test.treatment_metrics.impressions;
1210
1211 if total == 0 {
1212 return SRMCheck {
1213 srm_detected: false,
1214 expected_ratio,
1215 observed_ratio: 0.5,
1216 chi_squared: 0.0,
1217 p_value: 1.0,
1218 severity: SRMSeverity::None,
1219 };
1220 }
1221
1222 let observed_ratio = test.treatment_metrics.impressions as f64 / total as f64;
1223
1224 let expected_control = total as f64 * (1.0 - expected_ratio);
1226 let expected_treatment = total as f64 * expected_ratio;
1227
1228 let chi_sq = (test.control_metrics.impressions as f64 - expected_control).powi(2)
1230 / expected_control
1231 + (test.treatment_metrics.impressions as f64 - expected_treatment).powi(2)
1232 / expected_treatment;
1233
1234 let p_value = Self::chi_squared_p_value(chi_sq);
1235
1236 let deviation = (observed_ratio - expected_ratio).abs();
1238 let severity = if p_value > 0.01 {
1239 SRMSeverity::None
1240 } else if deviation < SRM_THRESHOLD {
1241 SRMSeverity::Warning
1242 } else {
1243 SRMSeverity::Critical
1244 };
1245
1246 SRMCheck {
1247 srm_detected: p_value < 0.01,
1248 expected_ratio,
1249 observed_ratio,
1250 chi_squared: chi_sq,
1251 p_value,
1252 severity,
1253 }
1254 }
1255
1256 pub fn sequential_analysis(
1260 test: &ABTest,
1261 analysis_number: u32,
1262 planned_analyses: u32,
1263 ) -> SequentialTest {
1264 let fraction = analysis_number as f64 / planned_analyses as f64;
1265
1266 let alpha = test.config.significance_level;
1269 let alpha_spent = 2.0
1270 * (1.0
1271 - Self::normal_cdf(Self::inverse_normal_cdf(1.0 - alpha / 2.0) / fraction.sqrt()));
1272
1273 let current_alpha = alpha_spent / analysis_number as f64;
1275
1276 let (_, p_value) = Self::chi_squared_test(
1278 test.control_metrics.impressions,
1279 test.control_metrics.clicks,
1280 test.treatment_metrics.impressions,
1281 test.treatment_metrics.clicks,
1282 );
1283
1284 let can_stop_early = p_value < current_alpha
1285 && test.control_metrics.impressions >= test.config.min_impressions / 2
1286 && test.treatment_metrics.impressions >= test.config.min_impressions / 2;
1287
1288 let stop_reason = if can_stop_early {
1289 let effect = Self::calculate_effect_size(test);
1290 if effect.interpretation == EffectSizeInterpretation::Negligible {
1291 Some("Futility: Effect size too small to be practically significant".to_string())
1292 } else {
1293 Some(format!(
1294 "Efficacy: Significant result with {} effect",
1295 effect.interpretation
1296 ))
1297 }
1298 } else {
1299 None
1300 };
1301
1302 SequentialTest {
1303 analysis_number,
1304 planned_analyses,
1305 alpha_spent,
1306 current_alpha,
1307 can_stop_early,
1308 stop_reason,
1309 }
1310 }
1311
1312 fn normal_cdf(x: f64) -> f64 {
1314 0.5 * (1.0 + Self::erf(x / 2.0_f64.sqrt()))
1315 }
1316
1317 pub fn comprehensive_analysis(test: &ABTest) -> ComprehensiveAnalysis {
1325 let frequentist = Self::analyze(test);
1326 let bayesian = Self::bayesian_analysis(test);
1327 let effect_size = Self::calculate_effect_size(test);
1328 let srm = Self::check_srm(test);
1329 let sequential = Self::sequential_analysis(test, 1, 5);
1330
1331 let is_practically_significant = effect_size.cohens_h >= MIN_PRACTICAL_EFFECT_SIZE;
1333 let has_data_quality_issues = srm.srm_detected;
1334 let high_confidence =
1335 bayesian.prob_treatment_better > 0.95 || bayesian.prob_control_better > 0.95;
1336 let low_risk = bayesian.risk_treatment < 0.01 || bayesian.risk_control < 0.01;
1337
1338 let should_ship = frequentist.is_significant
1340 && is_practically_significant
1341 && !has_data_quality_issues
1342 && high_confidence
1343 && low_risk
1344 && frequentist.winner == Some(ABTestVariant::Treatment);
1345
1346 let mut insights = Vec::new();
1348
1349 if should_ship {
1351 insights.push(format!(
1352 "✅ SHIP IT: Treatment is {:.1}% better with {:.1}% confidence and {} effect size",
1353 bayesian.expected_lift * 100.0,
1354 bayesian.prob_treatment_better * 100.0,
1355 effect_size.interpretation
1356 ));
1357 } else if frequentist.winner == Some(ABTestVariant::Control) && frequentist.is_significant {
1358 insights.push(format!(
1359 "❌ DO NOT SHIP: Control is {:.1}% better. Treatment would hurt users.",
1360 -bayesian.expected_lift * 100.0
1361 ));
1362 } else {
1363 insights.push("⏳ KEEP TESTING: Not enough evidence to make a decision".to_string());
1364 }
1365
1366 if !frequentist.is_significant {
1368 insights.push(format!(
1369 "📊 p-value = {:.4} (need < {:.2})",
1370 frequentist.p_value, test.config.significance_level
1371 ));
1372 }
1373
1374 if !is_practically_significant {
1375 insights.push(format!(
1376 "📏 Effect is {} (Cohen's h = {:.3}) - may not matter to users",
1377 effect_size.interpretation, effect_size.cohens_h
1378 ));
1379 }
1380
1381 if has_data_quality_issues {
1382 insights.push(format!(
1383 "⚠️ DATA QUALITY: Sample ratio mismatch detected ({:.1}% vs expected {:.1}%)",
1384 srm.observed_ratio * 100.0,
1385 srm.expected_ratio * 100.0
1386 ));
1387 }
1388
1389 if bayesian.risk_treatment > 0.01 {
1391 insights.push(format!(
1392 "🎲 Risk if shipping treatment: {:.2}% expected loss",
1393 bayesian.risk_treatment * 100.0
1394 ));
1395 }
1396
1397 if effect_size.nnt.is_finite() && effect_size.nnt < 1000.0 {
1399 insights.push(format!(
1400 "👥 Impact: 1 in {:.0} users will benefit from this change",
1401 effect_size.nnt
1402 ));
1403 }
1404
1405 ComprehensiveAnalysis {
1406 frequentist,
1407 bayesian,
1408 effect_size,
1409 srm,
1410 sequential,
1411 should_ship,
1412 is_practically_significant,
1413 insights,
1414 }
1415 }
1416}
1417
1418#[derive(Debug, Clone, Serialize, Deserialize)]
1420pub struct ComprehensiveAnalysis {
1421 pub frequentist: ABTestResults,
1423 pub bayesian: BayesianAnalysis,
1425 pub effect_size: EffectSize,
1427 pub srm: SRMCheck,
1429 pub sequential: SequentialTest,
1431 pub should_ship: bool,
1433 pub is_practically_significant: bool,
1435 pub insights: Vec<String>,
1437}
1438
1439pub struct ABTestManager {
1445 tests: Arc<RwLock<HashMap<String, ABTest>>>,
1447 archived: Arc<RwLock<Vec<ABTest>>>,
1449}
1450
1451impl Default for ABTestManager {
1452 fn default() -> Self {
1453 Self::new()
1454 }
1455}
1456
1457impl ABTestManager {
1458 pub fn new() -> Self {
1460 Self {
1461 tests: Arc::new(RwLock::new(HashMap::new())),
1462 archived: Arc::new(RwLock::new(Vec::new())),
1463 }
1464 }
1465
1466 pub fn create_test(&self, test: ABTest) -> Result<String, ABTestError> {
1468 let id = test.config.id.clone();
1469
1470 let mut tests = self.tests.write();
1471 if tests.contains_key(&id) {
1472 return Err(ABTestError::TestAlreadyExists(id));
1473 }
1474
1475 tests.insert(id.clone(), test);
1476 Ok(id)
1477 }
1478
1479 pub fn get_test(&self, test_id: &str) -> Option<ABTest> {
1481 self.tests.read().get(test_id).cloned()
1482 }
1483
1484 pub fn list_tests(&self) -> Vec<ABTest> {
1486 self.tests.read().values().cloned().collect()
1487 }
1488
1489 pub fn list_tests_by_status(&self, status: ABTestStatus) -> Vec<ABTest> {
1491 self.tests
1492 .read()
1493 .values()
1494 .filter(|t| t.status == status)
1495 .cloned()
1496 .collect()
1497 }
1498
1499 pub fn start_test(&self, test_id: &str) -> Result<(), ABTestError> {
1501 let mut tests = self.tests.write();
1502 let test = tests
1503 .get_mut(test_id)
1504 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1505
1506 if test.status != ABTestStatus::Draft {
1507 return Err(ABTestError::InvalidState(format!(
1508 "Cannot start test in {:?} state",
1509 test.status
1510 )));
1511 }
1512
1513 test.start();
1514 Ok(())
1515 }
1516
1517 pub fn pause_test(&self, test_id: &str) -> Result<(), ABTestError> {
1519 let mut tests = self.tests.write();
1520 let test = tests
1521 .get_mut(test_id)
1522 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1523
1524 test.pause();
1525 Ok(())
1526 }
1527
1528 pub fn resume_test(&self, test_id: &str) -> Result<(), ABTestError> {
1530 let mut tests = self.tests.write();
1531 let test = tests
1532 .get_mut(test_id)
1533 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1534
1535 test.resume();
1536 Ok(())
1537 }
1538
1539 pub fn complete_test(&self, test_id: &str) -> Result<ABTestResults, ABTestError> {
1541 let results = {
1542 let tests = self.tests.read();
1543 let test = tests
1544 .get(test_id)
1545 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1546
1547 ABTestAnalyzer::analyze(test)
1548 };
1549
1550 let mut tests = self.tests.write();
1551 if let Some(test) = tests.get_mut(test_id) {
1552 test.complete();
1553 }
1554
1555 Ok(results)
1556 }
1557
1558 pub fn archive_test(&self, test_id: &str) -> Result<(), ABTestError> {
1560 let mut tests = self.tests.write();
1561 let mut test = tests
1562 .remove(test_id)
1563 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1564
1565 test.archive();
1566 self.archived.write().push(test);
1567
1568 Ok(())
1569 }
1570
1571 pub fn delete_test(&self, test_id: &str) -> Result<(), ABTestError> {
1573 let mut tests = self.tests.write();
1574 tests
1575 .remove(test_id)
1576 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1577 Ok(())
1578 }
1579
1580 pub fn get_variant(&self, test_id: &str, user_id: &str) -> Result<ABTestVariant, ABTestError> {
1582 let mut tests = self.tests.write();
1583 let test = tests
1584 .get_mut(test_id)
1585 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1586
1587 if test.status != ABTestStatus::Running {
1588 return Err(ABTestError::TestNotRunning(test_id.to_string()));
1589 }
1590
1591 Ok(test.get_variant(user_id))
1592 }
1593
1594 pub fn get_weights_for_user(
1596 &self,
1597 test_id: &str,
1598 user_id: &str,
1599 ) -> Result<LearnedWeights, ABTestError> {
1600 let mut tests = self.tests.write();
1601 let test = tests
1602 .get_mut(test_id)
1603 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1604
1605 if test.status != ABTestStatus::Running {
1606 return Err(ABTestError::TestNotRunning(test_id.to_string()));
1607 }
1608
1609 let variant = test.get_variant(user_id);
1610 Ok(test.get_weights(variant).clone())
1611 }
1612
1613 pub fn record_impression(
1615 &self,
1616 test_id: &str,
1617 user_id: &str,
1618 relevance_score: f64,
1619 latency_us: u64,
1620 ) -> Result<(), ABTestError> {
1621 let mut tests = self.tests.write();
1622 let test = tests
1623 .get_mut(test_id)
1624 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1625
1626 if test.status != ABTestStatus::Running {
1627 return Err(ABTestError::TestNotRunning(test_id.to_string()));
1628 }
1629
1630 test.record_impression(user_id, relevance_score, latency_us);
1631 Ok(())
1632 }
1633
1634 pub fn record_click(
1636 &self,
1637 test_id: &str,
1638 user_id: &str,
1639 memory_id: Uuid,
1640 ) -> Result<(), ABTestError> {
1641 let mut tests = self.tests.write();
1642 let test = tests
1643 .get_mut(test_id)
1644 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1645
1646 if test.status != ABTestStatus::Running {
1647 return Err(ABTestError::TestNotRunning(test_id.to_string()));
1648 }
1649
1650 test.record_click(user_id, memory_id);
1651 Ok(())
1652 }
1653
1654 pub fn record_feedback(
1656 &self,
1657 test_id: &str,
1658 user_id: &str,
1659 positive: bool,
1660 ) -> Result<(), ABTestError> {
1661 let mut tests = self.tests.write();
1662 let test = tests
1663 .get_mut(test_id)
1664 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1665
1666 if test.status != ABTestStatus::Running {
1667 return Err(ABTestError::TestNotRunning(test_id.to_string()));
1668 }
1669
1670 test.record_feedback(user_id, positive);
1671 Ok(())
1672 }
1673
1674 pub fn analyze_test(&self, test_id: &str) -> Result<ABTestResults, ABTestError> {
1676 let tests = self.tests.read();
1677 let test = tests
1678 .get(test_id)
1679 .ok_or_else(|| ABTestError::TestNotFound(test_id.to_string()))?;
1680
1681 Ok(ABTestAnalyzer::analyze(test))
1682 }
1683
1684 pub fn list_archived(&self) -> Vec<ABTest> {
1686 self.archived.read().clone()
1687 }
1688
1689 pub fn check_expired_tests(&self) -> Vec<String> {
1691 let mut expired = Vec::new();
1692
1693 let mut tests = self.tests.write();
1694 for (id, test) in tests.iter_mut() {
1695 if test.status == ABTestStatus::Running && test.is_expired() {
1696 test.complete();
1697 expired.push(id.clone());
1698 }
1699 }
1700
1701 expired
1702 }
1703
1704 pub fn summary(&self) -> ABTestManagerSummary {
1706 let tests = self.tests.read();
1707 let archived = self.archived.read();
1708
1709 let mut draft = 0;
1710 let mut running = 0;
1711 let mut paused = 0;
1712 let mut completed = 0;
1713
1714 for test in tests.values() {
1715 match test.status {
1716 ABTestStatus::Draft => draft += 1,
1717 ABTestStatus::Running => running += 1,
1718 ABTestStatus::Paused => paused += 1,
1719 ABTestStatus::Completed => completed += 1,
1720 ABTestStatus::Archived => {}
1721 }
1722 }
1723
1724 ABTestManagerSummary {
1725 total_active: tests.len(),
1726 draft,
1727 running,
1728 paused,
1729 completed,
1730 archived: archived.len(),
1731 }
1732 }
1733}
1734
1735#[derive(Debug, Clone, Serialize, Deserialize)]
1737pub struct ABTestManagerSummary {
1738 pub total_active: usize,
1739 pub draft: usize,
1740 pub running: usize,
1741 pub paused: usize,
1742 pub completed: usize,
1743 pub archived: usize,
1744}
1745
1746#[derive(Debug, Clone, thiserror::Error)]
1752pub enum ABTestError {
1753 #[error("Test not found: {0}")]
1754 TestNotFound(String),
1755
1756 #[error("Test already exists: {0}")]
1757 TestAlreadyExists(String),
1758
1759 #[error("Test is not running: {0}")]
1760 TestNotRunning(String),
1761
1762 #[error("Invalid state: {0}")]
1763 InvalidState(String),
1764
1765 #[error("Insufficient data for analysis")]
1766 InsufficientData,
1767}
1768
1769#[cfg(test)]
1774mod tests {
1775 use super::*;
1776
1777 #[test]
1778 fn test_variant_assignment_consistency() {
1779 let mut test = ABTest::builder("test").with_traffic_split(0.5).build();
1780
1781 let user = "user_123";
1783 let variant1 = test.get_variant(user);
1784 let variant2 = test.get_variant(user);
1785 let variant3 = test.get_variant(user);
1786
1787 assert_eq!(variant1, variant2);
1788 assert_eq!(variant2, variant3);
1789 }
1790
1791 #[test]
1792 fn test_traffic_split() {
1793 let mut test = ABTest::builder("test").with_traffic_split(0.5).build();
1794
1795 let mut _control_count = 0;
1796 let mut treatment_count = 0;
1797
1798 for i in 0..1000 {
1800 let user = format!("user_{i}");
1801 match test.get_variant(&user) {
1802 ABTestVariant::Control => _control_count += 1,
1803 ABTestVariant::Treatment => treatment_count += 1,
1804 }
1805 }
1806
1807 let ratio = treatment_count as f64 / 1000.0;
1809 assert!(ratio > 0.4 && ratio < 0.6, "Ratio was {ratio}");
1810 }
1811
1812 #[test]
1813 fn test_metrics_tracking() {
1814 let mut test = ABTest::builder("test").build();
1815 test.start();
1816
1817 test.record_impression("user_1", 0.8, 5000);
1819 test.record_impression("user_1", 0.7, 4000);
1820 test.record_click("user_1", Uuid::new_v4());
1821 test.record_feedback("user_1", true);
1822
1823 let variant = test.get_variant("user_1");
1824 let metrics = test.get_metrics(variant);
1825
1826 assert_eq!(metrics.impressions, 2);
1827 assert_eq!(metrics.clicks, 1);
1828 assert_eq!(metrics.positive_feedback, 1);
1829 assert_eq!(metrics.unique_users, 1);
1830 assert!((metrics.ctr() - 0.5).abs() < 0.001);
1831 }
1832
1833 #[test]
1834 fn test_chi_squared_significant() {
1835 let (chi_sq, p_value) = ABTestAnalyzer::chi_squared_test(
1837 1000, 100, 1000, 200, );
1840
1841 assert!(chi_sq > CHI_SQUARED_CRITICAL_005);
1842 assert!(p_value < 0.05);
1843 }
1844
1845 #[test]
1846 fn test_chi_squared_not_significant() {
1847 let (chi_sq, p_value) = ABTestAnalyzer::chi_squared_test(
1849 50, 5, 50, 6, );
1852
1853 assert!(p_value > 0.05 || chi_sq < CHI_SQUARED_CRITICAL_005);
1855 }
1856
1857 #[test]
1858 fn test_confidence_interval() {
1859 let (low, high) = ABTestAnalyzer::calculate_confidence_interval(
1860 1000, 100, 1000, 150, );
1863
1864 assert!(low < 0.05);
1866 assert!(high > 0.05);
1867 assert!(low > 0.0 || high < 0.0 || (low < 0.0 && high > 0.0));
1869 }
1870
1871 #[test]
1872 fn test_manager_lifecycle() {
1873 let manager = ABTestManager::new();
1874
1875 let test = ABTest::builder("test_lifecycle")
1877 .with_description("Test lifecycle management")
1878 .build();
1879
1880 let id = manager.create_test(test).unwrap();
1881
1882 manager.start_test(&id).unwrap();
1884 let test = manager.get_test(&id).unwrap();
1885 assert_eq!(test.status, ABTestStatus::Running);
1886
1887 manager.record_impression(&id, "user_1", 0.8, 5000).unwrap();
1889 manager.record_click(&id, "user_1", Uuid::new_v4()).unwrap();
1890
1891 let results = manager.analyze_test(&id).unwrap();
1893 assert!(!results.is_significant); manager.complete_test(&id).unwrap();
1897 let test = manager.get_test(&id).unwrap();
1898 assert_eq!(test.status, ABTestStatus::Completed);
1899
1900 manager.archive_test(&id).unwrap();
1902 assert!(manager.get_test(&id).is_none());
1903 assert_eq!(manager.list_archived().len(), 1);
1904 }
1905
1906 #[test]
1907 fn test_learned_weights_integration() {
1908 let control = LearnedWeights::default();
1909 let mut treatment = LearnedWeights {
1910 semantic: 0.6,
1911 entity: 0.2,
1912 ..Default::default()
1913 };
1914 treatment.normalize();
1915
1916 let test = ABTest::builder("weights_test")
1917 .with_control(control.clone())
1918 .with_treatment(treatment.clone())
1919 .build();
1920
1921 assert_eq!(
1922 test.get_weights(ABTestVariant::Control).semantic,
1923 control.semantic
1924 );
1925 assert_eq!(
1926 test.get_weights(ABTestVariant::Treatment).semantic,
1927 treatment.semantic
1928 );
1929 }
1930
1931 #[test]
1932 fn test_ctr_calculation() {
1933 let mut metrics = VariantMetrics::default();
1934
1935 assert_eq!(metrics.ctr(), 0.0); metrics.impressions = 100;
1938 metrics.clicks = 10;
1939
1940 assert!((metrics.ctr() - 0.1).abs() < 0.001);
1941 }
1942
1943 #[test]
1944 fn test_success_rate_calculation() {
1945 let mut metrics = VariantMetrics::default();
1946
1947 assert_eq!(metrics.success_rate(), 0.0); metrics.positive_feedback = 8;
1950 metrics.negative_feedback = 2;
1951
1952 assert!((metrics.success_rate() - 0.8).abs() < 0.001);
1953 }
1954
1955 #[test]
1956 fn test_power_estimation() {
1957 let mut test = ABTest::builder("power_test").build();
1958
1959 for i in 0..500 {
1961 let user = format!("control_{i}");
1962 test.user_assignments
1963 .insert(user.clone(), ABTestVariant::Control);
1964 test.control_metrics.impressions += 1;
1965 test.control_metrics.unique_users += 1;
1966 if i % 10 == 0 {
1967 test.control_metrics.clicks += 1;
1969 }
1970 }
1971
1972 for i in 0..500 {
1973 let user = format!("treatment_{i}");
1974 test.user_assignments
1975 .insert(user.clone(), ABTestVariant::Treatment);
1976 test.treatment_metrics.impressions += 1;
1977 test.treatment_metrics.unique_users += 1;
1978 if i % 5 == 0 {
1979 test.treatment_metrics.clicks += 1;
1981 }
1982 }
1983
1984 let power = ABTestAnalyzer::estimate_power(&test);
1985 assert!(power > 0.5, "Power was {power}"); }
1987
1988 #[test]
1989 fn test_manager_summary() {
1990 let manager = ABTestManager::new();
1991
1992 let test1 = ABTest::builder("draft_test").build();
1994 manager.create_test(test1).unwrap();
1995
1996 let test2 = ABTest::builder("running_test").build();
1997 let id2 = manager.create_test(test2).unwrap();
1998 manager.start_test(&id2).unwrap();
1999
2000 let summary = manager.summary();
2001 assert_eq!(summary.total_active, 2);
2002 assert_eq!(summary.draft, 1);
2003 assert_eq!(summary.running, 1);
2004 }
2005
2006 #[test]
2007 fn test_recommendations_generation() {
2008 let mut test = ABTest::builder("recommendations_test")
2009 .with_min_impressions(100)
2010 .build();
2011
2012 test.control_metrics.impressions = 1000;
2014 test.control_metrics.clicks = 100; test.treatment_metrics.impressions = 1000;
2016 test.treatment_metrics.clicks = 200; let results = ABTestAnalyzer::analyze(&test);
2019
2020 assert!(results.is_significant);
2021 assert_eq!(results.winner, Some(ABTestVariant::Treatment));
2022 assert!(!results.recommendations.is_empty());
2023 assert!(results
2024 .recommendations
2025 .iter()
2026 .any(|r| r.contains("Treatment")));
2027 }
2028
2029 #[test]
2030 fn test_ab_demo_with_numbers() {
2031 println!("\n========================================");
2032 println!(" A/B TESTING DEMO WITH NUMBERS");
2033 println!("========================================\n");
2034
2035 println!("📊 SCENARIO 1: Clear Winner (Treatment significantly better)");
2037 println!(" Control: 1000 impressions, 100 clicks (10.0% CTR)");
2038 println!(" Treatment: 1000 impressions, 200 clicks (20.0% CTR)");
2039
2040 let (chi_sq, p_value) = ABTestAnalyzer::chi_squared_test(1000, 100, 1000, 200);
2041 let (ci_low, ci_high) = ABTestAnalyzer::calculate_confidence_interval(1000, 100, 1000, 200);
2042
2043 println!("\n RESULTS:");
2044 println!(" ├─ Chi-squared statistic: {chi_sq:.4}");
2045 println!(" ├─ P-value: {p_value:.6}");
2046 let significant = if p_value < 0.05 { "YES ✓" } else { "NO ✗" };
2047 println!(" ├─ Significant (p < 0.05): {significant}");
2048 println!(" ├─ 95% Confidence Interval: ({ci_low:.4}, {ci_high:.4})");
2049 let improvement = ((0.20 - 0.10) / 0.10) * 100.0;
2050 println!(" └─ Relative improvement: {improvement:.1}%");
2051
2052 println!("\n📊 SCENARIO 2: No Significant Difference (Sample too small)");
2054 println!(" Control: 50 impressions, 5 clicks (10.0% CTR)");
2055 println!(" Treatment: 50 impressions, 6 clicks (12.0% CTR)");
2056
2057 let (chi_sq2, p_value2) = ABTestAnalyzer::chi_squared_test(50, 5, 50, 6);
2058 let (ci_low2, ci_high2) = ABTestAnalyzer::calculate_confidence_interval(50, 5, 50, 6);
2059
2060 println!("\n RESULTS:");
2061 println!(" ├─ Chi-squared statistic: {chi_sq2:.4}");
2062 println!(" ├─ P-value: {p_value2:.6}");
2063 let significant2 = if p_value2 < 0.05 { "YES ✓" } else { "NO ✗" };
2064 println!(" ├─ Significant (p < 0.05): {significant2}");
2065 println!(" ├─ 95% Confidence Interval: ({ci_low2:.4}, {ci_high2:.4})");
2066 let ci_includes_zero = if ci_low2 < 0.0 && ci_high2 > 0.0 {
2067 "YES"
2068 } else {
2069 "NO"
2070 };
2071 println!(" └─ CI includes 0: {ci_includes_zero} (effect may be due to chance)");
2072
2073 println!("\n📊 SCENARIO 3: Full Analysis with Recommendations");
2075 let mut test = ABTest::builder("semantic_weight_test")
2076 .with_min_impressions(100)
2077 .build();
2078
2079 test.control_metrics.impressions = 5000;
2080 test.control_metrics.clicks = 500; test.control_metrics.positive_feedback = 400;
2082 test.control_metrics.negative_feedback = 50;
2083
2084 test.treatment_metrics.impressions = 5000;
2085 test.treatment_metrics.clicks = 750; test.treatment_metrics.positive_feedback = 600;
2087 test.treatment_metrics.negative_feedback = 30;
2088
2089 let results = ABTestAnalyzer::analyze(&test);
2090
2091 println!(" Test: Comparing semantic weight emphasis");
2092 println!(" Control: 5000 impressions, 500 clicks (10.0% CTR)");
2093 println!(" Treatment: 5000 impressions, 750 clicks (15.0% CTR)");
2094 println!("\n STATISTICAL RESULTS:");
2095 println!(" ├─ Chi-squared: {:.4}", results.chi_squared);
2096 println!(" ├─ P-value: {:.8}", results.p_value);
2097 println!(
2098 " ├─ Confidence Level: {:.2}%",
2099 results.confidence_level * 100.0
2100 );
2101 println!(
2102 " ├─ Significant: {}",
2103 if results.is_significant {
2104 "YES ✓"
2105 } else {
2106 "NO ✗"
2107 }
2108 );
2109 println!(" ├─ Winner: {:?}", results.winner);
2110 println!(
2111 " ├─ Relative Improvement: {:.2}%",
2112 results.relative_improvement
2113 );
2114 println!(" ├─ Control CTR: {:.2}%", results.control_ctr * 100.0);
2115 println!(" ├─ Treatment CTR: {:.2}%", results.treatment_ctr * 100.0);
2116 println!(
2117 " └─ 95% CI: ({:.4}, {:.4})",
2118 results.confidence_interval.0, results.confidence_interval.1
2119 );
2120
2121 println!("\n RECOMMENDATIONS:");
2122 for (i, rec) in results.recommendations.iter().enumerate() {
2123 println!(" {}. {}", i + 1, rec);
2124 }
2125
2126 println!("\n========================================");
2127 println!(" END OF A/B TESTING DEMO");
2128 println!("========================================\n");
2129
2130 assert!(results.is_significant);
2132 assert_eq!(results.winner, Some(ABTestVariant::Treatment));
2133 }
2134
2135 #[test]
2136 fn test_comprehensive_analysis_demo() {
2137 println!("\n========================================");
2138 println!(" COMPREHENSIVE A/B ANALYSIS DEMO");
2139 println!(" (Dynamic Weight-Based Simulation)");
2140 println!("========================================\n");
2141
2142 let control_weights = LearnedWeights {
2148 semantic: 0.35,
2149 entity: 0.30,
2150 tag: 0.10,
2151 importance: 0.10,
2152 momentum: 0.15, access_count: 0.0, graph_strength: 0.0, update_count: 0,
2156 last_updated: None,
2157 };
2158
2159 let treatment_weights = LearnedWeights::default(); println!("📊 WEIGHT COMPARISON:");
2163 println!(" Control (old): semantic={:.2}, entity={:.2}, momentum={:.2}, access={:.2}, graph={:.2}",
2164 control_weights.semantic, control_weights.entity, control_weights.momentum,
2165 control_weights.access_count, control_weights.graph_strength);
2166 println!(" Treatment (new): semantic={:.2}, entity={:.2}, momentum={:.2}, access={:.2}, graph={:.2}\n",
2167 treatment_weights.semantic, treatment_weights.entity, treatment_weights.momentum,
2168 treatment_weights.access_count, treatment_weights.graph_strength);
2169
2170 #[allow(clippy::type_complexity)]
2175 let memory_corpus: Vec<(f32, f32, f32, f32, f32, u32, f32, bool)> = vec![
2176 (0.8, 0.7, 0.5, 0.8, 0.9, 15, 0.9, true), (0.7, 0.8, 0.6, 0.7, 0.8, 12, 0.85, true), (0.9, 0.6, 0.4, 0.9, 0.7, 10, 0.8, true), (0.6, 0.9, 0.7, 0.6, 0.85, 8, 0.75, true), (0.95, 0.9, 0.8, 0.9, -0.6, 1, 0.15, false), (0.9, 0.85, 0.7, 0.85, -0.4, 0, 0.1, false), (0.88, 0.82, 0.6, 0.8, -0.5, 1, 0.2, false), (0.85, 0.88, 0.75, 0.82, -0.3, 2, 0.25, false), (0.7, 0.5, 0.3, 0.6, 0.4, 4, 0.5, true), (0.5, 0.6, 0.4, 0.5, 0.3, 3, 0.45, true), (0.6, 0.55, 0.35, 0.55, 0.35, 3, 0.4, true), (0.4, 0.3, 0.2, 0.4, 0.1, 1, 0.2, false), (0.35, 0.4, 0.25, 0.35, -0.1, 1, 0.15, false), ];
2194
2195 let mut control_ranked: Vec<(usize, f32, bool)> = memory_corpus
2197 .iter()
2198 .enumerate()
2199 .map(|(idx, &(sem, ent, tag, imp, mom, acc, graph, relevant))| {
2200 let score = control_weights.fuse_scores_full(sem, ent, tag, imp, mom, acc, graph);
2201 (idx, score, relevant)
2202 })
2203 .collect();
2204
2205 let mut treatment_ranked: Vec<(usize, f32, bool)> = memory_corpus
2206 .iter()
2207 .enumerate()
2208 .map(|(idx, &(sem, ent, tag, imp, mom, acc, graph, relevant))| {
2209 let score = treatment_weights.fuse_scores_full(sem, ent, tag, imp, mom, acc, graph);
2210 (idx, score, relevant)
2211 })
2212 .collect();
2213
2214 control_ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
2216 treatment_ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
2217
2218 println!("🔍 RANKING COMPARISON (top 8):");
2219 println!(" Control ranking:");
2220 for (rank, (idx, score, relevant)) in control_ranked.iter().take(8).enumerate() {
2221 let status = if *relevant {
2222 "✓ relevant"
2223 } else {
2224 "✗ TRAP"
2225 };
2226 println!(
2227 " #{}: memory[{}] score={:.3} {}",
2228 rank + 1,
2229 idx,
2230 score,
2231 status
2232 );
2233 }
2234 println!(" Treatment ranking:");
2235 for (rank, (idx, score, relevant)) in treatment_ranked.iter().take(8).enumerate() {
2236 let status = if *relevant {
2237 "✓ relevant"
2238 } else {
2239 "✗ TRAP"
2240 };
2241 println!(
2242 " #{}: memory[{}] score={:.3} {}",
2243 rank + 1,
2244 idx,
2245 score,
2246 status
2247 );
2248 }
2249 println!();
2250
2251 let num_sessions = 1000;
2255 let memories_surfaced = 5;
2256
2257 let control_top_k: Vec<bool> = control_ranked
2259 .iter()
2260 .take(memories_surfaced)
2261 .map(|x| x.2)
2262 .collect();
2263 let treatment_top_k: Vec<bool> = treatment_ranked
2264 .iter()
2265 .take(memories_surfaced)
2266 .map(|x| x.2)
2267 .collect();
2268
2269 let control_relevant_count = control_top_k.iter().filter(|&&r| r).count();
2270 let treatment_relevant_count = treatment_top_k.iter().filter(|&&r| r).count();
2271 let control_trap_count = memories_surfaced - control_relevant_count;
2272 let treatment_trap_count = memories_surfaced - treatment_relevant_count;
2273
2274 let control_trap_ratio = control_trap_count as f32 / memories_surfaced as f32;
2276 let treatment_trap_ratio = treatment_trap_count as f32 / memories_surfaced as f32;
2277
2278 println!("📈 CONTEXT QUALITY (top {memories_surfaced}):");
2279 let control_trap_pct = control_trap_ratio * 100.0;
2280 let treatment_trap_pct = treatment_trap_ratio * 100.0;
2281 println!(
2282 " Control: {control_relevant_count} relevant, {control_trap_count} traps ({control_trap_pct:.0}% trap ratio)"
2283 );
2284 println!(
2285 " Treatment: {treatment_relevant_count} relevant, {treatment_trap_count} traps ({treatment_trap_pct:.0}% trap ratio)\n"
2286 );
2287
2288 let mut rng_state: u64 = 42;
2290 let next_rand = |state: &mut u64| -> f32 {
2291 *state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
2292 ((*state >> 32) as f32) / (0x1_0000_0000_u64 as f32)
2294 };
2295
2296 let mut control_positive = 0u64;
2297 let mut control_negative = 0u64;
2298 let mut treatment_positive = 0u64;
2299 let mut treatment_negative = 0u64;
2300
2301 for _session in 0..num_sessions {
2302 if next_rand(&mut rng_state) < control_trap_ratio {
2305 control_negative += 1; } else {
2307 control_positive += 1; }
2309
2310 if next_rand(&mut rng_state) < treatment_trap_ratio {
2312 treatment_negative += 1;
2313 } else {
2314 treatment_positive += 1;
2315 }
2316 }
2317
2318 let num_impressions = num_sessions as u64;
2321 let control_clicks = control_positive; let treatment_clicks = treatment_positive;
2323
2324 let mut test = ABTest::builder("relevance_weights_experiment")
2326 .with_description(
2327 "CTX-3: Quality over quantity - momentum, access_count, graph_strength",
2328 )
2329 .with_control(control_weights)
2330 .with_treatment(treatment_weights)
2331 .with_min_impressions(100)
2332 .with_traffic_split(0.5)
2333 .build();
2334
2335 test.control_metrics.impressions = num_impressions;
2336 test.control_metrics.clicks = control_clicks;
2337 test.control_metrics.unique_users = (num_impressions as f64 * 0.85) as u64;
2338 test.control_metrics.positive_feedback = control_positive;
2339 test.control_metrics.negative_feedback = control_negative;
2340
2341 test.treatment_metrics.impressions = num_impressions;
2342 test.treatment_metrics.clicks = treatment_clicks;
2343 test.treatment_metrics.unique_users = (num_impressions as f64 * 0.85) as u64;
2344 test.treatment_metrics.positive_feedback = treatment_positive;
2345 test.treatment_metrics.negative_feedback = treatment_negative;
2346
2347 let control_ctr = (control_clicks as f64 / num_impressions as f64) * 100.0;
2348 let treatment_ctr = (treatment_clicks as f64 / num_impressions as f64) * 100.0;
2349
2350 let analysis = ABTestAnalyzer::comprehensive_analysis(&test);
2351
2352 println!("📊 DYNAMIC SIMULATION RESULTS:");
2353 println!(
2354 " ├─ Control: {num_impressions} impressions, {control_clicks} clicks ({control_ctr:.1}% CTR)"
2355 );
2356 println!(" │ positive={control_positive}, negative={control_negative}");
2357 println!(
2358 " └─ Treatment: {num_impressions} impressions, {treatment_clicks} clicks ({treatment_ctr:.1}% CTR)"
2359 );
2360 println!(" positive={treatment_positive}, negative={treatment_negative}\n");
2361
2362 println!("🔬 FREQUENTIST ANALYSIS:");
2363 let chi_sq = analysis.frequentist.chi_squared;
2364 let p_val = analysis.frequentist.p_value;
2365 println!(" ├─ Chi-squared: {chi_sq:.4}");
2366 println!(" ├─ P-value: {p_val:.6}");
2367 let significant = if analysis.frequentist.is_significant {
2368 "YES ✓"
2369 } else {
2370 "NO ✗"
2371 };
2372 println!(" ├─ Significant: {significant}");
2373 let winner = &analysis.frequentist.winner;
2374 println!(" └─ Winner: {winner:?}\n");
2375
2376 println!("🎲 BAYESIAN ANALYSIS:");
2377 let prob_treat = analysis.bayesian.prob_treatment_better * 100.0;
2378 let exp_lift = analysis.bayesian.expected_lift * 100.0;
2379 let ci_lo = analysis.bayesian.credible_interval.0 * 100.0;
2380 let ci_hi = analysis.bayesian.credible_interval.1 * 100.0;
2381 let risk_treat = analysis.bayesian.risk_treatment * 100.0;
2382 let risk_ctrl = analysis.bayesian.risk_control * 100.0;
2383 println!(" ├─ P(Treatment better): {prob_treat:.2}%");
2384 println!(" ├─ Expected lift: {exp_lift:.2}%");
2385 println!(" ├─ 95% Credible Interval: ({ci_lo:.2}%, {ci_hi:.2}%)");
2386 println!(" ├─ Risk if shipping treatment: {risk_treat:.3}%");
2387 println!(" └─ Risk if keeping control: {risk_ctrl:.3}%\n");
2388
2389 println!("📏 EFFECT SIZE:");
2390 let cohens_h = analysis.effect_size.cohens_h;
2391 let interpretation = &analysis.effect_size.interpretation;
2392 let rel_risk = analysis.effect_size.relative_risk;
2393 let odds_ratio = analysis.effect_size.odds_ratio;
2394 let nnt = analysis.effect_size.nnt;
2395 println!(" ├─ Cohen's h: {cohens_h:.4}");
2396 println!(" ├─ Interpretation: {interpretation}");
2397 println!(" ├─ Relative Risk: {rel_risk:.2}x");
2398 println!(" ├─ Odds Ratio: {odds_ratio:.2}");
2399 if nnt.is_finite() {
2400 println!(" └─ NNT (Number Needed to Treat): {nnt:.0}\n");
2401 } else {
2402 println!(" └─ NNT: N/A (no effect)\n");
2403 }
2404
2405 println!("⚖️ DATA QUALITY (SRM Check):");
2406 let expected_ratio = analysis.srm.expected_ratio * 100.0;
2407 let observed_ratio = analysis.srm.observed_ratio * 100.0;
2408 println!(" ├─ Expected ratio: {expected_ratio:.1}%");
2409 println!(" ├─ Observed ratio: {observed_ratio:.1}%");
2410 let srm_detected = if analysis.srm.srm_detected {
2411 "YES ⚠️"
2412 } else {
2413 "NO ✓"
2414 };
2415 println!(" ├─ SRM Detected: {srm_detected}");
2416 let severity = &analysis.srm.severity;
2417 println!(" └─ Severity: {severity:?}\n");
2418
2419 println!("📈 SEQUENTIAL TESTING:");
2420 let analysis_num = analysis.sequential.analysis_number;
2421 let planned = analysis.sequential.planned_analyses;
2422 let alpha_spent = analysis.sequential.alpha_spent;
2423 println!(" ├─ Analysis #{analysis_num} of {planned}");
2424 println!(" ├─ Alpha spent: {alpha_spent:.4}");
2425 let current_alpha = analysis.sequential.current_alpha;
2426 println!(" ├─ Current threshold: {current_alpha:.4}");
2427 let can_stop = if analysis.sequential.can_stop_early {
2428 "YES ✓"
2429 } else {
2430 "NO - Continue testing"
2431 };
2432 println!(" └─ Can stop early: {can_stop}\n");
2433
2434 println!("═══════════════════════════════════════");
2435 println!("🎯 FINAL DECISION:");
2436 println!("═══════════════════════════════════════");
2437 let should_ship = if analysis.should_ship {
2438 "YES ✅"
2439 } else {
2440 "NO ❌"
2441 };
2442 println!(" Should ship: {should_ship}");
2443 let practically_sig = if analysis.is_practically_significant {
2444 "YES"
2445 } else {
2446 "NO"
2447 };
2448 println!(" Practically significant: {practically_sig}");
2449 println!("\n📋 USER-FOCUSED INSIGHTS:");
2450 for insight in &analysis.insights {
2451 println!(" • {insight}");
2452 }
2453
2454 let ard = (treatment_ctr - control_ctr) / 100.0; let nnt = if ard > 0.0 { 1.0 / ard } else { f64::INFINITY };
2457 let ctr_diff = treatment_ctr - control_ctr;
2458 println!("\n🎯 KEY METRIC:");
2459 println!(
2460 " ├─ CTR Improvement: {control_ctr:.1}% → {treatment_ctr:.1}% (+{ctr_diff:.1}%)"
2461 );
2462 let ard_pct = ard * 100.0;
2463 println!(" ├─ ARD (Absolute Risk Difference): {ard_pct:.2}%");
2464 if nnt.is_finite() && nnt < 100.0 {
2465 println!(" └─ NNT (Number Needed to Treat): {nnt:.0}");
2466 println!(" (1 in {nnt:.0} users benefit from treatment)");
2467 } else {
2468 println!(" └─ NNT: N/A (no significant improvement)");
2469 }
2470
2471 println!("\n========================================");
2472 println!(" END OF COMPREHENSIVE ANALYSIS");
2473 println!("========================================\n");
2474
2475 assert!(
2477 treatment_clicks >= control_clicks,
2478 "Treatment ({treatment_clicks} clicks) should outperform Control ({control_clicks} clicks)"
2479 );
2480 assert!(
2481 treatment_ctr >= control_ctr,
2482 "Treatment CTR ({treatment_ctr:.1}%) should be >= Control CTR ({control_ctr:.1}%)"
2483 );
2484 let control_quality = if control_negative > 0 {
2486 control_positive as f64 / control_negative as f64
2487 } else {
2488 control_positive as f64
2489 };
2490 let treatment_quality = if treatment_negative > 0 {
2491 treatment_positive as f64 / treatment_negative as f64
2492 } else {
2493 treatment_positive as f64
2494 };
2495 assert!(
2496 treatment_quality >= control_quality * 0.9, "Treatment quality ratio ({treatment_quality:.2}) should be >= Control ({control_quality:.2})"
2498 );
2499 assert!(!analysis.insights.is_empty());
2500 }
2501
2502 #[test]
2503 fn test_bayesian_analysis() {
2504 let mut test = ABTest::builder("bayesian_test").build();
2505
2506 test.control_metrics.impressions = 1000;
2507 test.control_metrics.clicks = 100; test.treatment_metrics.impressions = 1000;
2509 test.treatment_metrics.clicks = 150; let bayesian = ABTestAnalyzer::bayesian_analysis(&test);
2512
2513 assert!(bayesian.prob_treatment_better > 0.9);
2515 assert!(bayesian.expected_lift > 0.0);
2516 assert!(bayesian.credible_interval.0 > -0.5);
2518 }
2519
2520 #[test]
2521 fn test_effect_size_calculation() {
2522 let mut test = ABTest::builder("effect_test").build();
2523
2524 test.control_metrics.impressions = 1000;
2525 test.control_metrics.clicks = 100; test.treatment_metrics.impressions = 1000;
2527 test.treatment_metrics.clicks = 200; let effect = ABTestAnalyzer::calculate_effect_size(&test);
2530
2531 assert!(effect.cohens_h > 0.2);
2533 assert!(effect.relative_risk > 1.5);
2534 assert!((effect.nnt - 10.0).abs() < 0.5);
2536 }
2537
2538 #[test]
2539 fn test_srm_detection() {
2540 let mut test = ABTest::builder("srm_test").with_traffic_split(0.5).build();
2541
2542 test.control_metrics.impressions = 700;
2544 test.treatment_metrics.impressions = 300;
2545
2546 let srm = ABTestAnalyzer::check_srm(&test);
2547
2548 assert!(srm.srm_detected);
2550 assert_eq!(srm.severity, SRMSeverity::Critical);
2551 }
2552
2553 #[test]
2554 fn test_sequential_analysis() {
2555 let mut test = ABTest::builder("sequential_test")
2556 .with_min_impressions(100)
2557 .build();
2558
2559 test.control_metrics.impressions = 500;
2560 test.control_metrics.clicks = 25; test.treatment_metrics.impressions = 500;
2562 test.treatment_metrics.clicks = 75; let seq = ABTestAnalyzer::sequential_analysis(&test, 1, 5);
2566
2567 assert_eq!(seq.analysis_number, 1);
2568 assert_eq!(seq.planned_analyses, 5);
2569 assert!(seq.alpha_spent < 0.01);
2571 }
2572}