use super::config::Config;
use super::dataset::Dataset;
use super::features::{compute_class_difference, compute_pooled_covariance, extract_features};
use super::report::{DimensionInfo, PowerDiagnostics, PowerOutcome, Report};
pub fn analyze(dataset: &Dataset, config: &Config) -> Report {
if let Err(e) = dataset.validate() {
let dim = DimensionInfo::new(config.feature_dimension(), 0.0);
return Report::new(
PowerOutcome::Inconclusive {
reason: format!("Dataset validation failed: {}", e),
leak_probability: 0.5,
},
dim,
);
}
let features = extract_features(dataset, config);
let difference = compute_class_difference(&features);
let _covariance = compute_pooled_covariance(&features);
let n_fixed = features.fixed_count();
let n_random = features.random_count();
let n_total = n_fixed + n_random;
let n_eff = (n_fixed as f64 * n_random as f64) / n_total as f64;
let d = features.dimension;
let dim_info = DimensionInfo::new(d, n_eff);
let max_effect = difference
.iter()
.map(|v| v.abs())
.max_by(|a, b| a.partial_cmp(b).unwrap())
.unwrap_or(0.0);
let se_per_dim: Vec<f64> = difference
.iter()
.enumerate()
.map(|(i, _)| {
let mut var_fixed = 0.0;
let mut var_random = 0.0;
for tf in &features.fixed {
let diff = tf.features[i] - difference[i];
var_fixed += diff * diff;
}
for tf in &features.random {
let diff = tf.features[i];
var_random += diff * diff;
}
if n_fixed > 1 {
var_fixed /= (n_fixed - 1) as f64;
}
if n_random > 1 {
var_random /= (n_random - 1) as f64;
}
let pooled_var = var_fixed / n_fixed as f64 + var_random / n_random as f64;
pooled_var.sqrt()
})
.collect();
let theta_floor = se_per_dim
.iter()
.max_by(|a, b| a.partial_cmp(b).unwrap())
.copied()
.unwrap_or(0.0);
let theta_eff = theta_floor * config.floor_multiplier;
let max_t_stat = if theta_floor > 1e-10 {
max_effect / theta_floor
} else {
0.0
};
let leak_probability = if max_t_stat > 0.0 {
use statrs::distribution::{ContinuousCDF, Normal};
let normal = Normal::new(0.0, 1.0).unwrap();
normal.cdf(max_t_stat)
} else {
0.5
};
let outcome = if leak_probability < config.pass_threshold {
PowerOutcome::Pass {
leak_probability,
max_effect,
}
} else if leak_probability > config.fail_threshold {
let ci_width = 1.96 * theta_floor;
PowerOutcome::Fail {
leak_probability,
max_effect,
max_effect_ci95: (max_effect - ci_width, max_effect + ci_width),
}
} else {
PowerOutcome::Inconclusive {
reason: "Posterior probability between thresholds".to_string(),
leak_probability,
}
};
let diagnostics = PowerDiagnostics {
n_fixed,
n_random,
n_total,
n_eff,
iact_fixed: 1.0,
iact_random: 1.0,
iact_combined: 1.0,
theta_floor,
block_length: 1,
gibbs_samples: 0,
gibbs_burnin: 0,
convergence: None,
warnings: Vec::new(),
};
let mut report = Report::new(outcome, dim_info);
report.max_effect = max_effect;
report.theta_floor = theta_floor;
report.floor_multiplier = config.floor_multiplier;
report.theta_eff = theta_eff;
report.units = dataset.units.clone();
report.feature_family = config.feature_family;
report.diagnostics = diagnostics;
report
}
#[cfg(test)]
mod tests {
use super::super::config::PreprocessingConfig;
use super::super::dataset::{Class, Trace};
use super::*;
#[test]
fn test_analyze_empty_dataset() {
let dataset = Dataset::new(vec![]);
let config = Config::default();
let report = analyze(&dataset, &config);
assert!(!report.outcome.is_conclusive());
}
#[test]
fn test_analyze_identical_classes() {
let traces: Vec<Trace> = (0..50)
.map(|i| {
let class = if i % 2 == 0 {
Class::Fixed
} else {
Class::Random
};
Trace::new(class, vec![1.0, 2.0, 3.0, 4.0])
})
.collect();
let dataset = Dataset::new(traces);
let mut config = Config::new().with_partitions(2);
config.preprocessing = PreprocessingConfig::none();
let report = analyze(&dataset, &config);
assert!(
report.max_effect.abs() < 1e-6,
"max_effect should be ~0 for identical data, got {}",
report.max_effect
);
}
#[test]
fn test_analyze_different_classes() {
let mut traces = Vec::new();
for i in 0..25 {
traces.push(Trace::with_id(
Class::Fixed,
vec![10.0, 20.0, 30.0, 40.0],
i as u64,
));
}
for i in 25..50 {
traces.push(Trace::with_id(
Class::Random,
vec![0.0, 1.0, 2.0, 3.0],
i as u64,
));
}
let dataset = Dataset::new(traces);
let mut config = Config::new().with_partitions(2);
config.preprocessing = PreprocessingConfig::none();
let report = analyze(&dataset, &config);
assert!(report.leak_probability > 0.9);
assert!(report.max_effect > 5.0);
}
#[test]
fn test_analyze_report_fields() {
let traces = vec![
Trace::new(Class::Fixed, vec![1.0, 2.0, 3.0, 4.0]),
Trace::new(Class::Fixed, vec![1.1, 2.1, 3.1, 4.1]),
Trace::new(Class::Random, vec![1.0, 2.0, 3.0, 4.0]),
Trace::new(Class::Random, vec![0.9, 1.9, 2.9, 3.9]),
];
let dataset = Dataset::new(traces);
let config = Config::new().with_partitions(2).with_floor_multiplier(3.0);
let report = analyze(&dataset, &config);
assert_eq!(report.diagnostics.n_fixed, 2);
assert_eq!(report.diagnostics.n_random, 2);
assert_eq!(report.dimension.d, 2);
assert!(report.diagnostics.n_eff > 0.0);
assert_eq!(report.floor_multiplier, 3.0);
}
}