use super::*;
use crate::core::recombination::factory::RecombinationDetectorFactory;
pub struct PhiTestDetector {
significance_level: f64, min_informative_sites: usize, bidirectional: bool,
}
impl PhiTestDetector {
pub fn new(config: &str) -> Result<Self, String> {
let params = RecombinationDetectorFactory::parse_config(config);
let significance_level = params.get("significance")
.and_then(|s| s.parse().ok())
.unwrap_or(0.05);
let min_informative_sites = params.get("min_sites")
.and_then(|s| s.parse().ok())
.unwrap_or(4);
let bidirectional = params.get("bidirectional")
.and_then(|s| s.parse().ok())
.unwrap_or(true);
if significance_level <= 0.0 || significance_level >= 1.0 {
return Err("Significance level must be between 0 and 1".to_string());
}
Ok(Self { significance_level, min_informative_sites, bidirectional })
}
fn calculate_phi_statistic(&self, seq1: &[u8], seq2: &[u8]) -> (f64, usize) {
let mut n00 = 0; let mut n01 = 0; let mut n10 = 0; let mut n11 = 0; let mut informative_sites = 0;
let min_len = seq1.len().min(seq2.len());
let mut polymorphic_sites = Vec::new();
for i in 0..min_len {
if seq1[i] != seq2[i] && seq1[i] != b'N' && seq2[i] != b'N' {
polymorphic_sites.push((seq1[i], seq2[i]));
}
}
if polymorphic_sites.len() < 2 {
return (0.0, 0); }
for i in 0..polymorphic_sites.len() {
for j in (i+1)..polymorphic_sites.len() {
let (a1, b1) = polymorphic_sites[i];
let (a2, b2) = polymorphic_sites[j];
match ((a1, a2), (b1, b2)) {
((a, c), (b, d)) if a == b && c == d => n00 += 1, ((a, c), (b, d)) if a == b && c != d => n01 += 1, ((a, c), (b, d)) if a != b && c == d => n10 += 1, ((a, c), (b, d)) if a != b && c != d => n11 += 1, _ => {}
}
informative_sites += 1;
}
}
let n = (n00 + n01 + n10 + n11) as f64;
if n == 0.0 {
return (0.0, 0);
}
let numerator = (n11 as f64 * n00 as f64 - n10 as f64 * n01 as f64).powi(2) * n;
let denominator = (n11 + n10) as f64 * (n01 + n00) as f64 * (n11 + n01) as f64 * (n10 + n00) as f64;
if denominator == 0.0 {
return (0.0, informative_sites);
}
let phi = numerator / denominator;
(phi, informative_sites)
}
fn phi_to_pvalue(&self, phi: f64) -> f64 {
if phi <= 0.0 {
return 1.0;
}
let chi_square = phi;
let z = chi_square.sqrt();
let erf_approx = if z > 3.0 { 1.0 } else { z / (1.0 + 0.278393 * z + 0.230389 * z * z + 0.000972 * z * z * z + 0.078108 * z * z * z * z) };
let cdf = 0.5 * (1.0 + erf_approx);
let p_value = 2.0 * (1.0 - cdf);
p_value.max(0.0).min(1.0)
}
}
impl RecombinationDetector for PhiTestDetector {
fn name(&self) -> &'static str {
"phi-test"
}
fn description(&self) -> &'static str {
"EXPERIMENTAL: Phi statistical test for recombination (Bruen et al. 2006) - not validated for production use"
}
fn detect(
&self,
seq1: &[u8],
seq2: &[u8],
_locus: &str,
distance_forward: usize,
distance_reverse_complement: Option<usize>
) -> RecombinationResult {
let (phi_forward, sites_forward) = self.calculate_phi_statistic(seq1, seq2);
let p_value_forward = self.phi_to_pvalue(phi_forward);
let (best_phi, best_p_value, best_distance) = if self.bidirectional && distance_reverse_complement.is_some() {
let seq2_revcomp: Vec<u8> = seq2.iter()
.rev()
.map(|&c| match c.to_ascii_uppercase() {
b'A' => b'T',
b'T' => b'A',
b'G' => b'C',
b'C' => b'G',
b'N' => b'N',
_ => c,
})
.collect();
let (phi_reverse, _sites_reverse) = self.calculate_phi_statistic(seq1, &seq2_revcomp);
let p_value_reverse = self.phi_to_pvalue(phi_reverse);
if p_value_forward <= p_value_reverse {
(phi_forward, p_value_forward, distance_forward)
} else {
(phi_reverse, p_value_reverse, distance_reverse_complement.unwrap())
}
} else {
(phi_forward, p_value_forward, distance_forward)
};
let is_recombination = sites_forward >= self.min_informative_sites &&
best_p_value < self.significance_level;
let confidence_score = if sites_forward >= self.min_informative_sites {
1.0 - best_p_value } else {
0.0 };
let mut method_data = HashMap::new();
method_data.insert("phi_statistic".to_string(), format!("{:.6}", best_phi));
method_data.insert("p_value".to_string(), format!("{:.6}", best_p_value));
method_data.insert("significance_level".to_string(), self.significance_level.to_string());
method_data.insert("informative_sites".to_string(), sites_forward.to_string());
method_data.insert("min_required_sites".to_string(), self.min_informative_sites.to_string());
RecombinationResult {
is_recombination,
confidence_score,
method_specific_data: method_data,
statistical_significance: Some(best_p_value),
distance_forward: Some(distance_forward),
distance_reverse_complement,
best_distance,
}
}
fn get_config(&self) -> RecombinationDetectorConfig {
let mut params = HashMap::new();
params.insert("significance".to_string(), self.significance_level.to_string());
params.insert("min_sites".to_string(), self.min_informative_sites.to_string());
params.insert("bidirectional".to_string(), self.bidirectional.to_string());
RecombinationDetectorConfig {
detector_type: "phi-test".to_string(),
parameters: params,
bidirectional_alignment: self.bidirectional,
}
}
fn requires_bidirectional(&self) -> bool {
self.bidirectional
}
fn validate_config(config_str: &str) -> Result<(), String> {
let params = RecombinationDetectorFactory::parse_config(config_str);
if let Some(sig_str) = params.get("significance") {
let sig: f64 = sig_str.parse()
.map_err(|_| "Significance level must be a number")?;
if sig <= 0.0 || sig >= 1.0 {
return Err("Significance level must be between 0 and 1".to_string());
}
}
if let Some(sites_str) = params.get("min_sites") {
let sites: usize = sites_str.parse()
.map_err(|_| "Minimum sites must be a positive integer")?;
if sites < 2 {
return Err("Minimum sites must be at least 2".to_string());
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_phi_test_detector() {
let detector = PhiTestDetector::new("significance=0.05,min_sites=4").unwrap();
assert_eq!(detector.significance_level, 0.05);
assert_eq!(detector.min_informative_sites, 4);
let seq1 = b"ATCGATCGATCG";
let seq2 = b"ATCGTTCGATCG"; let result = detector.detect(seq1, seq2, "test_locus", 2, None);
assert!(result.statistical_significance.is_some());
assert!(result.method_specific_data.contains_key("phi_statistic"));
assert!(result.method_specific_data.contains_key("p_value"));
}
}