use std::io::Write;
use csv::Writer;
use std::fs::File;
pub fn calculate_sample_distance_with_bidirectional_recombination(
sample1: &AllelicProfile,
sample2: &AllelicProfile,
loci_names: &[String],
engine: &DistanceEngine,
mode: DistanceMode,
min_loci: usize,
no_hamming_fallback: bool,
recomb_writer: Option<&mut Writer<File>>, ) -> Option<usize> {
let mut total_distance = 0;
let mut shared_loci = 0;
let mut recomb_count = 0;
for locus in loci_names {
let crc1 = sample1.loci_hashes.get(locus)
.and_then(|h| h.as_crc32())
.unwrap_or(u32::MAX);
let crc2 = sample2.loci_hashes.get(locus)
.and_then(|h| h.as_crc32())
.unwrap_or(u32::MAX);
if crc1 != u32::MAX && crc2 != u32::MAX {
shared_loci += 1;
if let Some(threshold) = engine.recombination_threshold {
if crc1 != crc2 { let distance_forward = engine.get_distance(
locus, crc1, crc2, DistanceMode::SnpsAndIndelBases, no_hamming_fallback
);
let final_distance = if distance_forward > threshold {
let distance_reverse = engine.get_distance(
locus, crc2, crc1, DistanceMode::SnpsAndIndelBases, no_hamming_fallback
);
if distance_reverse <= threshold {
distance_reverse
} else {
if let Some(writer) = recomb_writer {
write_recombination_event(
writer,
locus,
sample1,
sample2,
crc1,
crc2,
distance_forward.min(distance_reverse),
threshold,
engine,
)?;
recomb_count += 1;
if recomb_count % 100 == 0 {
writer.flush()?;
}
}
distance_forward.min(distance_reverse) }
} else {
distance_forward
};
total_distance += final_distance;
} else {
total_distance += 0;
}
} else {
total_distance += engine.get_distance(locus, crc1, crc2, mode, no_hamming_fallback);
}
}
}
if shared_loci >= min_loci {
Some(total_distance)
} else {
None
}
}
fn write_recombination_event(
writer: &mut Writer<File>,
locus: &str,
sample1: &AllelicProfile,
sample2: &AllelicProfile,
crc1: u32,
crc2: u32,
distance: usize,
threshold: usize,
engine: &DistanceEngine,
) -> Result<(), Box<dyn std::error::Error>> {
let (seq_len1, seq_len2) = if let Some(ref db) = engine.sequence_db {
let len1 = db.get_sequence(locus, crc1).map(|s| s.sequence.len()).unwrap_or(0);
let len2 = db.get_sequence(locus, crc2).map(|s| s.sequence.len()).unwrap_or(0);
(len1, len2)
} else {
(0, 0)
};
let avg_length = if seq_len1 > 0 && seq_len2 > 0 {
(seq_len1 + seq_len2) / 2
} else {
450 };
let divergence_percent = if avg_length > 0 {
(distance as f64 / avg_length as f64) * 100.0
} else {
0.0
};
writer.write_record(&[
locus,
&sample1.sample_id,
&sample2.sample_id,
&format!("{}", crc1),
&format!("{}", crc2),
&distance.to_string(),
&threshold.to_string(),
&seq_len1.to_string(),
&seq_len2.to_string(),
&format!("{:.2}", divergence_percent),
])?;
Ok(())
}
pub fn compute_distance_matrix_streaming(
matrix: &AllelicMatrix,
engine: &DistanceEngine,
mode: DistanceMode,
min_loci: usize,
no_hamming_fallback: bool,
recomb_log_path: Option<&str>,
) -> Result<Vec<Vec<Option<usize>>>, Box<dyn std::error::Error>> {
let n = matrix.samples.len();
let mut distance_matrix = vec![vec![None; n]; n];
let mut recomb_writer = if let Some(path) = recomb_log_path {
let mut w = Writer::from_path(path)?;
w.write_record(&[
"locus", "sample1", "sample2", "allele1_hash", "allele2_hash",
"snps_indel_bases", "threshold", "seq_length1", "seq_length2",
"divergence_percent"
])?;
Some(w)
} else {
None
};
let total_comparisons = n * (n - 1) / 2;
let pb = ProgressBar::new(total_comparisons as u64);
pb.set_style(ProgressStyle::default_bar()
.template("[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} ({eta})")
.progress_chars("█▓▒░ "));
let mut comparisons_done = 0;
let update_interval = (total_comparisons / 100).max(1);
for i in 0..n {
for j in i..n {
if i == j {
distance_matrix[i][j] = Some(0);
} else {
let distance = calculate_sample_distance_with_bidirectional_recombination(
&matrix.samples[i],
&matrix.samples[j],
&matrix.loci_names,
engine,
mode,
min_loci,
no_hamming_fallback,
recomb_writer.as_mut(),
);
distance_matrix[i][j] = distance;
distance_matrix[j][i] = distance;
comparisons_done += 1;
if comparisons_done % update_interval == 0 {
pb.set_position(comparisons_done as u64);
}
}
}
}
pb.finish();
if let Some(mut w) = recomb_writer {
w.flush()?;
println!("🔬 Recombination events written progressively to log");
}
Ok(distance_matrix)
}