use approx::assert_abs_diff_eq;
use snapbox::cmd::{cargo_bin, Command};
pub mod common;
use crate::common::TestSetup;
use crate::common::*;
#[cfg(test)]
mod tests {
use super::*;
fn assert_distance_with_tolerance(actual: f32, expected: f32, tolerance: f32) {
assert_abs_diff_eq!(actual, expected, epsilon = tolerance);
}
#[test]
fn test_completeness_ordering() {
let sandbox = TestSetup::setup();
sandbox.copy_input_file_to_wd("14412_3#82.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("14412_3#84.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("R6.fa.gz");
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.arg("-o")
.arg("test_genomes")
.args(["-v", "-k", "31", "-s", "1000"])
.arg("14412_3#82.contigs_velvet.fa.gz") .arg("14412_3#84.contigs_velvet.fa.gz") .arg("R6.fa.gz") .assert()
.success();
TestSetup::create_completeness_file(
&sandbox,
"completeness_cutoff_test.txt",
&[
("14412_3#82.contigs_velvet.fa.gz", 0.8), ("14412_3#84.contigs_velvet.fa.gz", 0.85), ("R6.fa.gz", 0.9), ],
);
let sketch_info_output = Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.arg("info")
.arg("test_genomes")
.output()
.expect("Failed to get sketch info");
println!("Sketch database info:");
println!("{}", String::from_utf8_lossy(&sketch_info_output.stdout));
let completeness_content = std::fs::read_to_string(
sandbox.file_string("completeness_cutoff_test.txt", TestDir::Output),
)
.expect("Failed to read completeness file");
println!("Completeness file contents:");
println!("{}", completeness_content);
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("test_genomes")
.args(["-o", "distances_no_correction"])
.args(["-k", "31"])
.arg("-v")
.assert()
.success();
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("test_genomes")
.args(["-o", "distances_with_correction_default_cutoff"])
.arg("--completeness-file")
.arg("completeness_cutoff_test.txt")
.args(["-k", "31"])
.arg("-v")
.assert()
.success();
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("test_genomes")
.args(["-o", "distances_with_correction_high_cutoff"])
.arg("--completeness-file")
.arg("completeness_cutoff_test.txt")
.arg("--completeness-cutoff")
.arg("0.8")
.args(["-k", "31"])
.arg("-v")
.assert()
.success();
let distances_no_correction = TestSetup::parse_distances(
&sandbox.file_string("distances_no_correction", TestDir::Output),
);
let distances_default_cutoff = TestSetup::parse_distances(
&sandbox.file_string("distances_with_correction_default_cutoff", TestDir::Output),
);
let distances_high_cutoff = TestSetup::parse_distances(
&sandbox.file_string("distances_with_correction_high_cutoff", TestDir::Output),
);
println!(
"Distances without correction: {:?}",
distances_no_correction
);
println!(
"Distances with default cutoff (0.64): {:?}",
distances_default_cutoff
);
println!(
"Distances with high cutoff (0.8): {:?}",
distances_high_cutoff
);
assert_eq!(
distances_no_correction.len(),
distances_default_cutoff.len()
);
assert_eq!(distances_no_correction.len(), distances_high_cutoff.len());
let mut default_cutoff_differences = 0;
for (uncorrected, corrected) in distances_no_correction
.iter()
.zip(distances_default_cutoff.iter())
{
if (uncorrected - corrected).abs() > 0.001 {
default_cutoff_differences += 1;
println!(
"Default cutoff difference found: {} vs {}",
uncorrected, corrected
);
}
}
let mut high_cutoff_differences = 0;
for (uncorrected, corrected) in distances_no_correction
.iter()
.zip(distances_high_cutoff.iter())
{
if (uncorrected - corrected).abs() > 0.001 {
high_cutoff_differences += 1;
println!(
"High cutoff difference found: {} vs {}",
uncorrected, corrected
);
}
}
println!(
"Default cutoff (0.64) differences: {}",
default_cutoff_differences
);
println!("High cutoff (0.8) differences: {}", high_cutoff_differences);
let meaningful_pairs = distances_no_correction
.iter()
.filter(|&&d| d < 0.99)
.count();
println!("Number of meaningful distance pairs: {}", meaningful_pairs);
assert_eq!(
default_cutoff_differences, meaningful_pairs,
"Default cutoff (0.64) should result in corrections for all meaningful pairs (all c1*c2 >= 0.64)"
);
assert_eq!(
high_cutoff_differences, 0,
"High cutoff (0.8) should result in 0 corrections (all c1*c2 < 0.8)"
);
assert!(
default_cutoff_differences > 0,
"Default cutoff should result in at least some completeness corrections"
);
assert_eq!(true, sandbox.file_exists("distances_no_correction"));
assert_eq!(
true,
sandbox.file_exists("distances_with_correction_default_cutoff")
);
assert_eq!(
true,
sandbox.file_exists("distances_with_correction_high_cutoff")
);
println!("✅ Completeness cutoff test passed!");
println!(
" - Default cutoff (0.64): {} corrections",
default_cutoff_differences
);
println!(
" - High cutoff (0.8): {} corrections",
high_cutoff_differences
);
println!(" - Meaningful distance pairs: {}", meaningful_pairs);
}
#[test]
fn test_completeness_missing_genomes() {
let sandbox = TestSetup::setup();
sandbox.copy_input_file_to_wd("14412_3#82.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("14412_3#84.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("R6.fa.gz");
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.arg("-o")
.arg("test_missing")
.args(["-v", "-k", "21", "-s", "1000"])
.arg("14412_3#82.contigs_velvet.fa.gz")
.arg("14412_3#84.contigs_velvet.fa.gz")
.arg("R6.fa.gz")
.assert()
.success();
TestSetup::create_completeness_file(
&sandbox,
"completeness_missing.txt",
&[
("14412_3#82.contigs_velvet.fa.gz", 0.8),
("14412_3#84.contigs_velvet.fa.gz", 0.9),
],
);
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("test_missing")
.args(["-k", "21", "-o", "distances_missing"])
.arg("--completeness-file")
.arg("completeness_missing.txt")
.arg("-v")
.assert()
.success();
assert_eq!(true, sandbox.file_exists("distances_missing"));
let distances =
TestSetup::parse_distances(&sandbox.file_string("distances_missing", TestDir::Output));
assert!(distances.len() > 0, "Should have calculated some distances");
for distance in distances {
assert!(
distance.is_finite(),
"Distance should be finite: {}",
distance
);
assert!(
distance >= 0.0 && distance <= 1.0,
"Distance should be between 0 and 1: {}",
distance
);
}
}
#[test]
fn test_completeness_extra_genomes() {
let sandbox = TestSetup::setup();
sandbox.copy_input_file_to_wd("14412_3#82.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("14412_3#84.contigs_velvet.fa.gz");
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.arg("-o")
.arg("test_extra")
.args(["-v", "-k", "21", "-s", "1000"])
.arg("14412_3#82.contigs_velvet.fa.gz")
.arg("14412_3#84.contigs_velvet.fa.gz")
.assert()
.success();
TestSetup::create_completeness_file(
&sandbox,
"completeness_extra.txt",
&[
("14412_3#82.contigs_velvet.fa.gz", 0.8),
("14412_3#84.contigs_velvet.fa.gz", 0.9),
("NonExistentGenome1", 0.5), ("NonExistentGenome2", 0.6), ("AnotherFakeGenome", 0.7), ],
);
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("test_extra")
.args(["-k", "21", "-o", "distances_extra"])
.arg("--completeness-file")
.arg("completeness_extra.txt")
.arg("-v")
.assert()
.success();
assert_eq!(true, sandbox.file_exists("distances_extra"));
let distances =
TestSetup::parse_distances(&sandbox.file_string("distances_extra", TestDir::Output));
assert!(distances.len() > 0, "Should have calculated some distances");
for distance in distances {
assert!(
distance.is_finite(),
"Distance should be finite: {}",
distance
);
assert!(
distance >= 0.0 && distance <= 1.0,
"Distance should be between 0 and 1: {}",
distance
);
}
}
#[test]
fn test_completeness_precluster() {
let sandbox = TestSetup::setup();
sandbox.copy_input_file_to_wd("14412_3#82.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("14412_3#84.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("R6.fa.gz");
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("inverted")
.arg("build")
.arg("-o")
.arg("precluster_index")
.args(["-v", "-k", "21", "-s", "10", "--write-skq"])
.arg("14412_3#82.contigs_velvet.fa.gz")
.arg("14412_3#84.contigs_velvet.fa.gz")
.arg("R6.fa.gz")
.assert()
.success();
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.arg("-o")
.arg("precluster_sketches")
.args(["-v", "-k", "21", "-s", "1000"])
.arg("14412_3#82.contigs_velvet.fa.gz")
.arg("14412_3#84.contigs_velvet.fa.gz")
.arg("R6.fa.gz")
.assert()
.success();
TestSetup::create_completeness_file(
&sandbox,
"completeness_precluster.txt",
&[
("14412_3#82.contigs_velvet.fa.gz", 0.8),
("14412_3#84.contigs_velvet.fa.gz", 0.9),
("R6.fa.gz", 0.7),
],
);
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("inverted")
.arg("precluster")
.arg("precluster_index.ski")
.arg("--skd")
.arg("precluster_sketches")
.args(["-v", "--knn", "2"])
.arg("--completeness-file")
.arg("completeness_precluster.txt")
.args(["-o", "precluster_distances"])
.assert()
.success();
assert_eq!(true, sandbox.file_exists("precluster_distances"));
let distances = TestSetup::parse_distances(
&sandbox.file_string("precluster_distances", TestDir::Output),
);
assert!(distances.len() > 0, "Should have calculated some distances");
for distance in distances {
assert!(
distance.is_finite(),
"Distance should be finite: {}",
distance
);
assert!(
distance >= 0.0 && distance <= 1.0,
"Distance should be between 0 and 1: {}",
distance
);
}
}
#[test]
fn test_completeness_correction_formula_exact() {
let sandbox = TestSetup::setup();
sandbox.copy_input_file_to_wd("14412_3#82.contigs_velvet.fa.gz");
sandbox.copy_input_file_to_wd("14412_3#84.contigs_velvet.fa.gz");
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.arg("-o")
.arg("formula_test")
.args(["-v", "-k", "31", "-s", "1000"])
.arg("14412_3#82.contigs_velvet.fa.gz")
.arg("14412_3#84.contigs_velvet.fa.gz")
.assert()
.success();
let c1 = 0.8;
let c2 = 0.9;
TestSetup::create_completeness_file(
&sandbox,
"completeness_formula.txt",
&[
("14412_3#82.contigs_velvet.fa.gz", c1),
("14412_3#84.contigs_velvet.fa.gz", c2),
],
);
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("formula_test")
.args(["-o", "distances_uncorrected"])
.args(["-k", "31"])
.arg("-v")
.assert()
.success();
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("formula_test")
.args(["-o", "distances_corrected"])
.arg("--completeness-file")
.arg("completeness_formula.txt")
.args(["-k", "31"])
.arg("-v")
.assert()
.success();
let uncorrected_distances = TestSetup::parse_distances(
&sandbox.file_string("distances_uncorrected", TestDir::Output),
);
let corrected_distances = TestSetup::parse_distances(
&sandbox.file_string("distances_corrected", TestDir::Output),
);
assert_eq!(
uncorrected_distances.len(),
1,
"Should have exactly one distance pair"
);
assert_eq!(
corrected_distances.len(),
1,
"Should have exactly one distance pair"
);
let uncorrected_jaccard = 1.0 - uncorrected_distances[0] as f32;
let corrected_jaccard = 1.0 - corrected_distances[0] as f32;
let correction_factor = (c1 * c2 / (c1 + c2 - c1 * c2)) as f32;
let expected_corrected_jaccard = uncorrected_jaccard / correction_factor;
let expected_corrected_distance = 1.0 - expected_corrected_jaccard;
println!("Uncorrected Jaccard: {}", uncorrected_jaccard);
println!("Corrected Jaccard: {}", corrected_jaccard);
println!("Expected corrected Jaccard: {}", expected_corrected_jaccard);
println!(
"Correction factor (c1*c2/(c1+c2-c1*c2)): {}",
correction_factor
);
println!("Uncorrected distance: {}", uncorrected_distances[0]);
println!("Corrected distance: {}", corrected_distances[0]);
println!(
"Expected corrected distance: {}",
expected_corrected_distance
);
assert_distance_with_tolerance(
corrected_distances[0] as f32,
expected_corrected_distance,
1e-6,
);
assert!(
(uncorrected_distances[0] - corrected_distances[0]).abs() > 1e-6,
"Correction should have been applied - distances should be different"
);
assert!(
corrected_distances[0] < uncorrected_distances[0],
"Completeness correction should reduce distance (increase Jaccard similarity)"
);
assert!(
corrected_jaccard <= 1.0,
"Corrected Jaccard should not exceed 1.0: {}",
corrected_jaccard
);
}
}