use crate::QualityFinder;
use checkm::GenomeQuality;
use std::collections::HashMap;
#[cfg(target_family = "unix")]
use std::os::unix::fs::symlink;
#[cfg(target_family = "windows")]
use std::os::windows::fs::symlink_file as symlink;
use std::path::Path;
use std::process::Command;
pub struct CheckM2Analyser {
pub comp_cont_cache: HashMap<String, (f64, f64)>,
pub database_path: String,
pub quality_report_source_path: Option<std::path::PathBuf>,
}
impl CheckM2Analyser {
pub fn new(database_path: String) -> Self {
Self {
comp_cont_cache: HashMap::new(),
database_path,
quality_report_source_path: None,
}
}
pub fn copy_quality_report(&self, dest_path: &str) -> Result<(), String> {
if let Some(src_path) = &self.quality_report_source_path {
std::fs::copy(src_path, dest_path).map_err(|e| {
format!("Failed to copy quality report from {src_path:?} to {dest_path}: {e}")
})?;
Ok(())
} else {
Err("No quality report available to copy (CheckM2 may not have been run)".to_string())
}
}
}
impl QualityFinder for CheckM2Analyser {
fn prepare_comp_cont(&mut self, genome_paths: &[String], threads: usize, tmp_path: &Path) {
let (cache, quality_report_path) =
get_comp_cont(genome_paths, threads, tmp_path, &self.database_path);
self.comp_cont_cache = cache;
self.quality_report_source_path = Some(quality_report_path);
}
fn find_comp_cont(&self, genome_path: &str) -> (f64, f64) {
self.comp_cont_cache
.get(genome_path)
.copied()
.expect("Genome path not found in CheckM2 results")
}
fn method_name(&self) -> &str {
"CheckM2"
}
}
fn get_comp_cont(
genome_paths: &[String],
threads: usize,
tmp_path: &Path,
database_path: &str,
) -> (HashMap<String, (f64, f64)>, std::path::PathBuf) {
let mut comp_cont_cache = HashMap::new();
let checkm2_path = tmp_path.join("checkm2");
let genomes_dir = tmp_path.join("genomes");
std::fs::create_dir_all(&genomes_dir).expect("Failed to create genomes directory for CheckM2");
for fasta in genome_paths {
let abs_fasta =
std::fs::canonicalize(fasta).expect("Failed to canonicalize genome path for symlink");
let new_filename = format!(
"{}.fna",
Path::new(fasta).file_stem().unwrap().to_string_lossy()
);
symlink(&abs_fasta, genomes_dir.join(new_filename))
.expect("Failed to create symlink for genome to run CheckM2");
}
info!("Running CheckM2 on provided genomes...");
let output = Command::new("checkm2")
.args([
"predict",
"-o",
&checkm2_path.to_string_lossy(),
"--threads",
&threads.to_string(),
"-i",
&genomes_dir.to_string_lossy(),
"--database_path",
database_path,
])
.output()
.expect("Failed to run CheckM2");
if !output.status.success() {
info!(
"CheckM2 failed with {}.\nstdout:\n{}\nstderr:\n{}",
output.status,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
panic!("CheckM2 did not run successfully");
}
let quality_report_path = checkm2_path.join("quality_report.tsv");
assert!(
quality_report_path.is_file(),
"CheckM2 did not produce quality_report.tsv at expected location: {:?}",
quality_report_path
);
let checkm2_result = checkm::CheckM2QualityReport::read_file_path(
quality_report_path
.to_str()
.expect("Invalid UTF-8 in quality_report_path"),
)
.expect("Failed to parse CheckM2 quality report");
for genome_path in genome_paths {
let genome_stem = Path::new(genome_path)
.file_stem()
.unwrap()
.to_string_lossy();
if let Ok(q) = checkm2_result.retrieve_via_fasta_path(genome_path) {
comp_cont_cache.insert(
genome_path.clone(),
(
q.completeness() as f64 * 100.0,
q.contamination() as f64 * 100.0,
),
);
} else if let Some((_, q)) = checkm2_result
.genome_to_quality
.iter()
.find(|(k, _)| **k == genome_stem)
{
comp_cont_cache.insert(
genome_path.clone(),
(
q.completeness() as f64 * 100.0,
q.contamination() as f64 * 100.0,
),
);
} else {
panic!(
"No CheckM2 quality found for genome {} (stem {})",
genome_path, genome_stem
);
}
}
(comp_cont_cache, quality_report_path)
}