use crate::analyse::GenomeOutput;
use crate::barrnap::BarrnapAnalyser;
use crate::checkm2::CheckM2Analyser;
use crate::cluster_argument_parsing;
use crate::trnascan::TrnascanAnalyser;
use std::collections::HashMap;
type ProcessResult = Result<(HashMap<String, GenomeOutput>, Vec<Vec<usize>>, Vec<String>), String>;
pub fn process_command(
genomes: &[String],
threads: usize,
cluster_args: &clap::ArgMatches,
cluster_def: &cluster_argument_parsing::GalahClustererCommandDefinition,
output_quality_report_path: Option<String>,
) -> ProcessResult {
let checkm2_quality_report = cluster_args
.get_one::<String>("checkm2-quality-report")
.map(|s| s.to_string());
let checkm_tab_table = cluster_args
.get_one::<String>("checkm-tab-table")
.map(|s| s.to_string());
let checkm2_db_path = if checkm2_quality_report.is_none() && checkm_tab_table.is_none() {
cluster_args
.get_one::<String>("checkm2-db-path")
.map(|s| s.to_string())
.or_else(|| std::env::var("CHECKM2DB").ok())
.expect(
"CheckM2 database path must be provided via --checkm2-db-path or CHECKM2DB env var",
)
} else {
String::new()
};
let mut quality_finder = crate::analyse_argument_parsing::QualityAnalyser::CheckM2(
CheckM2Analyser::new(checkm2_db_path),
);
let rrna_finder = crate::analyse_argument_parsing::RrnaAnalyser::Barrnap(BarrnapAnalyser);
let trna_finder = crate::analyse_argument_parsing::TrnaAnalyser::Trnascan(TrnascanAnalyser);
let barrnap_gff_list = cluster_args
.get_one::<String>("barrnap-gff-list")
.map(|s| s.to_string());
let trnascan_out_list = cluster_args
.get_one::<String>("trnascan-out-list")
.map(|s| s.to_string());
let analysis = crate::analyse::analyse(
genomes,
threads,
&mut quality_finder,
&rrna_finder,
&trna_finder,
&checkm2_quality_report,
&output_quality_report_path,
&checkm_tab_table,
&barrnap_gff_list,
&trnascan_out_list,
)?;
let cluster_contigs =
cluster_args.get_flag(&cluster_def.dereplication_cluster_contigs_argument);
if cluster_contigs {
panic!("Clustering contigs is not yet implemented in process command");
}
let reference_genomes_owned = if let Some(refs) =
cluster_args.get_many::<String>(&cluster_def.dereplication_reference_genomes_argument)
{
Some(refs.cloned().collect::<Vec<String>>())
} else if let Some(ref_file) =
cluster_args.get_one::<String>(&cluster_def.dereplication_reference_genomes_list_argument)
{
let content = std::fs::read_to_string(ref_file)
.unwrap_or_else(|_| panic!("Failed to read reference genomes list file: {}", ref_file));
Some(
content
.lines()
.filter(|line| !line.trim().is_empty())
.map(|s| s.to_string())
.collect::<Vec<String>>(),
)
} else {
None
};
let reference_genomes = reference_genomes_owned
.as_ref()
.map(|refs| refs.iter().map(|s| s.as_str()).collect::<Vec<&str>>());
if reference_genomes.is_some() {
let num_reference_genomes = reference_genomes.as_ref().map_or(0, |r| r.len());
info!(
"Clustering against {} reference genomes",
num_reference_genomes
);
}
if reference_genomes.is_some() && cluster_contigs {
eprintln!(
"Error: Reference genome clustering is not currently supported with --cluster-contigs"
);
std::process::exit(1);
}
let (combined_genomes, ref_genomes_for_clusterer) =
if let Some(ref_genomes) = &reference_genomes {
let mut combined = ref_genomes
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>();
combined.extend(genomes.iter().cloned());
(combined, Some(ref_genomes.clone()))
} else {
(genomes.to_vec(), None)
};
let galah = cluster_argument_parsing::generate_galah_clusterer(
&combined_genomes,
&None,
cluster_contigs,
cluster_args,
cluster_def,
ref_genomes_for_clusterer.as_deref(),
output_quality_report_path.clone(),
)
.expect("Failed to parse galah clustering arguments correctly");
let passed_genomes_owned: Vec<String> = galah
.genome_fasta_paths
.iter()
.map(|s| s.to_string())
.collect();
info!("Clustering {} genomes ..", passed_genomes_owned.len());
let clusters = galah.cluster();
info!("Found {} genome clusters", clusters.len());
Ok((analysis, clusters, passed_genomes_owned))
}