1use std::path::{Path, PathBuf};
2
3use clap::Args;
4
5use crate::catalog::store::ReferenceCatalog;
6use crate::cli::OutputFormat;
7use crate::core::header::QueryHeader;
8use crate::core::reference::KnownReference;
9use crate::core::types::{Assembly, ReferenceSource};
10use crate::matching::scoring::MatchScore;
11use crate::parsing;
12
13#[derive(Args)]
14pub struct CompareArgs {
15 #[arg(required = true)]
17 pub input_a: PathBuf,
18
19 #[arg(required = true)]
21 pub input_b: String,
22
23 #[arg(long)]
25 pub reference: bool,
26
27 #[arg(long)]
29 pub catalog: Option<PathBuf>,
30}
31
32#[allow(clippy::needless_pass_by_value)] pub fn run(args: CompareArgs, format: OutputFormat, verbose: bool) -> anyhow::Result<()> {
39 let query_a = parse_input(&args.input_a)?;
41
42 if verbose {
43 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] let md5_pct = (query_a.md5_coverage() * 100.0) as u32;
45 eprintln!(
46 "Input A: {} contigs ({md5_pct}% have MD5)",
47 query_a.contigs.len(),
48 );
49 }
50
51 let query_b = if args.reference {
53 let catalog = if let Some(path) = &args.catalog {
55 ReferenceCatalog::load_from_file(path)?
56 } else {
57 ReferenceCatalog::load_embedded()?
58 };
59
60 let ref_id = crate::core::types::ReferenceId::new(&args.input_b);
61 let reference = catalog
62 .get(&ref_id)
63 .ok_or_else(|| anyhow::anyhow!("Reference '{}' not found in catalog", args.input_b))?;
64
65 QueryHeader::new(reference.contigs.clone())
67 } else {
68 let path = PathBuf::from(&args.input_b);
70 parse_input(&path)?
71 };
72
73 if verbose {
74 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] let md5_pct = (query_b.md5_coverage() * 100.0) as u32;
76 eprintln!(
77 "Input B: {} contigs ({md5_pct}% have MD5)",
78 query_b.contigs.len(),
79 );
80 }
81
82 let reference_b = KnownReference::new(
84 "input_b",
85 &args.input_b,
86 Assembly::Other("unknown".to_string()),
87 ReferenceSource::Custom("input".to_string()),
88 )
89 .with_contigs(query_b.contigs.clone());
90
91 let score = MatchScore::calculate(&query_a, &reference_b);
92
93 match format {
95 OutputFormat::Text => print_text_comparison(&args, &query_a, &query_b, &score),
96 OutputFormat::Json => print_json_comparison(&args, &query_a, &query_b, &score)?,
97 OutputFormat::Tsv => print_tsv_comparison(&score),
98 }
99
100 Ok(())
101}
102
103fn parse_input(path: &Path) -> anyhow::Result<QueryHeader> {
104 let ext = path
105 .extension()
106 .and_then(|e| e.to_str())
107 .map(str::to_lowercase);
108
109 match ext.as_deref() {
110 Some("dict") => Ok(parsing::dict::parse_dict_file(path)?),
111 Some("tsv") => Ok(parsing::tsv::parse_tsv_file(path, '\t')?),
112 Some("csv") => Ok(parsing::tsv::parse_tsv_file(path, ',')?),
113 _ => Ok(parsing::sam::parse_file(path)?),
115 }
116}
117
118fn print_text_comparison(
119 args: &CompareArgs,
120 query_a: &QueryHeader,
121 query_b: &QueryHeader,
122 score: &MatchScore,
123) {
124 println!("Comparison Results");
125 println!("{}", "=".repeat(60));
126
127 println!("\nInput A: {}", args.input_a.display());
128 println!(" Contigs: {}", query_a.contigs.len());
129 println!(" MD5 coverage: {:.0}%", query_a.md5_coverage() * 100.0);
130 println!(" Naming convention: {:?}", query_a.naming_convention);
131
132 println!("\nInput B: {}", args.input_b);
133 println!(" Contigs: {}", query_b.contigs.len());
134 println!(" MD5 coverage: {:.0}%", query_b.md5_coverage() * 100.0);
135 println!(" Naming convention: {:?}", query_b.naming_convention);
136
137 println!("\nSimilarity Scores:");
138 println!(" MD5 Jaccard: {:.2}%", score.md5_jaccard * 100.0);
139 println!(
140 " Name+Length Jaccard: {:.2}%",
141 score.name_length_jaccard * 100.0
142 );
143 println!(
144 " MD5 Query Coverage: {:.2}%",
145 score.md5_query_coverage * 100.0
146 );
147 println!(
148 " Name+Length Query Coverage: {:.2}%",
149 score.name_length_query_coverage * 100.0
150 );
151 println!(" Order Preserved: {}", score.order_preserved);
152 println!(" Order Score: {:.2}%", score.order_score * 100.0);
153 println!(" Composite Score: {:.2}%", score.composite * 100.0);
154 println!(" Confidence: {:?}", score.confidence);
155}
156
157fn print_json_comparison(
158 args: &CompareArgs,
159 query_a: &QueryHeader,
160 query_b: &QueryHeader,
161 score: &MatchScore,
162) -> anyhow::Result<()> {
163 let output = serde_json::json!({
164 "input_a": {
165 "path": args.input_a.display().to_string(),
166 "contig_count": query_a.contigs.len(),
167 "md5_coverage": query_a.md5_coverage(),
168 "naming_convention": format!("{:?}", query_a.naming_convention),
169 },
170 "input_b": {
171 "path": args.input_b,
172 "contig_count": query_b.contigs.len(),
173 "md5_coverage": query_b.md5_coverage(),
174 "naming_convention": format!("{:?}", query_b.naming_convention),
175 },
176 "score": {
177 "md5_jaccard": score.md5_jaccard,
178 "name_length_jaccard": score.name_length_jaccard,
179 "md5_query_coverage": score.md5_query_coverage,
180 "name_length_query_coverage": score.name_length_query_coverage,
181 "order_preserved": score.order_preserved,
182 "order_score": score.order_score,
183 "composite": score.composite,
184 "confidence": format!("{:?}", score.confidence),
185 }
186 });
187
188 println!("{}", serde_json::to_string_pretty(&output)?);
189 Ok(())
190}
191
192fn print_tsv_comparison(score: &MatchScore) {
193 println!(
194 "md5_jaccard\tname_length_jaccard\tmd5_query_coverage\torder_preserved\tcomposite\tconfidence"
195 );
196 println!(
197 "{:.4}\t{:.4}\t{:.4}\t{}\t{:.4}\t{:?}",
198 score.md5_jaccard,
199 score.name_length_jaccard,
200 score.md5_query_coverage,
201 score.order_preserved,
202 score.composite,
203 score.confidence,
204 );
205}