ref_solver/cli/
identify.rs

1use std::path::{Path, PathBuf};
2
3use clap::Args;
4
5use crate::catalog::hierarchical::HierarchicalCatalog;
6use crate::catalog::store::ReferenceCatalog;
7use crate::cli::OutputFormat;
8use crate::core::header::QueryHeader;
9use crate::core::types::Confidence;
10use crate::matching::engine::{MatchResult, MatchingConfig, MatchingEngine, ScoringWeights};
11use crate::matching::hierarchical_engine::{HierarchicalMatchResult, HierarchicalMatchingEngine};
12use crate::matching::Suggestion;
13use crate::parsing;
14use crate::refget::{EnrichedContig, RefgetConfig, RefgetLookupResult};
15
16/// How to handle references that have contigs missing from their FASTA
17/// (e.g., CHM13 where MT is in assembly report but uses standard rCRS mitochondria)
18#[derive(Clone, Copy, Debug, Default, clap::ValueEnum)]
19pub enum MissingContigHandling {
20    /// Show warnings when query has contigs that match a reference's missing contigs
21    #[default]
22    Warn,
23    /// Treat missing contigs as errors that lower match confidence
24    Strict,
25    /// Don't mention missing contigs at all
26    Silent,
27}
28
29#[derive(Args)]
30pub struct IdentifyArgs {
31    /// Input file (BAM, SAM, CRAM, FASTA, FAI, VCF, .dict, TSV, or CSV)
32    /// Use '-' for stdin (expects header text)
33    #[arg(required = true)]
34    pub input: PathBuf,
35
36    /// Input format (auto-detected by default)
37    #[arg(long)]
38    pub input_format: Option<InputFormat>,
39
40    /// Number of matches to show
41    #[arg(short = 'n', long, default_value = "5")]
42    pub max_matches: usize,
43
44    /// Only show exact or near-exact matches
45    #[arg(long)]
46    pub exact_only: bool,
47
48    /// Path to custom catalog file
49    #[arg(long)]
50    pub catalog: Option<PathBuf>,
51
52    /// Use hierarchical catalog format (required when --catalog points to a hierarchical catalog)
53    #[arg(long)]
54    pub hierarchical: bool,
55
56    /// How to handle references with contigs missing from FASTA
57    /// (e.g., CHM13 MT which uses standard rCRS mitochondria)
58    #[arg(long, value_enum, default_value = "warn")]
59    pub missing_contig_handling: MissingContigHandling,
60
61    // === Scoring weight options ===
62    /// Weight for contig match score (0-100, default 70)
63    /// How well query contigs match reference contigs
64    #[arg(long, default_value = "70", value_parser = clap::value_parser!(u32).range(0..=100))]
65    pub weight_match: u32,
66
67    /// Weight for coverage score (0-100, default 20)
68    /// What fraction of reference contigs are covered by query
69    #[arg(long, default_value = "20", value_parser = clap::value_parser!(u32).range(0..=100))]
70    pub weight_coverage: u32,
71
72    /// Weight for order score (0-100, default 10)
73    /// Whether contigs appear in the same order
74    #[arg(long, default_value = "10", value_parser = clap::value_parser!(u32).range(0..=100))]
75    pub weight_order: u32,
76
77    /// Refget server URL for looking up unknown contigs.
78    /// When set, unmatched contigs with MD5 digests are queried against this
79    /// server to retrieve aliases and other metadata.
80    #[arg(long)]
81    pub refget_server: Option<String>,
82}
83
84#[derive(Clone, Copy, Debug, clap::ValueEnum)]
85pub enum InputFormat {
86    Sam,
87    Bam,
88    Cram,
89    Dict,
90    Fai,
91    Fasta,
92    Vcf,
93    Tsv,
94    Csv,
95}
96
97/// Execute identify subcommand
98///
99/// # Errors
100///
101/// Returns an error if the input cannot be parsed or identification fails.
102#[allow(clippy::needless_pass_by_value)] // CLI entry point, values from clap
103pub fn run(args: IdentifyArgs, format: OutputFormat, verbose: bool) -> anyhow::Result<()> {
104    // Parse input first (needed for both catalog types)
105    let query = parse_input(&args)?;
106
107    if verbose {
108        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] // Percentage 0-100
109        let md5_pct = (query.md5_coverage() * 100.0) as u32;
110        eprintln!(
111            "Parsed {} contigs from input ({md5_pct}% have MD5)",
112            query.contigs.len(),
113        );
114    }
115
116    // Use hierarchical or flat catalog based on flag
117    if args.hierarchical {
118        run_hierarchical(&args, &query, format, verbose)
119    } else {
120        run_flat(&args, &query, format, verbose)
121    }
122}
123
124fn run_flat(
125    args: &IdentifyArgs,
126    query: &QueryHeader,
127    format: OutputFormat,
128    verbose: bool,
129) -> anyhow::Result<()> {
130    // Load flat catalog
131    let catalog = if let Some(path) = &args.catalog {
132        ReferenceCatalog::load_from_file(path)?
133    } else {
134        ReferenceCatalog::load_embedded()?
135    };
136
137    if verbose {
138        eprintln!("Loaded flat catalog with {} references", catalog.len());
139    }
140
141    if catalog.is_empty() {
142        eprintln!("Warning: Catalog is empty, no references to match against.");
143        return Ok(());
144    }
145
146    // Build scoring weights from command line args
147    let scoring_weights = ScoringWeights {
148        contig_match: f64::from(args.weight_match) / 100.0,
149        coverage: f64::from(args.weight_coverage) / 100.0,
150        order: f64::from(args.weight_order) / 100.0,
151        conflict_penalty: 0.1, // Default: 10% credit for MD5 conflicts
152    };
153
154    if verbose {
155        eprintln!(
156            "Scoring weights: {:.0}% match, {:.0}% coverage, {:.0}% order",
157            scoring_weights.contig_match * 100.0,
158            scoring_weights.coverage * 100.0,
159            scoring_weights.order * 100.0,
160        );
161    }
162
163    // Find matches with custom config
164    let config = MatchingConfig {
165        min_score: 0.1,
166        scoring_weights: scoring_weights.clone(),
167    };
168    let engine = MatchingEngine::new(&catalog, config);
169    let matches = engine.find_matches(query, args.max_matches);
170
171    if matches.is_empty() {
172        eprintln!("No matching references found.");
173        return Ok(());
174    }
175
176    // Optionally enrich unmatched contigs via refget
177    let enriched = if let Some(ref server_url) = args.refget_server {
178        let config = RefgetConfig::new(server_url);
179        // Collect all query_only contigs from the top match
180        let unmatched_contigs: Vec<_> = matches
181            .first()
182            .map(|m| m.diagnosis.query_only.clone())
183            .unwrap_or_default();
184
185        if unmatched_contigs.is_empty() {
186            None
187        } else {
188            if verbose {
189                eprintln!(
190                    "Querying refget server for {} unmatched contigs...",
191                    unmatched_contigs.len()
192                );
193            }
194            let rt = tokio::runtime::Runtime::new()?;
195            let results = rt.block_on(crate::refget::enrichment::enrich_contigs(
196                &unmatched_contigs,
197                &config,
198            ));
199            Some(results)
200        }
201    } else {
202        None
203    };
204
205    // Output results
206    match format {
207        OutputFormat::Text => {
208            print_text_results(
209                &matches,
210                query,
211                verbose,
212                args.missing_contig_handling,
213                &scoring_weights,
214            );
215            if let Some(ref enriched) = enriched {
216                print_refget_text_results(enriched);
217            }
218        }
219        OutputFormat::Json => {
220            print_json_results(
221                &matches,
222                args.missing_contig_handling,
223                &scoring_weights,
224                enriched.as_deref(),
225            )?;
226        }
227        OutputFormat::Tsv => {
228            print_tsv_results(&matches, &scoring_weights);
229            if let Some(ref enriched) = enriched {
230                print_refget_tsv_results(enriched);
231            }
232        }
233    }
234
235    Ok(())
236}
237
238fn run_hierarchical(
239    args: &IdentifyArgs,
240    query: &QueryHeader,
241    format: OutputFormat,
242    verbose: bool,
243) -> anyhow::Result<()> {
244    // Load hierarchical catalog
245    let catalog_path = args
246        .catalog
247        .as_ref()
248        .ok_or_else(|| anyhow::anyhow!("--catalog is required when using --hierarchical"))?;
249
250    let catalog = HierarchicalCatalog::load(catalog_path)?;
251
252    if verbose {
253        eprintln!(
254            "Loaded hierarchical catalog v{} with {} assemblies",
255            catalog.version,
256            catalog.assemblies.len()
257        );
258    }
259
260    // Find matches
261    let engine = HierarchicalMatchingEngine::new(&catalog);
262    let matches = engine.find_matches(query, args.max_matches);
263
264    if matches.is_empty() {
265        eprintln!("No matching references found.");
266        return Ok(());
267    }
268
269    // Output results
270    match format {
271        OutputFormat::Text => print_hierarchical_text_results(&matches, query, verbose),
272        OutputFormat::Json => print_hierarchical_json_results(&matches)?,
273        OutputFormat::Tsv => print_hierarchical_tsv_results(&matches),
274    }
275
276    Ok(())
277}
278
279fn parse_input(args: &IdentifyArgs) -> anyhow::Result<QueryHeader> {
280    use std::io::{self, Read};
281
282    // Handle stdin
283    if args.input.to_string_lossy() == "-" {
284        let mut buffer = String::new();
285        io::stdin().read_to_string(&mut buffer)?;
286        return Ok(parsing::sam::parse_header_text(&buffer)?);
287    }
288
289    // Auto-detect or use specified format
290    let format = args
291        .input_format
292        .unwrap_or_else(|| detect_format(&args.input));
293
294    match format {
295        InputFormat::Sam | InputFormat::Bam | InputFormat::Cram => {
296            Ok(parsing::sam::parse_file(&args.input)?)
297        }
298        InputFormat::Dict => Ok(parsing::dict::parse_dict_file(&args.input)?),
299        InputFormat::Fai => Ok(parsing::fai::parse_fai_file(&args.input)?),
300        InputFormat::Fasta => Ok(parsing::fasta::parse_fasta_file(&args.input)?),
301        InputFormat::Vcf => Ok(parsing::vcf::parse_vcf_file(&args.input)?),
302        InputFormat::Tsv => Ok(parsing::tsv::parse_tsv_file(&args.input, '\t')?),
303        InputFormat::Csv => Ok(parsing::tsv::parse_tsv_file(&args.input, ',')?),
304    }
305}
306
307/// Detect input format from file extension
308fn detect_format(path: &Path) -> InputFormat {
309    let path_str = path.to_string_lossy().to_lowercase();
310
311    // Check for FASTA files (including gzipped)
312    if parsing::fasta::is_fasta_file(path) {
313        return InputFormat::Fasta;
314    }
315
316    // Check for gzipped VCF
317    if path_str.ends_with(".vcf.gz") || path_str.ends_with(".vcf.bgz") {
318        return InputFormat::Vcf;
319    }
320
321    // Get the extension for simple cases
322    let ext = path
323        .extension()
324        .and_then(|e| e.to_str())
325        .map(str::to_lowercase);
326
327    match ext.as_deref() {
328        Some("bam") => InputFormat::Bam,
329        Some("cram") => InputFormat::Cram,
330        Some("dict") => InputFormat::Dict,
331        Some("fai") => InputFormat::Fai,
332        Some("vcf") => InputFormat::Vcf,
333        Some("tsv") => InputFormat::Tsv,
334        Some("csv") => InputFormat::Csv,
335        _ => InputFormat::Sam, // Default to SAM for unknown extensions
336    }
337}
338
339#[allow(clippy::too_many_lines)] // TODO: Refactor into smaller functions
340fn print_text_results(
341    matches: &[MatchResult],
342    query: &QueryHeader,
343    verbose: bool,
344    missing_handling: MissingContigHandling,
345    weights: &ScoringWeights,
346) {
347    for (i, result) in matches.iter().enumerate() {
348        if i > 0 {
349            println!("\n{}", "─".repeat(60));
350        }
351
352        // Header
353        let confidence_str = match result.score.confidence {
354            Confidence::Exact => "EXACT",
355            Confidence::High => "HIGH",
356            Confidence::Medium => "MEDIUM",
357            Confidence::Low => "LOW",
358        };
359
360        println!(
361            "\n#{} {} ({})",
362            i + 1,
363            result.reference.display_name,
364            confidence_str
365        );
366        println!("   ID: {}", result.reference.id);
367        println!("   Assembly: {}", result.reference.assembly);
368        println!("   Source: {}", result.reference.source);
369        println!("   Match Type: {:?}", result.diagnosis.match_type);
370
371        // Score breakdown: show component scores and final composite
372        // Normalize weights for display
373        let norm = weights.normalized();
374        println!(
375            "\n   Score: {:.1}% = {:.0}%×match + {:.0}%×coverage + {:.0}%×order",
376            result.score.composite * 100.0,
377            result.score.match_quality * 100.0,
378            result.score.coverage_score * 100.0,
379            result.score.order_score * 100.0,
380        );
381        println!(
382            "          (weights: {:.0}% match, {:.0}% coverage, {:.0}% order)",
383            norm.contig_match * 100.0,
384            norm.coverage * 100.0,
385            norm.order * 100.0,
386        );
387
388        // Check for contigs missing from FASTA
389        if !result.reference.contigs_missing_from_fasta.is_empty() {
390            // Find query contigs that match the missing contigs (by name, case-insensitive)
391            let missing_set: std::collections::HashSet<String> = result
392                .reference
393                .contigs_missing_from_fasta
394                .iter()
395                .map(|s| s.to_lowercase())
396                .collect();
397
398            let query_has_missing: Vec<&str> = query
399                .contigs
400                .iter()
401                .filter(|c| missing_set.contains(&c.name.to_lowercase()))
402                .map(|c| c.name.as_str())
403                .collect();
404
405            match missing_handling {
406                MissingContigHandling::Silent => {}
407                MissingContigHandling::Warn => {
408                    if !query_has_missing.is_empty() {
409                        println!(
410                            "\n   Warning: Query has contig(s) not in reference FASTA: {}",
411                            query_has_missing.join(", ")
412                        );
413                        println!(
414                            "   Note: {} uses external sequence(s) for: {}",
415                            result.reference.display_name,
416                            result.reference.contigs_missing_from_fasta.join(", ")
417                        );
418                    }
419                }
420                MissingContigHandling::Strict => {
421                    if !query_has_missing.is_empty() {
422                        println!(
423                            "\n   ERROR: Query has contig(s) not in reference FASTA: {}",
424                            query_has_missing.join(", ")
425                        );
426                        println!(
427                            "   The reference {} does not include: {}",
428                            result.reference.display_name,
429                            result.reference.contigs_missing_from_fasta.join(", ")
430                        );
431                    }
432                }
433            }
434        }
435
436        // Match details - query contigs
437        let total_query = query.contigs.len();
438        let exact = result.score.exact_matches;
439        let name_len = result.score.name_length_matches;
440        let conflicts = result.score.md5_conflicts;
441        let unmatched = result.score.unmatched;
442
443        println!(
444            "\n   Query contigs: {total_query} total → {exact} exact, {name_len} name+length, {conflicts} conflicts, {unmatched} unmatched"
445        );
446
447        // Reference coverage info
448        let total_ref = result.reference.contigs.len();
449        let matched_ref = exact + name_len; // Good matches that cover reference
450        let uncovered_ref = total_ref.saturating_sub(matched_ref);
451        println!(
452            "   Reference contigs: {total_ref} total, {matched_ref} matched, {uncovered_ref} not in query"
453        );
454
455        if result.diagnosis.reordered {
456            println!("   Order: DIFFERENT from reference");
457        }
458
459        // Conflicts
460        if !result.diagnosis.conflicts.is_empty() {
461            println!("\n   Conflicts:");
462            for conflict in &result.diagnosis.conflicts {
463                println!("   - {}", conflict.description);
464            }
465        }
466
467        // Suggestions
468        if !result.diagnosis.suggestions.is_empty() {
469            println!("\n   Suggestions:");
470            for suggestion in &result.diagnosis.suggestions {
471                match suggestion {
472                    Suggestion::RenameContigs { command_hint, .. } => {
473                        println!("   - Rename contigs:");
474                        for line in command_hint.lines() {
475                            println!("     {line}");
476                        }
477                    }
478                    Suggestion::ReorderContigs { command_hint } => {
479                        println!("   - Reorder contigs:");
480                        for line in command_hint.lines() {
481                            println!("     {line}");
482                        }
483                    }
484                    Suggestion::ReplaceContig {
485                        contig_name,
486                        reason,
487                        ..
488                    } => {
489                        println!("   - Replace {contig_name}: {reason}");
490                    }
491                    Suggestion::UseAsIs { warnings } => {
492                        if warnings.is_empty() {
493                            println!("   - Safe to use as-is");
494                        } else {
495                            println!("   - Safe to use with warnings:");
496                            for w in warnings {
497                                println!("     - {w}");
498                            }
499                        }
500                    }
501                    Suggestion::Realign {
502                        reason,
503                        suggested_reference,
504                    } => {
505                        println!("   - Realignment needed: {reason}");
506                        println!("     Suggested reference: {suggested_reference}");
507                    }
508                }
509            }
510        }
511
512        // Download URL
513        if let Some(url) = &result.reference.download_url {
514            println!("\n   Download: {url}");
515        }
516
517        // Verbose details
518        if verbose && !result.diagnosis.renamed_matches.is_empty() {
519            println!("\n   Rename mappings:");
520            for r in &result.diagnosis.renamed_matches {
521                println!("     {} -> {}", r.query_name, r.reference_name);
522            }
523        }
524    }
525
526    println!();
527}
528
529fn print_json_results(
530    matches: &[MatchResult],
531    missing_handling: MissingContigHandling,
532    weights: &ScoringWeights,
533    enriched: Option<&[EnrichedContig]>,
534) -> anyhow::Result<()> {
535    let norm = weights.normalized();
536    // Create serializable output
537    let results: Vec<serde_json::Value> = matches
538        .iter()
539        .map(|m| {
540            // Calculate reference coverage
541            let ref_total = m.reference.contigs.len();
542            let ref_matched = m.score.exact_matches + m.score.name_length_matches;
543            let ref_uncovered = ref_total.saturating_sub(ref_matched);
544
545            let mut json = serde_json::json!({
546                "reference": {
547                    "id": m.reference.id.0,
548                    "display_name": m.reference.display_name,
549                    "assembly": format!("{}", m.reference.assembly),
550                    "source": format!("{}", m.reference.source),
551                    "download_url": m.reference.download_url,
552                    "total_contigs": ref_total,
553                },
554                "score": {
555                    "composite": m.score.composite,
556                    "confidence": format!("{:?}", m.score.confidence),
557                    // Component scores (these make up the composite)
558                    "match_quality": m.score.match_quality,
559                    "coverage_score": m.score.coverage_score,
560                    "order_score": m.score.order_score,
561                    // Weights used
562                    "weights": {
563                        "match": norm.contig_match,
564                        "coverage": norm.coverage,
565                        "order": norm.order,
566                    },
567                },
568                "query_contigs": {
569                    "exact_matches": m.score.exact_matches,
570                    "name_length_matches": m.score.name_length_matches,
571                    "md5_conflicts": m.score.md5_conflicts,
572                    "unmatched": m.score.unmatched,
573                },
574                "reference_coverage": {
575                    "total": ref_total,
576                    "matched": ref_matched,
577                    "not_in_query": ref_uncovered,
578                },
579                "match_type": format!("{:?}", m.diagnosis.match_type),
580                "reordered": m.diagnosis.reordered,
581            });
582
583            // Add missing contig info unless silent
584            if !matches!(missing_handling, MissingContigHandling::Silent)
585                && !m.reference.contigs_missing_from_fasta.is_empty()
586            {
587                json["reference"]["contigs_missing_from_fasta"] =
588                    serde_json::json!(&m.reference.contigs_missing_from_fasta);
589            }
590
591            json
592        })
593        .collect();
594
595    let mut output = serde_json::json!({ "matches": results });
596
597    if let Some(enriched) = enriched {
598        output["refget_enrichment"] = serde_json::json!(enriched);
599    }
600
601    println!("{}", serde_json::to_string_pretty(&output)?);
602    Ok(())
603}
604
605fn print_tsv_results(matches: &[MatchResult], weights: &ScoringWeights) {
606    let norm = weights.normalized();
607    // Header with all fields
608    println!(
609        "rank\tid\tdisplay_name\tassembly\tsource\tmatch_type\tscore\tmatch_score\tcoverage_score\torder_score\tweight_match\tweight_coverage\tweight_order\tconfidence\texact\tname_length\tconflicts\tunmatched\tref_total\tref_matched\tref_uncovered"
610    );
611    for (i, m) in matches.iter().enumerate() {
612        let ref_total = m.reference.contigs.len();
613        let ref_matched = m.score.exact_matches + m.score.name_length_matches;
614        let ref_uncovered = ref_total.saturating_sub(ref_matched);
615
616        println!(
617            "{}\t{}\t{}\t{}\t{}\t{:?}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.2}\t{:.2}\t{:.2}\t{:?}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
618            i + 1,
619            m.reference.id,
620            m.reference.display_name,
621            m.reference.assembly,
622            m.reference.source,
623            m.diagnosis.match_type,
624            m.score.composite,
625            m.score.match_quality,
626            m.score.coverage_score,
627            m.score.order_score,
628            norm.contig_match,
629            norm.coverage,
630            norm.order,
631            m.score.confidence,
632            m.score.exact_matches,
633            m.score.name_length_matches,
634            m.score.md5_conflicts,
635            m.score.unmatched,
636            ref_total,
637            ref_matched,
638            ref_uncovered,
639        );
640    }
641}
642
643// ============================================================================
644// Refget enrichment output functions
645// ============================================================================
646
647fn print_refget_text_results(enriched: &[EnrichedContig]) {
648    let found: Vec<_> = enriched
649        .iter()
650        .filter(|e| matches!(e.refget_metadata, RefgetLookupResult::Found { .. }))
651        .collect();
652
653    if found.is_empty() {
654        println!("Refget: no unmatched contigs found in refget server.");
655        return;
656    }
657
658    println!("\nRefget Aliases for Unmatched Contigs:");
659    println!("{}", "─".repeat(60));
660    for entry in &found {
661        if let RefgetLookupResult::Found {
662            aliases,
663            sha512t24u,
664            circular,
665        } = &entry.refget_metadata
666        {
667            print!("  {} ", entry.name);
668            if *circular {
669                print!("(circular) ");
670            }
671            println!("[sha512t24u: {sha512t24u}]");
672            if aliases.is_empty() {
673                println!("    (no aliases)");
674            } else {
675                for alias in aliases {
676                    println!("    {}: {}", alias.naming_authority, alias.value);
677                }
678            }
679        }
680    }
681    println!();
682}
683
684fn print_refget_tsv_results(enriched: &[EnrichedContig]) {
685    println!("\n# Refget enrichment for unmatched contigs");
686    println!("contig\tmd5\tstatus\tsha512t24u\tcircular\taliases");
687    for entry in enriched {
688        match &entry.refget_metadata {
689            RefgetLookupResult::Found {
690                aliases,
691                sha512t24u,
692                circular,
693            } => {
694                let alias_str: Vec<String> = aliases
695                    .iter()
696                    .map(|a| format!("{}={}", a.naming_authority, a.value))
697                    .collect();
698                println!(
699                    "{}\t{}\tfound\t{}\t{}\t{}",
700                    entry.name,
701                    entry.md5.as_deref().unwrap_or(""),
702                    sha512t24u,
703                    circular,
704                    alias_str.join(";"),
705                );
706            }
707            RefgetLookupResult::NotFound => {
708                println!(
709                    "{}\t{}\tnot_found\t\t\t",
710                    entry.name,
711                    entry.md5.as_deref().unwrap_or(""),
712                );
713            }
714            RefgetLookupResult::Error { message } => {
715                println!(
716                    "{}\t{}\terror\t\t\t{}",
717                    entry.name,
718                    entry.md5.as_deref().unwrap_or(""),
719                    message,
720                );
721            }
722        }
723    }
724}
725
726// ============================================================================
727// Hierarchical catalog output functions
728// ============================================================================
729
730fn print_hierarchical_text_results(
731    matches: &[HierarchicalMatchResult],
732    query: &QueryHeader,
733    verbose: bool,
734) {
735    for (i, result) in matches.iter().enumerate() {
736        if i > 0 {
737            println!("\n{}", "─".repeat(60));
738        }
739
740        // Header with match type
741        let match_str = format!("{:?}", result.match_type).to_uppercase();
742        println!("\n#{} {} ({})", i + 1, result.display_name, match_str);
743
744        // Distribution info
745        println!("   Distribution ID: {}", result.distribution_id);
746
747        // Assembly info (if available)
748        if !result.assembly_id.is_empty() {
749            println!(
750                "   Assembly: {} ({})",
751                result.assembly_name, result.assembly_id
752            );
753            if !result.version_string.is_empty() {
754                println!(
755                    "   Version: {} ({})",
756                    result.version_string, result.version_id
757                );
758            }
759        }
760
761        // Match score
762        println!("   Score: {:.1}%", result.match_percentage());
763
764        // Contig summary
765        println!("\n   Contig Summary:");
766        println!("   - Your file: {} contigs", result.total_query_contigs);
767        println!(
768            "   - This distribution: {} contigs",
769            result.total_distribution_contigs
770        );
771        println!("   - Matched: {} contigs", result.matched_contigs);
772
773        if result.extra_in_query > 0 {
774            println!("   - Extra in your file: {}", result.extra_in_query);
775        }
776        if result.missing_from_query > 0 {
777            println!("   - Missing from your file: {}", result.missing_from_query);
778        }
779
780        // Presence breakdown (only show if there's assembly linkage)
781        let counts = &result.presence_counts;
782        if counts.in_both > 0 || counts.fasta_only > 0 || counts.report_only > 0 {
783            println!("\n   Presence Breakdown:");
784            if counts.in_both > 0 {
785                println!("   - In both (FASTA + report): {} contigs", counts.in_both);
786            }
787            if counts.fasta_only > 0 {
788                println!("   - FASTA-only (decoy/HLA): {} contigs", counts.fasta_only);
789            }
790            if counts.report_only > 0 {
791                println!(
792                    "   - Report-only (not in FASTA): {} contigs",
793                    counts.report_only
794                );
795            }
796        }
797
798        // Verbose details
799        if verbose {
800            println!("\n   Query contigs: {}", query.contigs.len());
801            let md5_count = query.contigs.iter().filter(|c| c.md5.is_some()).count();
802            println!("   Query contigs with MD5: {md5_count}");
803        }
804    }
805
806    println!();
807}
808
809fn print_hierarchical_json_results(matches: &[HierarchicalMatchResult]) -> anyhow::Result<()> {
810    let output: Vec<serde_json::Value> = matches
811        .iter()
812        .map(|m| {
813            serde_json::json!({
814                "distribution": {
815                    "id": m.distribution_id,
816                    "display_name": m.display_name,
817                },
818                "assembly": {
819                    "id": m.assembly_id,
820                    "name": m.assembly_name,
821                    "version_id": m.version_id,
822                    "version": m.version_string,
823                },
824                "match_type": format!("{:?}", m.match_type),
825                "score": m.score,
826                "matched_contigs": m.matched_contigs,
827                "total_query_contigs": m.total_query_contigs,
828                "total_distribution_contigs": m.total_distribution_contigs,
829                "extra_in_query": m.extra_in_query,
830                "missing_from_query": m.missing_from_query,
831                "presence_counts": {
832                    "in_both": m.presence_counts.in_both,
833                    "fasta_only": m.presence_counts.fasta_only,
834                    "report_only": m.presence_counts.report_only,
835                },
836            })
837        })
838        .collect();
839
840    println!("{}", serde_json::to_string_pretty(&output)?);
841    Ok(())
842}
843
844fn print_hierarchical_tsv_results(matches: &[HierarchicalMatchResult]) {
845    println!("rank\tdistribution_id\tdisplay_name\tassembly_id\tversion_id\tmatch_type\tscore\tmatched\tquery_total\tdist_total\tin_both\tfasta_only");
846    for (i, m) in matches.iter().enumerate() {
847        println!(
848            "{}\t{}\t{}\t{}\t{}\t{:?}\t{:.4}\t{}\t{}\t{}\t{}\t{}",
849            i + 1,
850            m.distribution_id,
851            m.display_name,
852            m.assembly_id,
853            m.version_id,
854            m.match_type,
855            m.score,
856            m.matched_contigs,
857            m.total_query_contigs,
858            m.total_distribution_contigs,
859            m.presence_counts.in_both,
860            m.presence_counts.fasta_only,
861        );
862    }
863}
ref_solver/cli/identify.rs

ref_solver/cli/
identify.rs