ref_solver/lib.rs
1//! # ref-solver
2//!
3//! A library for identifying human reference genomes from BAM/SAM/CRAM headers.
4//!
5//! When working with alignment files from external sources, it's often unclear exactly
6//! which reference genome was used. While a reference might be labeled "`GRCh38`" or "hg19",
7//! there are dozens of variations with different naming conventions, contig sets, and
8//! sequence versions.
9//!
10//! `ref-solver` solves this by matching the sequence dictionary from your alignment file
11//! against a catalog of known human reference genomes.
12//!
13//! ## Features
14//!
15//! - **MD5-based matching**: Uses sequence checksums for exact identification
16//! - **Fuzzy matching**: Falls back to name+length matching when MD5s are missing
17//! - **Rename detection**: Identifies when files differ only in contig naming
18//! - **Order detection**: Detects when contigs are reordered vs. reference
19//! - **Conflict detection**: Identifies problematic differences
20//! - **Actionable suggestions**: Provides commands to fix issues
21//!
22//! ## Example
23//!
24//! ```rust,no_run
25//! use ref_solver::{ReferenceCatalog, MatchingEngine, MatchingConfig, QueryHeader};
26//! use ref_solver::parsing::sam::parse_header_text;
27//!
28//! // Load the embedded catalog of known references
29//! let catalog = ReferenceCatalog::load_embedded().unwrap();
30//!
31//! // Parse a SAM header
32//! let header_text = "@SQ\tSN:chr1\tLN:248_956_422\tM5:6aef897c3d6ff0c78aff06ac189178dd\n";
33//! let query = parse_header_text(header_text).unwrap();
34//!
35//! // Find matching references
36//! let engine = MatchingEngine::new(&catalog, MatchingConfig::default());
37//! let matches = engine.find_matches(&query, 5);
38//!
39//! for m in matches {
40//! println!("{}: {:.1}%", m.reference.display_name, m.score.composite * 100.0);
41//! }
42//! ```
43//!
44//! ## Modules
45//!
46//! - [`catalog`]: Reference catalog storage and indexing
47//! - [`core`]: Core data types for contigs, references, and headers
48//! - [`matching`]: Matching engine and scoring algorithms
49//! - [`parsing`]: Parsers for SAM/BAM/CRAM, dict, and TSV files
50//! - [`cli`]: Command-line interface implementation
51//! - [`web`]: Web server for browser-based identification
52
53pub mod catalog;
54pub mod cli;
55pub mod core;
56pub mod matching;
57pub mod parsing;
58pub mod utils;
59pub mod web;
60
61// Re-export commonly used types for convenience
62pub use catalog::store::ReferenceCatalog;
63pub use core::contig::Contig;
64pub use core::header::QueryHeader;
65pub use core::reference::KnownReference;
66pub use core::types::*;
67pub use matching::engine::{MatchResult, MatchingConfig, MatchingEngine};