Skip to main content

fgumi_lib/
mod.rs

1#![deny(unsafe_code)]
2// Clippy can misattribute large_stack_arrays to byte_offset 0 when monomorphizing
3// generic code in test builds. Allow it crate-wide in test config since the actual
4// allocations are heap-based (`vec![]`), not stack arrays.
5#![cfg_attr(test, allow(clippy::large_stack_arrays))]
6
7//! # fgumi - Fulcrum Genomics UMI Tools Library
8//!
9//! This library provides core functionality for working with Unique Molecular Identifiers (UMIs)
10//! in sequencing data, including grouping, consensus calling, and quality filtering.
11//!
12//! ## Overview
13//!
14//! The fgumi library is organized into several key modules:
15//!
16//! ### Core Functionality
17//!
18//! - **[`umi`]** - UMI assignment strategies (identity, edit-distance, adjacency, paired)
19//! - **[`consensus`]** - Consensus calling algorithms (simplex, duplex, vanilla)
20//! - **[`sam`]** - SAM/BAM file utilities and alignment tag manipulation
21//!
22//! ### Utilities
23//!
24//! - **[`bam_io`]** - BAM file I/O helpers for reading and writing
25//! - **[`validation`]** - Input validation utilities for parameters and files
26//! - **[`progress`]** - Progress tracking and logging
27//! - **[`logging`]** - Enhanced logging utilities with formatting
28//! - **[`metrics`]** - Structured metrics types and file writing utilities
29//! - **[`rejection`]** - Rejection reason tracking and statistics
30//!
31//! ### Specialized Modules
32//!
33//! - **[`clipper`]** - Read clipping for overlapping pairs
34//! - **[`template`]** - Template-based read grouping
35//! - **[`reference`][mod@reference]** - Reference genome handling
36//!
37//! ## Quick Start
38//!
39//! ### Reading and Writing BAM Files
40//!
41//! ```no_run
42//! use fgumi_lib::bam_io::{create_bam_reader, create_bam_writer};
43//!
44//! # fn main() -> anyhow::Result<()> {
45//! // Open input BAM and get header (path, threads)
46//! let (mut reader, header) = create_bam_reader("input.bam", 1)?;
47//!
48//! // Create output BAM writer (path, header, threads, compression_level)
49//! let mut writer = create_bam_writer("output.bam", &header, 1, 6)?;
50//! # Ok(())
51//! # }
52//! ```
53//!
54//! ### Validating Input Files
55//!
56//! ```no_run
57//! use fgumi_lib::validation::validate_file_exists;
58//!
59//! # fn main() -> anyhow::Result<()> {
60//! // Validate input files exist with clear error messages
61//! validate_file_exists("input.bam", "Input BAM")?;
62//! validate_file_exists("reference.fa", "Reference FASTA")?;
63//! # Ok(())
64//! # }
65//! ```
66//!
67//! ### Progress Tracking
68//!
69//! ```no_run
70//! use fgumi_lib::progress::ProgressTracker;
71//!
72//! # fn main() -> anyhow::Result<()> {
73//! let tracker = ProgressTracker::new("Processing records")
74//!     .with_interval(100);
75//!
76//! for _i in 0..1000 {
77//!     // Process one record...
78//!     tracker.log_if_needed(1);  // Track incremental progress
79//! }
80//! tracker.log_final();  // Log final count if not exactly on interval
81//! # Ok(())
82//! # }
83//! ```
84//!
85//! ### UMI Assignment
86//!
87//! ```
88//! use fgumi_lib::umi::{IdentityUmiAssigner, UmiAssigner};
89//!
90//! let assigner = IdentityUmiAssigner::default();
91//! let umis = vec!["ACGTACGT".to_string(), "ACGTACGT".to_string(), "TGCATGCA".to_string()];
92//! let assignments = assigner.assign(&umis);
93//! // With identity assignment, each unique UMI gets its own molecule ID
94//! // So we have 2 unique molecule IDs (ACGTACGT and TGCATGCA)
95//! assert_eq!(assignments.iter().collect::<std::collections::HashSet<_>>().len(), 2);
96//! ```
97//!
98//! ## Feature Highlights
99//!
100//! - **Type-safe BAM I/O** - Headers always paired with readers
101//! - **Consistent validation** - Standardized error messages
102//! - **Progress tracking** - Uniform logging across tools
103//! - **Module organization** - Related functionality grouped logically
104//! - **Comprehensive testing** - Extensive test suite ensuring correctness
105//!
106//! ## Architecture
107//!
108//! The library follows these design principles:
109//!
110//! - **Separation of concerns** - Modules have clear, focused responsibilities
111//! - **Backward compatibility** - Re-exports maintain existing APIs
112//! - **Testability** - Comprehensive unit and integration tests
113//! - **Documentation** - All public items documented with examples
114//!
115//! ## Contributing
116//!
117//! When adding new functionality:
118//!
119//! 1. Add to appropriate module group (sam, umi, consensus, etc.)
120//! 2. Include comprehensive documentation and examples
121//! 3. Add unit tests covering edge cases
122//! 4. Maintain backward compatibility via re-exports
123//!
124//! ## See Also
125//!
126//! - [fgbio](https://github.com/fulcrumgenomics/fgbio) - Scala implementation
127//! - [noodles](https://github.com/zaeleus/noodles) - Rust bioinformatics I/O
128
129pub mod bam_io;
130pub mod batched_sam_reader;
131pub use fgumi_bgzf::reader as bgzf_reader;
132pub use fgumi_bgzf::writer as bgzf_writer;
133pub use fgumi_dna::bitenc;
134pub use fgumi_sam::clipper;
135pub mod consensus;
136pub use fgumi_dna::dna;
137pub mod errors;
138pub mod fastq;
139pub mod grouper;
140pub mod header;
141pub mod logging;
142pub mod metrics;
143pub mod mi_group;
144pub use fgumi_consensus::phred;
145pub mod progress;
146pub mod read_info;
147pub mod reference;
148pub use fgumi_metrics::rejection;
149pub mod reorder_buffer;
150pub mod sam;
151pub mod sort;
152pub mod tag_reversal;
153pub mod template;
154pub mod umi;
155pub mod unified_pipeline;
156pub mod validation;
157pub mod variant_review;
158#[doc(hidden)]
159pub mod vendored;
160
161#[cfg(feature = "simulate")]
162pub mod simulate;
163
164// Re-export rejection tracking types for convenient access
165pub use rejection::RejectionReason;
166
167// Re-export commonly used SAM items for backward compatibility
168pub use sam::alignment_tags;
169
170// Re-export UMI items for backward compatibility
171pub use umi::assigner;
172
173// Re-export consensus items for backward compatibility
174pub use consensus::caller as consensus_caller;
175pub use consensus::duplex_caller as duplex_consensus_caller;
176pub use consensus::filter as consensus_filter;
177pub use consensus::overlapping as overlapping_consensus;
178pub use consensus::simple_umi as simple_umi_consensus;
179pub use consensus::tags as consensus_tags;
180pub use consensus::vanilla_caller as vanilla_consensus_caller;