rustalign_fmindex/lib.rs
1//! RustAlign FM-index Implementation
2//!
3//! This crate implements the FM-index (Full-text index in Minute space)
4//! based on the Burrows-Wheeler Transform, used for ultrafast sequence
5//! alignment in RustAlign.
6//!
7//! # Key Components
8//!
9//! - [`Ebwt`] - Extended Burrows-Wheeler Transform (main FM-index structure)
10//! - [`SideLocus`] - Position within the FM-index (side, byte, bitpair)
11//! - [`EbwtParams`] - Parameters describing the index structure
12//! - [`Reference`] - Reference sequence storage and extraction
13
14#![warn(missing_docs)]
15#![warn(clippy::all)]
16
17mod ebwt;
18mod locus;
19mod params;
20mod reference;
21
22#[cfg(test)]
23mod proptest;
24
25pub use ebwt::Ebwt;
26pub use locus::SideLocus;
27pub use params::EbwtParams;
28pub use reference::{EbwtWithRef, Reference};
29
30use rustalign_common::{AlignResult, Nuc, NucPair};
31
32/// FM-index trait for generic operations
33pub trait FmIndex {
34 /// Exact match search - return all positions matching the pattern
35 fn exact_match(&self, pattern: &[Nuc]) -> AlignResult<Vec<usize>>;
36
37 /// Get the length of the indexed text
38 fn len(&self) -> usize;
39
40 /// Check if the index is empty
41 fn is_empty(&self) -> bool {
42 self.len() == 0
43 }
44
45 /// Get forward BWT character at position
46 fn bwt(&self, pos: usize) -> AlignResult<NucPair>;
47
48 /// Rank operation - count occurrences of character up to position
49 fn rank(&self, c: Nuc, pos: usize) -> AlignResult<usize>;
50
51 /// Select operation - find position of k-th occurrence of character
52 fn select(&self, c: Nuc, k: usize) -> AlignResult<usize>;
53}