Skip to main content

rustalign_fmindex/
params.rs

1//! FM-index parameters (EbwtParams from C++ bt2_idx.h)
2//!
3//! These parameters describe the structure and layout of an FM-index.
4//! Based on the C++ EbwtParams class with accurate field calculations.
5
6use std::mem::size_of;
7
8/// Size of an offset value (uint32_t in C++)
9pub const OFF_SIZE: u32 = size_of::<u32>() as u32;
10
11/// OFF_MASK constant (used for offset calculations)
12const OFF_MASK: u64 = 0xFFFFFFFF;
13
14/// Parameters describing an FM-index structure
15///
16/// This corresponds to the `EbwtParams` class in the original C++ code.
17/// All fields match the C++ implementation for binary compatibility.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct EbwtParams {
20    /// Length of reference sequence
21    pub len: u64,
22
23    /// BWT length (len + 1 for terminating symbol)
24    pub bwt_len: u64,
25
26    /// Size in 32-bit words (len+3)/4
27    pub sz: u64,
28
29    /// BWT size in 32-bit words (len/4 + 1)
30    pub bwt_sz: u64,
31
32    /// Log2 of line size (for side calculation)
33    pub line_rate: i32,
34
35    /// Original offset rate (from constructor)
36    pub orig_off_rate: i32,
37
38    /// Suffix array sample rate (log2)
39    pub off_rate: i32,
40
41    /// Mask for offset calculations
42    pub off_mask: u64,
43
44    /// Number of characters in ftab
45    pub ftab_chars: i32,
46
47    /// Extended ftab length (ftabChars*2)
48    pub eftab_len: u32,
49
50    /// Extended ftab size in bytes
51    pub eftab_sz: u32,
52
53    /// Ftab length (1 << (ftabChars*2)) + 1
54    pub ftab_len: u64,
55
56    /// Ftab size in bytes
57    pub ftab_sz: u64,
58
59    /// Offsets array length
60    pub offs_len: u64,
61
62    /// Offsets array size in bytes
63    pub offs_sz: u64,
64
65    /// Line size (1 << lineRate)
66    pub line_sz: u32,
67
68    /// Size of each side in bytes
69    pub side_sz: u32,
70
71    /// Size of BWT portion in each side
72    pub side_bwt_sz: u32,
73
74    /// Length of BWT portion in each side
75    pub side_bwt_len: u32,
76
77    /// Number of sides in the index
78    pub num_sides: u64,
79
80    /// Number of lines
81    pub num_lines: u64,
82
83    /// Total ebwt length
84    pub ebwt_tot_len: u64,
85
86    /// Total ebwt size in bytes
87    pub ebwt_tot_sz: u64,
88
89    /// Colorspace mode (not used in DNA mode)
90    pub color: bool,
91
92    /// Entire reverse mode
93    pub entire_reverse: bool,
94}
95
96impl EbwtParams {
97    /// Create default parameters for a given reference length
98    pub fn new(ref_len: u64) -> Self {
99        // Default parameters from RustAlign
100        let line_rate = 6; // 64 bytes per line
101        let off_rate = 4; // Sample every 16 positions
102        let ftab_chars = 8; // Valid range: 1-8
103
104        Self::with_options(ref_len, line_rate, off_rate, ftab_chars, false, false)
105    }
106
107    /// Create parameters with specific options (matches C++ EbwtParams::init)
108    pub fn with_options(
109        ref_len: u64,
110        line_rate: i32,
111        off_rate: i32,
112        ftab_chars: i32,
113        color: bool,
114        entire_reverse: bool,
115    ) -> Self {
116        let bwt_len = ref_len + 1; // +1 for terminating symbol
117        let sz = ref_len.div_ceil(4);
118        let bwt_sz = ref_len / 4 + 1;
119
120        let orig_off_rate = off_rate;
121        let off_mask = OFF_MASK << off_rate;
122
123        let eftab_len = (ftab_chars * 2) as u32;
124        let eftab_sz = eftab_len * OFF_SIZE;
125
126        let ftab_len = (1u64 << (ftab_chars * 2)) + 1;
127        let ftab_sz = ftab_len * OFF_SIZE as u64;
128
129        let offs_len = (bwt_len + (1u64 << off_rate) - 1) >> off_rate;
130        let offs_sz = offs_len * OFF_SIZE as u64;
131
132        let line_sz = 1u32 << line_rate;
133        let side_sz = line_sz; // 1 lines per side
134        let side_bwt_sz = side_sz - OFF_SIZE * 4;
135        let side_bwt_len = side_bwt_sz * 4;
136
137        let num_sides = bwt_sz.div_ceil(side_bwt_sz as u64);
138        let num_lines = num_sides; // 1 lines per side
139
140        let ebwt_tot_len = num_sides * side_sz as u64;
141        let ebwt_tot_sz = ebwt_tot_len;
142
143        Self {
144            len: ref_len,
145            bwt_len,
146            sz,
147            bwt_sz,
148            line_rate,
149            orig_off_rate,
150            off_rate,
151            off_mask,
152            ftab_chars,
153            eftab_len,
154            eftab_sz,
155            ftab_len,
156            ftab_sz,
157            offs_len,
158            offs_sz,
159            line_sz,
160            side_sz,
161            side_bwt_sz,
162            side_bwt_len,
163            num_sides,
164            num_lines,
165            ebwt_tot_len,
166            ebwt_tot_sz,
167            color,
168            entire_reverse,
169        }
170    }
171
172    /// Set a new suffix-array sampling rate (matches C++ setOffRate)
173    pub fn set_off_rate(&mut self, off_rate: i32) {
174        self.off_rate = off_rate;
175        self.off_mask = OFF_MASK << off_rate;
176        self.offs_len = (self.bwt_len + (1u64 << off_rate) - 1) >> off_rate;
177        self.offs_sz = self.offs_len * OFF_SIZE as u64;
178    }
179
180    /// Get size of a side in bytes
181    pub fn side_size(&self) -> u32 {
182        self.side_sz
183    }
184
185    /// Calculate which side a BWT position is on
186    pub fn pos_to_side(&self, pos: u64) -> u64 {
187        pos / (self.side_bwt_len as u64)
188    }
189
190    /// Check if parameters are valid (matches C++ repOk)
191    #[allow(clippy::manual_is_multiple_of)]
192    pub fn is_valid(&self) -> bool {
193        self.len > 0
194            && self.line_rate > 3
195            && self.off_rate >= 0
196            && self.ftab_chars <= 16
197            && self.ftab_chars >= 1
198            && self.line_rate < 32
199            && self.ftab_chars < 32
200            && self.ebwt_tot_sz % (self.line_sz as u64) == 0
201    }
202
203    /// Get total size in bytes
204    pub fn total_size(&self) -> u64 {
205        // Total size = ebwt + ftab + eftab + offs + rstarts
206        self.ebwt_tot_sz + self.ftab_sz + self.eftab_sz as u64 + self.offs_sz
207    }
208}
209
210impl Default for EbwtParams {
211    fn default() -> Self {
212        Self::new(0)
213    }
214}
215
216impl EbwtParams {
217    /// Create test parameters with a specific BWT length
218    #[cfg(test)]
219    pub fn new_test_params(bwt_len: usize) -> Self {
220        Self::new(bwt_len as u64 - 1)
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn test_params_new() {
230        let params = EbwtParams::new(1000);
231        assert_eq!(params.len, 1000);
232        assert_eq!(params.bwt_len, 1001);
233        assert!(params.is_valid());
234    }
235
236    #[test]
237    fn test_params_with_options() {
238        let params = EbwtParams::with_options(5000, 5, 3, 6, false, false);
239        assert_eq!(params.len, 5000);
240        assert_eq!(params.bwt_len, 5001);
241        assert_eq!(params.line_rate, 5);
242        assert_eq!(params.off_rate, 3);
243        assert!(params.is_valid());
244    }
245
246    #[test]
247    fn test_set_off_rate() {
248        let mut params = EbwtParams::new(10000);
249        let orig_offs_len = params.offs_len;
250        params.set_off_rate(5);
251        assert_eq!(params.off_rate, 5);
252        assert_ne!(params.offs_len, orig_offs_len);
253    }
254
255    #[test]
256    fn test_pos_to_side() {
257        let params = EbwtParams::with_options(10000, 6, 4, 8, false, false);
258        // side_bwt_len = side_sz - OFF_SIZE*4
259        // side_sz = 1 << 6 = 64
260        // side_bwt_sz = 64 - 16 = 48
261        // side_bwt_len = 48 * 4 = 192
262        let side = params.pos_to_side(100);
263        assert_eq!(side, 100 / params.side_bwt_len as u64);
264    }
265
266    #[test]
267    fn test_sizes() {
268        let params = EbwtParams::new(1000);
269        assert!(params.sz > 0);
270        assert!(params.bwt_sz > 0);
271        assert!(params.side_sz > 0);
272    }
273}