sigalign_impl/pattern_index/
dynamic_lfi.rs

1use crate::utils::get_unique_characters_of_sequence;
2use super::static_lfi::{
3    Lfi32B2V64,
4    Lfi32B3V64,
5    Lfi32B4V64,
6    Lfi32B5V64,
7    LfiOption,
8};
9// Re-export: The build error type is the same as the static version.
10pub use super::static_lfi::LfiBuildError;
11use sigalign_core::reference::PatternIndex;
12
13/// The LtFmIndex that can adjust the type by the number of characters.
14/// - The maximum number of characters that can be indexed is 31 (same as the `Lfi32B5V64`).
15/// - The maximum length of one sequence is u32::MAX (same as the static version).
16#[derive(Clone)]
17pub enum DynamicLfi {
18    B2(Lfi32B2V64),
19    B3(Lfi32B3V64),
20    B4(Lfi32B4V64),
21    B5(Lfi32B5V64),
22}
23
24/// Option to define the structure of the LtFmIndex.
25#[derive(Debug, Clone)]
26pub struct DynamicLfiOption {
27    pub suffix_array_sampling_ratio: u64,
28    pub lookup_table_max_bytes_size: u64,
29    pub use_safe_guard: bool,
30}
31impl DynamicLfiOption {
32    fn to_lfi_option(self) -> LfiOption {
33        LfiOption {
34            suffix_array_sampling_ratio: self.suffix_array_sampling_ratio,
35            lookup_table_max_bytes_size: self.lookup_table_max_bytes_size,
36            use_safe_guard: self.use_safe_guard,
37        }
38    }
39}
40
41impl PatternIndex for DynamicLfi {
42    type Option = DynamicLfiOption;
43    type BuildError = LfiBuildError;
44
45    fn new(
46        concatenated_sequence: Vec<u8>,
47        option: Self::Option,
48    ) -> Result<Self, Self::BuildError> {
49        let lfi_option = option.to_lfi_option();
50        let unique_sequence = get_unique_characters_of_sequence(&concatenated_sequence);
51        let chr_count = {
52            if lfi_option.use_safe_guard {
53                unique_sequence.len()
54            } else {
55                unique_sequence.len() - 1
56            }
57        };
58
59        if chr_count <= 3 {
60            let inner = Lfi32B2V64::new(concatenated_sequence, lfi_option)?;
61            Ok(Self::B2(inner))
62        } else if chr_count <= 7 {
63            let inner = Lfi32B3V64::new(concatenated_sequence, lfi_option)?;
64            Ok(Self::B3(inner))
65        } else if chr_count <= 15 {
66            let inner = Lfi32B4V64::new(concatenated_sequence, lfi_option)?;
67            Ok(Self::B4(inner))
68        } else if chr_count <= 31 {
69            let inner = Lfi32B5V64::new(concatenated_sequence, lfi_option)?;
70            Ok(Self::B5(inner))
71        } else {
72            Err(Self::BuildError::OverMaximumCharacters { max: 31, input: chr_count as u32 })
73        }
74    }
75    fn get_sorted_positions(&self, pattern: &[u8]) -> Vec<u32> {
76        match self {
77            Self::B2(v) => v.get_sorted_positions(pattern),
78            Self::B3(v) => v.get_sorted_positions(pattern),
79            Self::B4(v) => v.get_sorted_positions(pattern),
80            Self::B5(v) => v.get_sorted_positions(pattern),
81        }
82    }
83}
84
85// Impl Extensions
86use sigalign_core::reference::extensions::{
87    Serialize,
88    EstimateSize,
89};
90//  - Serialize
91use crate::core::{EndianType, WriteBytesExt, ReadBytesExt};
92impl Serialize for DynamicLfi {
93    fn save_to<W>(&self, mut writer: W) -> Result<(), std::io::Error> where
94        W: std::io::Write
95    {
96        match self {
97            Self::B2(v) => {
98                writer.write_u64::<EndianType>(Self::B2_MAGIC_NUMBER)?;
99                v.save_to(&mut writer)?;
100                Ok(())
101            },
102            Self::B3(v) => {
103                writer.write_u64::<EndianType>(Self::B3_MAGIC_NUMBER)?;
104                v.save_to(&mut writer)?;
105                Ok(())
106            },
107            Self::B4(v) => {
108                writer.write_u64::<EndianType>(Self::B4_MAGIC_NUMBER)?;
109                v.save_to(&mut writer)?;
110                Ok(())
111            },
112            Self::B5(v) => {
113                writer.write_u64::<EndianType>(Self::B5_MAGIC_NUMBER)?;
114                v.save_to(&mut writer)?;
115                Ok(())
116            },
117        }
118    }
119    fn load_from<R>(mut reader: R) -> Result<Self, std::io::Error> where
120        R: std::io::Read,
121        Self: Sized,
122    {
123        let magic_number = reader.read_u64::<EndianType>()?;
124        match magic_number {
125            Self::B2_MAGIC_NUMBER => {
126                let inner = Lfi32B2V64::load_from(&mut reader)?;
127                Ok(Self::B2(inner))
128            },
129            Self::B3_MAGIC_NUMBER => {
130                let inner = Lfi32B3V64::load_from(&mut reader)?;
131                Ok(Self::B3(inner))
132            },
133            Self::B4_MAGIC_NUMBER => {
134                let inner = Lfi32B4V64::load_from(&mut reader)?;
135                Ok(Self::B4(inner))
136            },
137            Self::B5_MAGIC_NUMBER => {
138                let inner = Lfi32B5V64::load_from(&mut reader)?;
139                Ok(Self::B5(inner))
140            },
141            _ => {
142                Err((std::io::ErrorKind::InvalidData).into())
143            },
144        }
145    }
146}
147impl DynamicLfi {
148    // MAGIC NUMBERS: FNV1A32 hash value of
149    // LtFmIndexPosition32Block2Vector64: 956ed7f2
150    const B2_MAGIC_NUMBER: u64 = 2507069426;
151    // LtFmIndexPosition32Block3Vector64: 626c069d
152    const B3_MAGIC_NUMBER: u64 = 1651246749;
153    // LtFmIndexPosition32Block4Vector64: 6e317038
154    const B4_MAGIC_NUMBER: u64 = 1848733752;
155    // LtFmIndexPosition32Block5Vector64: 6a2427ab
156    const B5_MAGIC_NUMBER: u64 = 1780754347;
157}
158//  - EstimateSize
159impl EstimateSize for DynamicLfi {
160    fn serialized_size(&self) -> usize {
161        std::mem::size_of::<u64>()
162        + match self {
163            Self::B2(v) => v.serialized_size(),
164            Self::B3(v) => v.serialized_size(),
165            Self::B4(v) => v.serialized_size(),
166            Self::B5(v) => v.serialized_size(),
167        }
168    }
169}