1use std::marker::PhantomData;
2
3use crate::{
4 Position, Block,
6 components::{
7 Header, View,
8 MagicNumber, TextEncoder, CountArrayHeader, SuffixArrayHeader, BwmHeader,
10 CountArrayView, SuffixArrayView, BwmView,
12 },
13};
14
15pub mod build_config;
16
17pub struct FmIndexBuilder<P: Position, B: Block, E: TextEncoder> {
19 text_len: usize,
21 symbol_count: u32,
22 magic_number: MagicNumber,
23 text_encoder: E,
24 suffix_array_config: build_config::SuffixArrayConfig,
26 lookup_table_config: build_config::LookupTableConfig,
27 count_array_header: CountArrayHeader,
29 suffix_array_header: SuffixArrayHeader,
30 bwm_header: BwmHeader,
31 _phantom: PhantomData<(P, B)>,
33}
34
35#[derive(Debug, thiserror::Error)]
37pub enum BuildError {
38 #[error("The symbol count ({1}) exceeds the maximum for the chosen block type ({0}). Try using a larger block type or reducing the number of symbols.")]
40 SymbolCountOver(u32, u32),
41
42 #[error("Mismatched text length: expected {0} bytes, but got {1} bytes.")]
44 UnmatchedTextLength(usize, usize),
45
46 #[error("Incorrect blob size: expected {0} bytes, but got {1} bytes.")]
48 InvalidBlobSize(usize, usize),
49
50 #[error("Improper blob alignment: required alignment is {0} bytes, but the blob has an offset of {1} bytes.")]
52 NotAlignedBlob(usize, usize),
53
54 #[error("Invalid build configuration: {0}")]
56 InvalidConfig(String),
57}
58
59impl<P: Position, B: Block, E: TextEncoder> FmIndexBuilder<P, B, E> {
60 pub fn new(
64 text_len: usize,
65 symbol_count: u32,
66 text_encoder: E,
67 ) -> Result<Self, BuildError> {
68 let suffix_array_config = build_config::SuffixArrayConfig::default();
69 let lookup_table_config = build_config::LookupTableConfig::default();
70
71 if symbol_count > B::MAX_SYMBOL {
72 return Err(BuildError::SymbolCountOver(B::MAX_SYMBOL, symbol_count));
73 }
74
75 let (count_array_header, suffix_array_header, bwm_header) = Self::generate_headers(
77 text_len,
78 symbol_count,
79 &suffix_array_config,
80 &lookup_table_config,
81 )?;
82
83 Ok(Self {
84 text_len,
86 symbol_count,
87 magic_number: MagicNumber::new(),
88 text_encoder,
89 lookup_table_config,
91 suffix_array_config,
92 count_array_header,
94 suffix_array_header,
95 bwm_header,
96 _phantom: PhantomData,
98 })
99 }
100 fn generate_headers(
101 text_len: usize,
102 symbol_count: u32,
103 suffix_array_config: &build_config::SuffixArrayConfig,
104 lookup_table_config: &build_config::LookupTableConfig,
105 ) -> Result<(CountArrayHeader, SuffixArrayHeader, BwmHeader), BuildError> {
106 let lookup_table_kmer_size = lookup_table_config.kmer_size::<P>(symbol_count)?;
107 let suffix_array_sampling_ratio = suffix_array_config.sampling_ratio()?;
108
109 let count_array_header = CountArrayHeader::new(
110 symbol_count,
111 lookup_table_kmer_size,
112 );
113 let suffix_array_header = SuffixArrayHeader::new(
114 text_len as u64,
115 suffix_array_sampling_ratio,
116 );
117 let bwm_header = BwmHeader::new::<P, B>(
118 text_len as u64,
119 symbol_count,
120 );
121
122 Ok((
123 count_array_header,
124 suffix_array_header,
125 bwm_header,
126 ))
127 }
128 pub fn set_lookup_table_config(self, config: build_config::LookupTableConfig) -> Result<Self, BuildError> {
129 let (count_array_header, suffix_array_header, bwm_header) = Self::generate_headers(
130 self.text_len,
131 self.symbol_count,
132 &self.suffix_array_config,
133 &config,
134 )?;
135
136 Ok(Self {
137 lookup_table_config: config,
138 count_array_header,
139 suffix_array_header,
140 bwm_header,
141 ..self
142 })
143 }
144 pub fn set_suffix_array_config(self, config: build_config::SuffixArrayConfig) -> Result<Self, BuildError> {
145 let (count_array_header, suffix_array_header, bwm_header) = Self::generate_headers(
146 self.text_len,
147 self.symbol_count,
148 &config,
149 &self.lookup_table_config,
150 )?;
151
152 Ok(Self {
153 suffix_array_config: config,
154 count_array_header,
155 suffix_array_header,
156 bwm_header,
157 ..self
158 })
159 }
160
161 pub fn blob_size(&self) -> usize {
166 self.header_size() + self.body_size()
167 }
168 fn header_size(&self) -> usize {
170 self.magic_number.aligned_size::<B>()
171 + self.text_encoder.aligned_size::<B>()
172 + self.count_array_header.aligned_size::<B>()
173 + self.suffix_array_header.aligned_size::<B>()
174 + self.bwm_header.aligned_size::<B>()
175 }
176 fn body_size(&self) -> usize {
178 CountArrayView::<P>::aligned_body_size::<B>(&self.count_array_header)
179 + SuffixArrayView::<P>::aligned_body_size::<B>(&self.suffix_array_header)
180 + BwmView::<P, B>::aligned_body_size::<B>(&self.bwm_header)
181 }
182
183 pub fn build<'a>(
188 &self,
189 mut text: Vec<u8>,
190 blob: &'a mut [u8],
191 ) -> Result<(), BuildError> {
192 if text.len() != self.text_len {
194 return Err(BuildError::UnmatchedTextLength(self.text_len, text.len()));
195 }
196
197 let required_alignment = B::ALIGN_SIZE;
199 let offset = blob.as_ptr() as usize % required_alignment;
200 if offset != 0 {
201 return Err(BuildError::NotAlignedBlob(required_alignment, offset));
202 }
203
204 let blob_size = self.blob_size();
206 let blob_size_actual = blob.len();
207 if blob_size != blob_size_actual {
208 return Err(BuildError::InvalidBlobSize(blob_size, blob_size_actual));
209 }
210
211 let mut header_start_index = 0;
213 let mut header_end_index = self.magic_number.aligned_size::<B>();
215 self.magic_number.write_to_blob(&mut blob[header_start_index..header_end_index]);
216 header_start_index = header_end_index;
218 header_end_index += self.text_encoder.aligned_size::<B>();
219 self.text_encoder.write_to_blob(&mut blob[header_start_index..header_end_index]);
220 header_start_index = header_end_index;
222 header_end_index += self.count_array_header.aligned_size::<B>();
223 self.count_array_header.write_to_blob(&mut blob[header_start_index..header_end_index]);
224 header_start_index = header_end_index;
226 header_end_index += self.suffix_array_header.aligned_size::<B>();
227 self.suffix_array_header.write_to_blob(&mut blob[header_start_index..header_end_index]);
228 header_start_index = header_end_index;
230 header_end_index += self.bwm_header.aligned_size::<B>();
231 self.bwm_header.write_to_blob(&mut blob[header_start_index..header_end_index]);
232
233 let mut body_start_index = header_end_index;
235 let mut body_end_index = body_start_index + CountArrayView::<P>::aligned_body_size::<B>(&self.count_array_header);
236 self.count_array_header.count_and_encode_text::<P, B, E>(
240 &mut text,
241 &self.text_encoder,
242 &mut blob[body_start_index..body_end_index],
243 );
244 body_start_index = body_end_index;
248 body_end_index = body_start_index + SuffixArrayView::<P>::aligned_body_size::<B>(&self.suffix_array_header);
249
250 let sentinel_index = self.suffix_array_header.write_to_blob_and_get_sentinel_index::<P>(
251 &mut text,
252 &mut blob[body_start_index..body_end_index],
253 );
254 body_start_index = body_end_index;
256 body_end_index = body_start_index + BwmView::<P, B>::aligned_body_size::<B>(&self.bwm_header);
257 self.bwm_header.encode_bwm_body::<P, B>(
258 text,
259 sentinel_index,
260 &mut blob[body_start_index..body_end_index],
261 );
262
263 Ok(())
264 }
265}