1pub mod alphabet;
49
50pub mod text_with_rank_support;
55
56mod batch_computed_cursors;
57mod config;
58mod construction;
59mod cursor;
60mod lookup_table;
61mod sampled_suffix_array;
62mod text_id_search_tree;
63
64use num_traits::NumCast;
65
66#[doc(inline)]
67pub use alphabet::Alphabet;
68#[doc(inline)]
69pub use config::FmIndexConfig;
70#[doc(inline)]
71pub use config::PerformancePriority;
72#[doc(inline)]
73pub use construction::IndexStorage;
74#[doc(inline)]
75pub use cursor::Cursor;
76
77use batch_computed_cursors::BatchComputedCursors;
78use construction::DataStructures;
79use lookup_table::LookupTables;
80use sampled_suffix_array::SampledSuffixArray;
81use text_id_search_tree::TexdIdSearchTree;
82use text_with_rank_support::{
83 Block64, Block512, CondensedTextWithRankSupport, FlatTextWithRankSupport, TextWithRankSupport,
84};
85
86#[cfg_attr(feature = "savefile", derive(savefile::savefile_derive::Savefile))]
90#[savefile_doc_hidden]
91#[derive(Clone)]
92pub struct FmIndex<I, R = CondensedTextWithRankSupport<I, Block64>> {
93 alphabet: Alphabet,
94 count: Vec<usize>,
95 text_with_rank_support: R,
96 suffix_array: SampledSuffixArray<I>,
97 text_ids: TexdIdSearchTree,
98 lookup_tables: LookupTables<I>,
99}
100
101pub type FmIndexCondensed64<I> = FmIndex<I, CondensedTextWithRankSupport<I, Block64>>;
104
105pub type FmIndexCondensed512<I> = FmIndex<I, CondensedTextWithRankSupport<I, Block512>>;
107
108pub type FmIndexFlat64<I> = FmIndex<I, FlatTextWithRankSupport<I, Block64>>;
110
111pub type FmIndexFlat512<I> = FmIndex<I, FlatTextWithRankSupport<I, Block512>>;
113
114const BATCH_SIZE: usize = 64;
115
116impl<I: IndexStorage, R: TextWithRankSupport<I>> FmIndex<I, R> {
117 fn new<T: AsRef<[u8]>>(
118 texts: impl IntoIterator<Item = T>,
119 alphabet: Alphabet,
120 config: FmIndexConfig<I, R>,
121 ) -> Self {
122 let DataStructures {
123 count,
124 sampled_suffix_array,
125 text_ids,
126 text_with_rank_support,
127 } = construction::create_data_structures::<I, R, T>(texts, &config, &alphabet);
128
129 let num_searchable_dense_symbols = alphabet.num_searchable_dense_symbols();
130
131 let mut index = FmIndex {
132 alphabet,
133 count,
134 text_with_rank_support,
135 suffix_array: sampled_suffix_array,
136 text_ids,
137 lookup_tables: LookupTables::new_empty(),
138 };
139
140 lookup_table::fill_lookup_tables(
141 &mut index,
142 config.lookup_table_depth,
143 num_searchable_dense_symbols,
144 );
145
146 index
147 }
148
149 pub fn count(&self, query: &[u8]) -> usize {
153 self.cursor_for_query(query).count()
154 }
155
156 pub fn count_many<'a>(
161 &'a self,
162 queries: impl IntoIterator<Item = &'a [u8]>,
163 ) -> impl Iterator<Item = usize> {
164 self.cursors_for_many_queries(queries)
165 .map(|cursor| cursor.count())
166 }
167
168 pub fn locate(&self, query: &[u8]) -> impl Iterator<Item = Hit> {
175 let cursor = self.cursor_for_query(query);
176
177 self.locate_interval(cursor.interval())
178 }
179
180 pub fn locate_many<'a>(
185 &'a self,
186 queries: impl IntoIterator<Item = &'a [u8]>,
187 ) -> impl Iterator<Item: Iterator<Item = Hit>> {
188 self.cursors_for_many_queries(queries)
189 .map(|cursor| self.locate_interval(cursor.interval()))
190 }
191
192 fn locate_interval(&self, interval: HalfOpenInterval) -> impl Iterator<Item = Hit> {
193 self.suffix_array
194 .recover_range(interval.start..interval.end, self)
195 .map(|idx| {
196 let (text_id, position) = self
197 .text_ids
198 .backtransfrom_concatenated_text_index(<usize as NumCast>::from(idx).unwrap());
199
200 Hit { text_id, position }
201 })
202 }
203
204 pub fn cursor_empty<'a>(&'a self) -> Cursor<'a, I, R> {
208 Cursor {
209 index: self,
210 interval: HalfOpenInterval {
211 start: 0,
212 end: self.total_text_len(),
213 },
214 }
215 }
216
217 pub fn cursor_for_query<'a>(&'a self, query: &[u8]) -> Cursor<'a, I, R> {
223 let (remaining_query, query_suffix) = self.split_query_for_lookup(query);
224 let interval = self.lookup_tables.lookup(query_suffix, &self.alphabet);
225
226 let mut cursor = Cursor {
227 index: self,
228 interval,
229 };
230
231 for &symbol in remaining_query.iter().rev() {
232 cursor.extend_query_front(symbol);
233
234 if cursor.count() == 0 {
235 break;
236 }
237 }
238
239 cursor
240 }
241
242 pub fn cursors_for_many_queries<'a>(
247 &'a self,
248 queries: impl IntoIterator<Item = &'a [u8]>,
249 ) -> impl Iterator<Item = Cursor<'a, I, R>> {
250 BatchComputedCursors::<I, R, _, BATCH_SIZE>::new(self, queries.into_iter())
251 }
252
253 fn cursor_for_query_without_alphabet_translation<'a>(
254 &'a self,
255 query: &[u8],
256 ) -> Cursor<'a, I, R> {
257 let (remaining_query, query_suffix) = self.split_query_for_lookup(query);
258 let interval = self
259 .lookup_tables
260 .lookup_without_alphabet_translation(query_suffix);
261
262 let mut cursor = Cursor {
263 index: self,
264 interval,
265 };
266
267 for &symbol in remaining_query.iter().rev() {
268 cursor.extend_front_without_alphabet_translation(symbol);
269
270 if cursor.count() == 0 {
271 break;
272 }
273 }
274
275 cursor
276 }
277
278 fn lf_mapping_step(&self, symbol: u8, idx: usize) -> usize {
279 self.count[symbol as usize] + self.text_with_rank_support.rank(symbol, idx)
280 }
281
282 fn split_query_for_lookup<'a>(&self, query: &'a [u8]) -> (&'a [u8], &'a [u8]) {
283 let lookup_depth = std::cmp::min(query.len(), self.lookup_tables.max_depth());
284 let suffix_idx = query.len() - lookup_depth;
285 query.split_at(suffix_idx)
286 }
287
288 pub fn alphabet(&self) -> &Alphabet {
289 &self.alphabet
290 }
291
292 pub fn num_texts(&self) -> usize {
293 self.text_ids.sentinel_indices.len()
294 }
295
296 pub fn total_text_len(&self) -> usize {
298 self.text_with_rank_support.text_len()
299 }
300
301 #[cfg(feature = "savefile")]
302 const VERSION_FOR_SAVEFILE: u32 = 0;
303
304 #[cfg(feature = "savefile")]
305 pub fn load_from_reader(
306 reader: &mut impl std::io::Read,
307 ) -> Result<Self, savefile::SavefileError> {
308 savefile::load(reader, Self::VERSION_FOR_SAVEFILE)
309 }
310
311 #[cfg(feature = "savefile")]
312 pub fn load_from_file(
313 filepath: impl AsRef<std::path::Path>,
314 ) -> Result<Self, savefile::SavefileError> {
315 savefile::load_file(filepath, Self::VERSION_FOR_SAVEFILE)
316 }
317
318 #[cfg(feature = "savefile")]
319 pub fn save_to_writer(
320 &self,
321 writer: &mut impl std::io::Write,
322 ) -> Result<(), savefile::SavefileError> {
323 savefile::save(writer, Self::VERSION_FOR_SAVEFILE, self)
324 }
325
326 #[cfg(feature = "savefile")]
327 pub fn save_to_file(
328 &self,
329 filepath: impl AsRef<std::path::Path>,
330 ) -> Result<(), savefile::SavefileError> {
331 savefile::save_file(filepath, Self::VERSION_FOR_SAVEFILE, self)
332 }
333}
334
335#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
337pub struct Hit {
338 pub text_id: usize,
339 pub position: usize,
340}
341
342#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
343pub(crate) struct HalfOpenInterval {
344 pub start: usize,
345 pub end: usize,
346}
347
348mod maybe_savefile {
349 #[cfg(feature = "savefile")]
350 pub trait MaybeSavefile: savefile::Savefile {}
351
352 #[cfg(not(feature = "savefile"))]
353 pub trait MaybeSavefile {}
354
355 impl MaybeSavefile for i32 {}
356 impl MaybeSavefile for u32 {}
357 impl MaybeSavefile for i64 {}
358}
359
360mod sealed {
361 pub trait Sealed {}
362}