sview_fmindex/components/text_encoder/text_encoders/
encoding_table.rs

1use super::{Header, TextEncoder};
2
3/// A table mapping symbols to their indices in the FM-index
4#[repr(C)]
5#[derive(Debug, Clone, PartialEq, Eq)]
6#[derive(zerocopy::FromBytes, zerocopy::IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
7pub struct EncodingTable([u8; 256]);
8impl TextEncoder for EncodingTable {
9    fn idx_of(&self, sym: u8) -> u8 {
10        unsafe { *self.0.get_unchecked(sym as usize) }
11    }
12}
13
14impl EncodingTable {
15    /// Treat the last symbol as wild card.
16    #[inline]
17    pub fn from_symbols<T: AsRef<[u8]>>(symbols: &[T]) -> Self {
18        let symbol_count = symbols.len() as u32;
19        let mut table = [(symbol_count - 1) as u8; 256]; // wild card's index is symbol_count
20        symbols.iter().enumerate().for_each(|(idx, sym)| {
21            sym.as_ref().iter().for_each(|x| table[*x as usize] = idx as u8);
22        });
23        Self(table)
24    }
25    /// Add one additional wildcard
26    #[inline]
27    pub fn from_symbols_with_wildcard<T: AsRef<[u8]>>(symbols: &[T]) -> Self {
28        let symbol_count = symbols.len() as u32 + 1;
29        let mut table = [(symbol_count - 1) as u8; 256]; // wild card's index is symbol_count
30        symbols.iter().enumerate().for_each(|(idx, sym)| {
31            sym.as_ref().iter().for_each(|x| table[*x as usize] = idx as u8);
32        });
33        Self(table)
34    }
35    pub fn symbol_count(&self) -> u32 {
36        *self.0.iter().max().unwrap() as u32 + 1
37    }
38}
39
40impl Header for EncodingTable {}