meme_id/
mapper.rs

1use core::{
2    cmp::{Ord, Ordering},
3    ops::ControlFlow,
4};
5
6pub struct WordMapper<'a> {
7    array: &'a [&'a str],
8}
9
10impl<'a> WordMapper<'a> {
11    /// Returns new world mapper instance from particularly ordered array of words.
12    pub(crate) const fn new(array: &'a [&'a str]) -> Self {
13        assert!(array.len().is_power_of_two());
14        WordMapper { array }
15    }
16
17    /// Returns number of bits that can be encoded by word with this mapper.
18    #[inline]
19    pub const fn bits(&self) -> u32 {
20        self.array.len().trailing_zeros()
21    }
22
23    /// Returns bit mask for bits.
24    #[inline]
25    pub const fn bit_mask(&self) -> usize {
26        self.array.len() - 1
27    }
28
29    /// Returns bits for the specified word.
30    #[inline]
31    pub fn decode_word(&self, word: &str, bits: u128) -> Option<u128> {
32        let idx = eytzinger_search(self.array, word)?;
33        Some(bits << self.bits() | idx as u128)
34    }
35
36    /// Returns bits for the specified word.
37    #[inline]
38    pub fn decode_words<const N: usize>(
39        &self,
40        words: [&str; N],
41        mut bits: u128,
42    ) -> Result<u128, usize> {
43        let mask = self.bit_mask();
44        let shift = self.bits();
45
46        for (i, word) in words.iter().enumerate() {
47            let idx = eytzinger_search(self.array, word).ok_or(i)?;
48            bits = (bits << shift) | (idx & mask) as u128;
49        }
50
51        Ok(bits)
52    }
53
54    /// Returns bits for the specified word.
55    #[inline]
56    pub fn decode_words_norepeat<const N: usize>(
57        &self,
58        words: [&str; N],
59        mut bits: u128,
60    ) -> Result<u128, usize> {
61        let less_bits_each = N.next_power_of_two().trailing_zeros();
62        let mask = self.bit_mask() >> less_bits_each;
63        let shift = self.bits() - less_bits_each;
64
65        for (i, word) in words.iter().enumerate() {
66            let idx = eytzinger_search(self.array, word).ok_or(i)?;
67            bits = (bits << shift) | (idx & mask) as u128;
68        }
69
70        Ok(bits)
71    }
72
73    /// Return word for the specified bits.
74    #[inline]
75    pub fn encode_word(&self, mut bits: u128) -> (&'a str, u128) {
76        let word = self.array[(bits as usize) & self.bit_mask()];
77        bits >>= self.bits();
78        (word, bits)
79    }
80
81    /// Return word for the specified bits.
82    #[inline]
83    pub fn encode_words<const N: usize>(&self, mut bits: u128) -> ([&'a str; N], u128) {
84        let mask = self.bit_mask();
85        let shift = self.bits();
86
87        let mut i = 0;
88        let words = [(); N].map(|()| {
89            let word = self.array[(bits as usize) & mask];
90            i += 1;
91            bits >>= shift;
92            word
93        });
94
95        (words, bits)
96    }
97
98    /// Return word for the specified bits.
99    #[inline]
100    pub fn encode_words_norepeat<const N: usize>(&self, mut bits: u128) -> ([&'a str; N], u128) {
101        let less_bits_each = N.next_power_of_two().trailing_zeros();
102        let mask = self.bit_mask() >> less_bits_each;
103        let shift = self.bits() - less_bits_each;
104
105        let mut i = 0;
106        let words = [(); N].map(|()| {
107            let word = self.array[((bits as usize) & mask) + (i << shift)];
108            i += 1;
109            bits >>= shift;
110            word
111        });
112
113        (words, bits)
114    }
115}
116
117#[inline]
118fn eytzinger_search(array: &[&str], s: &str) -> Option<usize> {
119    let mut i = 0;
120    while i < array.len() {
121        let v = array[i]; // this range check is optimized out :D
122        i = match cmp_ignore_case_ascii(v, s) {
123            Ordering::Greater | Ordering::Equal => 2 * i + 1,
124            Ordering::Less => 2 * i + 2,
125        };
126    }
127
128    // magic from the paper to fix up the (incomplete) final tree layer
129    // (only difference is that we recheck f() because this is exact search)
130    let p = i + 1;
131    let j = p >> (1 + (!p).trailing_zeros());
132    if j != 0 && (str::eq_ignore_ascii_case(array[j - 1], s)) {
133        Some(j - 1)
134    } else {
135        None
136    }
137}
138
139#[inline]
140fn cmp_ignore_case_ascii(a: &str, b: &str) -> Ordering {
141    let cf = a.bytes().zip(b.bytes()).try_for_each(|(a, b)| {
142        match Ord::cmp(&a.to_ascii_lowercase(), &b.to_ascii_lowercase()) {
143            Ordering::Equal => ControlFlow::Continue(()),
144            ord => ControlFlow::Break(ord),
145        }
146    });
147
148    match cf {
149        ControlFlow::Break(ord) => ord,
150        _ => Ord::cmp(&a.len(), &b.len()),
151    }
152}