sid_encode/
lib.rs

1mod error;
2
3pub use error::DecodeError;
4
5static ALPHABET: [char; 32] = [
6    '0', '1', '2', '3', '4', '5', '6', '7',
7    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
8    'g', 'h', 'j', 'k', 'm', 'n', 'p', 'q',
9    'r', 's', 't', 'v', 'w', 'x', 'y', 'z',
10];
11
12const LOOKUP_TABLE_LENGTH: u8 = 38;
13// generated with some trial and error in the perfect_hash.py file.
14static REVERSE_ALPHABET: [u8; LOOKUP_TABLE_LENGTH as usize] = [
15    24, 25, 26, 255, 27, 28, 29, 30, 31, 255, 0, 1, 2,
16    3, 4, 5, 6, 7, 8, 9, 255, 10, 11, 12, 13, 14,
17    15, 16, 17, 255, 18, 19, 255, 20, 21, 255, 22, 23
18];
19
20pub const SHORT_LENGTH: usize = 22;
21
22// really in range 0..32 (or 255 if error)
23#[inline]
24fn lookup(value: u8) -> u8 {
25    let value = value % LOOKUP_TABLE_LENGTH;
26    REVERSE_ALPHABET[value as usize]
27}
28
29pub fn base32_decode(input: &str) -> Result<[u8; 16], DecodeError> {
30    if input.len() != 27 {
31        return Err(DecodeError::InvalidLength);
32    }
33    let input: [u8; 27] = input.as_bytes().try_into().unwrap();
34    if !input[22] == b'_' {
35        return Err(DecodeError::NoSeparator);
36    }
37
38    // build intermediate map. u8 data type, but each only contains 5 bits.
39    let mut intermediate = [0u8; 26];
40    for i in 0..22 {
41        intermediate[i] = lookup(input[i]);
42    }
43    for i in 23..27 {
44        intermediate[i - 1] = lookup(input[i]);
45    }
46
47    // bit hacks to check if any invalid with minimal branching:
48    // valid characters are 0..31, so if we OR them all together, we should get 31.
49    // if it's 255, then we have an invalid character.
50    let mut combined = 0u8;
51    for &c in &intermediate {
52        combined |= c;
53    }
54    let has_invalid = combined == 255;
55    if has_invalid {
56        let mut idx255 = intermediate.iter().position(|&c| c == 255).unwrap();
57        if idx255 >= 22 {
58            idx255 += 1;
59        }
60        let c = input[idx255];
61        return Err(DecodeError::InvalidCharacter(c as char));
62    }
63
64    let mut result = [0u8; 16];
65    // now we can do the actual decoding.
66    for i in 0..3 {
67        let j = i * 8;
68        let k = i * 5;
69        let d0 = intermediate[j];
70        let d1 = intermediate[j + 1];
71        let d2 = intermediate[j + 2];
72        let d3 = intermediate[j + 3];
73        let d4 = intermediate[j + 4];
74        let d5 = intermediate[j + 5];
75        let d6 = intermediate[j + 6];
76        let d7 = intermediate[j + 7];
77        let d8 = intermediate[j + 8];
78
79        result[k] = d0 << 5 | d1;
80        result[k + 1] = d2 << 3 | (d3 >> 2);
81        result[k + 2] = d3 << 6 | (d4 << 1) | (d5 >> 4);
82        result[k + 3] = d5 << 4 | (d6 >> 1);
83        result[k + 4] = d6 << 7 | (d7 << 2) | (d8 >> 3);
84    }
85
86    result[15] = intermediate[24] << 5 | intermediate[25];
87
88    Ok(result)
89}
90
91#[inline]
92fn alphabet(i: u8) -> char {
93    // unsafe {
94    //     *ALPHABET.get_unchecked(i & 0x1F as usize)
95    // }
96    ALPHABET[(i & 0x1F) as usize]
97}
98
99pub fn base32_encode(data: [u8; 16]) -> String {
100    let mut encoded = String::with_capacity(27);
101    // encoded0 skips 2 bits and takes top 3 bits from data0
102    encoded.push(alphabet(data[0] >> 5));
103    // encoded1 takes bottom 5 bits from data0
104    encoded.push(alphabet(data[0]));
105    // encoded2 takes top 5 bits from data1
106    encoded.push(alphabet(data[1] >> 3));
107    // encoded3 takes bottom 3 bits from data1 and top 2 bits from data2
108    encoded.push(alphabet(data[1] << 2 | data[2] >> 6));
109    // encoded4 takes bits 3-7 from data2
110    encoded.push(alphabet(data[2] >> 1));
111    // encoded5 takes bottom 1 bit from data2 and top 4 bits from data3
112    encoded.push(alphabet(data[2] << 4 | data[3] >> 4));
113    // encoded6 takes bottom 4 bits from data3 and top 1 bit from data4
114    encoded.push(alphabet(data[3] << 1 | data[4] >> 7));
115    // encoded7 takes bits 2-6 from data4
116    encoded.push(alphabet(data[4] >> 2));
117    // encoded8 takes bottom 2 bits from data4 and top 3 bits from data5
118    encoded.push(alphabet(data[4] << 3 | data[5] >> 5));
119    // encoded9 takes bottom 5 bits from data5
120    encoded.push(alphabet(data[5]));
121    // encoded10 takes top 5 bits from data6
122    encoded.push(alphabet(data[6] >> 3));
123    // encoded11 takes bottom 3 bits from data6 and top 2 bits from data7
124    encoded.push(alphabet(data[6] << 2 | data[7] >> 6));
125    // encoded12 takes bits 3-7 from data7
126    encoded.push(alphabet(data[7] >> 1));
127    // encoded13 takes bottom 1 bit from data7 and top 4 bits from data8
128    encoded.push(alphabet(data[7] << 4 | data[8] >> 4));
129    // encoded14 takes bottom 4 bits from data8 and top 1 bit from data9
130    encoded.push(alphabet(data[8] << 1 | data[9] >> 7));
131    // encoded15 takes bits 2-6 from data9
132    encoded.push(alphabet(data[9] >> 2));
133    // encoded16 takes bottom 2 bits from data9 and top 3 bits from data10
134    encoded.push(alphabet(data[9] << 3 | data[10] >> 5));
135    // encoded17 takes bottom 5 bits from data10
136    encoded.push(alphabet(data[10]));
137    // encoded18 takes top 5 bits from data11
138    encoded.push(alphabet(data[11] >> 3));
139    // encoded19 takes bottom 3 bits from data11 and top 2 bits from data12
140    encoded.push(alphabet(data[11] << 2 | data[12] >> 6));
141    // encoded20 takes bits 3-7 from data12
142    encoded.push(alphabet(data[12] >> 1));
143    // encoded21 takes bottom 1 bit from data12 and top 4 bits from data13
144    encoded.push(alphabet(data[12] << 4 | data[13] >> 4));
145    encoded.push('_');
146    // encoded22 takes bottom 4 bits from data13 and top 1 bit from data14
147    encoded.push(alphabet(data[13] << 1 | data[14] >> 7));
148    // encoded23 takes bits 2-6 from data14
149    encoded.push(alphabet(data[14] >> 2));
150    // encoded24 takes bottom 2 bits from data14 and top 3 bits from data15
151    encoded.push(alphabet(data[14] << 3 | data[15] >> 5));
152    // encoded25 takes bottom 5 bits from data15
153    encoded.push(alphabet(data[15]));
154    encoded
155}
156
157#[cfg(test)]
158mod test {
159    use super::*;
160
161    // iterating 2**128 times isn't a great idea
162    // #[test]
163    // fn test_ridiculous() {
164    //     let mut s = [0u8; 16];
165    //     let mut last_enc = "".to_string();
166    //     loop {
167    //         // Increment the counter
168    //         for i in (0..16).rev() {
169    //             if s[i] < 255 {
170    //                 s[i] += 1;
171    //                 break;
172    //             } else {
173    //                 if i < 15 {
174    //                     println!("pos {i} reset to 0");
175    //                 }
176    //                 s[i] = 0;
177    //             }
178    //         }
179    //
180    //         let enc = base32_encode(s);
181    //         assert_eq!(enc > last_enc, true);
182    //         let dec = base32_decode(&enc).unwrap();
183    //         let last_enc = enc;
184    //         assert_eq!(dec, s);
185    //
186    //         // Check if the counter reached the maximum value
187    //         if s.iter().all(|&x| x == 255) {
188    //             break;
189    //         }
190    //     }
191    // }
192
193    #[test]
194    fn test_rand() {
195        use rand::RngCore;
196        let mut rng = rand::thread_rng();
197        for _ in 0..3000000 {
198            let mut s = [0u8; 16];
199            rng.fill_bytes(&mut s);
200            let mut t = [0u8; 16];
201            rng.fill_bytes(&mut t);
202
203            // test s round trips
204            let s_enc = base32_encode(s);
205            let dec = base32_decode(&s_enc).unwrap();
206            assert_eq!(dec, s);
207
208            // test t round trips
209            let t_enc = base32_encode(t);
210            let dec = base32_decode(&t_enc).unwrap();
211            assert_eq!(dec, t);
212
213            if s < t {
214                assert_eq!(s_enc < t_enc, true);
215            } else {
216                assert_eq!(s_enc > t_enc, true);
217            }
218        }
219    }
220}