base32_fs/
encode.rs

1use crate::Output;
2use crate::CHARS;
3
4/// Maximum number of bytes that can be encoded as BASE32.
5pub const MAX_INPUT_LEN: usize = usize::MAX / 8 * 5 + 4;
6
7/// Returns the length of the BASE32-encoded string for the given input length.
8///
9/// Panics if `input_len` is greater than [`MAX_INPUT_LEN`](crate::MAX_INPUT_LEN).
10pub const fn encoded_len(input_len: usize) -> usize {
11    if input_len > MAX_INPUT_LEN {
12        panic!("The input is too large");
13    }
14    input_len / 5 * 8
15        + match input_len % 5 {
16            0 => 0,
17            1 => 2,
18            2 => 4,
19            3 => 5,
20            _ => 7,
21        }
22}
23
24/// Encode `input` byte sequence using BASE32 encoding and write the resulting byte sequence to
25/// `output`.
26pub fn encode<O: Output + ?Sized>(input: &[u8], output: &mut O) {
27    macro_rules! byte {
28        (0, $a: ident) => {
29            CHARS[($a >> 3) as usize]
30        };
31        (1, $a: ident, $b: ident) => {
32            CHARS[((($a & 0b111) << 2) | ($b >> 6)) as usize]
33        };
34        (2, $b: ident) => {
35            CHARS[(($b >> 1) & 0b11111) as usize]
36        };
37        (3, $b: ident, $c: ident) => {
38            CHARS[((($b & 0b1) << 4) | ($c >> 4)) as usize]
39        };
40        (4, $c: ident, $d: ident) => {
41            CHARS[((($c & 0b1111) << 1) | ($d >> 7)) as usize]
42        };
43        (5, $d: ident) => {
44            CHARS[(($d >> 2) & 0b11111) as usize]
45        };
46        (6, $d: ident, $e: ident) => {
47            CHARS[((($d & 0b11) << 3) | ($e >> 5)) as usize]
48        };
49        (7, $e: ident) => {
50            CHARS[($e & 0b11111) as usize]
51        };
52    }
53    let mut chunks = input.chunks_exact(5);
54    for chunk in chunks.by_ref() {
55        let a = chunk[0];
56        let b = chunk[1];
57        let c = chunk[2];
58        let d = chunk[3];
59        let e = chunk[4];
60        output.push(byte!(0, a)); // 5 bits
61        output.push(byte!(1, a, b)); // 3 + 2 bits
62        output.push(byte!(2, b)); // 5 bits
63        output.push(byte!(3, b, c)); // 1 + 4 bits
64        output.push(byte!(4, c, d)); // 4 + 1 bits
65        output.push(byte!(5, d)); // 5 bits
66        output.push(byte!(6, d, e)); // 2 + 3 bits
67        output.push(byte!(7, e)); // 5 bits
68    }
69    let remainder = chunks.remainder();
70    let remaining = remainder.len();
71    if remaining == 0 {
72        return;
73    }
74    let a = remainder[0];
75    output.push(byte!(0, a)); // 5 bits
76    let b = remainder.get(1).copied().unwrap_or(0);
77    output.push(byte!(1, a, b)); // 3 + 2 bits
78    if remaining == 1 {
79        return;
80    }
81    let c = remainder.get(2).copied().unwrap_or(0);
82    output.push(byte!(2, b)); // 5 bits
83    output.push(byte!(3, b, c)); // 1 + 4 bits
84    if remaining == 2 {
85        return;
86    }
87    let d = remainder.get(3).copied().unwrap_or(0);
88    output.push(byte!(4, c, d)); // 4 + 1 bits
89    if remaining == 3 {
90        return;
91    }
92    let e = remainder.get(4).copied().unwrap_or(0);
93    output.push(byte!(5, d)); // 5 bits
94    output.push(byte!(6, d, e)); // 2 + 3 bits
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use alloc::vec;
101    use alloc::vec::Vec;
102    use arbtest::arbtest;
103
104    use crate::decode;
105    use crate::decoded_len;
106
107    #[test]
108    fn test_encoded_len() {
109        arbtest(|u| {
110            let input_len = u.int_in_range(0..=usize::MAX / 8)?;
111            let enc_len = encoded_len(input_len);
112            let dec_len = decoded_len(enc_len).unwrap();
113            assert_eq!(input_len, dec_len);
114            Ok(())
115        });
116    }
117
118    #[test]
119    fn test_encoded_len_no_panic() {
120        let _enc_len = encoded_len(MAX_INPUT_LEN);
121    }
122
123    #[test]
124    #[should_panic]
125    fn test_encoded_len_panic() {
126        let _enc_len = encoded_len(MAX_INPUT_LEN + 1);
127    }
128
129    #[test]
130    fn test_encode() {
131        let input = *b"hello";
132        let mut output = [b'_'; encoded_len(5)];
133        encode(&input, &mut &mut output[..]);
134        let mut decoded = [b'_'; 5];
135        decode(output.as_slice(), &mut &mut decoded[..]).unwrap();
136        assert_eq!(input, decoded);
137    }
138
139    #[test]
140    fn test_len_divisible_by_5() {
141        arbtest(|u| {
142            let input_len: usize = u.arbitrary_len::<u8>()? * 5;
143            let mut input = Vec::with_capacity(input_len);
144            for _ in 0..input_len {
145                input.push(u.arbitrary()?);
146            }
147            let mut encoded = Vec::with_capacity(encoded_len(input.len()));
148            encode(&input, &mut encoded);
149            assert!(
150                !encoded.contains(&b'_'),
151                "input = {:?}, encoded = {:?}",
152                input,
153                core::str::from_utf8(&encoded)
154            );
155            let mut decoded: Vec<u8> = Vec::with_capacity(decoded_len(encoded.len()).unwrap());
156            decode(encoded.as_slice(), &mut decoded).unwrap();
157            assert_eq!(input, decoded);
158            Ok(())
159        });
160    }
161
162    #[test]
163    fn test_len_non_divisible_by_5() {
164        arbtest(|u| {
165            let input_len: usize = u.int_in_range(0..=4)?;
166            let mut input = Vec::with_capacity(input_len);
167            for _ in 0..input_len {
168                input.push(u.arbitrary()?);
169            }
170            let mut encoded = Vec::with_capacity(encoded_len(input.len()));
171            encode(&input, &mut encoded);
172            assert!(
173                !encoded.contains(&b'_'),
174                "input = {:?}, encoded = {:?}",
175                input,
176                core::str::from_utf8(&encoded)
177            );
178            let mut decoded: Vec<u8> = Vec::with_capacity(decoded_len(encoded.len()).unwrap());
179            decode(encoded.as_slice(), &mut decoded).unwrap();
180            assert_eq!(
181                input,
182                decoded,
183                "input = {input:?}, encoded = {:?}, decoded = {decoded:?}",
184                core::str::from_utf8(&encoded)
185            );
186            Ok(())
187        });
188    }
189
190    #[test]
191    fn test_any_len() {
192        arbtest(|u| {
193            let input: Vec<u8> = u.arbitrary()?;
194            let mut encoded = Vec::with_capacity(encoded_len(input.len()));
195            encode(&input, &mut encoded);
196            assert!(
197                !encoded.contains(&b'_'),
198                "input = {:?}, encoded = {:?}",
199                input,
200                core::str::from_utf8(&encoded)
201            );
202            let mut decoded: Vec<u8> = Vec::with_capacity(decoded_len(encoded.len()).unwrap());
203            decode(encoded.as_slice(), &mut decoded).unwrap();
204            assert_eq!(
205                input,
206                decoded,
207                "input = {input:?}, encoded = {:?}, decoded = {decoded:?}",
208                core::str::from_utf8(&encoded)
209            );
210            Ok(())
211        });
212    }
213
214    #[test]
215    fn test_decode() {
216        arbtest(|u| {
217            let input_len: usize = u.arbitrary_len::<u8>()?;
218            let Some(output_len) = decoded_len(input_len) else {
219                return Ok(());
220            };
221            let mut input = Vec::with_capacity(input_len);
222            for _ in 0..input_len {
223                input.push(*u.choose(&CHARS)?);
224            }
225            let mut decoded: Vec<u8> = Vec::with_capacity(output_len);
226            decode(input.as_slice(), &mut decoded).unwrap();
227            Ok(())
228        });
229    }
230
231    #[test]
232    fn test_decode_zeroes() {
233        arbtest(|u| {
234            let input_len: usize = u.arbitrary_len::<u8>()?;
235            let input = vec![0_u8; input_len];
236            let mut encoded = Vec::with_capacity(encoded_len(input.len()));
237            encode(&input, &mut encoded);
238            assert!(
239                !encoded.contains(&b'_'),
240                "input = {:?}, encoded = {:?}",
241                input,
242                core::str::from_utf8(&encoded)
243            );
244            let mut decoded: Vec<u8> = Vec::with_capacity(decoded_len(encoded.len()).unwrap());
245            decode(encoded.as_slice(), &mut decoded).unwrap();
246            assert_eq!(
247                input,
248                decoded,
249                "input = {input:?}, encoded = {:?}, decoded = {decoded:?}",
250                core::str::from_utf8(&encoded),
251            );
252            Ok(())
253        });
254    }
255
256    #[test]
257    fn test_hashes() {
258        arbtest(|u| {
259            let mut hashes: Vec<[u8; 32]> = u.arbitrary()?;
260            hashes.sort_unstable();
261            hashes.dedup();
262            let mut strings = Vec::with_capacity(hashes.len());
263            for hash in hashes.iter() {
264                let mut hash_string = [0_u8; encoded_len(32)];
265                encode(&hash[..], &mut &mut hash_string[..]);
266                strings.push(hash_string);
267            }
268            strings.sort_unstable();
269            strings.dedup();
270            assert_eq!(hashes.len(), strings.len());
271            let mut actual_hashes = Vec::with_capacity(hashes.len());
272            for string in strings.iter() {
273                let mut actual_hash = [0_u8; 32];
274                decode(&string[..], &mut &mut actual_hash[..]).unwrap();
275                actual_hashes.push(actual_hash);
276            }
277            actual_hashes.sort_unstable();
278            assert_eq!(hashes, actual_hashes);
279            Ok(())
280        });
281    }
282
283    #[test]
284    fn test_sorting() {
285        arbtest(|u| {
286            let hash_len: usize = u.arbitrary_len::<u8>()?;
287            let mut hashes = [vec![0_u8; hash_len], vec![0_u8; hash_len]];
288            for i in 0..hash_len {
289                hashes[0][i] = u.arbitrary()?;
290                hashes[1][i] = u.arbitrary()?;
291            }
292            let expected = hashes[0].cmp(&hashes[1]);
293            let mut encoded = [
294                vec![0_u8; encoded_len(hash_len)],
295                vec![0_u8; encoded_len(hash_len)],
296            ];
297            encode(&hashes[0][..], &mut &mut encoded[0][..]);
298            encode(&hashes[1][..], &mut &mut encoded[1][..]);
299            let actual = encoded[0].cmp(&encoded[1]);
300            assert_eq!(
301                expected,
302                actual,
303                "expected = {expected:?}, actual = {actual:?}, raw = {:?} {:?}, encoded = {} {}",
304                hashes[0],
305                hashes[1],
306                core::str::from_utf8(&encoded[0]).unwrap(),
307                core::str::from_utf8(&encoded[1]).unwrap(),
308            );
309            Ok(())
310        });
311    }
312}