base131072/
lib.rs

1//! I originally made this crate in order to pack some data into tweets. However halfway through
2//! making the crate, I discovered [with the help of a very helpful
3//! table](https://github.com/qntm/base2048) that Twitter weights its characters, and that
4//! Base131072 is not actually the most efficient way to encode information on Twitter, but rather
5//! Base2048. [Another very good crate](https://docs.rs/base2048/2.0.2/base2048) implements
6//! Base2048.
7//!
8//! However, this crate should still work, should you want to encode something Base131072 for some
9//! reason!
10
11#![cfg_attr(not(feature = "std"), no_std)]
12
13#[cfg(not(feature = "std"))]
14extern crate alloc;
15
16#[cfg(not(feature = "std"))]
17use alloc::{string::String, vec::Vec};
18
19mod lookup_table;
20
21use core::cmp::Ordering;
22use core::fmt;
23use lookup_table::{LOOKUP_TABLE, PAD1, PAD2};
24
25#[derive(Debug, Clone, Copy, Eq, PartialEq)]
26struct B17(u32);
27
28impl B17 {
29    fn encode(self) -> char {
30        match LOOKUP_TABLE.binary_search_by_key(&self.0, |&(idx, _, _)| idx) {
31            Ok(lookup_idx) => unsafe { char::from_u32_unchecked(LOOKUP_TABLE[lookup_idx].1) },
32            Err(lookup_idx) => {
33                let (idx, start, _) = LOOKUP_TABLE[lookup_idx - 1];
34                unsafe { char::from_u32_unchecked(self.0 - idx + start) }
35            }
36        }
37    }
38
39    fn decode(ch: char) -> Option<Self> {
40        let code_point = ch as u32;
41        let lookup_idx = LOOKUP_TABLE
42            .binary_search_by(|&(_, start, stop)| {
43                if start > code_point {
44                    Ordering::Greater
45                } else if code_point > stop {
46                    Ordering::Less
47                } else {
48                    Ordering::Equal
49                }
50            })
51            .ok()?;
52        let (idx, start, _) = LOOKUP_TABLE[lookup_idx];
53        Some(Self(code_point - start + idx))
54    }
55}
56
57struct B8ToB17Iter<'a> {
58    data: &'a [u8],
59    index: usize,
60    bit_offset: usize,
61}
62
63impl<'a> B8ToB17Iter<'a> {
64    fn new(data: &'a [u8]) -> Self {
65        Self {
66            data,
67            index: 0,
68            bit_offset: 0,
69        }
70    }
71}
72
73impl<'a> Iterator for B8ToB17Iter<'a> {
74    type Item = B17;
75    fn next(&mut self) -> Option<Self::Item> {
76        if self.index >= self.data.len() {
77            return None;
78        }
79        let mut res: u32 = 0;
80        res |= ((self.data[self.index] << self.bit_offset) as u32) << 9;
81
82        self.index += 1;
83        if self.index >= self.data.len() {
84            return Some(B17(res));
85        }
86        res |= (self.data[self.index] as u32) << (1 + self.bit_offset);
87
88        self.index += 1;
89        if self.index >= self.data.len() {
90            return Some(B17(res));
91        }
92        res |= (self.data[self.index] >> (7 - self.bit_offset)) as u32;
93
94        self.bit_offset += 1;
95        if self.bit_offset > 7 {
96            self.index += 1;
97            self.bit_offset = 0;
98        }
99        Some(B17(res))
100    }
101}
102
103struct B17ToB8Iter<'a> {
104    data: &'a [B17],
105    index: usize,
106    bit_offset: usize,
107}
108
109impl<'a> B17ToB8Iter<'a> {
110    fn new(data: &'a [B17]) -> Self {
111        Self {
112            data,
113            index: 0,
114            bit_offset: 0,
115        }
116    }
117}
118
119impl<'a> Iterator for B17ToB8Iter<'a> {
120    type Item = u8;
121    fn next(&mut self) -> Option<Self::Item> {
122        if self.index >= self.data.len() {
123            return None;
124        }
125        if self.bit_offset > 9 {
126            let mut next = (self.data[self.index].0 << (self.bit_offset - 9)) as u8;
127            self.index += 1;
128            if self.index >= self.data.len() {
129                if self.bit_offset == 17 {
130                    return None;
131                }
132                return Some(next);
133            }
134            self.bit_offset -= 9;
135            next |= (self.data[self.index].0 >> (17 - self.bit_offset)) as u8;
136            Some(next)
137        } else {
138            let next = (self.data[self.index].0 >> (9 - self.bit_offset)) as u8;
139            self.bit_offset += 8;
140            Some(next)
141        }
142    }
143}
144
145#[derive(Debug, Clone, Copy, Eq, PartialEq)]
146enum Padding {
147    Pad1,
148    Pad2,
149}
150
151fn calc_padding(byte_size: usize) -> Option<Padding> {
152    let bits = byte_size * 8;
153    if bits % 17 == 0 {
154        None
155    } else {
156        Some(match (16 - (bits % 17)) / 8 {
157            0 => Padding::Pad1,
158            1 => Padding::Pad2,
159            _ => unreachable!(),
160        })
161    }
162}
163
164/// Encode some bytes to a base131072 encoded string
165pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
166    let input = input.as_ref();
167    let mut out = String::with_capacity(input.len() * 8 / 17);
168    for b17 in B8ToB17Iter::new(input) {
169        out.push(b17.encode());
170    }
171    if let Some(padding) = calc_padding(input.len()) {
172        match padding {
173            Padding::Pad1 => out.push(unsafe { char::from_u32_unchecked(PAD1) }),
174            Padding::Pad2 => out.push(unsafe { char::from_u32_unchecked(PAD2) }),
175        }
176    }
177    out
178}
179
180/// The error encountered when decoding an invalid Base2048 string
181#[derive(Debug, Clone, Copy)]
182pub struct InvalidChar(
183    /// The char index of the invalid character
184    pub usize,
185    /// The invalid character
186    pub char,
187);
188
189impl fmt::Display for InvalidChar {
190    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
191        f.write_fmt(format_args!(
192            "invalid char '{}' encountered at character number {}",
193            self.1, self.0
194        ))
195    }
196}
197
198#[cfg(feature = "std")]
199impl std::error::Error for InvalidChar {}
200
201/// Decode a base131072 encoded string
202pub fn decode<T: AsRef<str>>(input: T) -> Result<Vec<u8>, InvalidChar> {
203    let mut string = input.as_ref();
204    if string.is_empty() {}
205    let padding = if let Some(ch) = string.chars().last() {
206        match ch as u32 {
207            PAD1 => 1,
208            PAD2 => 2,
209            _ => 0,
210        }
211    } else {
212        return Ok(Vec::new());
213    };
214    if padding > 0 {
215        let last_char_index = string.char_indices().last().unwrap().0;
216        string = &string[..last_char_index];
217    }
218    let mut b17s = Vec::with_capacity(string.len());
219    for (idx, ch) in string.chars().enumerate() {
220        if let Some(b17) = B17::decode(ch) {
221            b17s.push(b17);
222        } else {
223            return Err(InvalidChar(idx, ch));
224        }
225    }
226    let mut bytes = B17ToB8Iter::new(&b17s).collect::<Vec<_>>();
227    bytes.truncate(bytes.len() - padding);
228    Ok(bytes)
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn b17_encoding() {
237        const B17_TEST_CASES: &[u32] = &[
238            0,
239            1,
240            2,
241            (1 << 17) - 1,
242            (1 << 17) - 2,
243            (1 << 17) / 2,
244            (1 << 17) / 3,
245            (1 << 17) / 4,
246            (1 << 17) / 5,
247            (1 << 17) / 6,
248            (1 << 17) / 7,
249            (1 << 17) / 8,
250            (1 << 17) / 9,
251        ];
252        for &test_case in B17_TEST_CASES {
253            assert_eq!(
254                B17::decode(B17(test_case).encode()).unwrap(),
255                B17(test_case)
256            )
257        }
258    }
259
260    #[test]
261    fn b8_to_b17_iter() {
262        assert_eq!(B8ToB17Iter::new(&[]).collect::<Vec<_>>(), vec![]);
263        assert_eq!(
264            B8ToB17Iter::new(&[1]).collect::<Vec<_>>(),
265            vec![
266                #[allow(clippy::unusual_byte_groupings)]
267                B17(0b0000_0001_0000_0000_0)
268            ]
269        );
270        assert_eq!(
271            B8ToB17Iter::new(&[1, 2]).collect::<Vec<_>>(),
272            vec![
273                #[allow(clippy::unusual_byte_groupings)]
274                B17(0b0000_0001_0000_0010_0)
275            ]
276        );
277        assert_eq!(
278            B8ToB17Iter::new(&[1, 2, 3]).collect::<Vec<_>>(),
279            vec![
280                #[allow(clippy::unusual_byte_groupings)]
281                B17(0b0000_0001_0000_0010_0),
282                B17(0b000_0011_0000_0000_00)
283            ]
284        );
285        assert_eq!(
286            B8ToB17Iter::new(&[
287                0b0001_0001,
288                0b0010_0010,
289                0b0011_0011,
290                0b0100_0100,
291                0b0101_0101,
292                0b0110_0110,
293                0b0111_0111,
294                0b1000_1000,
295            ])
296            .collect::<Vec<_>>(),
297            vec![
298                #[allow(clippy::unusual_byte_groupings)]
299                B17(0b0001_0001_0010_0010_0),
300                B17(0b011_0011_0100_0100_01),
301                #[allow(clippy::unusual_byte_groupings)]
302                B17(0b01_0101_0110_0110_011),
303                #[allow(clippy::unusual_byte_groupings)]
304                B17(0b1_0111_1000_1000_0000)
305            ]
306        );
307        assert_eq!(
308            B8ToB17Iter::new(&[
309                0b0000_0001,
310                0b0000_0010,
311                0b0000_0011,
312                0b0000_0100,
313                0b0000_0101,
314                0b0000_0110,
315                0b0000_0111,
316                0b0000_1000,
317                0b0000_1001,
318                0b0000_1010,
319                0b0000_1011,
320                0b0000_1100,
321                0b0000_1101,
322                0b0000_1110,
323                0b0000_1111,
324                0b0001_0000,
325                0b0001_0001,
326            ])
327            .collect::<Vec<_>>(),
328            vec![
329                #[allow(clippy::unusual_byte_groupings)]
330                B17(0b0000_0001_0000_0010_0),
331                B17(0b000_0011_0000_0100_00),
332                #[allow(clippy::unusual_byte_groupings)]
333                B17(0b00_0101_0000_0110_000),
334                #[allow(clippy::unusual_byte_groupings)]
335                B17(0b0_0111_0000_1000_0000),
336                #[allow(clippy::unusual_byte_groupings)]
337                B17(0b1001_0000_1010_0000_1),
338                B17(0b011_0000_1100_0000_11),
339                #[allow(clippy::unusual_byte_groupings)]
340                B17(0b01_0000_1110_0000_111),
341                B17(0b1_0001_0000_0001_0001),
342            ]
343        );
344        assert_eq!(
345            B8ToB17Iter::new(&[
346                0b0000_0001,
347                0b0000_0010,
348                0b0000_0011,
349                0b0000_0100,
350                0b0000_0101,
351                0b0000_0110,
352                0b0000_0111,
353                0b0000_1000,
354                0b0000_1001,
355                0b0000_1010,
356                0b0000_1011,
357                0b0000_1100,
358                0b0000_1101,
359                0b0000_1110,
360                0b0000_1111,
361                0b0001_0000,
362                0b0001_0001,
363                0b0001_0010,
364                0b0001_0011,
365                0b0001_0100,
366            ])
367            .collect::<Vec<_>>(),
368            vec![
369                #[allow(clippy::unusual_byte_groupings)]
370                B17(0b0000_0001_0000_0010_0),
371                B17(0b000_0011_0000_0100_00),
372                #[allow(clippy::unusual_byte_groupings)]
373                B17(0b00_0101_0000_0110_000),
374                #[allow(clippy::unusual_byte_groupings)]
375                B17(0b0_0111_0000_1000_0000),
376                #[allow(clippy::unusual_byte_groupings)]
377                B17(0b1001_0000_1010_0000_1),
378                B17(0b011_0000_1100_0000_11),
379                #[allow(clippy::unusual_byte_groupings)]
380                B17(0b01_0000_1110_0000_111),
381                B17(0b1_0001_0000_0001_0001),
382                #[allow(clippy::unusual_byte_groupings)]
383                B17(0b0001_0010_0001_0011_0),
384                B17(0b001_0100_0000_0000_00),
385            ]
386        );
387    }
388
389    #[test]
390    fn b17_to_b8_iter() {
391        assert_eq!(B17ToB8Iter::new(&[]).collect::<Vec<_>>(), vec![]);
392        assert_eq!(
393            B17ToB8Iter::new(&[
394                #[allow(clippy::unusual_byte_groupings)]
395                B17(0b0000_0001_0000_0000_0)
396            ])
397            .collect::<Vec<_>>(),
398            vec![1, 0, 0]
399        );
400        assert_eq!(
401            B17ToB8Iter::new(&[
402                #[allow(clippy::unusual_byte_groupings)]
403                B17(0b0000_0001_0000_0010_0)
404            ])
405            .collect::<Vec<_>>(),
406            vec![1, 2, 0],
407        );
408        assert_eq!(
409            B17ToB8Iter::new(&[
410                #[allow(clippy::unusual_byte_groupings)]
411                B17(0b0000_0001_0000_0010_0),
412                B17(0b000_0011_0000_0000_00)
413            ])
414            .collect::<Vec<_>>(),
415            vec![1, 2, 3, 0, 0],
416        );
417        assert_eq!(
418            B17ToB8Iter::new(&[
419                #[allow(clippy::unusual_byte_groupings)]
420                B17(0b0001_0001_0010_0010_0),
421                B17(0b011_0011_0100_0100_01),
422                #[allow(clippy::unusual_byte_groupings)]
423                B17(0b01_0101_0110_0110_011),
424                #[allow(clippy::unusual_byte_groupings)]
425                B17(0b1_0111_1000_1000_0000)
426            ])
427            .collect::<Vec<_>>(),
428            vec![
429                0b0001_0001,
430                0b0010_0010,
431                0b0011_0011,
432                0b0100_0100,
433                0b0101_0101,
434                0b0110_0110,
435                0b0111_0111,
436                0b1000_1000,
437                0
438            ]
439        );
440        assert_eq!(
441            B17ToB8Iter::new(&[
442                #[allow(clippy::unusual_byte_groupings)]
443                B17(0b0000_0001_0000_0010_0),
444                B17(0b000_0011_0000_0100_00),
445                #[allow(clippy::unusual_byte_groupings)]
446                B17(0b00_0101_0000_0110_000),
447                #[allow(clippy::unusual_byte_groupings)]
448                B17(0b0_0111_0000_1000_0000),
449                #[allow(clippy::unusual_byte_groupings)]
450                B17(0b1001_0000_1010_0000_1),
451                B17(0b011_0000_1100_0000_11),
452                #[allow(clippy::unusual_byte_groupings)]
453                B17(0b01_0000_1110_0000_111),
454                B17(0b1_0001_0000_0001_0001),
455            ])
456            .collect::<Vec<_>>(),
457            vec![
458                0b0000_0001,
459                0b0000_0010,
460                0b0000_0011,
461                0b0000_0100,
462                0b0000_0101,
463                0b0000_0110,
464                0b0000_0111,
465                0b0000_1000,
466                0b0000_1001,
467                0b0000_1010,
468                0b0000_1011,
469                0b0000_1100,
470                0b0000_1101,
471                0b0000_1110,
472                0b0000_1111,
473                0b0001_0000,
474                0b0001_0001,
475            ]
476        );
477        assert_eq!(
478            B17ToB8Iter::new(&[
479                #[allow(clippy::unusual_byte_groupings)]
480                B17(0b0000_0001_0000_0010_0),
481                B17(0b000_0011_0000_0100_00),
482                #[allow(clippy::unusual_byte_groupings)]
483                B17(0b00_0101_0000_0110_000),
484                #[allow(clippy::unusual_byte_groupings)]
485                B17(0b0_0111_0000_1000_0000),
486                #[allow(clippy::unusual_byte_groupings)]
487                B17(0b1001_0000_1010_0000_1),
488                B17(0b011_0000_1100_0000_11),
489                #[allow(clippy::unusual_byte_groupings)]
490                B17(0b01_0000_1110_0000_111),
491                B17(0b1_0001_0000_0001_0001),
492                #[allow(clippy::unusual_byte_groupings)]
493                B17(0b0001_0010_0001_0011_0),
494                B17(0b001_0100_0000_0000_00),
495            ])
496            .collect::<Vec<_>>(),
497            vec![
498                0b0000_0001,
499                0b0000_0010,
500                0b0000_0011,
501                0b0000_0100,
502                0b0000_0101,
503                0b0000_0110,
504                0b0000_0111,
505                0b0000_1000,
506                0b0000_1001,
507                0b0000_1010,
508                0b0000_1011,
509                0b0000_1100,
510                0b0000_1101,
511                0b0000_1110,
512                0b0000_1111,
513                0b0001_0000,
514                0b0001_0001,
515                0b0001_0010,
516                0b0001_0011,
517                0b0001_0100,
518                0,
519                0
520            ]
521        );
522    }
523
524    #[test]
525    fn test_padding() {
526        assert_eq!(calc_padding(0), None);
527        assert_eq!(calc_padding(1), Some(Padding::Pad2));
528        assert_eq!(calc_padding(2), Some(Padding::Pad1));
529        assert_eq!(calc_padding(3), Some(Padding::Pad2));
530        assert_eq!(calc_padding(8), Some(Padding::Pad1));
531        assert_eq!(calc_padding(17), None);
532        assert_eq!(calc_padding(20), Some(Padding::Pad2));
533    }
534
535    #[test]
536    fn encoding() {
537        assert_eq!(decode(encode(&[])).unwrap(), &[]);
538        assert_eq!(decode(encode(&[1])).unwrap(), &[1]);
539        assert_eq!(decode(encode(&[1, 2])).unwrap(), &[1, 2]);
540        assert_eq!(decode(encode(&[1, 2, 3])).unwrap(), &[1, 2, 3]);
541        assert_eq!(
542            decode(encode((0..17).collect::<Vec<_>>())).unwrap(),
543            (0..17).collect::<Vec<_>>()
544        );
545        assert_eq!(
546            decode(encode((0..255).collect::<Vec<_>>())).unwrap(),
547            (0..255).collect::<Vec<_>>()
548        );
549        assert_eq!(decode(encode(vec![100; 1024])).unwrap(), vec![100; 1024]);
550    }
551}