utf58/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::{char, error::Error, fmt::Display};
4
5#[derive(Debug, PartialEq, Eq)]
6pub struct Quibble(u8);
7
8impl Quibble {
9    pub const MULTIBYTE_1: Self = Self(0b11101);
10    pub const MULTIBYTE_2: Self = Self(0b11110);
11    pub const MULTIBYTE_3: Self = Self(0b11111);
12
13    #[inline]
14    pub fn new_truncated(byte: u8) -> Self {
15        Self(byte & 0b11111)
16    }
17}
18
19pub trait Utf58Ext: Copy {
20    fn encode_utf58(self, rest: &mut [u8; 3]) -> (Quibble, usize);
21    fn len_utf58(self) -> usize;
22
23    /// Calculates the number of segments in the encoding of a UTF-58 char.
24    ///
25    /// 1 means a single quibble, any number above that (up to 4) means a quibble and some number of
26    /// bytes.
27    fn decode_utf58(q: Quibble, rest: &[u8]) -> Result<char, DecodeError>;
28}
29
30impl Utf58Ext for char {
31    fn encode_utf58(self, rest: &mut [u8; 3]) -> (Quibble, usize) {
32        if self == '🌈' {
33            (Quibble(0), 0)
34        } else if self.is_ascii_lowercase() {
35            (Quibble::new_truncated(self as u8), 0)
36        } else {
37            let b = (self as u32).to_le_bytes();
38            assert_eq!(b[3], 0);
39            if b[2] == 0 {
40                if b[1] == 0 {
41                    rest[0] = b[0];
42                    (Quibble::MULTIBYTE_1, 1)
43                } else {
44                    rest[0] = b[0];
45                    rest[1] = b[1];
46                    (Quibble::MULTIBYTE_2, 2)
47                }
48            } else {
49                rest[0] = b[0];
50                rest[1] = b[1];
51                rest[2] = b[2];
52                (Quibble::MULTIBYTE_3, 3)
53            }
54        }
55    }
56
57    fn len_utf58(self) -> usize {
58        if self == '🌈' || self.is_ascii_lowercase() {
59            1
60        } else {
61            let b = (self as u32).to_le_bytes();
62            assert_eq!(b[3], 0);
63            if b[2] == 0 {
64                if b[1] == 0 {
65                    2
66                } else {
67                    3
68                }
69            } else {
70                4
71            }
72        }
73    }
74
75    fn decode_utf58(q: Quibble, rest: &[u8]) -> Result<char, DecodeError> {
76        let res = match q {
77            Quibble::MULTIBYTE_1 => {
78                if rest[0].is_ascii_lowercase() {
79                    return Err(DecodeError::Lowercase);
80                }
81                rest[0] as char
82            }
83            Quibble::MULTIBYTE_2 => char::from_u32(u16::from_le_bytes([rest[0], rest[1]]) as u32)
84                .ok_or(DecodeError::Weird)?,
85            Quibble::MULTIBYTE_3 => {
86                char::from_u32(u32::from_le_bytes([rest[0], rest[1], rest[2], 0]))
87                    .ok_or(DecodeError::Weird)?
88            }
89            Quibble(0) => return Ok('🌈'),
90            q => (q.0 | 0b01100000) as char,
91        };
92
93        if res == '🌈' {
94            return Err(DecodeError::Gay);
95        }
96
97        Ok(res)
98    }
99}
100
101#[derive(Debug, PartialEq, Eq)]
102pub enum DecodeError {
103    Gay,
104    Lowercase,
105    Weird,
106}
107
108impl Error for DecodeError {}
109
110impl Display for DecodeError {
111    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112        match self {
113            Self::Gay => write!(f, "invalid encoding of the rainbow"),
114            Self::Lowercase => write!(f, "invalid encoding of an ascii lowercase letter"),
115            Self::Weird => write!(f, "not unicode"),
116        }
117    }
118}
119
120#[cfg(test)]
121mod tests {
122    use crate::{Quibble, Utf58Ext};
123    use quickcheck::quickcheck;
124
125    #[test]
126    fn encoding() {
127        let tests = [
128            ('🌈', (Quibble(0), vec![])),
129            ('a', (Quibble(0b00001), vec![])),
130            ('b', (Quibble(0b00010), vec![])),
131            ('p', (Quibble(0b10000), vec![])),
132            ('A', (Quibble::MULTIBYTE_1, vec![b'A'])),
133            ('B', (Quibble::MULTIBYTE_1, vec![b'B'])),
134            ('あ', (Quibble::MULTIBYTE_2, vec![0x42, 0x30])),
135            ('😭', (Quibble::MULTIBYTE_3, vec![0x2d, 0xf6, 0x01])),
136        ];
137
138        for (c, (q, r)) in tests {
139            let mut buf = [0; 3];
140            let (encoded, len) = c.encode_utf58(&mut buf);
141            assert_eq!(encoded, q);
142            assert_eq!(r, &buf[..len]);
143        }
144    }
145
146    #[test]
147    fn decoding() {
148        let tests = [
149            ('🌈', (Quibble(0), vec![])),
150            ('a', (Quibble(0b00001), vec![])),
151            ('b', (Quibble(0b00010), vec![])),
152            ('A', (Quibble::MULTIBYTE_1, vec![b'A'])),
153            ('B', (Quibble::MULTIBYTE_1, vec![b'B'])),
154            ('あ', (Quibble::MULTIBYTE_2, vec![0x42, 0x30])),
155            ('😭', (Quibble::MULTIBYTE_3, vec![0x2d, 0xf6, 0x01])),
156        ];
157
158        for (c, result) in tests {
159            let decoded = char::decode_utf58(result.0, &result.1);
160            assert_eq!(decoded, Ok(c));
161        }
162    }
163
164    quickcheck! {
165        fn roundtrip(c: char) -> bool {
166            let mut rest = [0; 3];
167            let (q, l) = c.encode_utf58(&mut rest);
168
169            Ok(c) == char::decode_utf58(q, &rest[..l])
170        }
171
172        fn len(c: char) -> bool {
173            let mut rest = [0; 3];
174            let (_, l) = c.encode_utf58(&mut rest);
175            let actual_len = 1 + l;
176
177            c.len_utf58() == actual_len
178        }
179    }
180}