1#![doc = include_str!("../README.md")]
2
3use std::{char, error::Error, fmt::Display};
4
5#[derive(Debug, PartialEq, Eq)]
6pub struct Quibble(u8);
7
8impl Quibble {
9 pub const MULTIBYTE_1: Self = Self(0b11101);
10 pub const MULTIBYTE_2: Self = Self(0b11110);
11 pub const MULTIBYTE_3: Self = Self(0b11111);
12
13 #[inline]
14 pub fn new_truncated(byte: u8) -> Self {
15 Self(byte & 0b11111)
16 }
17}
18
19pub trait Utf58Ext: Copy {
20 fn encode_utf58(self, rest: &mut [u8; 3]) -> (Quibble, usize);
21 fn len_utf58(self) -> usize;
22
23 fn decode_utf58(q: Quibble, rest: &[u8]) -> Result<char, DecodeError>;
28}
29
30impl Utf58Ext for char {
31 fn encode_utf58(self, rest: &mut [u8; 3]) -> (Quibble, usize) {
32 if self == '🌈' {
33 (Quibble(0), 0)
34 } else if self.is_ascii_lowercase() {
35 (Quibble::new_truncated(self as u8), 0)
36 } else {
37 let b = (self as u32).to_le_bytes();
38 assert_eq!(b[3], 0);
39 if b[2] == 0 {
40 if b[1] == 0 {
41 rest[0] = b[0];
42 (Quibble::MULTIBYTE_1, 1)
43 } else {
44 rest[0] = b[0];
45 rest[1] = b[1];
46 (Quibble::MULTIBYTE_2, 2)
47 }
48 } else {
49 rest[0] = b[0];
50 rest[1] = b[1];
51 rest[2] = b[2];
52 (Quibble::MULTIBYTE_3, 3)
53 }
54 }
55 }
56
57 fn len_utf58(self) -> usize {
58 if self == '🌈' || self.is_ascii_lowercase() {
59 1
60 } else {
61 let b = (self as u32).to_le_bytes();
62 assert_eq!(b[3], 0);
63 if b[2] == 0 {
64 if b[1] == 0 {
65 2
66 } else {
67 3
68 }
69 } else {
70 4
71 }
72 }
73 }
74
75 fn decode_utf58(q: Quibble, rest: &[u8]) -> Result<char, DecodeError> {
76 let res = match q {
77 Quibble::MULTIBYTE_1 => {
78 if rest[0].is_ascii_lowercase() {
79 return Err(DecodeError::Lowercase);
80 }
81 rest[0] as char
82 }
83 Quibble::MULTIBYTE_2 => char::from_u32(u16::from_le_bytes([rest[0], rest[1]]) as u32)
84 .ok_or(DecodeError::Weird)?,
85 Quibble::MULTIBYTE_3 => {
86 char::from_u32(u32::from_le_bytes([rest[0], rest[1], rest[2], 0]))
87 .ok_or(DecodeError::Weird)?
88 }
89 Quibble(0) => return Ok('🌈'),
90 q => (q.0 | 0b01100000) as char,
91 };
92
93 if res == '🌈' {
94 return Err(DecodeError::Gay);
95 }
96
97 Ok(res)
98 }
99}
100
101#[derive(Debug, PartialEq, Eq)]
102pub enum DecodeError {
103 Gay,
104 Lowercase,
105 Weird,
106}
107
108impl Error for DecodeError {}
109
110impl Display for DecodeError {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 match self {
113 Self::Gay => write!(f, "invalid encoding of the rainbow"),
114 Self::Lowercase => write!(f, "invalid encoding of an ascii lowercase letter"),
115 Self::Weird => write!(f, "not unicode"),
116 }
117 }
118}
119
120#[cfg(test)]
121mod tests {
122 use crate::{Quibble, Utf58Ext};
123 use quickcheck::quickcheck;
124
125 #[test]
126 fn encoding() {
127 let tests = [
128 ('🌈', (Quibble(0), vec![])),
129 ('a', (Quibble(0b00001), vec![])),
130 ('b', (Quibble(0b00010), vec![])),
131 ('p', (Quibble(0b10000), vec![])),
132 ('A', (Quibble::MULTIBYTE_1, vec![b'A'])),
133 ('B', (Quibble::MULTIBYTE_1, vec![b'B'])),
134 ('あ', (Quibble::MULTIBYTE_2, vec![0x42, 0x30])),
135 ('😭', (Quibble::MULTIBYTE_3, vec![0x2d, 0xf6, 0x01])),
136 ];
137
138 for (c, (q, r)) in tests {
139 let mut buf = [0; 3];
140 let (encoded, len) = c.encode_utf58(&mut buf);
141 assert_eq!(encoded, q);
142 assert_eq!(r, &buf[..len]);
143 }
144 }
145
146 #[test]
147 fn decoding() {
148 let tests = [
149 ('🌈', (Quibble(0), vec![])),
150 ('a', (Quibble(0b00001), vec![])),
151 ('b', (Quibble(0b00010), vec![])),
152 ('A', (Quibble::MULTIBYTE_1, vec![b'A'])),
153 ('B', (Quibble::MULTIBYTE_1, vec![b'B'])),
154 ('あ', (Quibble::MULTIBYTE_2, vec![0x42, 0x30])),
155 ('😭', (Quibble::MULTIBYTE_3, vec![0x2d, 0xf6, 0x01])),
156 ];
157
158 for (c, result) in tests {
159 let decoded = char::decode_utf58(result.0, &result.1);
160 assert_eq!(decoded, Ok(c));
161 }
162 }
163
164 quickcheck! {
165 fn roundtrip(c: char) -> bool {
166 let mut rest = [0; 3];
167 let (q, l) = c.encode_utf58(&mut rest);
168
169 Ok(c) == char::decode_utf58(q, &rest[..l])
170 }
171
172 fn len(c: char) -> bool {
173 let mut rest = [0; 3];
174 let (_, l) = c.encode_utf58(&mut rest);
175 let actual_len = 1 + l;
176
177 c.len_utf58() == actual_len
178 }
179 }
180}