base256u/
lib.rs

1#![cfg_attr(not(test), no_std)]
2
3/// Encoder iterator, converting bytes into unicode chars based on the contained encoding callable.
4pub struct Encoder<I, F>
5where
6    I: Iterator<Item = u8>,
7    F: Fn(u8) -> char,
8{
9    iterator: I,
10    encode: F,
11}
12
13impl<I, F> Encoder<I, F>
14where
15    I: Iterator<Item = u8>,
16    F: Fn(u8) -> char,
17{
18    pub fn new(iterator: I, encode: F) -> Self {
19        Self { iterator, encode }
20    }
21}
22
23impl<I, F> Iterator for Encoder<I, F>
24where
25    I: Iterator<Item = u8>,
26    F: Fn(u8) -> char,
27{
28    type Item = char;
29
30    fn next(&mut self) -> Option<Self::Item> {
31        self.iterator.next().map(|byte| (self.encode)(byte))
32    }
33}
34
35/// Encoder iterator, converting unicode chars into bytes based on the contained decoding callable.
36/// Must be fallible because not all unicode codepoints are valid bytes.
37pub struct Decoder<I, F>
38where
39    I: Iterator<Item = char>,
40    F: Fn(char) -> Option<u8>,
41{
42    iterator: I,
43    decode: F,
44}
45
46impl<I, F> Decoder<I, F>
47where
48    I: Iterator<Item = char>,
49    F: Fn(char) -> Option<u8>,
50{
51    pub fn new(iterator: I, decode: F) -> Self {
52        Self { iterator, decode }
53    }
54}
55
56impl<I, F> Iterator for Decoder<I, F>
57where
58    I: Iterator<Item = char>,
59    F: Fn(char) -> Option<u8>,
60{
61    type Item = Option<u8>;
62
63    fn next(&mut self) -> Option<Self::Item> {
64        self.iterator.next().map(|byte| (self.decode)(byte))
65    }
66}
67
68/// Encode function for encoding to printable-ascii-preserving Unicode.
69/// `0x00..=0x1F` is mapped to the range starting at `U+B0` to map into the Latin-1 Supplement block in the first range that is 8-byte aligned and fully printable, skipping `NBSP` and `SHY`.
70/// `0x20..=0x7E` are mapped to the same bytes as printable ASCII.
71/// `0x7F` is arbitrarily mapped from ASCII ESC to §.
72/// `0x80..=0xFF`  mapped to the range starting at `U+100`, Latin Extended-A, with the exception of
73/// `0xC9`, which is mapped arbitrarily to `¤` to avoid the deprecated character at that codepoint.
74pub fn encode_papu(byte: u8) -> char {
75    match byte {
76        b @ 0x00..=0x1F => (b + 0xB0) as char,
77        b @ 0x20..=0x7E => b as char,
78        0x7F => '§',
79        // Unsafe is fine here because these ranges are known to be safe char values.
80        b @ (0x80..=0xC8 | 0xCA..) => unsafe { char::from_u32_unchecked(b as u32 + 0x80) },
81        0xC9 => '¤',
82    }
83}
84
85/// Decode function for printable-ascii-preserving Unicode.
86/// All values are mapped as the inverse of encode_papu.  All other input chars map to None.
87pub const fn decode_papu(c: char) -> Option<u8> {
88    Some(match c as u32 {
89        b @ 0xB0..=0xCF => (b - 0xB0) as u8,
90        b @ 0x20..=0x7E => b as u8,
91        0xA7 => 0x7F,
92        b @ (0x100..=0x148 | 0x14A..=0x17F) => (b - 0x80) as u8,
93        0xA4 => 0xC9,
94        _ => return None,
95    })
96}
97
98/// Encode function for encoding to emoji.
99/// `0x00..=0x4F` is mapped to the range starting at `U+1F370` For some plants and foods.
100/// `0x50..=0x8F` is mapped to the range starting at `U+1F400` for animals.
101/// `0x90..=0xDF` is mapped to the range starting at `U+1F600` for expressions and hand signs.
102/// `0xE0..=0xFF` is mapped to the range starting at `U+1F910` for more expressions and hand signs.
103pub fn encode_emoji(byte: u8) -> char {
104    match byte {
105        b @ 0x00..=0x4F => unsafe { char::from_u32_unchecked(b as u32 + 0x1F330) },
106        b @ 0x50..=0x8F => unsafe { char::from_u32_unchecked(b as u32 + (0x1F400 - 0x50)) },
107        b @ 0x90..=0xDF => unsafe { char::from_u32_unchecked(b as u32 + (0x1F600 - 0x90)) },
108        b @ 0xE0..=0xFF => unsafe { char::from_u32_unchecked(b as u32 + (0x1F910 - 0xE0)) },
109    }
110}
111
112/// Decode function for emoji.
113/// All values are mapped as the inverse of encode_emoji.  All other input chars map to None.
114pub const fn decode_emoji(c: char) -> Option<u8> {
115    Some(match c as u32 {
116        b @ 0x1F330..=0x1F37F => (b - 0x1F330) as u8,
117        b @ 0x1F400..=0x1F43F => (b - (0x1F400 - 0x50)) as u8,
118        b @ 0x1F600..=0x1F64F => (b - (0x1F600 - 0x90)) as u8,
119        b @ 0x1F910..=0x1F92F => (b - (0x1F910 - 0xE0)) as u8,
120        _ => return None,
121    })
122}
123
124pub trait Encode: Iterator<Item = u8>
125where
126    Self: Sized,
127{
128    fn base256u<F>(self, function: F) -> Encoder<Self, F>
129    where
130        F: Fn(u8) -> char,
131    {
132        Encoder::new(self, function)
133    }
134
135    fn base256u_papu(self) -> Encoder<Self, fn(u8) -> char> {
136        self.base256u(encode_papu)
137    }
138    fn base256u_emoji(self) -> Encoder<Self, fn(u8) -> char> {
139        self.base256u(encode_emoji)
140    }
141}
142
143impl<T> Encode for T where T: Iterator<Item = u8> {}
144
145pub trait Decode: Iterator<Item = char>
146where
147    Self: Sized,
148{
149    fn base256u<F>(self, function: F) -> Decoder<Self, F>
150    where
151        F: Fn(char) -> Option<u8>,
152    {
153        Decoder::new(self, function)
154    }
155
156    fn base256u_papu(self) -> Decoder<Self, fn(char) -> Option<u8>> {
157        self.base256u(decode_papu)
158    }
159    fn base256u_emoji(self) -> Decoder<Self, fn(char) -> Option<u8>> {
160        self.base256u(decode_emoji)
161    }
162}
163
164impl<T> Decode for T where T: Iterator<Item = char> {}
165
166#[cfg(test)]
167mod tests {
168    use crate::{Decode, Encode};
169
170    #[test]
171    fn encoding_papu() {
172        let encoded: String = (u8::MIN..=u8::MAX).base256u_papu().collect();
173        assert_eq!(encoded, "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ");
174        let encoded: String = b"Pack my box with five dozen liquor jugs."
175            .into_iter()
176            .copied()
177            .base256u_papu()
178            .collect();
179        assert_eq!(encoded, "Pack my box with five dozen liquor jugs.");
180    }
181
182    #[test]
183    fn decoding_papu() {
184        let decoded: Vec<Option<u8>> = "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƝʼn".chars().base256u_papu().collect();
185        let mut matcher: Vec<Option<u8>> = (u8::MIN..=u8::MAX).map(|b| Some(b)).collect();
186        matcher.push(None);
187        matcher.push(None);
188        assert_eq!(decoded, matcher);
189        let decoded: Vec<u8> = "Pack my box with five dozen liquor jugs."
190            .chars()
191            .base256u_papu()
192            .map(|c| c.unwrap())
193            .collect();
194        assert_eq!(
195            decoded.as_slice(),
196            b"Pack my box with five dozen liquor jugs."
197        );
198    }
199    #[test]
200    fn encoding_emoji() {
201        let encoded: String = (u8::MIN..=u8::MAX).base256u_emoji().collect();
202        assert_eq!(encoded, "🌰🌱🌲🌳🌴🌵🌶🌷🌸🌹🌺🌻🌼🌽🌾🌿🍀🍁🍂🍃🍄🍅🍆🍇🍈🍉🍊🍋🍌🍍🍎🍏🍐🍑🍒🍓🍔🍕🍖🍗🍘🍙🍚🍛🍜🍝🍞🍟🍠🍡🍢🍣🍤🍥🍦🍧🍨🍩🍪🍫🍬🍭🍮🍯🍰🍱🍲🍳🍴🍵🍶🍷🍸🍹🍺🍻🍼🍽🍾🍿🐀🐁🐂🐃🐄🐅🐆🐇🐈🐉🐊🐋🐌🐍🐎🐏🐐🐑🐒🐓🐔🐕🐖🐗🐘🐙🐚🐛🐜🐝🐞🐟🐠🐡🐢🐣🐤🐥🐦🐧🐨🐩🐪🐫🐬🐭🐮🐯🐰🐱🐲🐳🐴🐵🐶🐷🐸🐹🐺🐻🐼🐽🐾🐿😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣😤😥😦😧😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷😸😹😺😻😼😽😾😿🙀🙁🙂🙃🙄🙅🙆🙇🙈🙉🙊🙋🙌🙍🙎🙏🤐🤑🤒🤓🤔🤕🤖🤗🤘🤙🤚🤛🤜🤝🤞🤟🤠🤡🤢🤣🤤🤥🤦🤧🤨🤩🤪🤫🤬🤭🤮🤯");
203        let encoded: String = b"Pack my box with five dozen liquor jugs."
204            .into_iter()
205            .copied()
206            .base256u_emoji()
207            .collect();
208        assert_eq!(
209            encoded,
210            "🐀🐑🐓🐛🍐🐝🐩🍐🐒🐟🐨🍐🐧🐙🐤🐘🍐🐖🐙🐦🐕🍐🐔🐟🐪🐕🐞🍐🐜🐙🐡🐥🐟🐢🍐🐚🐥🐗🐣🍞"
211        );
212    }
213    #[test]
214    fn decoding_emoji() {
215        let decoded: Vec<Option<u8>> = "🌰🌱🌲🌳🌴🌵🌶🌷🌸🌹🌺🌻🌼🌽🌾🌿🍀🍁🍂🍃🍄🍅🍆🍇🍈🍉🍊🍋🍌🍍🍎🍏🍐🍑🍒🍓🍔🍕🍖🍗🍘🍙🍚🍛🍜🍝🍞🍟🍠🍡🍢🍣🍤🍥🍦🍧🍨🍩🍪🍫🍬🍭🍮🍯🍰🍱🍲🍳🍴🍵🍶🍷🍸🍹🍺🍻🍼🍽🍾🍿🐀🐁🐂🐃🐄🐅🐆🐇🐈🐉🐊🐋🐌🐍🐎🐏🐐🐑🐒🐓🐔🐕🐖🐗🐘🐙🐚🐛🐜🐝🐞🐟🐠🐡🐢🐣🐤🐥🐦🐧🐨🐩🐪🐫🐬🐭🐮🐯🐰🐱🐲🐳🐴🐵🐶🐷🐸🐹🐺🐻🐼🐽🐾🐿😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣😤😥😦😧😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷😸😹😺😻😼😽😾😿🙀🙁🙂🙃🙄🙅🙆🙇🙈🙉🙊🙋🙌🙍🙎🙏🤐🤑🤒🤓🤔🤕🤖🤗🤘🤙🤚🤛🤜🤝🤞🤟🤠🤡🤢🤣🤤🤥🤦🤧🤨🤩🤪🤫🤬🤭🤮🤯Ɲʼn".chars().base256u_emoji().collect();
216        let mut matcher: Vec<Option<u8>> = (u8::MIN..=u8::MAX).map(|b| Some(b)).collect();
217        matcher.push(None);
218        matcher.push(None);
219        assert_eq!(decoded, matcher);
220        let decoded: Vec<u8> =
221            "🐀🐑🐓🐛🍐🐝🐩🍐🐒🐟🐨🍐🐧🐙🐤🐘🍐🐖🐙🐦🐕🍐🐔🐟🐪🐕🐞🍐🐜🐙🐡🐥🐟🐢🍐🐚🐥🐗🐣🍞"
222                .chars()
223                .base256u_emoji()
224                .map(|c| c.unwrap())
225                .collect();
226        assert_eq!(
227            decoded.as_slice(),
228            b"Pack my box with five dozen liquor jugs."
229        );
230    }
231}