aingle_id/
lib.rs

1//! AIngle AI-ID base32 encoding utility.
2//!
3//! # Example
4//!
5//! ```
6//! fn main() {
7//!     let enc = aingle_id::AiidEncoding::with_kind("ais0").unwrap();
8//!     let key = enc.encode(&[0; 32]).unwrap();
9//!     assert_eq!("aiSciaaaa", &key[..9]);
10//!     let buffer = enc.decode(&key).unwrap();
11//!     assert_eq!([0; 32].to_vec(), buffer);
12//! }
13//! ```
14
15mod error;
16mod b32;
17pub use error::{AiidError, AiidResult};
18
19mod util;
20use util::{b32_correct, cap_decode, cap_encode_bin, char_upper};
21
22// AI_CODE_MAP: Maps third character of kind (e.g., 'k' in "aik0") to res byte
23// The res byte encodes: bits 7-6 = 00 (for "ai" prefix), bits 5-1 = third char value, bit 0 = version
24// Index = ASCII code of third char - 51
25static AI_CODE_MAP: &[[u8; 2]] = &[
26    [ 0x32, 0x33 ], // 51 '3': ai30/ai31 -> third char '3' = 25 = 11001 -> 0x32/0x33
27    [ 0x34, 0x35 ], // 52 '4': ai40/ai41 -> third char '4' = 26 = 11010 -> 0x34/0x35
28    [ 0x36, 0x37 ], // 53 '5': ai50/ai51 -> third char '5' = 27 = 11011 -> 0x36/0x37
29    [ 0x38, 0x39 ], // 54 '6': ai60/ai61 -> third char '6' = 28 = 11100 -> 0x38/0x39
30    [ 0x3a, 0x3b ], // 55 '7': ai70/ai71 -> third char '7' = 29 = 11101 -> 0x3a/0x3b
31    [ 0x3c, 0x3d ], // 56 '8': ai80/ai81 -> third char '8' = 30 = 11110 -> 0x3c/0x3d
32    [ 0x3e, 0x3f ], // 57 '9': ai90/ai91 -> third char '9' = 31 = 11111 -> 0x3e/0x3f
33
34    // 58-61: reserved (: ; < =)
35    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
36    // 62-65: reserved (> ? @ A)
37    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
38    // 66-69: reserved (B C D E)
39    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
40    // 70-73: reserved (F G H I)
41    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
42    // 74-77: reserved (J K L M)
43    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
44    // 78-81: reserved (N O P Q)
45    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
46    // 82-85: reserved (R S T U)
47    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
48    // 86-89: reserved (V W X Y)
49    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
50    // 90-93: reserved (Z [ \ ])
51    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
52    // 94-96: reserved (^ _ `)
53    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
54
55    [ 0x00, 0x01 ], // 97 'a': aia0/aia1 -> third char 'A' = 0 = 00000 -> 0x00/0x01
56    [ 0x02, 0x03 ], // 98 'b': aib0/aib1 -> third char 'B' = 1 = 00001 -> 0x02/0x03
57    [ 0x04, 0x05 ], // 99 'c': aic0/aic1 -> third char 'C' = 2 = 00010 -> 0x04/0x05
58    [ 0x06, 0x07 ], // 100 'd': aid0/aid1 -> third char 'D' = 3 = 00011 -> 0x06/0x07
59    [ 0x08, 0x09 ], // 101 'e': aie0/aie1 -> third char 'E' = 4 = 00100 -> 0x08/0x09
60    [ 0x0a, 0x0b ], // 102 'f': aif0/aif1 -> third char 'F' = 5 = 00101 -> 0x0a/0x0b
61    [ 0x0c, 0x0d ], // 103 'g': aig0/aig1 -> third char 'G' = 6 = 00110 -> 0x0c/0x0d
62    [ 0x0e, 0x0f ], // 104 'h': aih0/aih1 -> third char 'H' = 7 = 00111 -> 0x0e/0x0f
63    [ 0x10, 0x11 ], // 105 'i': aii0/aii1 -> third char 'I' = 8 = 01000 -> 0x10/0x11
64    [ 0x12, 0x13 ], // 106 'j': aij0/aij1 -> third char 'J' = 9 = 01001 -> 0x12/0x13
65    [ 0x14, 0x15 ], // 107 'k': aik0/aik1 -> third char 'K' = 10 = 01010 -> 0x14/0x15
66
67    [ 0xff, 0xff ], // 108 'l': reserved (no 'L' in base32 alphabet)
68
69    [ 0x16, 0x17 ], // 109 'm': aim0/aim1 -> third char 'M' = 11 = 01011 -> 0x16/0x17
70    [ 0x18, 0x19 ], // 110 'n': ain0/ain1 -> third char 'N' = 12 = 01100 -> 0x18/0x19
71    [ 0x1a, 0x1b ], // 111 'o': aio0/aio1 -> third char 'O' = 13 = 01101 -> 0x1a/0x1b
72    [ 0x1c, 0x1d ], // 112 'p': aip0/aip1 -> third char 'P' = 14 = 01110 -> 0x1c/0x1d
73    [ 0x1e, 0x1f ], // 113 'q': aiq0/aiq1 -> third char 'Q' = 15 = 01111 -> 0x1e/0x1f
74    [ 0x20, 0x21 ], // 114 'r': air0/air1 -> third char 'R' = 16 = 10000 -> 0x20/0x21
75    [ 0x22, 0x23 ], // 115 's': ais0/ais1 -> third char 'S' = 17 = 10001 -> 0x22/0x23
76    [ 0x24, 0x25 ], // 116 't': ait0/ait1 -> third char 'T' = 18 = 10010 -> 0x24/0x25
77    [ 0x26, 0x27 ], // 117 'u': aiu0/aiu1 -> third char 'U' = 19 = 10011 -> 0x26/0x27
78    [ 0x28, 0x29 ], // 118 'v': aiv0/aiv1 -> third char 'V' = 20 = 10100 -> 0x28/0x29
79    [ 0x2a, 0x2b ], // 119 'w': aiw0/aiw1 -> third char 'W' = 21 = 10101 -> 0x2a/0x2b
80    [ 0x2c, 0x2d ], // 120 'x': aix0/aix1 -> third char 'X' = 22 = 10110 -> 0x2c/0x2d
81    [ 0x2e, 0x2f ], // 121 'y': aiy0/aiy1 -> third char 'Y' = 23 = 10111 -> 0x2e/0x2f
82    [ 0x30, 0x31 ], // 122 'z': aiz0/aiz1 -> third char 'Z' = 24 = 11000 -> 0x30/0x31
83];
84
85/*
86 * New prefix format for "ai" branding:
87 * Prefix = [0x02, res, 0x24] produces "aiXc..." where X depends on res
88 *
89 * aiK v0 hex:     0x021424 -> "aiKc..."
90 * aiK v1 hex:     0x021524 -> "aiKs..."
91 * aiA v0 hex:     0x020024 -> "aiAc..."
92 * aiA v1 hex:     0x020124 -> "aiAs..."
93 * aiS v0 hex:     0x022224 -> "aiSc..."
94 * aiS v1 hex:     0x022324 -> "aiSs..."
95 */
96
97/// represents an encoding configuration for aiid rendering and parsing
98pub struct AiidEncodingConfig {
99    /// byte count of actuall key data that will be encoded
100    pub key_byte_count: usize,
101    /// parity bytes that will be encoded directly into the base32 string (appended after key)
102    pub base_parity_byte_count: usize,
103    /// parity bytes that will be encoded in the alpha capitalization (appended after base parity)
104    pub cap_parity_byte_count: usize,
105    /// bytes to prefix before rendering to base32
106    pub prefix: Vec<u8>,
107    /// binary indication of the capitalization for prefix characters
108    pub prefix_cap: Vec<u8>,
109    /// how many characters are in a capitalization parity segment
110    pub cap_segment_char_count: usize,
111    /// how many characters long the fully rendered base32 string should be
112    pub encoded_char_count: usize,
113}
114
115impl AiidEncodingConfig {
116    /// create a new config given a kind token string
117    ///
118    /// # Example
119    ///
120    /// ```
121    /// let aia0 = aingle_id::AiidEncodingConfig::new("aia0").unwrap();
122    /// let aik0 = aingle_id::AiidEncodingConfig::new("aik0").unwrap();
123    /// let ais0 = aingle_id::AiidEncodingConfig::new("ais0").unwrap();
124    /// ```
125    pub fn new(kind: &str) -> AiidResult<Self> {
126        let kind_b = kind.as_bytes();
127        // Check for "aiXY" format where X is a-z or 3-9, Y is 0 or 1
128        // 'a' = 97, 'i' = 105
129        if kind_b.len() != 4 || kind_b[0] != 97 || kind_b[1] != 105 ||
130                (kind_b[3] != 48 && kind_b[3] != 49) ||
131                kind_b[2] < 51 || kind_b[2] > 122 {
132            return Err(format!("invalid kind: `{}`", kind).into());
133        }
134
135        let version = if kind_b[3] == 48 { 0 } else { 1 };
136        let res = AI_CODE_MAP[(kind_b[2] - 51) as usize][version as usize];
137
138        if res == 0xff {
139            return Err(format!("invalid kind: `{}`", kind).into());
140        }
141
142        Ok(AiidEncodingConfig {
143            key_byte_count: 32,
144            base_parity_byte_count: 4,
145            cap_parity_byte_count: 4,
146            prefix: vec![0x02, res, 0x24],
147            prefix_cap: b"001".to_vec(),  // "ai" lowercase, third char uppercase
148            cap_segment_char_count: 15,
149            encoded_char_count: 63,
150        })
151    }
152}
153
154/// an instance that can encode / decode a particular aiid encoding configuration
155pub struct AiidEncoding {
156    config: AiidEncodingConfig,
157    rs_enc: reed_solomon::Encoder,
158    rs_dec: reed_solomon::Decoder,
159}
160
161impl AiidEncoding {
162    /// create a new AiidEncoding instance from given AiidEncodingConfig
163    pub fn new(config: AiidEncodingConfig) -> AiidResult<Self> {
164        // set up a reed-solomon encoder with proper parity count
165        let rs_enc = reed_solomon::Encoder::new(
166            config.base_parity_byte_count + config.cap_parity_byte_count,
167        );
168
169        // set up a reed-solomon decoder with proper parity count
170        let rs_dec = reed_solomon::Decoder::new(
171            config.base_parity_byte_count + config.cap_parity_byte_count,
172        );
173
174        Ok(Self {
175            config,
176            rs_enc,
177            rs_dec,
178        })
179    }
180
181    /// create a new config given a kind token string
182    ///
183    /// # Example
184    ///
185    /// ```
186    /// let aia0 = aingle_id::AiidEncoding::with_kind("aia0").unwrap();
187    /// let aik0 = aingle_id::AiidEncoding::with_kind("aik0").unwrap();
188    /// let ais0 = aingle_id::AiidEncoding::with_kind("ais0").unwrap();
189    /// ```
190    pub fn with_kind(kind: &str) -> AiidResult<Self> {
191        AiidEncoding::new(AiidEncodingConfig::new(kind)?)
192    }
193
194    /// encode a string to base32 with this instance's configuration
195    pub fn encode(&self, data: &[u8]) -> AiidResult<String> {
196        if data.len() != self.config.key_byte_count {
197            return Err(AiidError(String::from(format!(
198                "BadDataLen:{},Expected:{}",
199                data.len(),
200                self.config.key_byte_count
201            ))));
202        }
203
204        // generate reed-solomon parity bytes
205        let full_parity = self.rs_enc.encode(data);
206
207        // extract the bytes that will be encoded as capitalization
208        let cap_bytes = &full_parity[full_parity.len() - self.config.cap_parity_byte_count..];
209
210        // base is the bytes that will be base32 encoded
211        let mut base = self.config.prefix.clone();
212        base.extend_from_slice(
213            &full_parity[0..full_parity.len() - self.config.cap_parity_byte_count],
214        );
215
216        // do the base32 encoding
217        let mut base32 = b32::encode(&base);
218
219        if base32.len() != self.config.encoded_char_count {
220            return Err(AiidError(String::from(format!(
221                "InternalGeneratedBadLen:{},Expected:{}",
222                base32.len(),
223                self.config.encoded_char_count
224            ))));
225        }
226
227        // capitalize the prefix with a fixed scheme
228        cap_encode_bin(
229            &mut base32[0..self.config.prefix_cap.len()],
230            &self.config.prefix_cap,
231            3,
232        )?;
233
234        // iterate over segments, applying parity capitalization
235        for i in 0..cap_bytes.len() {
236            let seg_start = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
237            let seg = &mut base32[seg_start..seg_start + self.config.cap_segment_char_count];
238            let bin = format!("{:08b}", cap_bytes[i]).into_bytes();
239            cap_encode_bin(seg, &bin, 8)?;
240        }
241        
242        // we only use ascii characters
243        // use unchecked for performance / so we don't allocate again
244        unsafe {
245            // return the result as a String for ease of use
246            Ok(String::from_utf8_unchecked(base32))
247        }
248    }
249
250    /// decode the data from a base32 string with this instance's configuration.  Reed-Solomon can
251    /// correct up to 1/2 its parity size worth of erasures (if no other errors are present).
252    pub fn decode(&self, data: &str) -> AiidResult<Vec<u8>> {
253        // get our parsed data with erasures
254        let (data, erasures) = self.pre_decode(data)?;
255
256        if erasures.len() > ( self.config.base_parity_byte_count + self.config.cap_parity_byte_count ) / 2 {
257            // our reed-solomon library makes bad corrections once erasure count exceeds 1/2 the
258            // parity count (it takes 2 parity symbols to find/correct one error, 1 parity symbol to
259            // correct a known erasure)
260            return Err(AiidError(String::from("TooManyErrors")));
261        }
262
263        // optimise for the case where there are no transcription errors
264        // this makes correcting more expensive if there *are*,
265        // but on average makes the system more efficient
266        if self.pre_is_corrupt(&data, &erasures)? {
267            // apply reed-solomon correction
268            // will "throw" on too many errors
269            Ok(
270                self.rs_dec.correct(&data, Some(&erasures[..]))?[0..self.config.key_byte_count]
271                    .to_vec(),
272            )
273        } else {
274            Ok(data[0..self.config.key_byte_count].to_vec())
275        }
276    }
277
278    /// a lighter-weight check to determine if a base32 string is corrupt
279    pub fn is_corrupt(&self, data: &str) -> AiidResult<bool> {
280        // get our parsed data with erasures
281        let (data, erasures) = match self.pre_decode(data) {
282            Ok(v) => v,
283            Err(_) => return Ok(true),
284        };
285
286        match self.pre_is_corrupt(&data, &erasures) {
287            Ok(v) => Ok(v),
288            Err(_) => Ok(true),
289        }
290    }
291
292    /// internal helper for is_corrupt checking
293    fn pre_is_corrupt(&self, data: &[u8], erasures: &[u8]) -> AiidResult<bool> {
294        // if we have any erasures, we can exit early
295        if erasures.len() > 0 {
296            return Ok(true);
297        }
298
299        // slightly more efficient reed-solomon corruption check
300        Ok(self.rs_dec.is_corrupted(&data))
301    }
302
303    /// internal helper for preparing decoding
304    fn pre_decode(&self, data: &str) -> AiidResult<(Vec<u8>, Vec<u8>)> {
305        if data.len() != self.config.encoded_char_count {
306            return Err(AiidError(String::from(format!(
307                "BadIdLen:{},Expected:{}",
308                data.len(),
309                self.config.encoded_char_count
310            ))));
311        }
312
313        let key_base_byte_size = self.config.key_byte_count + self.config.base_parity_byte_count;
314        // All char_erasures are indexed from the 0th char of the full codeword w/ prefix, but
315        // byte_erasures are indexed from the 0th byte of the key+parity (ie. without the prefix).
316        // Any byte of key, or base/cap parity could be erased.
317        let mut byte_erasures = vec![b'0'; key_base_byte_size + self.config.cap_parity_byte_count];
318        let mut char_erasures = vec![b'0'; data.len()];
319
320        // correct any transliteration errors into our base32 alphabet
321        // marking any unrecognizable characters as char-level erasures
322        let mut data = b32_correct(data.as_bytes(), &mut char_erasures);
323
324        // Pull out the parity data that was encoded as capitalization.  If its erasure,
325        // determine the 
326        let mut cap_bytes: Vec<u8> = Vec::new();
327        let mut all_zro = true;
328        let mut all_one = true;
329        for i in 0..self.config.cap_parity_byte_count {
330            // For cap. parity, indexing starts after pre-defined Base-32 prefix
331            let char_idx = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
332            match cap_decode(
333                char_idx,
334                &data[char_idx..char_idx + self.config.cap_segment_char_count],
335                &char_erasures
336            )? {
337                None => {
338                    byte_erasures[key_base_byte_size + i] = b'1';
339                    cap_bytes.push(0)
340                }
341                Some(parity) => {
342                    if all_zro && parity != 0x00_u8 {
343                        all_zro = false
344                    }
345                    if all_one && parity != 0xFF_u8 {
346                        all_one = false
347                    }
348                    cap_bytes.push(parity)
349                }
350            }
351        }
352
353        // If either all caps or all lower case (or erasure), assume the casing was lost (eg. QR
354        // code, or dns segment); mark all cap-derived parity as erasures.  This allows validation
355        // of codeword if all remaining parity is intact and key is correct; since no parity
356        // capacity remains, no correction will be attempted.  There is only a low probability that
357        // any remaining errors will be detected, in this case.  However, we're no *worse* off than
358        // if we had no R-S parity at all.
359        if all_zro || all_one {
360            for i in 0..self.config.cap_parity_byte_count {
361                byte_erasures[key_base_byte_size + i] = b'1';
362            }
363        }
364
365        // we have the cap data, uppercase everything
366        for c in data.iter_mut() {
367            char_upper(c);
368        }
369
370        // do the base32 decode
371        let mut data = b32::decode(&data)?;
372
373        if &data[0..self.config.prefix.len()] != self.config.prefix.as_slice() {
374            return Err(AiidError(String::from("PrefixMismatch")));
375        }
376
377        // remove the prefix bytes
378        data.drain(0..self.config.prefix.len());
379
380        // append our cap parity bytes
381        data.append(&mut cap_bytes);
382
383        // Sort through the char-level erasures (5 bits), and associate them with byte-level data (8
384        // bits) -- in the (now prefix-free) data buffer, so that we mark the proper erasures for
385        // reed-solomon correction.  Some of these chars span multiple bytes... we need to mark both.
386        for i in self.config.prefix_cap.len()..char_erasures.len() {
387            let c = char_erasures[i];
388            if c == b'1' {
389                // 1st and last bit of 5-bit segment may index different bytes
390                byte_erasures[( i * 5 + 0 ) / 8 - self.config.prefix.len()] = b'1';
391                byte_erasures[( i * 5 + 4 ) / 8 - self.config.prefix.len()] = b'1';
392            }
393        }
394
395        // translate erasures into the form expected by our reed-solomon lib
396        let mut erasures: Vec<u8> = Vec::new();
397        for i in 0..byte_erasures.len() {
398            if byte_erasures[i] == b'1' {
399                data[i] = 0;
400                erasures.push(i as u8);
401            }
402        }
403
404        Ok((data, erasures))
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411
412    static TEST_HEX_1: &'static str =
413        "0c71db50d35d760b0ea2002ff20147c7c3a8e8030d35ef28ed1adaec9e329aba";
414    static TEST_ID_1: &'static str =
415        "aiKciDds5OiogymxbnHKEabQ8iavqs8dwdVaGdJW76Vp4gx47tQDfGW4OWc9w5i";
416
417    #[test]
418    fn it_encodes_1() {
419        let enc = AiidEncoding::with_kind("aik0").unwrap();
420
421        let input = hex::decode(TEST_HEX_1.as_bytes()).unwrap();
422        let id = enc.encode(&input).unwrap();
423        assert_eq!(TEST_ID_1, id);
424    }
425
426    #[test]
427    fn it_decodes_1() {
428        let enc = AiidEncoding::with_kind("aik0").unwrap();
429
430        let data = hex::encode(&enc.decode(TEST_ID_1).unwrap());
431        assert_eq!(TEST_HEX_1, data);
432    }
433
434}