aingle_id/lib.rs
1//! AIngle AI-ID base32 encoding utility.
2//!
3//! # Example
4//!
5//! ```
6//! fn main() {
7//! let enc = aingle_id::AiidEncoding::with_kind("ais0").unwrap();
8//! let key = enc.encode(&[0; 32]).unwrap();
9//! assert_eq!("aiSciaaaa", &key[..9]);
10//! let buffer = enc.decode(&key).unwrap();
11//! assert_eq!([0; 32].to_vec(), buffer);
12//! }
13//! ```
14
15mod error;
16mod b32;
17pub use error::{AiidError, AiidResult};
18
19mod util;
20use util::{b32_correct, cap_decode, cap_encode_bin, char_upper};
21
22// AI_CODE_MAP: Maps third character of kind (e.g., 'k' in "aik0") to res byte
23// The res byte encodes: bits 7-6 = 00 (for "ai" prefix), bits 5-1 = third char value, bit 0 = version
24// Index = ASCII code of third char - 51
25static AI_CODE_MAP: &[[u8; 2]] = &[
26 [ 0x32, 0x33 ], // 51 '3': ai30/ai31 -> third char '3' = 25 = 11001 -> 0x32/0x33
27 [ 0x34, 0x35 ], // 52 '4': ai40/ai41 -> third char '4' = 26 = 11010 -> 0x34/0x35
28 [ 0x36, 0x37 ], // 53 '5': ai50/ai51 -> third char '5' = 27 = 11011 -> 0x36/0x37
29 [ 0x38, 0x39 ], // 54 '6': ai60/ai61 -> third char '6' = 28 = 11100 -> 0x38/0x39
30 [ 0x3a, 0x3b ], // 55 '7': ai70/ai71 -> third char '7' = 29 = 11101 -> 0x3a/0x3b
31 [ 0x3c, 0x3d ], // 56 '8': ai80/ai81 -> third char '8' = 30 = 11110 -> 0x3c/0x3d
32 [ 0x3e, 0x3f ], // 57 '9': ai90/ai91 -> third char '9' = 31 = 11111 -> 0x3e/0x3f
33
34 // 58-61: reserved (: ; < =)
35 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
36 // 62-65: reserved (> ? @ A)
37 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
38 // 66-69: reserved (B C D E)
39 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
40 // 70-73: reserved (F G H I)
41 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
42 // 74-77: reserved (J K L M)
43 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
44 // 78-81: reserved (N O P Q)
45 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
46 // 82-85: reserved (R S T U)
47 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
48 // 86-89: reserved (V W X Y)
49 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
50 // 90-93: reserved (Z [ \ ])
51 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
52 // 94-96: reserved (^ _ `)
53 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
54
55 [ 0x00, 0x01 ], // 97 'a': aia0/aia1 -> third char 'A' = 0 = 00000 -> 0x00/0x01
56 [ 0x02, 0x03 ], // 98 'b': aib0/aib1 -> third char 'B' = 1 = 00001 -> 0x02/0x03
57 [ 0x04, 0x05 ], // 99 'c': aic0/aic1 -> third char 'C' = 2 = 00010 -> 0x04/0x05
58 [ 0x06, 0x07 ], // 100 'd': aid0/aid1 -> third char 'D' = 3 = 00011 -> 0x06/0x07
59 [ 0x08, 0x09 ], // 101 'e': aie0/aie1 -> third char 'E' = 4 = 00100 -> 0x08/0x09
60 [ 0x0a, 0x0b ], // 102 'f': aif0/aif1 -> third char 'F' = 5 = 00101 -> 0x0a/0x0b
61 [ 0x0c, 0x0d ], // 103 'g': aig0/aig1 -> third char 'G' = 6 = 00110 -> 0x0c/0x0d
62 [ 0x0e, 0x0f ], // 104 'h': aih0/aih1 -> third char 'H' = 7 = 00111 -> 0x0e/0x0f
63 [ 0x10, 0x11 ], // 105 'i': aii0/aii1 -> third char 'I' = 8 = 01000 -> 0x10/0x11
64 [ 0x12, 0x13 ], // 106 'j': aij0/aij1 -> third char 'J' = 9 = 01001 -> 0x12/0x13
65 [ 0x14, 0x15 ], // 107 'k': aik0/aik1 -> third char 'K' = 10 = 01010 -> 0x14/0x15
66
67 [ 0xff, 0xff ], // 108 'l': reserved (no 'L' in base32 alphabet)
68
69 [ 0x16, 0x17 ], // 109 'm': aim0/aim1 -> third char 'M' = 11 = 01011 -> 0x16/0x17
70 [ 0x18, 0x19 ], // 110 'n': ain0/ain1 -> third char 'N' = 12 = 01100 -> 0x18/0x19
71 [ 0x1a, 0x1b ], // 111 'o': aio0/aio1 -> third char 'O' = 13 = 01101 -> 0x1a/0x1b
72 [ 0x1c, 0x1d ], // 112 'p': aip0/aip1 -> third char 'P' = 14 = 01110 -> 0x1c/0x1d
73 [ 0x1e, 0x1f ], // 113 'q': aiq0/aiq1 -> third char 'Q' = 15 = 01111 -> 0x1e/0x1f
74 [ 0x20, 0x21 ], // 114 'r': air0/air1 -> third char 'R' = 16 = 10000 -> 0x20/0x21
75 [ 0x22, 0x23 ], // 115 's': ais0/ais1 -> third char 'S' = 17 = 10001 -> 0x22/0x23
76 [ 0x24, 0x25 ], // 116 't': ait0/ait1 -> third char 'T' = 18 = 10010 -> 0x24/0x25
77 [ 0x26, 0x27 ], // 117 'u': aiu0/aiu1 -> third char 'U' = 19 = 10011 -> 0x26/0x27
78 [ 0x28, 0x29 ], // 118 'v': aiv0/aiv1 -> third char 'V' = 20 = 10100 -> 0x28/0x29
79 [ 0x2a, 0x2b ], // 119 'w': aiw0/aiw1 -> third char 'W' = 21 = 10101 -> 0x2a/0x2b
80 [ 0x2c, 0x2d ], // 120 'x': aix0/aix1 -> third char 'X' = 22 = 10110 -> 0x2c/0x2d
81 [ 0x2e, 0x2f ], // 121 'y': aiy0/aiy1 -> third char 'Y' = 23 = 10111 -> 0x2e/0x2f
82 [ 0x30, 0x31 ], // 122 'z': aiz0/aiz1 -> third char 'Z' = 24 = 11000 -> 0x30/0x31
83];
84
85/*
86 * New prefix format for "ai" branding:
87 * Prefix = [0x02, res, 0x24] produces "aiXc..." where X depends on res
88 *
89 * aiK v0 hex: 0x021424 -> "aiKc..."
90 * aiK v1 hex: 0x021524 -> "aiKs..."
91 * aiA v0 hex: 0x020024 -> "aiAc..."
92 * aiA v1 hex: 0x020124 -> "aiAs..."
93 * aiS v0 hex: 0x022224 -> "aiSc..."
94 * aiS v1 hex: 0x022324 -> "aiSs..."
95 */
96
97/// represents an encoding configuration for aiid rendering and parsing
98pub struct AiidEncodingConfig {
99 /// byte count of actuall key data that will be encoded
100 pub key_byte_count: usize,
101 /// parity bytes that will be encoded directly into the base32 string (appended after key)
102 pub base_parity_byte_count: usize,
103 /// parity bytes that will be encoded in the alpha capitalization (appended after base parity)
104 pub cap_parity_byte_count: usize,
105 /// bytes to prefix before rendering to base32
106 pub prefix: Vec<u8>,
107 /// binary indication of the capitalization for prefix characters
108 pub prefix_cap: Vec<u8>,
109 /// how many characters are in a capitalization parity segment
110 pub cap_segment_char_count: usize,
111 /// how many characters long the fully rendered base32 string should be
112 pub encoded_char_count: usize,
113}
114
115impl AiidEncodingConfig {
116 /// create a new config given a kind token string
117 ///
118 /// # Example
119 ///
120 /// ```
121 /// let aia0 = aingle_id::AiidEncodingConfig::new("aia0").unwrap();
122 /// let aik0 = aingle_id::AiidEncodingConfig::new("aik0").unwrap();
123 /// let ais0 = aingle_id::AiidEncodingConfig::new("ais0").unwrap();
124 /// ```
125 pub fn new(kind: &str) -> AiidResult<Self> {
126 let kind_b = kind.as_bytes();
127 // Check for "aiXY" format where X is a-z or 3-9, Y is 0 or 1
128 // 'a' = 97, 'i' = 105
129 if kind_b.len() != 4 || kind_b[0] != 97 || kind_b[1] != 105 ||
130 (kind_b[3] != 48 && kind_b[3] != 49) ||
131 kind_b[2] < 51 || kind_b[2] > 122 {
132 return Err(format!("invalid kind: `{}`", kind).into());
133 }
134
135 let version = if kind_b[3] == 48 { 0 } else { 1 };
136 let res = AI_CODE_MAP[(kind_b[2] - 51) as usize][version as usize];
137
138 if res == 0xff {
139 return Err(format!("invalid kind: `{}`", kind).into());
140 }
141
142 Ok(AiidEncodingConfig {
143 key_byte_count: 32,
144 base_parity_byte_count: 4,
145 cap_parity_byte_count: 4,
146 prefix: vec![0x02, res, 0x24],
147 prefix_cap: b"001".to_vec(), // "ai" lowercase, third char uppercase
148 cap_segment_char_count: 15,
149 encoded_char_count: 63,
150 })
151 }
152}
153
154/// an instance that can encode / decode a particular aiid encoding configuration
155pub struct AiidEncoding {
156 config: AiidEncodingConfig,
157 rs_enc: reed_solomon::Encoder,
158 rs_dec: reed_solomon::Decoder,
159}
160
161impl AiidEncoding {
162 /// create a new AiidEncoding instance from given AiidEncodingConfig
163 pub fn new(config: AiidEncodingConfig) -> AiidResult<Self> {
164 // set up a reed-solomon encoder with proper parity count
165 let rs_enc = reed_solomon::Encoder::new(
166 config.base_parity_byte_count + config.cap_parity_byte_count,
167 );
168
169 // set up a reed-solomon decoder with proper parity count
170 let rs_dec = reed_solomon::Decoder::new(
171 config.base_parity_byte_count + config.cap_parity_byte_count,
172 );
173
174 Ok(Self {
175 config,
176 rs_enc,
177 rs_dec,
178 })
179 }
180
181 /// create a new config given a kind token string
182 ///
183 /// # Example
184 ///
185 /// ```
186 /// let aia0 = aingle_id::AiidEncoding::with_kind("aia0").unwrap();
187 /// let aik0 = aingle_id::AiidEncoding::with_kind("aik0").unwrap();
188 /// let ais0 = aingle_id::AiidEncoding::with_kind("ais0").unwrap();
189 /// ```
190 pub fn with_kind(kind: &str) -> AiidResult<Self> {
191 AiidEncoding::new(AiidEncodingConfig::new(kind)?)
192 }
193
194 /// encode a string to base32 with this instance's configuration
195 pub fn encode(&self, data: &[u8]) -> AiidResult<String> {
196 if data.len() != self.config.key_byte_count {
197 return Err(AiidError(String::from(format!(
198 "BadDataLen:{},Expected:{}",
199 data.len(),
200 self.config.key_byte_count
201 ))));
202 }
203
204 // generate reed-solomon parity bytes
205 let full_parity = self.rs_enc.encode(data);
206
207 // extract the bytes that will be encoded as capitalization
208 let cap_bytes = &full_parity[full_parity.len() - self.config.cap_parity_byte_count..];
209
210 // base is the bytes that will be base32 encoded
211 let mut base = self.config.prefix.clone();
212 base.extend_from_slice(
213 &full_parity[0..full_parity.len() - self.config.cap_parity_byte_count],
214 );
215
216 // do the base32 encoding
217 let mut base32 = b32::encode(&base);
218
219 if base32.len() != self.config.encoded_char_count {
220 return Err(AiidError(String::from(format!(
221 "InternalGeneratedBadLen:{},Expected:{}",
222 base32.len(),
223 self.config.encoded_char_count
224 ))));
225 }
226
227 // capitalize the prefix with a fixed scheme
228 cap_encode_bin(
229 &mut base32[0..self.config.prefix_cap.len()],
230 &self.config.prefix_cap,
231 3,
232 )?;
233
234 // iterate over segments, applying parity capitalization
235 for i in 0..cap_bytes.len() {
236 let seg_start = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
237 let seg = &mut base32[seg_start..seg_start + self.config.cap_segment_char_count];
238 let bin = format!("{:08b}", cap_bytes[i]).into_bytes();
239 cap_encode_bin(seg, &bin, 8)?;
240 }
241
242 // we only use ascii characters
243 // use unchecked for performance / so we don't allocate again
244 unsafe {
245 // return the result as a String for ease of use
246 Ok(String::from_utf8_unchecked(base32))
247 }
248 }
249
250 /// decode the data from a base32 string with this instance's configuration. Reed-Solomon can
251 /// correct up to 1/2 its parity size worth of erasures (if no other errors are present).
252 pub fn decode(&self, data: &str) -> AiidResult<Vec<u8>> {
253 // get our parsed data with erasures
254 let (data, erasures) = self.pre_decode(data)?;
255
256 if erasures.len() > ( self.config.base_parity_byte_count + self.config.cap_parity_byte_count ) / 2 {
257 // our reed-solomon library makes bad corrections once erasure count exceeds 1/2 the
258 // parity count (it takes 2 parity symbols to find/correct one error, 1 parity symbol to
259 // correct a known erasure)
260 return Err(AiidError(String::from("TooManyErrors")));
261 }
262
263 // optimise for the case where there are no transcription errors
264 // this makes correcting more expensive if there *are*,
265 // but on average makes the system more efficient
266 if self.pre_is_corrupt(&data, &erasures)? {
267 // apply reed-solomon correction
268 // will "throw" on too many errors
269 Ok(
270 self.rs_dec.correct(&data, Some(&erasures[..]))?[0..self.config.key_byte_count]
271 .to_vec(),
272 )
273 } else {
274 Ok(data[0..self.config.key_byte_count].to_vec())
275 }
276 }
277
278 /// a lighter-weight check to determine if a base32 string is corrupt
279 pub fn is_corrupt(&self, data: &str) -> AiidResult<bool> {
280 // get our parsed data with erasures
281 let (data, erasures) = match self.pre_decode(data) {
282 Ok(v) => v,
283 Err(_) => return Ok(true),
284 };
285
286 match self.pre_is_corrupt(&data, &erasures) {
287 Ok(v) => Ok(v),
288 Err(_) => Ok(true),
289 }
290 }
291
292 /// internal helper for is_corrupt checking
293 fn pre_is_corrupt(&self, data: &[u8], erasures: &[u8]) -> AiidResult<bool> {
294 // if we have any erasures, we can exit early
295 if erasures.len() > 0 {
296 return Ok(true);
297 }
298
299 // slightly more efficient reed-solomon corruption check
300 Ok(self.rs_dec.is_corrupted(&data))
301 }
302
303 /// internal helper for preparing decoding
304 fn pre_decode(&self, data: &str) -> AiidResult<(Vec<u8>, Vec<u8>)> {
305 if data.len() != self.config.encoded_char_count {
306 return Err(AiidError(String::from(format!(
307 "BadIdLen:{},Expected:{}",
308 data.len(),
309 self.config.encoded_char_count
310 ))));
311 }
312
313 let key_base_byte_size = self.config.key_byte_count + self.config.base_parity_byte_count;
314 // All char_erasures are indexed from the 0th char of the full codeword w/ prefix, but
315 // byte_erasures are indexed from the 0th byte of the key+parity (ie. without the prefix).
316 // Any byte of key, or base/cap parity could be erased.
317 let mut byte_erasures = vec![b'0'; key_base_byte_size + self.config.cap_parity_byte_count];
318 let mut char_erasures = vec![b'0'; data.len()];
319
320 // correct any transliteration errors into our base32 alphabet
321 // marking any unrecognizable characters as char-level erasures
322 let mut data = b32_correct(data.as_bytes(), &mut char_erasures);
323
324 // Pull out the parity data that was encoded as capitalization. If its erasure,
325 // determine the
326 let mut cap_bytes: Vec<u8> = Vec::new();
327 let mut all_zro = true;
328 let mut all_one = true;
329 for i in 0..self.config.cap_parity_byte_count {
330 // For cap. parity, indexing starts after pre-defined Base-32 prefix
331 let char_idx = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
332 match cap_decode(
333 char_idx,
334 &data[char_idx..char_idx + self.config.cap_segment_char_count],
335 &char_erasures
336 )? {
337 None => {
338 byte_erasures[key_base_byte_size + i] = b'1';
339 cap_bytes.push(0)
340 }
341 Some(parity) => {
342 if all_zro && parity != 0x00_u8 {
343 all_zro = false
344 }
345 if all_one && parity != 0xFF_u8 {
346 all_one = false
347 }
348 cap_bytes.push(parity)
349 }
350 }
351 }
352
353 // If either all caps or all lower case (or erasure), assume the casing was lost (eg. QR
354 // code, or dns segment); mark all cap-derived parity as erasures. This allows validation
355 // of codeword if all remaining parity is intact and key is correct; since no parity
356 // capacity remains, no correction will be attempted. There is only a low probability that
357 // any remaining errors will be detected, in this case. However, we're no *worse* off than
358 // if we had no R-S parity at all.
359 if all_zro || all_one {
360 for i in 0..self.config.cap_parity_byte_count {
361 byte_erasures[key_base_byte_size + i] = b'1';
362 }
363 }
364
365 // we have the cap data, uppercase everything
366 for c in data.iter_mut() {
367 char_upper(c);
368 }
369
370 // do the base32 decode
371 let mut data = b32::decode(&data)?;
372
373 if &data[0..self.config.prefix.len()] != self.config.prefix.as_slice() {
374 return Err(AiidError(String::from("PrefixMismatch")));
375 }
376
377 // remove the prefix bytes
378 data.drain(0..self.config.prefix.len());
379
380 // append our cap parity bytes
381 data.append(&mut cap_bytes);
382
383 // Sort through the char-level erasures (5 bits), and associate them with byte-level data (8
384 // bits) -- in the (now prefix-free) data buffer, so that we mark the proper erasures for
385 // reed-solomon correction. Some of these chars span multiple bytes... we need to mark both.
386 for i in self.config.prefix_cap.len()..char_erasures.len() {
387 let c = char_erasures[i];
388 if c == b'1' {
389 // 1st and last bit of 5-bit segment may index different bytes
390 byte_erasures[( i * 5 + 0 ) / 8 - self.config.prefix.len()] = b'1';
391 byte_erasures[( i * 5 + 4 ) / 8 - self.config.prefix.len()] = b'1';
392 }
393 }
394
395 // translate erasures into the form expected by our reed-solomon lib
396 let mut erasures: Vec<u8> = Vec::new();
397 for i in 0..byte_erasures.len() {
398 if byte_erasures[i] == b'1' {
399 data[i] = 0;
400 erasures.push(i as u8);
401 }
402 }
403
404 Ok((data, erasures))
405 }
406}
407
408#[cfg(test)]
409mod tests {
410 use super::*;
411
412 static TEST_HEX_1: &'static str =
413 "0c71db50d35d760b0ea2002ff20147c7c3a8e8030d35ef28ed1adaec9e329aba";
414 static TEST_ID_1: &'static str =
415 "aiKciDds5OiogymxbnHKEabQ8iavqs8dwdVaGdJW76Vp4gx47tQDfGW4OWc9w5i";
416
417 #[test]
418 fn it_encodes_1() {
419 let enc = AiidEncoding::with_kind("aik0").unwrap();
420
421 let input = hex::decode(TEST_HEX_1.as_bytes()).unwrap();
422 let id = enc.encode(&input).unwrap();
423 assert_eq!(TEST_ID_1, id);
424 }
425
426 #[test]
427 fn it_decodes_1() {
428 let enc = AiidEncoding::with_kind("aik0").unwrap();
429
430 let data = hex::encode(&enc.decode(TEST_ID_1).unwrap());
431 assert_eq!(TEST_HEX_1, data);
432 }
433
434}