aiid/
lib.rs

1//! AIngle AI-ID base32 encoding utility.
2//!
3//! # Example
4//!
5//! ```
6//! extern crate aiid;
7//!
8//! fn main() {
9//!     let enc = aiid::aiidEncoding::with_kind("ais0").unwrap();
10//!     let key = enc.encode(&[0; 32]).unwrap();
11//!     assert_eq!("aiSciacbd", key);
12//!     let buffer = enc.decode(&key).unwrap();
13//!     assert_eq!([0; 32].to_vec(), buffer);
14//! }
15//! ```
16
17extern crate reed_solomon;
18
19mod error;
20mod b32;
21pub use error::{aiidError, aiidResult};
22
23mod util;
24use util::{b32_correct, cap_decode, cap_encode_bin, char_upper};
25
26static AI_CODE_MAP: &'static [[u8; 2]] = &[
27    [ 0xb2, 0xff ], // 51: ai30, reserved
28    [ 0xb4, 0xff ], // 52: ai40, reserved
29    [ 0xb6, 0xff ], // 53: ai50, reserved
30    [ 0xb8, 0xff ], // 54: ai60, reserved
31    [ 0xba, 0xff ], // 55: ai70, reserved
32    [ 0xbc, 0xff ], // 56: ai80, reserved
33    [ 0xbe, 0xff ], // 57: ai90, reserved
34
35    // 58-61: reserved
36    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
37    // 62-65: reserved
38    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
39    // 66-69: reserved
40    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
41    // 70-73: reserved
42    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
43    // 74-77: reserved
44    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
45    // 78-81: reserved
46    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
47    // 82-85: reserved
48    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
49    // 86-89: reserved
50    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
51    // 90-93: reserved
52    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
53    // 94-96: reserved
54    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
55
56    [ 0x80, 0xff ], // 97: aia0, reserved
57    [ 0x82, 0xff ], // 98: aib0, reserved
58    [ 0x84, 0xff ], // 99: aic0, reserved
59    [ 0x86, 0xff ], // 100: aid0, reserved
60    [ 0x88, 0xff ], // 101: aie0, reserved
61    [ 0x8a, 0xff ], // 102: aif0, reserved
62    [ 0x8c, 0xff ], // 103: aig0, reserved
63    [ 0x8e, 0xff ], // 104: aih0, reserved
64    [ 0x90, 0xff ], // 105: aii0, reserved
65    [ 0x92, 0xff ], // 106: aij0, reserved
66    [ 0x94, 0xff ], // 107: aik0, reserved
67
68    [ 0xff, 0xff ], // 108: reserved, reserved
69
70    [ 0x96, 0xff ], // 109: aim0, reserved
71    [ 0x98, 0xff ], // 110: ain0, reserved
72    [ 0x9a, 0xff ], // 111: aio0, reserved
73    [ 0x9c, 0xff ], // 112: aip0, reserved
74    [ 0x9e, 0xff ], // 113: aiq0, reserved
75    [ 0xa0, 0xff ], // 114: air0, reserved
76    [ 0xa2, 0xff ], // 115: ais0, reserved
77    [ 0xa4, 0xff ], // 116: ait0, reserved
78    [ 0xa6, 0xff ], // 117: aiu0, reserved
79    [ 0xa8, 0xff ], // 118: aiv0, reserved
80    [ 0xaa, 0xff ], // 119: aiw0, reserved
81    [ 0xad, 0xff ], // 120: aix0, reserved
82    [ 0xae, 0xff ], // 121: aiy0, reserved
83    [ 0xb0, 0xff ], // 122: aiz0, reserved
84];
85
86/* XXX
87 *
88 * aiK v0 hex:     0x389424
89 * aiK v1 hex:     0x389524
90 * aiA v0 hex:     0x388024
91 * aiA v1 hex:     0x388124
92 * aiS v0 hex:     0x38a224
93 * aiS v1 hex:     0x38a324
94 *
95 * XXX
96 */
97
98/// represents an encoding configuration for aiid rendering and parsing
99pub struct aiidEncodingConfig {
100    /// byte count of actuall key data that will be encoded
101    pub key_byte_count: usize,
102    /// parity bytes that will be encoded directly into the base32 string (appended after key)
103    pub base_parity_byte_count: usize,
104    /// parity bytes that will be encoded in the alpha capitalization (appended after base parity)
105    pub cap_parity_byte_count: usize,
106    /// bytes to prefix before rendering to base32
107    pub prefix: Vec<u8>,
108    /// binary indication of the capitalization for prefix characters
109    pub prefix_cap: Vec<u8>,
110    /// how many characters are in a capitalization parity segment
111    pub cap_segment_char_count: usize,
112    /// how many characters long the fully rendered base32 string should be
113    pub encoded_char_count: usize,
114}
115
116impl aiidEncodingConfig {
117    /// create a new config given a kind token string
118    ///
119    /// # Example
120    ///
121    /// ```
122    /// extern crate aiid;
123    /// let aia0 = aiid::aiidEncodingConfig::new("aia0").unwrap();
124    /// let aik0 = aiid::aiidEncodingConfig::new("aik0").unwrap();
125    /// let ais0 = aiid::aiidEncodingConfig::new("ais0").unwrap();
126    /// ```
127    pub fn new(kind: &str) -> aiidResult<Self> {
128        let kind_b = kind.as_bytes();
129        if kind_b.len() != 4 || kind_b[0] != 104 || kind_b[1] != 99 ||
130                (kind_b[3] != 48 && kind_b[3] != 49) ||
131                kind_b[2] < 51 || kind_b[2] > 122 {
132            return Err(format!("invalid kind: `{}`", kind).into());
133        }
134
135        let version = if kind_b[3] == 48 { 0 } else { 1 };
136        let res = AI_CODE_MAP[(kind_b[2] - 51) as usize][version as usize];
137
138        if res == 0xff {
139            return Err(format!("invalid kind: `{}`", kind).into());
140        }
141
142        Ok(aiidEncodingConfig {
143            key_byte_count: 32,
144            base_parity_byte_count: 4,
145            cap_parity_byte_count: 4,
146            prefix: vec![0x38, res, 0x24],
147            prefix_cap: b"101".to_vec(),
148            cap_segment_char_count: 15,
149            encoded_char_count: 63,
150        })
151    }
152}
153
154/// an instance that can encode / decode a particular aiid encoding configuration
155pub struct aiidEncoding {
156    config: aiidEncodingConfig,
157    rs_enc: reed_solomon::Encoder,
158    rs_dec: reed_solomon::Decoder,
159}
160
161impl aiidEncoding {
162    /// create a new aiidEncoding instance from given aiidEncodingConfig
163    pub fn new(config: aiidEncodingConfig) -> aiidResult<Self> {
164        // set up a reed-solomon encoder with proper parity count
165        let rs_enc = reed_solomon::Encoder::new(
166            config.base_parity_byte_count + config.cap_parity_byte_count,
167        );
168
169        // set up a reed-solomon decoder with proper parity count
170        let rs_dec = reed_solomon::Decoder::new(
171            config.base_parity_byte_count + config.cap_parity_byte_count,
172        );
173
174        Ok(Self {
175            config,
176            rs_enc,
177            rs_dec,
178        })
179    }
180
181    /// create a new config given a kind token string
182    ///
183    /// # Example
184    ///
185    /// ```
186    /// extern crate aiid;
187    /// let aia0 = aiid::aiidEncoding::with_kind("aia0").unwrap();
188    /// let aik0 = aiid::aiidEncoding::with_kind("aik0").unwrap();
189    /// let ais0 = aiid::aiidEncoding::with_kind("ais0").unwrap();
190    /// ```
191    pub fn with_kind(kind: &str) -> aiidResult<Self> {
192        aiidEncoding::new(aiidEncodingConfig::new(kind)?)
193    }
194
195    /// encode a string to base32 with this instance's configuration
196    pub fn encode(&self, data: &[u8]) -> aiidResult<String> {
197        if data.len() != self.config.key_byte_count {
198            return Err(aiidError(String::from(format!(
199                "BadDataLen:{},Expected:{}",
200                data.len(),
201                self.config.key_byte_count
202            ))));
203        }
204
205        // generate reed-solomon parity bytes
206        let full_parity = self.rs_enc.encode(data);
207
208        // extract the bytes that will be encoded as capitalization
209        let cap_bytes = &full_parity[full_parity.len() - self.config.cap_parity_byte_count..];
210
211        // base is the bytes that will be base32 encoded
212        let mut base = self.config.prefix.clone();
213        base.extend_from_slice(
214            &full_parity[0..full_parity.len() - self.config.cap_parity_byte_count],
215        );
216
217        // do the base32 encoding
218        let mut base32 = b32::encode(&base);
219
220        if base32.len() != self.config.encoded_char_count {
221            return Err(aiidError(String::from(format!(
222                "InternalGeneratedBadLen:{},Expected:{}",
223                base32.len(),
224                self.config.encoded_char_count
225            ))));
226        }
227
228        // capitalize the prefix with a fixed scheme
229        cap_encode_bin(
230            &mut base32[0..self.config.prefix_cap.len()],
231            &self.config.prefix_cap,
232            3,
233        )?;
234
235        // iterate over segments, applying parity capitalization
236        for i in 0..cap_bytes.len() {
237            let seg_start = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
238            let seg = &mut base32[seg_start..seg_start + self.config.cap_segment_char_count];
239            let bin = format!("{:08b}", cap_bytes[i]).into_bytes();
240            cap_encode_bin(seg, &bin, 8)?;
241        }
242        
243        // we only use ascii characters
244        // use unchecked for performance / so we don't allocate again
245        unsafe {
246            // return the result as a String for ease of use
247            Ok(String::from_utf8_unchecked(base32))
248        }
249    }
250
251    /// decode the data from a base32 string with this instance's configuration.  Reed-Solomon can
252    /// correct up to 1/2 its parity size worth of erasures (if no other errors are present).
253    pub fn decode(&self, data: &str) -> aiidResult<Vec<u8>> {
254        // get our parsed data with erasures
255        let (data, erasures) = self.pre_decode(data)?;
256
257        if erasures.len() > ( self.config.base_parity_byte_count + self.config.cap_parity_byte_count ) / 2 {
258            // our reed-solomon library makes bad corrections once erasure count exceeds 1/2 the
259            // parity count (it takes 2 parity symbols to find/correct one error, 1 parity symbol to
260            // correct a known erasure)
261            return Err(aiidError(String::from("TooManyErrors")));
262        }
263
264        // optimise for the case where there are no transcription errors
265        // this makes correcting more expensive if there *are*,
266        // but on average makes the system more efficient
267        if self.pre_is_corrupt(&data, &erasures)? {
268            // apply reed-solomon correction
269            // will "throw" on too many errors
270            Ok(
271                self.rs_dec.correct(&data, Some(&erasures[..]))?[0..self.config.key_byte_count]
272                    .to_vec(),
273            )
274        } else {
275            Ok(data[0..self.config.key_byte_count].to_vec())
276        }
277    }
278
279    /// a lighter-weight check to determine if a base32 string is corrupt
280    pub fn is_corrupt(&self, data: &str) -> aiidResult<bool> {
281        // get our parsed data with erasures
282        let (data, erasures) = match self.pre_decode(data) {
283            Ok(v) => v,
284            Err(_) => return Ok(true),
285        };
286
287        match self.pre_is_corrupt(&data, &erasures) {
288            Ok(v) => Ok(v),
289            Err(_) => Ok(true),
290        }
291    }
292
293    /// internal helper for is_corrupt checking
294    fn pre_is_corrupt(&self, data: &[u8], erasures: &[u8]) -> aiidResult<bool> {
295        // if we have any erasures, we can exit early
296        if erasures.len() > 0 {
297            return Ok(true);
298        }
299
300        // slightly more efficient reed-solomon corruption check
301        Ok(self.rs_dec.is_corrupted(&data))
302    }
303
304    /// internal helper for preparing decoding
305    fn pre_decode(&self, data: &str) -> aiidResult<(Vec<u8>, Vec<u8>)> {
306        if data.len() != self.config.encoded_char_count {
307            return Err(aiidError(String::from(format!(
308                "BadIdLen:{},Expected:{}",
309                data.len(),
310                self.config.encoded_char_count
311            ))));
312        }
313
314        let key_base_byte_size = self.config.key_byte_count + self.config.base_parity_byte_count;
315        // All char_erasures are indexed from the 0th char of the full codeword w/ prefix, but
316        // byte_erasures are indexed from the 0th byte of the key+parity (ie. without the prefix).
317        // Any byte of key, or base/cap parity could be erased.
318        let mut byte_erasures = vec![b'0'; key_base_byte_size + self.config.cap_parity_byte_count];
319        let mut char_erasures = vec![b'0'; data.len()];
320
321        // correct any transliteration errors into our base32 alphabet
322        // marking any unrecognizable characters as char-level erasures
323        let mut data = b32_correct(data.as_bytes(), &mut char_erasures);
324
325        // Pull out the parity data that was encoded as capitalization.  If its erasure,
326        // determine the 
327        let mut cap_bytes: Vec<u8> = Vec::new();
328        let mut all_zro = true;
329        let mut all_one = true;
330        for i in 0..self.config.cap_parity_byte_count {
331            // For cap. parity, indexing starts after pre-defined Base-32 prefix
332            let char_idx = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
333            match cap_decode(
334                char_idx,
335                &data[char_idx..char_idx + self.config.cap_segment_char_count],
336                &char_erasures
337            )? {
338                None => {
339                    byte_erasures[key_base_byte_size + i] = b'1';
340                    cap_bytes.push(0)
341                }
342                Some(parity) => {
343                    if all_zro && parity != 0x00_u8 {
344                        all_zro = false
345                    }
346                    if all_one && parity != 0xFF_u8 {
347                        all_one = false
348                    }
349                    cap_bytes.push(parity)
350                }
351            }
352        }
353
354        // If either all caps or all lower case (or erasure), assume the casing was lost (eg. QR
355        // code, or dns segment); mark all cap-derived parity as erasures.  This allows validation
356        // of codeword if all remaining parity is intact and key is correct; since no parity
357        // capacity remains, no correction will be attempted.  There is only a low probability that
358        // any remaining errors will be detected, in this case.  However, we're no *worse* off than
359        // if we had no R-S parity at all.
360        if all_zro || all_one {
361            for i in 0..self.config.cap_parity_byte_count {
362                byte_erasures[key_base_byte_size + i] = b'1';
363            }
364        }
365
366        // we have the cap data, uppercase everything
367        for c in data.iter_mut() {
368            char_upper(c);
369        }
370
371        // do the base32 decode
372        let mut data = b32::decode(&data)?;
373
374        if &data[0..self.config.prefix.len()] != self.config.prefix.as_slice() {
375            return Err(aiidError(String::from("PrefixMismatch")));
376        }
377
378        // remove the prefix bytes
379        data.drain(0..self.config.prefix.len());
380
381        // append our cap parity bytes
382        data.append(&mut cap_bytes);
383
384        // Sort through the char-level erasures (5 bits), and associate them with byte-level data (8
385        // bits) -- in the (now prefix-free) data buffer, so that we mark the proper erasures for
386        // reed-solomon correction.  Some of these chars span multiple bytes... we need to mark both.
387        for i in self.config.prefix_cap.len()..char_erasures.len() {
388            let c = char_erasures[i];
389            if c == b'1' {
390                // 1st and last bit of 5-bit segment may index different bytes
391                byte_erasures[( i * 5 + 0 ) / 8 - self.config.prefix.len()] = b'1';
392                byte_erasures[( i * 5 + 4 ) / 8 - self.config.prefix.len()] = b'1';
393            }
394        }
395
396        // translate erasures into the form expected by our reed-solomon lib
397        let mut erasures: Vec<u8> = Vec::new();
398        for i in 0..byte_erasures.len() {
399            if byte_erasures[i] == b'1' {
400                data[i] = 0;
401                erasures.push(i as u8);
402            }
403        }
404
405        Ok((data, erasures))
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    static TEST_HEX_1: &'static str =
414        "0c71db50d35d760b0ea2002ff20147c7c3a8e8030d35ef28ed1adaec9e329aba";
415    static TEST_ID_1: &'static str =
416        "aiKciDds5OiogymxbnHKEabQ8iavqs8dwdVaGdJW76Vp4gx47tQDfGW4OWc9w5i";
417
418    #[test]
419    fn it_encodes_1() {
420        let enc = aiidEncoding::with_kind("aik0").unwrap();
421
422        let input = hex::decode(TEST_HEX_1.as_bytes()).unwrap();
423        let id = enc.encode(&input).unwrap();
424        assert_eq!(TEST_ID_1, id);
425    }
426
427    #[test]
428    fn it_decodes_1() {
429        let enc = aiidEncoding::with_kind("aik0").unwrap();
430
431        let data = hex::encode(&enc.decode(TEST_ID_1).unwrap());
432        assert_eq!(TEST_HEX_1, data);
433    }
434}