aiid/lib.rs
1//! AIngle AI-ID base32 encoding utility.
2//!
3//! # Example
4//!
5//! ```
6//! extern crate aiid;
7//!
8//! fn main() {
9//! let enc = aiid::aiidEncoding::with_kind("ais0").unwrap();
10//! let key = enc.encode(&[0; 32]).unwrap();
11//! assert_eq!("aiSciacbd", key);
12//! let buffer = enc.decode(&key).unwrap();
13//! assert_eq!([0; 32].to_vec(), buffer);
14//! }
15//! ```
16
17extern crate reed_solomon;
18
19mod error;
20mod b32;
21pub use error::{aiidError, aiidResult};
22
23mod util;
24use util::{b32_correct, cap_decode, cap_encode_bin, char_upper};
25
26static AI_CODE_MAP: &'static [[u8; 2]] = &[
27 [ 0xb2, 0xff ], // 51: ai30, reserved
28 [ 0xb4, 0xff ], // 52: ai40, reserved
29 [ 0xb6, 0xff ], // 53: ai50, reserved
30 [ 0xb8, 0xff ], // 54: ai60, reserved
31 [ 0xba, 0xff ], // 55: ai70, reserved
32 [ 0xbc, 0xff ], // 56: ai80, reserved
33 [ 0xbe, 0xff ], // 57: ai90, reserved
34
35 // 58-61: reserved
36 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
37 // 62-65: reserved
38 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
39 // 66-69: reserved
40 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
41 // 70-73: reserved
42 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
43 // 74-77: reserved
44 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
45 // 78-81: reserved
46 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
47 // 82-85: reserved
48 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
49 // 86-89: reserved
50 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
51 // 90-93: reserved
52 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
53 // 94-96: reserved
54 [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
55
56 [ 0x80, 0xff ], // 97: aia0, reserved
57 [ 0x82, 0xff ], // 98: aib0, reserved
58 [ 0x84, 0xff ], // 99: aic0, reserved
59 [ 0x86, 0xff ], // 100: aid0, reserved
60 [ 0x88, 0xff ], // 101: aie0, reserved
61 [ 0x8a, 0xff ], // 102: aif0, reserved
62 [ 0x8c, 0xff ], // 103: aig0, reserved
63 [ 0x8e, 0xff ], // 104: aih0, reserved
64 [ 0x90, 0xff ], // 105: aii0, reserved
65 [ 0x92, 0xff ], // 106: aij0, reserved
66 [ 0x94, 0xff ], // 107: aik0, reserved
67
68 [ 0xff, 0xff ], // 108: reserved, reserved
69
70 [ 0x96, 0xff ], // 109: aim0, reserved
71 [ 0x98, 0xff ], // 110: ain0, reserved
72 [ 0x9a, 0xff ], // 111: aio0, reserved
73 [ 0x9c, 0xff ], // 112: aip0, reserved
74 [ 0x9e, 0xff ], // 113: aiq0, reserved
75 [ 0xa0, 0xff ], // 114: air0, reserved
76 [ 0xa2, 0xff ], // 115: ais0, reserved
77 [ 0xa4, 0xff ], // 116: ait0, reserved
78 [ 0xa6, 0xff ], // 117: aiu0, reserved
79 [ 0xa8, 0xff ], // 118: aiv0, reserved
80 [ 0xaa, 0xff ], // 119: aiw0, reserved
81 [ 0xad, 0xff ], // 120: aix0, reserved
82 [ 0xae, 0xff ], // 121: aiy0, reserved
83 [ 0xb0, 0xff ], // 122: aiz0, reserved
84];
85
86/* XXX
87 *
88 * aiK v0 hex: 0x389424
89 * aiK v1 hex: 0x389524
90 * aiA v0 hex: 0x388024
91 * aiA v1 hex: 0x388124
92 * aiS v0 hex: 0x38a224
93 * aiS v1 hex: 0x38a324
94 *
95 * XXX
96 */
97
98/// represents an encoding configuration for aiid rendering and parsing
99pub struct aiidEncodingConfig {
100 /// byte count of actuall key data that will be encoded
101 pub key_byte_count: usize,
102 /// parity bytes that will be encoded directly into the base32 string (appended after key)
103 pub base_parity_byte_count: usize,
104 /// parity bytes that will be encoded in the alpha capitalization (appended after base parity)
105 pub cap_parity_byte_count: usize,
106 /// bytes to prefix before rendering to base32
107 pub prefix: Vec<u8>,
108 /// binary indication of the capitalization for prefix characters
109 pub prefix_cap: Vec<u8>,
110 /// how many characters are in a capitalization parity segment
111 pub cap_segment_char_count: usize,
112 /// how many characters long the fully rendered base32 string should be
113 pub encoded_char_count: usize,
114}
115
116impl aiidEncodingConfig {
117 /// create a new config given a kind token string
118 ///
119 /// # Example
120 ///
121 /// ```
122 /// extern crate aiid;
123 /// let aia0 = aiid::aiidEncodingConfig::new("aia0").unwrap();
124 /// let aik0 = aiid::aiidEncodingConfig::new("aik0").unwrap();
125 /// let ais0 = aiid::aiidEncodingConfig::new("ais0").unwrap();
126 /// ```
127 pub fn new(kind: &str) -> aiidResult<Self> {
128 let kind_b = kind.as_bytes();
129 if kind_b.len() != 4 || kind_b[0] != 104 || kind_b[1] != 99 ||
130 (kind_b[3] != 48 && kind_b[3] != 49) ||
131 kind_b[2] < 51 || kind_b[2] > 122 {
132 return Err(format!("invalid kind: `{}`", kind).into());
133 }
134
135 let version = if kind_b[3] == 48 { 0 } else { 1 };
136 let res = AI_CODE_MAP[(kind_b[2] - 51) as usize][version as usize];
137
138 if res == 0xff {
139 return Err(format!("invalid kind: `{}`", kind).into());
140 }
141
142 Ok(aiidEncodingConfig {
143 key_byte_count: 32,
144 base_parity_byte_count: 4,
145 cap_parity_byte_count: 4,
146 prefix: vec![0x38, res, 0x24],
147 prefix_cap: b"101".to_vec(),
148 cap_segment_char_count: 15,
149 encoded_char_count: 63,
150 })
151 }
152}
153
154/// an instance that can encode / decode a particular aiid encoding configuration
155pub struct aiidEncoding {
156 config: aiidEncodingConfig,
157 rs_enc: reed_solomon::Encoder,
158 rs_dec: reed_solomon::Decoder,
159}
160
161impl aiidEncoding {
162 /// create a new aiidEncoding instance from given aiidEncodingConfig
163 pub fn new(config: aiidEncodingConfig) -> aiidResult<Self> {
164 // set up a reed-solomon encoder with proper parity count
165 let rs_enc = reed_solomon::Encoder::new(
166 config.base_parity_byte_count + config.cap_parity_byte_count,
167 );
168
169 // set up a reed-solomon decoder with proper parity count
170 let rs_dec = reed_solomon::Decoder::new(
171 config.base_parity_byte_count + config.cap_parity_byte_count,
172 );
173
174 Ok(Self {
175 config,
176 rs_enc,
177 rs_dec,
178 })
179 }
180
181 /// create a new config given a kind token string
182 ///
183 /// # Example
184 ///
185 /// ```
186 /// extern crate aiid;
187 /// let aia0 = aiid::aiidEncoding::with_kind("aia0").unwrap();
188 /// let aik0 = aiid::aiidEncoding::with_kind("aik0").unwrap();
189 /// let ais0 = aiid::aiidEncoding::with_kind("ais0").unwrap();
190 /// ```
191 pub fn with_kind(kind: &str) -> aiidResult<Self> {
192 aiidEncoding::new(aiidEncodingConfig::new(kind)?)
193 }
194
195 /// encode a string to base32 with this instance's configuration
196 pub fn encode(&self, data: &[u8]) -> aiidResult<String> {
197 if data.len() != self.config.key_byte_count {
198 return Err(aiidError(String::from(format!(
199 "BadDataLen:{},Expected:{}",
200 data.len(),
201 self.config.key_byte_count
202 ))));
203 }
204
205 // generate reed-solomon parity bytes
206 let full_parity = self.rs_enc.encode(data);
207
208 // extract the bytes that will be encoded as capitalization
209 let cap_bytes = &full_parity[full_parity.len() - self.config.cap_parity_byte_count..];
210
211 // base is the bytes that will be base32 encoded
212 let mut base = self.config.prefix.clone();
213 base.extend_from_slice(
214 &full_parity[0..full_parity.len() - self.config.cap_parity_byte_count],
215 );
216
217 // do the base32 encoding
218 let mut base32 = b32::encode(&base);
219
220 if base32.len() != self.config.encoded_char_count {
221 return Err(aiidError(String::from(format!(
222 "InternalGeneratedBadLen:{},Expected:{}",
223 base32.len(),
224 self.config.encoded_char_count
225 ))));
226 }
227
228 // capitalize the prefix with a fixed scheme
229 cap_encode_bin(
230 &mut base32[0..self.config.prefix_cap.len()],
231 &self.config.prefix_cap,
232 3,
233 )?;
234
235 // iterate over segments, applying parity capitalization
236 for i in 0..cap_bytes.len() {
237 let seg_start = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
238 let seg = &mut base32[seg_start..seg_start + self.config.cap_segment_char_count];
239 let bin = format!("{:08b}", cap_bytes[i]).into_bytes();
240 cap_encode_bin(seg, &bin, 8)?;
241 }
242
243 // we only use ascii characters
244 // use unchecked for performance / so we don't allocate again
245 unsafe {
246 // return the result as a String for ease of use
247 Ok(String::from_utf8_unchecked(base32))
248 }
249 }
250
251 /// decode the data from a base32 string with this instance's configuration. Reed-Solomon can
252 /// correct up to 1/2 its parity size worth of erasures (if no other errors are present).
253 pub fn decode(&self, data: &str) -> aiidResult<Vec<u8>> {
254 // get our parsed data with erasures
255 let (data, erasures) = self.pre_decode(data)?;
256
257 if erasures.len() > ( self.config.base_parity_byte_count + self.config.cap_parity_byte_count ) / 2 {
258 // our reed-solomon library makes bad corrections once erasure count exceeds 1/2 the
259 // parity count (it takes 2 parity symbols to find/correct one error, 1 parity symbol to
260 // correct a known erasure)
261 return Err(aiidError(String::from("TooManyErrors")));
262 }
263
264 // optimise for the case where there are no transcription errors
265 // this makes correcting more expensive if there *are*,
266 // but on average makes the system more efficient
267 if self.pre_is_corrupt(&data, &erasures)? {
268 // apply reed-solomon correction
269 // will "throw" on too many errors
270 Ok(
271 self.rs_dec.correct(&data, Some(&erasures[..]))?[0..self.config.key_byte_count]
272 .to_vec(),
273 )
274 } else {
275 Ok(data[0..self.config.key_byte_count].to_vec())
276 }
277 }
278
279 /// a lighter-weight check to determine if a base32 string is corrupt
280 pub fn is_corrupt(&self, data: &str) -> aiidResult<bool> {
281 // get our parsed data with erasures
282 let (data, erasures) = match self.pre_decode(data) {
283 Ok(v) => v,
284 Err(_) => return Ok(true),
285 };
286
287 match self.pre_is_corrupt(&data, &erasures) {
288 Ok(v) => Ok(v),
289 Err(_) => Ok(true),
290 }
291 }
292
293 /// internal helper for is_corrupt checking
294 fn pre_is_corrupt(&self, data: &[u8], erasures: &[u8]) -> aiidResult<bool> {
295 // if we have any erasures, we can exit early
296 if erasures.len() > 0 {
297 return Ok(true);
298 }
299
300 // slightly more efficient reed-solomon corruption check
301 Ok(self.rs_dec.is_corrupted(&data))
302 }
303
304 /// internal helper for preparing decoding
305 fn pre_decode(&self, data: &str) -> aiidResult<(Vec<u8>, Vec<u8>)> {
306 if data.len() != self.config.encoded_char_count {
307 return Err(aiidError(String::from(format!(
308 "BadIdLen:{},Expected:{}",
309 data.len(),
310 self.config.encoded_char_count
311 ))));
312 }
313
314 let key_base_byte_size = self.config.key_byte_count + self.config.base_parity_byte_count;
315 // All char_erasures are indexed from the 0th char of the full codeword w/ prefix, but
316 // byte_erasures are indexed from the 0th byte of the key+parity (ie. without the prefix).
317 // Any byte of key, or base/cap parity could be erased.
318 let mut byte_erasures = vec![b'0'; key_base_byte_size + self.config.cap_parity_byte_count];
319 let mut char_erasures = vec![b'0'; data.len()];
320
321 // correct any transliteration errors into our base32 alphabet
322 // marking any unrecognizable characters as char-level erasures
323 let mut data = b32_correct(data.as_bytes(), &mut char_erasures);
324
325 // Pull out the parity data that was encoded as capitalization. If its erasure,
326 // determine the
327 let mut cap_bytes: Vec<u8> = Vec::new();
328 let mut all_zro = true;
329 let mut all_one = true;
330 for i in 0..self.config.cap_parity_byte_count {
331 // For cap. parity, indexing starts after pre-defined Base-32 prefix
332 let char_idx = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
333 match cap_decode(
334 char_idx,
335 &data[char_idx..char_idx + self.config.cap_segment_char_count],
336 &char_erasures
337 )? {
338 None => {
339 byte_erasures[key_base_byte_size + i] = b'1';
340 cap_bytes.push(0)
341 }
342 Some(parity) => {
343 if all_zro && parity != 0x00_u8 {
344 all_zro = false
345 }
346 if all_one && parity != 0xFF_u8 {
347 all_one = false
348 }
349 cap_bytes.push(parity)
350 }
351 }
352 }
353
354 // If either all caps or all lower case (or erasure), assume the casing was lost (eg. QR
355 // code, or dns segment); mark all cap-derived parity as erasures. This allows validation
356 // of codeword if all remaining parity is intact and key is correct; since no parity
357 // capacity remains, no correction will be attempted. There is only a low probability that
358 // any remaining errors will be detected, in this case. However, we're no *worse* off than
359 // if we had no R-S parity at all.
360 if all_zro || all_one {
361 for i in 0..self.config.cap_parity_byte_count {
362 byte_erasures[key_base_byte_size + i] = b'1';
363 }
364 }
365
366 // we have the cap data, uppercase everything
367 for c in data.iter_mut() {
368 char_upper(c);
369 }
370
371 // do the base32 decode
372 let mut data = b32::decode(&data)?;
373
374 if &data[0..self.config.prefix.len()] != self.config.prefix.as_slice() {
375 return Err(aiidError(String::from("PrefixMismatch")));
376 }
377
378 // remove the prefix bytes
379 data.drain(0..self.config.prefix.len());
380
381 // append our cap parity bytes
382 data.append(&mut cap_bytes);
383
384 // Sort through the char-level erasures (5 bits), and associate them with byte-level data (8
385 // bits) -- in the (now prefix-free) data buffer, so that we mark the proper erasures for
386 // reed-solomon correction. Some of these chars span multiple bytes... we need to mark both.
387 for i in self.config.prefix_cap.len()..char_erasures.len() {
388 let c = char_erasures[i];
389 if c == b'1' {
390 // 1st and last bit of 5-bit segment may index different bytes
391 byte_erasures[( i * 5 + 0 ) / 8 - self.config.prefix.len()] = b'1';
392 byte_erasures[( i * 5 + 4 ) / 8 - self.config.prefix.len()] = b'1';
393 }
394 }
395
396 // translate erasures into the form expected by our reed-solomon lib
397 let mut erasures: Vec<u8> = Vec::new();
398 for i in 0..byte_erasures.len() {
399 if byte_erasures[i] == b'1' {
400 data[i] = 0;
401 erasures.push(i as u8);
402 }
403 }
404
405 Ok((data, erasures))
406 }
407}
408
409#[cfg(test)]
410mod tests {
411 use super::*;
412
413 static TEST_HEX_1: &'static str =
414 "0c71db50d35d760b0ea2002ff20147c7c3a8e8030d35ef28ed1adaec9e329aba";
415 static TEST_ID_1: &'static str =
416 "aiKciDds5OiogymxbnHKEabQ8iavqs8dwdVaGdJW76Vp4gx47tQDfGW4OWc9w5i";
417
418 #[test]
419 fn it_encodes_1() {
420 let enc = aiidEncoding::with_kind("aik0").unwrap();
421
422 let input = hex::decode(TEST_HEX_1.as_bytes()).unwrap();
423 let id = enc.encode(&input).unwrap();
424 assert_eq!(TEST_ID_1, id);
425 }
426
427 #[test]
428 fn it_decodes_1() {
429 let enc = aiidEncoding::with_kind("aik0").unwrap();
430
431 let data = hex::encode(&enc.decode(TEST_ID_1).unwrap());
432 assert_eq!(TEST_HEX_1, data);
433 }
434}