1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
//! Holochain HCID base32 encoding utility.
//!
//! # Example
//!
//! ```
//! extern crate hcid;
//!
//! fn main() {
//!     let enc = hcid::HcidEncoding::with_kind("hcs0").unwrap();
//!     let key = enc.encode(&[0; 32]).unwrap();
//!     assert_eq!("HcSciaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", key);
//!     let buffer = enc.decode(&key).unwrap();
//!     assert_eq!([0; 32].to_vec(), buffer);
//! }
//! ```

extern crate reed_solomon;

mod error;
mod b32;
pub use error::{HcidError, HcidResult};

mod util;
use util::{b32_correct, cap_decode, cap_encode_bin, char_upper};

static HC_CODE_MAP: &'static [[u8; 2]] = &[
    [ 0xb2, 0xff ], // 51: hc30, reserved
    [ 0xb4, 0xff ], // 52: hc40, reserved
    [ 0xb6, 0xff ], // 53: hc50, reserved
    [ 0xb8, 0xff ], // 54: hc60, reserved
    [ 0xba, 0xff ], // 55: hc70, reserved
    [ 0xbc, 0xff ], // 56: hc80, reserved
    [ 0xbe, 0xff ], // 57: hc90, reserved

    // 58-61: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 62-65: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 66-69: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 70-73: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 74-77: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 78-81: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 82-85: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 86-89: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 90-93: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],
    // 94-96: reserved
    [ 0xff, 0xff ], [ 0xff, 0xff ], [ 0xff, 0xff ],

    [ 0x80, 0xff ], // 97: hca0, reserved
    [ 0x82, 0xff ], // 98: hcb0, reserved
    [ 0x84, 0xff ], // 99: hcc0, reserved
    [ 0x86, 0xff ], // 100: hcd0, reserved
    [ 0x88, 0xff ], // 101: hce0, reserved
    [ 0x8a, 0xff ], // 102: hcf0, reserved
    [ 0x8c, 0xff ], // 103: hcg0, reserved
    [ 0x8e, 0xff ], // 104: hch0, reserved
    [ 0x90, 0xff ], // 105: hci0, reserved
    [ 0x92, 0xff ], // 106: hcj0, reserved
    [ 0x94, 0xff ], // 107: hck0, reserved

    [ 0xff, 0xff ], // 108: reserved, reserved

    [ 0x96, 0xff ], // 109: hcm0, reserved
    [ 0x98, 0xff ], // 110: hcn0, reserved
    [ 0x9a, 0xff ], // 111: hco0, reserved
    [ 0x9c, 0xff ], // 112: hcp0, reserved
    [ 0x9e, 0xff ], // 113: hcq0, reserved
    [ 0xa0, 0xff ], // 114: hcr0, reserved
    [ 0xa2, 0xff ], // 115: hcs0, reserved
    [ 0xa4, 0xff ], // 116: hct0, reserved
    [ 0xa6, 0xff ], // 117: hcu0, reserved
    [ 0xa8, 0xff ], // 118: hcv0, reserved
    [ 0xaa, 0xff ], // 119: hcw0, reserved
    [ 0xad, 0xff ], // 120: hcx0, reserved
    [ 0xae, 0xff ], // 121: hcy0, reserved
    [ 0xb0, 0xff ], // 122: hcz0, reserved
];

/* XXX
 *
 * HcK v0 hex:     0x389424
 * HcK v1 hex:     0x389524
 * HcA v0 hex:     0x388024
 * HcA v1 hex:     0x388124
 * HcS v0 hex:     0x38a224
 * HcS v1 hex:     0x38a324
 *
 * XXX
 */

/// represents an encoding configuration for hcid rendering and parsing
pub struct HcidEncodingConfig {
    /// byte count of actuall key data that will be encoded
    pub key_byte_count: usize,
    /// parity bytes that will be encoded directly into the base32 string (appended after key)
    pub base_parity_byte_count: usize,
    /// parity bytes that will be encoded in the alpha capitalization (appended after base parity)
    pub cap_parity_byte_count: usize,
    /// bytes to prefix before rendering to base32
    pub prefix: Vec<u8>,
    /// binary indication of the capitalization for prefix characters
    pub prefix_cap: Vec<u8>,
    /// how many characters are in a capitalization parity segment
    pub cap_segment_char_count: usize,
    /// how many characters long the fully rendered base32 string should be
    pub encoded_char_count: usize,
}

impl HcidEncodingConfig {
    /// create a new config given a kind token string
    ///
    /// # Example
    ///
    /// ```
    /// extern crate hcid;
    /// let hca0 = hcid::HcidEncodingConfig::new("hca0").unwrap();
    /// let hck0 = hcid::HcidEncodingConfig::new("hck0").unwrap();
    /// let hcs0 = hcid::HcidEncodingConfig::new("hcs0").unwrap();
    /// ```
    pub fn new(kind: &str) -> HcidResult<Self> {
        let kind_b = kind.as_bytes();
        if kind_b.len() != 4 || kind_b[0] != 104 || kind_b[1] != 99 ||
                (kind_b[3] != 48 && kind_b[3] != 49) ||
                kind_b[2] < 51 || kind_b[2] > 122 {
            return Err(format!("invalid kind: `{}`", kind).into());
        }

        let version = if kind_b[3] == 48 { 0 } else { 1 };
        let res = HC_CODE_MAP[(kind_b[2] - 51) as usize][version as usize];

        if res == 0xff {
            return Err(format!("invalid kind: `{}`", kind).into());
        }

        Ok(HcidEncodingConfig {
            key_byte_count: 32,
            base_parity_byte_count: 4,
            cap_parity_byte_count: 4,
            prefix: vec![0x38, res, 0x24],
            prefix_cap: b"101".to_vec(),
            cap_segment_char_count: 15,
            encoded_char_count: 63,
        })
    }
}

/// an instance that can encode / decode a particular hcid encoding configuration
pub struct HcidEncoding {
    config: HcidEncodingConfig,
    rs_enc: reed_solomon::Encoder,
    rs_dec: reed_solomon::Decoder,
}

impl HcidEncoding {
    /// create a new HcidEncoding instance from given HcidEncodingConfig
    pub fn new(config: HcidEncodingConfig) -> HcidResult<Self> {
        // set up a reed-solomon encoder with proper parity count
        let rs_enc = reed_solomon::Encoder::new(
            config.base_parity_byte_count + config.cap_parity_byte_count,
        );

        // set up a reed-solomon decoder with proper parity count
        let rs_dec = reed_solomon::Decoder::new(
            config.base_parity_byte_count + config.cap_parity_byte_count,
        );

        Ok(Self {
            config,
            rs_enc,
            rs_dec,
        })
    }

    /// create a new config given a kind token string
    ///
    /// # Example
    ///
    /// ```
    /// extern crate hcid;
    /// let hca0 = hcid::HcidEncoding::with_kind("hca0").unwrap();
    /// let hck0 = hcid::HcidEncoding::with_kind("hck0").unwrap();
    /// let hcs0 = hcid::HcidEncoding::with_kind("hcs0").unwrap();
    /// ```
    pub fn with_kind(kind: &str) -> HcidResult<Self> {
        HcidEncoding::new(HcidEncodingConfig::new(kind)?)
    }

    /// encode a string to base32 with this instance's configuration
    pub fn encode(&self, data: &[u8]) -> HcidResult<String> {
        if data.len() != self.config.key_byte_count {
            return Err(HcidError(String::from(format!(
                "BadDataLen:{},Expected:{}",
                data.len(),
                self.config.key_byte_count
            ))));
        }

        // generate reed-solomon parity bytes
        let full_parity = self.rs_enc.encode(data);

        // extract the bytes that will be encoded as capitalization
        let cap_bytes = &full_parity[full_parity.len() - self.config.cap_parity_byte_count..];

        // base is the bytes that will be base32 encoded
        let mut base = self.config.prefix.clone();
        base.extend_from_slice(
            &full_parity[0..full_parity.len() - self.config.cap_parity_byte_count],
        );

        // do the base32 encoding
        let mut base32 = b32::encode(&base);

        if base32.len() != self.config.encoded_char_count {
            return Err(HcidError(String::from(format!(
                "InternalGeneratedBadLen:{},Expected:{}",
                base32.len(),
                self.config.encoded_char_count
            ))));
        }

        // capitalize the prefix with a fixed scheme
        cap_encode_bin(
            &mut base32[0..self.config.prefix_cap.len()],
            &self.config.prefix_cap,
            3,
        )?;

        // iterate over segments, applying parity capitalization
        for i in 0..cap_bytes.len() {
            let seg_start = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
            let seg = &mut base32[seg_start..seg_start + self.config.cap_segment_char_count];
            let bin = format!("{:08b}", cap_bytes[i]).into_bytes();
            cap_encode_bin(seg, &bin, 8)?;
        }
        
        // we only use ascii characters
        // use unchecked for performance / so we don't allocate again
        unsafe {
            // return the result as a String for ease of use
            Ok(String::from_utf8_unchecked(base32))
        }
    }

    /// decode the data from a base32 string with this instance's configuration.  Reed-Solomon can
    /// correct up to 1/2 its parity size worth of erasures (if no other errors are present).
    pub fn decode(&self, data: &str) -> HcidResult<Vec<u8>> {
        // get our parsed data with erasures
        let (data, erasures) = self.pre_decode(data)?;

        if erasures.len() > ( self.config.base_parity_byte_count + self.config.cap_parity_byte_count ) / 2 {
            // our reed-solomon library makes bad corrections once erasure count exceeds 1/2 the
            // parity count (it takes 2 parity symbols to find/correct one error, 1 parity symbol to
            // correct a known erasure)
            return Err(HcidError(String::from("TooManyErrors")));
        }

        // optimise for the case where there are no transcription errors
        // this makes correcting more expensive if there *are*,
        // but on average makes the system more efficient
        if self.pre_is_corrupt(&data, &erasures)? {
            // apply reed-solomon correction
            // will "throw" on too many errors
            Ok(
                self.rs_dec.correct(&data, Some(&erasures[..]))?[0..self.config.key_byte_count]
                    .to_vec(),
            )
        } else {
            Ok(data[0..self.config.key_byte_count].to_vec())
        }
    }

    /// a lighter-weight check to determine if a base32 string is corrupt
    pub fn is_corrupt(&self, data: &str) -> HcidResult<bool> {
        // get our parsed data with erasures
        let (data, erasures) = match self.pre_decode(data) {
            Ok(v) => v,
            Err(_) => return Ok(true),
        };

        match self.pre_is_corrupt(&data, &erasures) {
            Ok(v) => Ok(v),
            Err(_) => Ok(true),
        }
    }

    /// internal helper for is_corrupt checking
    fn pre_is_corrupt(&self, data: &[u8], erasures: &[u8]) -> HcidResult<bool> {
        // if we have any erasures, we can exit early
        if erasures.len() > 0 {
            return Ok(true);
        }

        // slightly more efficient reed-solomon corruption check
        Ok(self.rs_dec.is_corrupted(&data))
    }

    /// internal helper for preparing decoding
    fn pre_decode(&self, data: &str) -> HcidResult<(Vec<u8>, Vec<u8>)> {
        if data.len() != self.config.encoded_char_count {
            return Err(HcidError(String::from(format!(
                "BadIdLen:{},Expected:{}",
                data.len(),
                self.config.encoded_char_count
            ))));
        }

        let key_base_byte_size = self.config.key_byte_count + self.config.base_parity_byte_count;
        // All char_erasures are indexed from the 0th char of the full codeword w/ prefix, but
        // byte_erasures are indexed from the 0th byte of the key+parity (ie. without the prefix).
        // Any byte of key, or base/cap parity could be erased.
        let mut byte_erasures = vec![b'0'; key_base_byte_size + self.config.cap_parity_byte_count];
        let mut char_erasures = vec![b'0'; data.len()];

        // correct any transliteration errors into our base32 alphabet
        // marking any unrecognizable characters as char-level erasures
        let mut data = b32_correct(data.as_bytes(), &mut char_erasures);

        // Pull out the parity data that was encoded as capitalization.  If its erasure,
        // determine the 
        let mut cap_bytes: Vec<u8> = Vec::new();
        let mut all_zro = true;
        let mut all_one = true;
        for i in 0..self.config.cap_parity_byte_count {
            // For cap. parity, indexing starts after pre-defined Base-32 prefix
            let char_idx = self.config.prefix_cap.len() + (i * self.config.cap_segment_char_count);
            match cap_decode(
                char_idx,
                &data[char_idx..char_idx + self.config.cap_segment_char_count],
                &char_erasures
            )? {
                None => {
                    byte_erasures[key_base_byte_size + i] = b'1';
                    cap_bytes.push(0)
                }
                Some(parity) => {
                    if all_zro && parity != 0x00_u8 {
                        all_zro = false
                    }
                    if all_one && parity != 0xFF_u8 {
                        all_one = false
                    }
                    cap_bytes.push(parity)
                }
            }
        }

        // If either all caps or all lower case (or erasure), assume the casing was lost (eg. QR
        // code, or dns segment); mark all cap-derived parity as erasures.  This allows validation
        // of codeword if all remaining parity is intact and key is correct; since no parity
        // capacity remains, no correction will be attempted.  There is only a low probability that
        // any remaining errors will be detected, in this case.  However, we're no *worse* off than
        // if we had no R-S parity at all.
        if all_zro || all_one {
            for i in 0..self.config.cap_parity_byte_count {
                byte_erasures[key_base_byte_size + i] = b'1';
            }
        }

        // we have the cap data, uppercase everything
        for c in data.iter_mut() {
            char_upper(c);
        }

        // do the base32 decode
        let mut data = b32::decode(&data)?;

        if &data[0..self.config.prefix.len()] != self.config.prefix.as_slice() {
            return Err(HcidError(String::from("PrefixMismatch")));
        }

        // remove the prefix bytes
        data.drain(0..self.config.prefix.len());

        // append our cap parity bytes
        data.append(&mut cap_bytes);

        // Sort through the char-level erasures (5 bits), and associate them with byte-level data (8
        // bits) -- in the (now prefix-free) data buffer, so that we mark the proper erasures for
        // reed-solomon correction.  Some of these chars span multiple bytes... we need to mark both.
        for i in self.config.prefix_cap.len()..char_erasures.len() {
            let c = char_erasures[i];
            if c == b'1' {
                // 1st and last bit of 5-bit segment may index different bytes
                byte_erasures[( i * 5 + 0 ) / 8 - self.config.prefix.len()] = b'1';
                byte_erasures[( i * 5 + 4 ) / 8 - self.config.prefix.len()] = b'1';
            }
        }

        // translate erasures into the form expected by our reed-solomon lib
        let mut erasures: Vec<u8> = Vec::new();
        for i in 0..byte_erasures.len() {
            if byte_erasures[i] == b'1' {
                data[i] = 0;
                erasures.push(i as u8);
            }
        }

        Ok((data, erasures))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    static TEST_HEX_1: &'static str =
        "0c71db50d35d760b0ea2002ff20147c7c3a8e8030d35ef28ed1adaec9e329aba";
    static TEST_ID_1: &'static str =
        "HcKciDds5OiogymxbnHKEabQ8iavqs8dwdVaGdJW76Vp4gx47tQDfGW4OWc9w5i";

    #[test]
    fn it_encodes_1() {
        let enc = HcidEncoding::with_kind("hck0").unwrap();

        let input = hex::decode(TEST_HEX_1.as_bytes()).unwrap();
        let id = enc.encode(&input).unwrap();
        assert_eq!(TEST_ID_1, id);
    }

    #[test]
    fn it_decodes_1() {
        let enc = HcidEncoding::with_kind("hck0").unwrap();

        let data = hex::encode(&enc.decode(TEST_ID_1).unwrap());
        assert_eq!(TEST_HEX_1, data);
    }
}