1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
//
// Copyright (c) 2020 Nathan Fiedler
//

//!
//! The `base32` module provides encoding and decoding functions for converting
//! binary data to and from UTF-8 alphanumeric text using the base32hex encoding
//! defined in RFC-4648.
//!
//! This encoding is useful for emitting numeric index keys and querying those
//! values with a range query. This derives from the fact that base32hex encoded
//! text preserves the bitwise sort order of the represented data.
//!
//! Similarly, the default composite key separator character (the zero byte)
//! will continue to work adequately even for index keys that could themselves
//! contain a zero, if not encoded.
//!

// The base32hex alphabet.
const ALPHABET: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUV";

///
/// Encode the binary data using base32hex encoding.
///
pub fn encode(input: &[u8]) -> Vec<u8> {
    // this code was derived from github:naim94a/binascii-rs
    let data_len = input.len() * 8 / 5;
    let pad_len = 8 - (data_len % 8);
    let output_len = data_len + if pad_len == 8 { 0 } else { pad_len };
    let mut output = Vec::with_capacity(output_len);
    for block_idx in 0..=(input.len() / 5) {
        let block_len = if input.len() > block_idx * 5 + 5 {
            block_idx * 5 + 5
        } else {
            input.len()
        };
        let block = &input[block_idx * 5..block_len];
        let mut num = 0u64;
        for (idx, ch) in block.iter().enumerate() {
            num |= (*ch as u64) << (64 - 8 - idx * 8);
        }
        // this will overfill with zeros which is fixed up later
        for i in 0..8 {
            let digit_idx = (num >> (64 - 5 - i * 5)) & 0b11111;
            output.push(ALPHABET[digit_idx as usize]);
        }
    }
    // replace the trailing zeros filled in above with padding
    #[allow(clippy::needless_range_loop)]
    for idx in data_len + 1..output_len {
        output[idx] = b'=';
    }
    // fix up any zero overfill in the edge cases
    output.truncate(output_len);
    output
}

// The UTF-8 character set encoded as values for base32hex.
#[rustfmt::skip]
const INV_ALPHABET: [i8; 256] = [
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
    -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
    25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
    25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
];

///
/// Decode the base32hex encoded data into raw binary data.
///
/// Lowercase letters in the input are treated as the equivalent uppercase.
///
/// If the input is invalid for any reason, `None` is returned. This includes
/// incorrect input length, invalid characters, or any character other than `=`
/// appearing after the first padding character.
///
pub fn decode(input: &[u8]) -> Option<Vec<u8>> {
    let remainder = input.len() % 8;
    // this code was derived from github:naim94a/binascii-rs
    let padding = 8 - remainder;
    let input_len = input.len() + if padding != 8 { padding } else { 0 };
    let mut output_len = input_len * 5 / 8;
    let mut output = Vec::with_capacity(output_len);
    let mut eof = false;
    for block_idx in 0..=(input.len() / 8) {
        let block_end = if input.len() > block_idx * 8 + 8 {
            block_idx * 8 + 8
        } else {
            input.len()
        };
        let block = &input[block_idx * 8..block_end];
        let mut num = 0u64;
        for (idx, ch) in block.iter().enumerate() {
            // ensure that once padding has been found, only padding remains
            // until the end of the input
            if *ch == b'=' {
                eof = true;
                continue;
            } else if eof {
                return None;
            }
            let c_val = INV_ALPHABET[*ch as usize];
            if c_val < 0 {
                return None;
            }
            num |= (c_val as u64) << (64 - 5 - idx * 5);
            output_len = block_idx * 5 + (idx * 5 / 8) + 1;
        }
        for i in 0..5 {
            output.push(((num >> (64 - 8 - i * 8)) & 0xff) as u8);
        }
    }
    if !eof && (remainder == 1 || remainder == 3 || remainder == 6) {
        // input had no padding and was an invalid length
        return None;
    }
    output.truncate(output_len);
    Some(output)
}

#[cfg(test)]
mod test {
    use super::{decode, encode};
    use rand::prelude::*;

    #[test]
    fn valid() {
        let input: Vec<u8> = vec![];
        assert_eq!(encode(&input), b"");
        assert_eq!(decode(&input), Some(input));

        let input = vec![b'f'];
        assert_eq!(encode(&input), b"CO======");
        assert_eq!(decode(b"CO======"), Some(input));

        let input = vec![b'f', b'o'];
        assert_eq!(encode(&input), b"CPNG====");
        assert_eq!(decode(b"CPNG===="), Some(input));

        let input = vec![b'f', b'o', b'o'];
        assert_eq!(encode(&input), b"CPNMU===");
        // various padding length and mixed letter case
        assert_eq!(decode(b"CPNMU"), Some(input.clone()));
        assert_eq!(decode(b"CPNMU="), Some(input.clone()));
        assert_eq!(decode(b"CPNMU=="), Some(input.clone()));
        assert_eq!(decode(b"CPNMU==="), Some(input.clone()));
        assert_eq!(decode(b"cpnmu==="), Some(input.clone()));
        assert_eq!(decode(b"CpNmU===="), Some(input.clone()));
        assert_eq!(decode(b"cPnMu====="), Some(input));

        let input = vec![b'f', b'o', b'o', b'b'];
        assert_eq!(encode(&input), b"CPNMUOG=");
        assert_eq!(decode(b"CPNMUOG="), Some(input));

        let input = vec![b'f', b'o', b'o', b'b', b'a'];
        assert_eq!(encode(&input), b"CPNMUOJ1");
        assert_eq!(decode(b"CPNMUOJ1"), Some(input));

        let input = vec![b'f', b'o', b'o', b'b', b'a', b'r'];
        assert_eq!(encode(&input), b"CPNMUOJ1E8======");
        assert_eq!(decode(b"CPNMUOJ1E8======"), Some(input));
    }

    #[test]
    fn ensure_sort_order() {
        // Generate some random data, encode it, sort the list, then visit
        // pairs, decode, and ensure values are in sorted order (i.e. the
        // encoded sort and raw data should sort the same).
        let mut rng = rand::thread_rng();
        let mut collection: Vec<Vec<u8>> = Vec::new();
        for _ in 0..100 {
            let mut data = [0u8; 8];
            rng.fill_bytes(&mut data);
            let encoded = encode(&data);
            collection.push(encoded);
        }
        collection.sort_unstable();
        for pair in collection.windows(2) {
            let a = decode(&pair[0]);
            let b = decode(&pair[1]);
            assert!(a <= b, "elements out of order");
        }
    }

    #[test]
    fn invalid_chars() {
        assert_eq!(decode(b"CO==12=="), None);
        assert_eq!(decode(b","), None);
        assert_eq!(decode(b"!"), None);
        assert_eq!(decode(b"?"), None);
        assert_eq!(decode(b"CPNMW"), None);
        assert_eq!(decode(b"CPNMX"), None);
        assert_eq!(decode(b"CPNMY"), None);
        assert_eq!(decode(b"CPNMZ"), None);
    }

    #[test]
    fn invalid_length() {
        assert_eq!(decode(b"A"), None);
        assert_eq!(decode(b"ABC"), None);
        assert_eq!(decode(b"ABCDEF"), None);
        assert_eq!(decode(b"00000000A"), None);
        assert_eq!(decode(b"00000000ABC"), None);
        assert_eq!(decode(b"00000000ABCDEF"), None);
    }
}