1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/// Represents writable buffer capable of receiving encoded data.
///
/// Write is implemented on `Vec<u8>` and `String`, but you are free to implement it on your own
/// types. One conceivable purpose would be to allow for lowercase encoding output by inverting
/// the cap bit before writing.
pub trait Write {
    /// Writes a single byte (or, more precisely, a 5-bit group) to the output.
    fn write(&mut self, u: u8);
}

impl Write for String {
    fn write(&mut self, u: u8) {
        // UPPERCASE_ENCODING contains only ASCII bytes.
        unsafe {
            self.as_mut_vec().push(u);
        }
    }
}

impl Write for Vec<u8> {
    fn write(&mut self, u: u8) {
        self.push(u);
    }
}

/// Encodes a `u64` value as a Crockford Base32-encoded string.
pub fn encode(n: u64) -> String {
    // The longest possible representation of u64 in Base32 is 13 digits.
    let mut fits = Vec::with_capacity(13);
    encode_into(n, &mut fits);

    // UPPERCASE_ENCODING contains only ASCII bytes.
    unsafe { String::from_utf8_unchecked(fits) }
}

/// Encodes a `u64` value as Crockford Base32 and writes it to the provided output.
///
/// Either `String` or `Vec<u8>` will be accepted.
pub fn encode_into<T: Write>(mut n: u64, w: &mut T) {
    use UPPERCASE_ENCODING;

    // After we clear the four most significant bits, the four least significant bits will be
    // replaced with 0001. We can then know to stop once the four most significant bits are,
    // likewise, 0001.
    const STOP_BIT: u64 = 1 << QUAD_SHIFT;

    const QUAD_SHIFT: usize = 60;
    const QUAD_RESET: usize = 4;

    const FIVE_SHIFT: usize = 59;
    const FIVE_RESET: usize = 5;

    if n == 0 {
        w.write(b'0');
        return;
    }

    // Start by getting the most significant four bits. We get four here because these would be
    // leftovers when starting from the least significant bits. In either case, tag the four least
    // significant bits with our stop bit.
    match (n >> QUAD_SHIFT) as usize {
        // Eat leading zero-bits. This should not be done if the first four bits were non-zero.
        // Additionally, we *must* do this in increments of five bits.
        0 => {
            n <<= QUAD_RESET;
            n |= 1;
            n <<= n.leading_zeros() / 5 * 5;
        }

        // Write value of first four bytes.
        i => {
            n <<= QUAD_RESET;
            n |= 1;
            w.write(UPPERCASE_ENCODING[i]);
        }
    }

    // From now until we reach the stop bit, take the five most significant bits and then shift
    // left by five bits.
    while n != STOP_BIT {
        w.write(UPPERCASE_ENCODING[(n >> FIVE_SHIFT) as usize]);
        n <<= FIVE_RESET;
    }
}

#[cfg(test)]
mod tests {
    use encoding::*;

    #[test]
    fn zero_returns_zero() {
        let input = 0;
        let expected = "0";
        let actual = encode(input);

        assert_eq!(expected, &*actual);
    }

    #[test]
    fn large_value_returns_correct_large_value() {
        let input = 65535;
        let expected = "1ZZZ";
        let actual = encode(input);

        assert_eq!(expected, &*actual);
    }

    #[test]
    fn x5111_is_4zq() {
        assert_eq!("4ZQ", &*encode(5111));
    }

    #[test]
    fn x18446744073709551615_is_fzzzzzzzzzzzz() {
        assert_eq!("FZZZZZZZZZZZZ", &*encode(18446744073709551615));
    }

    #[test]
    fn large_odd_number() {
        use decoding::decode;

        // No, this is not a joke.
        let x = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000001;
        let y = decode(encode(x)).unwrap();

        assert_eq!(x, y);
    }

    #[test]
    fn large_even_number() {
        use decoding::decode;

        // No, this is not a joke.
        let x = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
        let y = decode(encode(x)).unwrap();

        assert_eq!(x, y);
    }

    #[test]
    fn tiny_number() {
        use decoding::decode;

        let x = 1;
        let y = decode(encode(x)).unwrap();

        assert_eq!(x, y);
    }

    // Test is ignored because it takes forever to run.
    #[ignore]
    #[test]
    fn round_trips() {
        use decoding::decode;
        use std::str;

        let mut s = Vec::new();

        for n in 0..20_000_001 {
            encode_into(n, &mut s);
            assert_eq!(n, decode(str::from_utf8(&s).unwrap()).unwrap());
            s.clear();
        }

        for n in (u64::max_value() - 20_000_000)..u64::max_value() {
            encode_into(n, &mut s);
            assert_eq!(n, decode(str::from_utf8(&s).unwrap()).unwrap());
            s.clear();
        }
    }
}