1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
/// Represents writable buffer capable of receiving encoded data.
///
/// Write is implemented on `Vec<u8>` and `String`, but you are free to implement it on your own
/// types. One conceivable purpose would be to allow for lowercase encoding output by inverting
/// the cap bit before writing.
///
/// For performance reasons, the default implementation of each of these does not
pub trait Write {
/// Writes a single byte (or, more precisely, a 5-bit group) to the output.
///
/// # Safety
///
/// Provided implementations of this function **do not** check whether a given u8 byte is
/// valid ascii before trying to add each byte to the output string.
unsafe fn write(&mut self, u: u8);
}
impl Write for String {
unsafe fn write(&mut self, u: u8) {
self.as_mut_vec().push(u);
}
}
impl Write for Vec<u8> {
unsafe fn write(&mut self, u: u8) {
self.push(u);
}
}
/// Encodes a `u64` value as a Crockford Base32-encoded string.
pub fn encode(n: u64) -> String {
// The longest possible representation of u64 in Base32 is 13 digits.
let mut fits = Vec::with_capacity(13);
encode_into(n, &mut fits);
// UPPERCASE_ENCODING contains only ASCII bytes.
unsafe { String::from_utf8_unchecked(fits) }
}
/// Encodes a `u64` value as Crockford Base32 and writes it to the provided output.
///
/// Either `String` or `Vec<u8>` will be accepted.
pub fn encode_into<T: Write>(mut n: u64, w: &mut T) {
use crate::UPPERCASE_ENCODING;
// Used for the initial shift.
const QUAD_SHIFT: usize = 60;
const QUAD_RESET: usize = 4;
// Used for all subsequent shifts.
const FIVE_SHIFT: usize = 59;
const FIVE_RESET: usize = 5;
// After we clear the four most significant bits, the four least significant bits will be
// replaced with 0001. We can then know to stop once the four most significant bits are,
// likewise, 0001.
const STOP_BIT: u64 = 1 << QUAD_SHIFT;
if n == 0 {
unsafe {
w.write(b'0');
}
return;
}
// Start by getting the most significant four bits. We get four here because these would be
// leftovers when starting from the least significant bits. In either case, tag the four least
// significant bits with our stop bit.
match (n >> QUAD_SHIFT) as usize {
// Eat leading zero-bits. This should not be done if the first four bits were non-zero.
// Additionally, we *must* do this in increments of five bits.
0 => {
n <<= QUAD_RESET;
n |= 1;
n <<= n.leading_zeros() / 5 * 5;
}
// Write value of first four bytes.
i => {
n <<= QUAD_RESET;
n |= 1;
unsafe {
w.write(UPPERCASE_ENCODING[i]);
}
}
}
// From now until we reach the stop bit, take the five most significant bits and then shift
// left by five bits.
while n != STOP_BIT {
unsafe {
w.write(UPPERCASE_ENCODING[(n >> FIVE_SHIFT) as usize]);
}
n <<= FIVE_RESET;
}
}
#[cfg(test)]
mod tests {
use std::str;
use crate::{decode, encode, encode_into};
#[test]
fn zero_returns_zero() {
let input = 0;
let expected = "0";
let actual = encode(input);
assert_eq!(expected, &*actual);
}
#[test]
fn large_value_returns_correct_large_value() {
let input = 65535;
let expected = "1ZZZ";
let actual = encode(input);
assert_eq!(expected, &*actual);
}
#[test]
fn x5111_is_4zq() {
assert_eq!("4ZQ", &*encode(5111));
}
#[test]
fn x18446744073709551615_is_fzzzzzzzzzzzz() {
assert_eq!("FZZZZZZZZZZZZ", &*encode(18446744073709551615));
}
#[test]
fn large_odd_number() {
// No, this is not a joke.
let x = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000001;
let y = decode(encode(x)).unwrap();
assert_eq!(x, y);
}
#[test]
fn large_even_number() {
// No, this is not a joke.
let x = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
let y = decode(encode(x)).unwrap();
assert_eq!(x, y);
}
#[test]
fn tiny_number() {
let x = 1;
let y = decode(encode(x)).unwrap();
assert_eq!(x, y);
}
// Test is ignored because it takes forever to run.
// Testing all of these would probably complete right
// after the bottom falls out of the solar market--
// --you know, because the sun has flared out.
#[ignore]
#[test]
fn round_trips() {
let mut s = Vec::new();
for n in 0..20_000_001 {
encode_into(n, &mut s);
assert_eq!(n, decode(str::from_utf8(&s).unwrap()).unwrap());
s.clear();
}
for n in (u64::max_value() - 20_000_000)..u64::max_value() {
encode_into(n, &mut s);
assert_eq!(n, decode(str::from_utf8(&s).unwrap()).unwrap());
s.clear();
}
}
}