rust_base62/
lib.rs

1//! A simple library base62 encode/decode, no dependencies other libraries.
2
3// standard 62-encoding, with a 32-byte input block and, a
4// 43-byte output block.
5const BASE256BLOCK_LEN: usize = 32;
6const BASE62BLOCK_LEN: usize = 43;
7const BASE62_LOG2: f64 = 5.954196310386875; // the result of `62f64.log2()`
8
9const ALPHABET_SIZE: usize = 62;
10
11const ALPHABET: [char; ALPHABET_SIZE] = [
12    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
13    'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
14    'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
15    'v', 'w', 'x', 'y', 'z',
16];
17
18/*
19ALPHABET_VERT: [u8; 256] = [0xff: 256];
20for (i, &v) in ALPHABET.iter().enumerate() {
21    ALPHABET_VERT[v as usize] = i as u8;
22}
23 */
24const ALPHABET_VERT: [u8; 256] = [
25    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
26    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
27    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255,
28    255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29    29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, 255, 36, 37, 38, 39, 40, 41, 42, 43, 44,
30    45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255,
31    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
32    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
33    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
34    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
35    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
36    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
37    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
38];
39
40fn encode_len(n: usize) -> usize {
41    if n == BASE256BLOCK_LEN {
42        return BASE62BLOCK_LEN;
43    }
44    let n_block = n / BASE256BLOCK_LEN;
45    let mut out = n_block * BASE62BLOCK_LEN;
46    let rem = n % BASE256BLOCK_LEN;
47    if rem > 0 {
48        out += ((rem * 8) as f64 / BASE62_LOG2).ceil() as usize;
49    }
50    out
51}
52
53fn decode_len(n: usize) -> usize {
54    let n_block = n / BASE62BLOCK_LEN;
55    let mut out = n_block * BASE256BLOCK_LEN;
56    let rem = n % BASE62BLOCK_LEN;
57    if rem > 0 {
58        out += (rem as f64 * BASE62_LOG2 / 8f64).floor() as usize;
59    }
60    out
61}
62
63fn is_valid_encoding_length(n: usize) -> bool {
64    fn f(x: usize) -> usize {
65        ((x as f64) * BASE62_LOG2 / 8f64).floor() as usize
66    }
67    f(n) != f(n - 1)
68}
69
70/// Encode `bytes` using the base62, return `String`.
71pub fn encode(src: &[u8]) -> String {
72    if src.is_empty() {
73        return "".to_string();
74    }
75    let mut rs: usize = 0;
76    let cap = encode_len(src.len());
77    let mut dst = vec![0u8; cap];
78    for b in src.iter().copied() {
79        let mut c: usize = 0;
80        let mut carry = b as usize;
81        for j in (0..cap).rev() {
82            if carry == 0 && c >= rs {
83                break;
84            }
85            carry += 256 * dst[j] as usize;
86            dst[j] = (carry % ALPHABET_SIZE) as u8;
87            carry /= ALPHABET_SIZE;
88            c += 1;
89        }
90        rs = c;
91    }
92    dst.iter().map(|&i| ALPHABET[i as usize]).collect()
93}
94
95#[derive(Debug)]
96pub enum Error {
97    BadInput { reason: String },
98}
99
100/// Decode `bytes` using the base62, return `Result<Vec<u8>, Error>`.
101pub fn decode(src: &[u8]) -> Result<Vec<u8>, Error> {
102    if src.is_empty() {
103        return Ok(vec![]);
104    }
105    if !is_valid_encoding_length(src.len()) {
106        return Err(Error::BadInput {
107            reason: "invalid input length".to_string(),
108        });
109    }
110    let mut rs: usize = 0;
111    let cap = decode_len(src.len());
112    let mut dst = vec![0u8; cap];
113    for b in src.iter().copied() {
114        let mut c: usize = 0;
115        let mut carry: usize = ALPHABET_VERT[b as usize] as usize;
116        if carry == 255 {
117            return Err(Error::BadInput {
118                reason: format!("bad input {}", b),
119            });
120        }
121        for j in (0..cap).rev() {
122            if carry == 0 && c >= rs {
123                break;
124            }
125            carry += ALPHABET_SIZE * (dst[j] as usize);
126            dst[j] = (carry % 256) as u8;
127            carry /= 256;
128            c += 1;
129        }
130        rs = c;
131    }
132    Ok(dst)
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    fn check_bytes(plain: &[u8], cipher: &[u8]) {
140        assert_eq!(cipher, encode(plain).as_bytes());
141        let result = decode(cipher);
142        assert!(result.is_ok());
143        assert_eq!(plain, result.unwrap());
144    }
145    fn check_str(plaintext: &str, ciphertext: &str) {
146        check_bytes(plaintext.as_bytes(), ciphertext.as_bytes());
147    }
148
149    #[test]
150    fn test_str() {
151        check_str("", "");
152        check_str("f", "1e");
153        check_str("fo", "6ox");
154        check_str("foo", "0SAPP");
155        check_str("foob", "1sIyuo");
156        check_str("fooba", "7kENWa1");
157        check_str("foobar", "0VytN8Wjy");
158
159        check_str("su", "7gj");
160        check_str("sur", "0VkRe");
161        check_str("sure", "275mAn");
162        check_str("sure.", "8jHquZ4");
163        check_str("asure.", "0UQPPAab8");
164        check_str("easure.", "26h8PlupSA");
165        check_str("leasure.", "9IzLUOIY2fe");
166
167        check_str("Hello, World!", "1wJfrzvdbtXUOlUjUf");
168        check_str("你好,世界!", "1ugmIChyMAcCbDRpROpAtpXdp");
169        check_str("こんにちは", "1fyB0pNlcVqP3tfXZ1FmB");
170        check_str("안녕하십니까", "1yl6dfHPaO9hroEXU9qFioFhM");
171
172        check_str("=", "0z");
173        check_str(">", "10");
174        check_str("?", "11");
175        check_str("11", "3H7");
176        check_str("111", "0DWfh");
177        check_str("1111", "0tquAL");
178        check_str("11111", "3icRuhV");
179        check_str("111111", "0FMElG7cn");
180        check_str(
181            "333333333333333333333333333333333333333",
182            "12crJoybWfE2zqqnxPeYnbDOEcx8Lkv7ksPxzAA8kmM5Yb25Eb6bD",
183        );
184    }
185
186    #[test]
187    fn test_large_text() {
188        // big text
189        let s = "3333333333333".repeat(900);
190        let e = encode(&s.as_bytes());
191        let r = decode(e.as_bytes()).unwrap();
192        assert_eq!(s, String::from_utf8(r).unwrap());
193    }
194
195    #[test]
196    fn test_integer() {
197        {
198            // zero
199            check_bytes(&[], "".as_bytes());
200            check_bytes(&[0], "00".as_bytes());
201            check_bytes(&[0, 0], "000".as_bytes());
202            check_bytes(&[0, 0, 0], "00000".as_bytes());
203            check_bytes(&[0, 0, 0, 0], "000000".as_bytes());
204            check_bytes(&[0; 1025], "0".repeat(1378).as_bytes());
205
206            // leading zero
207            check_bytes(&[1], "01".as_bytes());
208            check_bytes(&[2], "02".as_bytes());
209            check_bytes(&[61], "0z".as_bytes());
210            check_bytes(&[62], "10".as_bytes());
211            check_bytes(&[100], "1c".as_bytes());
212            check_bytes(&[0, 1], "001".as_bytes());
213            check_bytes(&[0, 0, 0, 5], "000005".as_bytes());
214            check_bytes(&[0, 0, 0, 0, 0, 62], "000000010".as_bytes());
215        }
216        {
217            let bytes = (u64::MAX).to_be_bytes();
218            check_bytes(&bytes, "LygHa16AHYF".as_bytes());
219
220            let bytes = (u64::MAX as u128 + 1).to_be_bytes(); // exist leading zero
221            check_bytes(&bytes, "00000000000LygHa16AHYG".as_bytes());
222        }
223        {
224            let bytes = (ALPHABET_SIZE as u128).pow(21).to_be_bytes();
225            check_bytes(&bytes, "1000000000000000000000".as_bytes());
226
227            let bytes = (ALPHABET_SIZE as u128).pow(20).to_be_bytes();
228            check_bytes(&bytes, "0100000000000000000000".as_bytes());
229
230            let bytes = 92202686130861137968548313400401640448_u128.to_be_bytes();
231            check_bytes(&bytes, "26tF05fvSIgh0000000000".as_bytes());
232        }
233    }
234
235    #[test]
236    fn test_invalid() {
237        assert!(decode(&[1, 2, 3]).is_err());
238        assert!(decode("73XpUgzMGA-jX6SV".as_bytes()).is_err());
239    }
240}