base85/
lib.rs

1//! A library for Base85 encoding as described in [RFC1924](https://datatracker.ietf.org/doc/html/rfc1924) and released under the Mozilla Public License 2.0.
2//!
3//!## Description
4//!
5//! Several variants of Base85 encoding exist. The most popular variant is often known as ascii85 and is best known for use in Adobe products. This is not that algorithm.
6//!
7//! The variant implemented in RFC 1924 was originally intended for encoding IPv6 addresses. It utilizes the same concepts as other versions, but uses a character set which is friendly toward embedding in source code without the need for escaping. During decoding ASCII whitespace (\n, \r, \t, space) is ignored. A base85-encoded string is 25% larger than the original binary data, which is more efficient than the more-common base64 algorithm (33%). This encoding pairs very well with JSON, yielding lower overhead and needing no character escapes.
8//!
9//! ## Usage
10//!
11//! This was my first real Rust project but has matured since then and is stable. The API is simple: `encode()` turns a slice of bytes into a String and `decode()` turns a string reference into a Vector of bytes (u8). Both calls work completely within RAM, so processing huge files is probably not a good idea.
12//!
13//! ## Contributions
14//!
15//! Even though I've been coding for a while and have learned quite a bit about Rust, I'm still a novice. Suggestions and contributions are always welcome and appreciated.
16
17use core::mem::MaybeUninit;
18
19pub type Result<T> = std::result::Result<T, Error>;
20
21#[derive(thiserror::Error, Debug)]
22pub enum Error {
23    #[error("Unexpected end of input")]
24    UnexpectedEof,
25    #[error("Unexpected character '{0}'")]
26    InvalidCharacter(u8),
27}
28
29#[inline]
30fn byte_to_char85(x85: u8) -> u8 {
31    static B85_TO_CHAR: &[u8] =
32        b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
33    B85_TO_CHAR[x85 as usize]
34}
35
36#[inline]
37fn char85_to_byte(c: u8) -> Result<u8> {
38    match c {
39        b'0'..=b'9' => Ok(c - b'0'),
40        b'A'..=b'Z' => Ok(c - b'A' + 10),
41        b'a'..=b'z' => Ok(c - b'a' + 36),
42        b'!' => Ok(62),
43        b'#' => Ok(63),
44        b'$' => Ok(64),
45        b'%' => Ok(65),
46        b'&' => Ok(66),
47        b'(' => Ok(67),
48        b')' => Ok(68),
49        b'*' => Ok(69),
50        b'+' => Ok(70),
51        b'-' => Ok(71),
52        b';' => Ok(72),
53        b'<' => Ok(73),
54        b'=' => Ok(74),
55        b'>' => Ok(75),
56        b'?' => Ok(76),
57        b'@' => Ok(77),
58        b'^' => Ok(78),
59        b'_' => Ok(79),
60        b'`' => Ok(80),
61        b'{' => Ok(81),
62        b'|' => Ok(82),
63        b'}' => Ok(83),
64        b'~' => Ok(84),
65        v => Err(Error::InvalidCharacter(v)),
66    }
67}
68
69/// encode() turns a slice of bytes into a string of encoded data
70pub fn encode(indata: &[u8]) -> String {
71    let chunks = indata.chunks_exact(4);
72    let remainder = chunks.remainder();
73    let capacity = if remainder.is_empty() {
74        (indata.len() / 4) * 5
75    } else {
76        (indata.len() / 4) * 5 + remainder.len() + 1
77    };
78    let mut out = Vec::<MaybeUninit<u8>>::with_capacity(capacity);
79    unsafe {
80        out.set_len(capacity);
81    }
82    let mut out_chunks = out.chunks_exact_mut(5);
83
84    for (chunk, out) in std::iter::zip(chunks, &mut out_chunks) {
85        let decnum = u32::from_be_bytes(<[u8; 4]>::try_from(chunk).unwrap());
86        out[0] = MaybeUninit::new(byte_to_char85((decnum / 85u32.pow(4)) as u8));
87        out[1] = MaybeUninit::new(byte_to_char85(
88            ((decnum % 85u32.pow(4)) / 85u32.pow(3)) as u8,
89        ));
90        out[2] = MaybeUninit::new(byte_to_char85(
91            ((decnum % 85u32.pow(3)) / 85u32.pow(2)) as u8,
92        ));
93        out[3] = MaybeUninit::new(byte_to_char85(((decnum % 85u32.pow(2)) / 85u32) as u8));
94        out[4] = MaybeUninit::new(byte_to_char85((decnum % 85u32) as u8));
95    }
96
97    let out_remainder = out_chunks.into_remainder();
98    if let Some(a) = remainder.first().copied() {
99        let b = remainder.get(1).copied();
100        let c = remainder.get(2).copied();
101        let d = remainder.get(3).copied();
102        let decnum = u32::from_be_bytes([a, b.unwrap_or(0), c.unwrap_or(0), d.unwrap_or(0)]);
103        out_remainder[0] = MaybeUninit::new(byte_to_char85((decnum / 85u32.pow(4)) as u8));
104        out_remainder[1] = MaybeUninit::new(byte_to_char85(
105            ((decnum % 85u32.pow(4)) / 85u32.pow(3)) as u8,
106        ));
107        if b.is_some() {
108            out_remainder[2] = MaybeUninit::new(byte_to_char85(
109                ((decnum % 85u32.pow(3)) / 85u32.pow(2)) as u8,
110            ));
111        }
112        if c.is_some() {
113            out_remainder[3] =
114                MaybeUninit::new(byte_to_char85(((decnum % 85u32.pow(2)) / 85u32) as u8));
115        }
116        if d.is_some() {
117            out_remainder[4] = MaybeUninit::new(byte_to_char85((decnum % 85u32) as u8));
118        }
119    }
120
121    unsafe { String::from_utf8_unchecked(std::mem::transmute::<_, Vec<u8>>(out)) }
122}
123
124/// decode() turns a string of encoded data into a slice of bytes
125pub fn decode(instr: &str) -> Result<Vec<u8>> {
126    let indata = instr.as_bytes();
127    let chunks = indata.chunks_exact(5);
128    let remainder = chunks.remainder();
129    let capacity = if remainder.is_empty() {
130        (indata.len() / 5) * 4
131    } else {
132        (indata.len() / 5) * 4 + remainder.len() - 1
133    };
134    let mut out = Vec::<MaybeUninit<u8>>::with_capacity(capacity);
135    unsafe {
136        out.set_len(capacity);
137    }
138    let mut out_chunks = out.chunks_exact_mut(4);
139
140    for (chunk, out_chunk) in std::iter::zip(chunks, &mut out_chunks) {
141        let accumulator = u32::from(char85_to_byte(chunk[0])?) * 85u32.pow(4)
142            + u32::from(char85_to_byte(chunk[1])?) * 85u32.pow(3)
143            + u32::from(char85_to_byte(chunk[2])?) * 85u32.pow(2)
144            + u32::from(char85_to_byte(chunk[3])?) * 85u32
145            + u32::from(char85_to_byte(chunk[4])?);
146        out_chunk[0] = MaybeUninit::new((accumulator >> 24) as u8);
147        out_chunk[1] = MaybeUninit::new((accumulator >> 16) as u8);
148        out_chunk[2] = MaybeUninit::new((accumulator >> 8) as u8);
149        out_chunk[3] = MaybeUninit::new(accumulator as u8);
150    }
151
152    let out_remainder = out_chunks.into_remainder();
153    if let Some(a) = remainder.first().copied() {
154        let b = remainder.get(1).copied();
155        let c = remainder.get(2).copied();
156        let d = remainder.get(3).copied();
157        let e = remainder.get(4).copied();
158        let accumulator = u32::from(char85_to_byte(a)?) * 85u32.pow(4)
159            + u32::from(b.map_or(Err(Error::UnexpectedEof), char85_to_byte)?) * 85u32.pow(3)
160            + u32::from(c.map_or(Ok(126), char85_to_byte)?) * 85u32.pow(2)
161            + u32::from(d.map_or(Ok(126), char85_to_byte)?) * 85u32.pow(1)
162            + u32::from(e.map_or(Ok(126), char85_to_byte)?) * 85u32.pow(0);
163        out_remainder[0] = MaybeUninit::new((accumulator >> 24) as u8);
164        if remainder.len() > 2 {
165            out_remainder[1] = MaybeUninit::new((accumulator >> 16) as u8);
166            if remainder.len() > 3 {
167                out_remainder[2] = MaybeUninit::new((accumulator >> 8) as u8);
168                if remainder.len() > 4 {
169                    out_remainder[3] = MaybeUninit::new(accumulator as u8);
170                }
171            }
172        }
173    }
174
175    Ok(unsafe { std::mem::transmute::<_, Vec<u8>>(out) })
176}
177
178#[cfg(test)]
179mod tests {
180    use crate::*;
181
182    #[test]
183    fn test_encode_decode() {
184        // The list of tests consists of the unencoded data on the left and the encoded data on
185        // the right. By using strings for the arbitrary binary data, we make the test much less
186        // complicated to write.
187        let testlist = [
188            ("a", "VE"),
189            ("aa", "VPO"),
190            ("aaa", "VPRn"),
191            ("aaaa", "VPRom"),
192            ("aaaaa", "VPRomVE"),
193            ("aaaaaa", "VPRomVPO"),
194            ("aaaaaaa", "VPRomVPRn"),
195            ("aaaaaaaa", "VPRomVPRom"),
196        ];
197
198        for test in testlist.iter() {
199            let s = encode(test.0.as_bytes());
200            assert_eq!(
201                s, test.1,
202                "encoder test failed: wanted: {}, got: {}",
203                test.0, s
204            );
205
206            let b = decode(test.1)
207                .unwrap_or_else(|e| panic!("decoder test error on input {}: {}", test.1, e));
208
209            let s = String::from_utf8(b).unwrap_or_else(|e| {
210                panic!(
211                    "decoder test '{}' failed to convert to string: {:#?}",
212                    test.1, e
213                )
214            });
215
216            assert_eq!(
217                test.0, s,
218                "decoder data mismatch: wanted: {}, got: {}",
219                test.0, s
220            );
221        }
222    }
223}