//! A library for Base85 encoding as described in [RFC1924](https://datatracker.ietf.org/doc/html/rfc1924) and released under the Mozilla Public License 2.0.
//!
//!## Description
//!
//! Several variants of Base85 encoding exist. The most popular variant is often known as ascii85 and is best known for use in Adobe products. This is not that algorithm.
//!
//! The variant implemented in RFC 1924 was originally intended for encoding IPv6 addresses. It utilizes the same concepts as other versions, but uses a character set which is friendly toward embedding in source code without the need for escaping. During decoding ASCII whitespace (\n, \r, \t, space) is ignored. A base85-encoded string is 25% larger than the original binary data, which is more efficient than the more-common base64 algorithm (33%). This encoding pairs very well with JSON, yielding lower overhead and needing no character escapes.
//!
//! ## Usage
//!
//! This was my first real Rust project but has matured since then and is stable. The API is simple: `encode()` turns a slice of bytes into a String and `decode()` turns a string reference into a Vector of bytes (u8). Both calls work completely within RAM, so processing huge files is probably not a good idea.
//!
//! ## Contributions
//!
//! Even though I've been coding for a while and have learned quite a bit about Rust, I'm still a novice. Suggestions and contributions are always welcome and appreciated.
use core::mem::MaybeUninit;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Unexpected end of input")]
UnexpectedEof,
#[error("Unexpected character '{0}'")]
InvalidCharacter(u8),
}
#[inline]
fn byte_to_char85(x85: u8) -> u8 {
static B85_TO_CHAR: &[u8] =
b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
B85_TO_CHAR[x85 as usize]
}
#[inline]
fn char85_to_byte(c: u8) -> Result<u8> {
match c {
b'0'..=b'9' => Ok(c - b'0'),
b'A'..=b'Z' => Ok(c - b'A' + 10),
b'a'..=b'z' => Ok(c - b'a' + 36),
b'!' => Ok(62),
b'#' => Ok(63),
b'$' => Ok(64),
b'%' => Ok(65),
b'&' => Ok(66),
b'(' => Ok(67),
b')' => Ok(68),
b'*' => Ok(69),
b'+' => Ok(70),
b'-' => Ok(71),
b';' => Ok(72),
b'<' => Ok(73),
b'=' => Ok(74),
b'>' => Ok(75),
b'?' => Ok(76),
b'@' => Ok(77),
b'^' => Ok(78),
b'_' => Ok(79),
b'`' => Ok(80),
b'{' => Ok(81),
b'|' => Ok(82),
b'}' => Ok(83),
b'~' => Ok(84),
v => Err(Error::InvalidCharacter(v)),
}
}
/// encode() turns a slice of bytes into a string of encoded data
pub fn encode(indata: &[u8]) -> String {
let chunks = indata.chunks_exact(4);
let remainder = chunks.remainder();
let capacity = if remainder.is_empty() {
(indata.len() / 4) * 5
} else {
(indata.len() / 4) * 5 + remainder.len() + 1
};
let mut out = Vec::<MaybeUninit<u8>>::with_capacity(capacity);
unsafe {
out.set_len(capacity);
}
let mut out_chunks = out.chunks_exact_mut(5);
for (chunk, out) in std::iter::zip(chunks, &mut out_chunks) {
let decnum = u32::from_be_bytes(<[u8; 4]>::try_from(chunk).unwrap());
out[0] = MaybeUninit::new(byte_to_char85((decnum / 85u32.pow(4)) as u8));
out[1] = MaybeUninit::new(byte_to_char85(
((decnum % 85u32.pow(4)) / 85u32.pow(3)) as u8,
));
out[2] = MaybeUninit::new(byte_to_char85(
((decnum % 85u32.pow(3)) / 85u32.pow(2)) as u8,
));
out[3] = MaybeUninit::new(byte_to_char85(((decnum % 85u32.pow(2)) / 85u32) as u8));
out[4] = MaybeUninit::new(byte_to_char85((decnum % 85u32) as u8));
}
let out_remainder = out_chunks.into_remainder();
if let Some(a) = remainder.first().copied() {
let b = remainder.get(1).copied();
let c = remainder.get(2).copied();
let d = remainder.get(3).copied();
let decnum = u32::from_be_bytes([a, b.unwrap_or(0), c.unwrap_or(0), d.unwrap_or(0)]);
out_remainder[0] = MaybeUninit::new(byte_to_char85((decnum / 85u32.pow(4)) as u8));
out_remainder[1] = MaybeUninit::new(byte_to_char85(
((decnum % 85u32.pow(4)) / 85u32.pow(3)) as u8,
));
if b.is_some() {
out_remainder[2] = MaybeUninit::new(byte_to_char85(
((decnum % 85u32.pow(3)) / 85u32.pow(2)) as u8,
));
}
if c.is_some() {
out_remainder[3] =
MaybeUninit::new(byte_to_char85(((decnum % 85u32.pow(2)) / 85u32) as u8));
}
if d.is_some() {
out_remainder[4] = MaybeUninit::new(byte_to_char85((decnum % 85u32) as u8));
}
}
unsafe { String::from_utf8_unchecked(std::mem::transmute::<_, Vec<u8>>(out)) }
}
/// decode() turns a string of encoded data into a slice of bytes
pub fn decode(instr: &str) -> Result<Vec<u8>> {
let indata = instr.as_bytes();
let chunks = indata.chunks_exact(5);
let remainder = chunks.remainder();
let capacity = if remainder.is_empty() {
(indata.len() / 5) * 4
} else {
(indata.len() / 5) * 4 + remainder.len() - 1
};
let mut out = Vec::<MaybeUninit<u8>>::with_capacity(capacity);
unsafe {
out.set_len(capacity);
}
let mut out_chunks = out.chunks_exact_mut(4);
for (chunk, out_chunk) in std::iter::zip(chunks, &mut out_chunks) {
let accumulator = u32::from(char85_to_byte(chunk[0])?) * 85u32.pow(4)
+ u32::from(char85_to_byte(chunk[1])?) * 85u32.pow(3)
+ u32::from(char85_to_byte(chunk[2])?) * 85u32.pow(2)
+ u32::from(char85_to_byte(chunk[3])?) * 85u32
+ u32::from(char85_to_byte(chunk[4])?);
out_chunk[0] = MaybeUninit::new((accumulator >> 24) as u8);
out_chunk[1] = MaybeUninit::new((accumulator >> 16) as u8);
out_chunk[2] = MaybeUninit::new((accumulator >> 8) as u8);
out_chunk[3] = MaybeUninit::new(accumulator as u8);
}
let out_remainder = out_chunks.into_remainder();
if let Some(a) = remainder.first().copied() {
let b = remainder.get(1).copied();
let c = remainder.get(2).copied();
let d = remainder.get(3).copied();
let e = remainder.get(4).copied();
let accumulator = u32::from(char85_to_byte(a)?) * 85u32.pow(4)
+ u32::from(b.map_or(Err(Error::UnexpectedEof), char85_to_byte)?) * 85u32.pow(3)
+ u32::from(c.map_or(Ok(126), char85_to_byte)?) * 85u32.pow(2)
+ u32::from(d.map_or(Ok(126), char85_to_byte)?) * 85u32.pow(1)
+ u32::from(e.map_or(Ok(126), char85_to_byte)?) * 85u32.pow(0);
out_remainder[0] = MaybeUninit::new((accumulator >> 24) as u8);
if remainder.len() > 2 {
out_remainder[1] = MaybeUninit::new((accumulator >> 16) as u8);
if remainder.len() > 3 {
out_remainder[2] = MaybeUninit::new((accumulator >> 8) as u8);
if remainder.len() > 4 {
out_remainder[3] = MaybeUninit::new(accumulator as u8);
}
}
}
}
Ok(unsafe { std::mem::transmute::<_, Vec<u8>>(out) })
}
#[cfg(test)]
mod tests {
use crate::*;
#[test]
fn test_encode_decode() {
// The list of tests consists of the unencoded data on the left and the encoded data on
// the right. By using strings for the arbitrary binary data, we make the test much less
// complicated to write.
let testlist = [
("a", "VE"),
("aa", "VPO"),
("aaa", "VPRn"),
("aaaa", "VPRom"),
("aaaaa", "VPRomVE"),
("aaaaaa", "VPRomVPO"),
("aaaaaaa", "VPRomVPRn"),
("aaaaaaaa", "VPRomVPRom"),
];
for test in testlist.iter() {
let s = encode(test.0.as_bytes());
assert_eq!(
s, test.1,
"encoder test failed: wanted: {}, got: {}",
test.0, s
);
let b = decode(test.1)
.unwrap_or_else(|e| panic!("decoder test error on input {}: {}", test.1, e));
let s = String::from_utf8(b).unwrap_or_else(|e| {
panic!(
"decoder test '{}' failed to convert to string: {:#?}",
test.1, e
)
});
assert_eq!(
test.0, s,
"decoder data mismatch: wanted: {}, got: {}",
test.0, s
);
}
}
}