base_utf8/
lib.rs

1#![feature(array_chunks)]
2#![feature(iter_array_chunks)]
3#![feature(iter_advance_by)]
4#![feature(array_methods)]
5/// This crate is an example implementation of the base-utf8 encoding algorithm.
6use std::iter;
7use thiserror::Error;
8
9#[derive(Error, Debug)]
10pub enum DecodeError {
11    #[error("Invalid length: {0} is not a multiple of 8")]
12    InvalidLength(usize),
13    #[error("Invalid padding: {0} is not a valid padding length")]
14    InvalidPadding(usize),
15}
16
17pub fn encode(arr: &[u8]) -> String {
18    // calculate padding
19    let padding = ((7 - ((arr.len() + 1) % 7)) % 7) as u8;
20    // allocate space for the result
21    let mut ans: Vec<u8> = vec![0u8; (arr.len() + 1 + padding as usize) / 7 * 8];
22
23    // chain values together making full source data
24    let arr = 
25        // add padding info
26        iter::repeat(&padding)
27        .take(1)
28        // origin data
29        .chain(arr.iter())
30        // padding
31        .chain(iter::repeat(&0).take(padding as usize).into_iter());
32    // encode
33    for i in arr.array_chunks::<7>().zip(ans.array_chunks_mut::<8>()) {
34        encode78(&i.0, i.1);
35    }
36    String::from_utf8(ans).unwrap()
37}
38
39pub fn decode(arr: &str) -> Result<Vec<u8>, DecodeError> {
40    let arr = arr.as_bytes();
41    // check length
42    if arr.len() == 0 {
43        return Ok(Vec::new());
44    }
45    if arr.len() % 8 != 0 {
46        return Err(DecodeError::InvalidLength(arr.len()));
47    }
48    
49    // decode first chunk
50    let mut firest_chunk = [0u8;7];
51    decode87(arr.array_chunks::<8>().next().unwrap(), firest_chunk.each_mut());
52    
53    // peek padding info
54    let padding = firest_chunk[0];
55    if padding >= 7 {
56        return Err(DecodeError::InvalidPadding(arr.len()));
57    }
58    // allocate space for the result
59    let mut ans: Vec<u8> = vec![0u8; (arr.len() / 8 * 7) - 1 - padding as usize];
60    // push first chunk
61    for (i, v) in firest_chunk[1..].iter().enumerate() {
62        if i >= ans.len() {
63            return Ok(ans);
64        }
65        ans[i] = *v;
66    }
67    // decode
68    // note that we have already decoded the first chunk
69    let mut arr_iter = arr.array_chunks::<8>();
70    let _ = arr_iter.advance_by(1);
71    let mut ans_iter = ans.iter_mut();
72    let _ = ans_iter.advance_by(6);
73    for i in arr_iter.zip(ans_iter.array_chunks::<7>()) {
74        decode87(&i.0, i.1);
75    }
76    // If we have padding, we need to manually decode the last chunk
77    let last_data_len = 7-padding;
78    if last_data_len != 0 {
79        let mut buffer = [0u8; 7];
80        decode87(arr[(arr.len() - 8)..].array_chunks::<8>().next().unwrap(), buffer.each_mut());
81        for i in (ans.len() - last_data_len as usize)..ans.len() {
82            ans[i] = buffer[i - (ans.len() - last_data_len as usize)];
83        }
84    }
85    Ok(ans)
86}
87
88fn encode78(arr: &[&u8; 7], buffer: &mut [u8; 8]) {
89    for i in 0..7 {
90        buffer[i + 1] = *arr[i] & 0b01111111;
91        buffer[0] |= (arr[i] & 0b10000000) >> (i + 1);
92    }
93}
94fn decode87(arr: &[u8; 8], buffer: [&mut u8; 7]) {
95    for i in 0..7 {
96        *buffer[i] = arr[i + 1] | ((arr[0] << (i + 1)) & 0b10000000);
97    }
98}
99#[cfg(test)]
100mod tests {
101    use super::*;
102    use rand::{thread_rng, RngCore};
103    #[test]
104    fn test_normal() {
105        let data = b"Hello, world!";
106        let encoded = encode(data);
107        assert_eq!(encoded.as_bytes(), [0, 0, 72, 101, 108, 108, 111, 44, 0, 32, 119, 111, 114, 108, 100, 33]);
108        let decoded = decode(&encoded).unwrap();
109        assert_eq!(data, &decoded[..]);
110    }
111    #[test]
112    fn test_low_length() {
113        let data = &[0;1];
114        let encoded = encode(data);
115        assert_eq!(encoded.as_bytes(), [0, 5, 0, 0, 0, 0, 0, 0]);
116        let decoded = decode(&encoded).unwrap();
117        assert_eq!(data, &decoded[..]);
118    }
119    #[test]
120    fn test_long_random() {
121        const MIB: usize = 1024 * 1024;
122        let mut rng = thread_rng();
123        let data_length = (1*MIB) + (rng.next_u32() as usize % (9*MIB));
124        let mut data = vec![0u8; data_length];
125        rng.fill_bytes(&mut data);
126        let encoded = encode(&data);
127        let decoded = decode(&encoded).unwrap();
128        assert_eq!(data, &decoded[..]);
129    }
130}