Skip to main content

binarytext/
base32.rs

1//! Implemention of the Base32 encoder and its variants.
2
3use crate::binarytext::{BinaryText, build_decoding_lut};
4use crate::error::BinTxtError;
5
6/// Base32 implementation as described in RFC 4648 / Section 6.
7#[derive(Clone, Debug)]
8pub struct Base32 {
9    name: String,
10    lut_enc: [u8; 32],
11    lut_dec: [u8; 128],
12}
13
14impl Default for Base32 {
15    fn default() -> Self {
16        Self::new()
17    }
18}
19
20impl Base32 {
21    /// Returns the Base32 encoder as described in RFC 4648 / Section 6.
22    pub fn new() -> Self {
23        let name = "Base32".to_string();
24        let lut_enc = [
25            b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
26            b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'2', b'3',
27            b'4', b'5', b'6', b'7',
28        ];
29        let lut_dec = build_decoding_lut(&lut_enc);
30        Self {
31            name,
32            lut_enc,
33            lut_dec,
34        }
35    }
36
37    /// Returns the Base32 variant Base32Hex as described in RFC 4648 / Section 7.
38    pub fn base32hex() -> Self {
39        let name = "Base32Hex".to_string();
40        let lut_enc = [
41            b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D',
42            b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R',
43            b'S', b'T', b'U', b'V',
44        ];
45        let lut_dec = build_decoding_lut(&lut_enc);
46        Self {
47            name,
48            lut_enc,
49            lut_dec,
50        }
51    }
52}
53
54impl BinaryText for Base32 {
55    fn base(&self) -> usize {
56        32
57    }
58
59    fn name(&self) -> &str {
60        self.name.as_str()
61    }
62
63    fn n_bytes_encode(&self) -> usize {
64        5
65    }
66
67    fn n_bytes_decode(&self) -> usize {
68        8
69    }
70
71    fn encode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
72        if byte >= 32 {
73            let msg = format!("Byte {byte} exceeds maximum {}", 32);
74            return Err(BinTxtError::EncodingErr(msg));
75        }
76        Ok(self.lut_enc[byte as usize])
77    }
78
79    fn encode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
80        let encode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
81            // Loop unrolled
82            let pos = bytes_in[0] >> 3;
83            bytes_out[0] = self.encode_byte(pos)?;
84            let pos = ((bytes_in[0] & 0b111) << 2) | (bytes_in[1] >> 6);
85            bytes_out[1] = self.encode_byte(pos)?;
86            let pos = (bytes_in[1] & 0b111110) >> 1;
87            bytes_out[2] = self.encode_byte(pos)?;
88            let pos = ((bytes_in[1] & 0b1) << 4) | (bytes_in[2] >> 4);
89            bytes_out[3] = self.encode_byte(pos)?;
90            let pos = ((bytes_in[2] & 0b1111) << 1) | (bytes_in[3] >> 7);
91            bytes_out[4] = self.encode_byte(pos)?;
92            let pos = (bytes_in[3] & 0b1111100) >> 2;
93            bytes_out[5] = self.encode_byte(pos)?;
94            let pos = ((bytes_in[3] & 0b11) << 3) | (bytes_in[4] >> 5);
95            bytes_out[6] = self.encode_byte(pos)?;
96            let pos = bytes_in[4] & 0b11111;
97            bytes_out[7] = self.encode_byte(pos)?;
98            Ok(())
99        };
100        res.clear();
101        // Five bytes -> 40 bits
102        let iter = input.chunks_exact(5);
103        let bytes_rem = iter.remainder();
104        let mut bytes_enc = [0u8; 8];
105        for bytes in iter {
106            encode_block(bytes, &mut bytes_enc)?;
107            res.extend(&bytes_enc);
108        }
109        // Handle the remaining bytes
110        if !bytes_rem.is_empty() {
111            let mut bytes = [0u8; 5];
112            bytes[..bytes_rem.len()].copy_from_slice(bytes_rem);
113            encode_block(&bytes, &mut bytes_enc)?;
114            // Padding
115            let n_bytes_enc = (bytes_rem.len() * 8) / 5 + 1;
116            for byte in bytes_enc.iter_mut().skip(n_bytes_enc) {
117                *byte = b'=';
118            }
119            res.extend(&bytes_enc);
120        }
121        Ok(())
122    }
123
124    fn decode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
125        let b = if byte < 128 {
126            self.lut_dec[byte as usize]
127        } else {
128            255
129        };
130        if b < 255 {
131            Ok(b)
132        } else {
133            let errmsg = format!("Invalid byte \"{}\" in Base32 string", byte);
134            Err(BinTxtError::DecodingErr(errmsg))
135        }
136    }
137
138    fn decode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
139        res.clear();
140        if input.is_empty() {
141            return Ok(());
142        }
143        // Function for decoding a block of eight bytes into five bytes
144        let decode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
145            // Loop unrolled
146            let dec0 = self.decode_byte(bytes_in[0])?;
147            let dec1 = self.decode_byte(bytes_in[1])?;
148            let dec2 = self.decode_byte(bytes_in[2])?;
149            let dec3 = self.decode_byte(bytes_in[3])?;
150            let dec4 = self.decode_byte(bytes_in[4])?;
151            let dec5 = self.decode_byte(bytes_in[5])?;
152            let dec6 = self.decode_byte(bytes_in[6])?;
153            let dec7 = self.decode_byte(bytes_in[7])?;
154            bytes_out[0] = (dec0 << 3) | (dec1 >> 2);
155            bytes_out[1] = (dec1 << 6) | (dec2 << 1) | (dec3 >> 4);
156            bytes_out[2] = (dec3 << 4) | (dec4 >> 1);
157            bytes_out[3] = (dec4 << 7) | (dec5 << 2) | (dec6 >> 3);
158            bytes_out[4] = (dec6 << 5) | dec7;
159            Ok(())
160        };
161        // Get the last position before the remainder
162        let rem = input.len() % 8;
163        let pos_last = if rem == 0 {
164            input.len() - 8
165        } else {
166            input.len() - rem
167        };
168        let mut bytes_dec = [0u8; 5];
169        // Decode everything except the remainder bytes
170        // This way we don't have to worry about padding here
171        for bytes in input[0..pos_last].chunks_exact(8) {
172            decode_block(bytes, &mut bytes_dec)?;
173            res.extend(&bytes_dec);
174        }
175        // Handle the last chunk of bytes with padding
176        let bytes_rem = &input[pos_last..];
177        // Save the position of the first padding character, if it exists
178        let pos_padding = bytes_rem.iter().position(|&x| x == b'=');
179        // 'A' decodes to zero
180        let bytes = {
181            let mut ret = bytes_rem
182                .iter()
183                .cloned()
184                .map(|x| if x == b'=' { b'A' } else { x })
185                .collect::<Vec<u8>>();
186            ret.resize(8, b'A');
187            ret
188        };
189        decode_block(&bytes, &mut bytes_dec)?;
190        match pos_padding {
191            Some(2) => {
192                res.extend(&bytes_dec[0..1]);
193            }
194            Some(3) => {
195                res.extend(&bytes_dec[0..2]);
196            }
197            Some(4) => {
198                res.extend(&bytes_dec[0..2]);
199            }
200            Some(5) => {
201                res.extend(&bytes_dec[0..3]);
202            }
203            Some(6) => {
204                res.extend(&bytes_dec[0..3]);
205            }
206            Some(7) => {
207                res.extend(&bytes_dec[0..4]);
208            }
209            _ => {
210                res.extend(&bytes_dec);
211            }
212        }
213        Ok(())
214    }
215
216    fn is_decodable(&self, input: &str) -> bool {
217        let rem = input.len() % 8;
218        let pos_last = if rem == 0 {
219            input.len() - 8
220        } else {
221            input.len() - rem
222        };
223        for &byte in &input.as_bytes()[0..pos_last] {
224            if self.decode_byte(byte).is_err() {
225                return false;
226            }
227        }
228        // In the last chunk padding character "=" is valid
229        for &byte in &input.as_bytes()[pos_last..] {
230            if byte != b'=' && self.decode_byte(byte).is_err() {
231                return false;
232            }
233        }
234        true
235    }
236}