Skip to main content

binarytext/
base64.rs

1//! Implemention of the Base64 encoder and its variants.
2
3use crate::binarytext::{BinaryText, build_decoding_lut};
4use crate::error::BinTxtError;
5
6/// Base64 implementation as described in RFC 4648 / Section 4.
7#[derive(Clone, Debug)]
8pub struct Base64 {
9    name: String,
10    lut_enc: [u8; 64],
11    lut_dec: [u8; 128],
12}
13
14impl Default for Base64 {
15    fn default() -> Self {
16        Self::new()
17    }
18}
19
20impl Base64 {
21    /// Returns the default Base64 encoder.
22    pub fn new() -> Self {
23        let name = "Base64".to_string();
24        let lut_enc = [
25            b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
26            b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b',
27            b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
28            b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
29            b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
30        ];
31        let lut_dec = build_decoding_lut(&lut_enc);
32        Self {
33            name,
34            lut_enc,
35            lut_dec,
36        }
37    }
38
39    /// Returns the Base64 encoder suitable for URLs using '-' and '_' instead of '+' and '/'.
40    pub fn base64url() -> Self {
41        let name = "Base64URL".to_string();
42        let lut_enc = [
43            b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
44            b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b',
45            b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
46            b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
47            b'4', b'5', b'6', b'7', b'8', b'9', b'-', b'_',
48        ];
49        let lut_dec = build_decoding_lut(&lut_enc);
50        Self {
51            name,
52            lut_enc,
53            lut_dec,
54        }
55    }
56}
57
58impl BinaryText for Base64 {
59    fn base(&self) -> usize {
60        64
61    }
62
63    fn name(&self) -> &str {
64        self.name.as_str()
65    }
66
67    fn n_bytes_encode(&self) -> usize {
68        3
69    }
70
71    fn n_bytes_decode(&self) -> usize {
72        4
73    }
74
75    fn encode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
76        if byte >= 64 {
77            let msg = format!("Byte {byte} exceeds maximum {}", 64);
78            return Err(BinTxtError::EncodingErr(msg));
79        }
80        Ok(self.lut_enc[byte as usize])
81    }
82
83    fn encode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
84        res.clear();
85        let encode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
86            // Loop unrolled
87            let pos = bytes_in[0] >> 2;
88            bytes_out[0] = self.encode_byte(pos)?;
89            let pos = ((bytes_in[0] & 0b00000011u8) << 4) | ((bytes_in[1] & 0b11110000u8) >> 4);
90            bytes_out[1] = self.encode_byte(pos)?;
91            let pos = ((bytes_in[1] & 0b00001111u8) << 2) | ((bytes_in[2] & 0b11000000u8) >> 6);
92            bytes_out[2] = self.encode_byte(pos)?;
93            let pos = bytes_in[2] & 0b00111111u8;
94            bytes_out[3] = self.encode_byte(pos)?;
95            Ok(())
96        };
97        // Three bytes -> 24 bits
98        let iter = input.chunks_exact(3);
99        let bytes_rem = iter.remainder();
100        let mut bytes_enc = [0u8; 4];
101        for bytes in iter {
102            encode_block(bytes, &mut bytes_enc)?;
103            res.extend(&bytes_enc);
104        }
105        // Handle the remaining bytes
106        if !bytes_rem.is_empty() {
107            let mut bytes = [0u8; 3];
108            bytes[..bytes_rem.len()].copy_from_slice(bytes_rem);
109            encode_block(&bytes, &mut bytes_enc)?;
110            // Padding
111            if bytes_rem.len() == 1 {
112                bytes_enc[2] = b'=';
113                bytes_enc[3] = b'=';
114            } else if bytes_rem.len() == 2 {
115                bytes_enc[3] = b'=';
116            }
117            res.extend(&bytes_enc);
118        }
119        Ok(())
120    }
121
122    fn decode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
123        let b = if byte < 128 {
124            self.lut_dec[byte as usize]
125        } else {
126            255
127        };
128        if b < 255 {
129            Ok(b)
130        } else {
131            let errmsg = format!("Invalid byte \"{}\" in Base64 string", byte);
132            Err(BinTxtError::DecodingErr(errmsg))
133        }
134    }
135
136    fn decode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
137        res.clear();
138        if input.is_empty() {
139            return Ok(());
140        }
141        // Get the last position before the remainder
142        let rem = input.len() % 4;
143        let pos_last = if rem == 0 {
144            input.len() - 4
145        } else {
146            input.len() - rem
147        };
148        // Function for decoding a block of four bytes into three bytes
149        let decode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
150            // Loop unrolled
151            let dec0 = self.decode_byte(bytes_in[0])?;
152            let dec1 = self.decode_byte(bytes_in[1])?;
153            let dec2 = self.decode_byte(bytes_in[2])?;
154            let dec3 = self.decode_byte(bytes_in[3])?;
155            bytes_out[0] = (dec0 << 2) | (dec1 >> 4);
156            bytes_out[1] = (dec1 << 4) | (dec2 >> 2);
157            bytes_out[2] = (dec2 << 6) | dec3;
158            Ok(())
159        };
160        let mut bytes_dec = [0u8; 3];
161        // Decode everything except the last 4 bytes
162        // This way we don't have to worry about padding here
163        for bytes in input[0..pos_last].chunks_exact(4) {
164            decode_block(bytes, &mut bytes_dec)?;
165            res.extend(&bytes_dec);
166        }
167        // Handle the last chunk of bytes with padding
168        let bytes_rem = &input[pos_last..];
169        // Save the position of the first padding character, if it exists
170        let pos_padding = bytes_rem.iter().position(|&x| x == b'=');
171        // 'A' decodes to zero
172        let bytes = {
173            let mut ret = bytes_rem
174                .iter()
175                .cloned()
176                .map(|x| if x == b'=' { b'A' } else { x })
177                .collect::<Vec<u8>>();
178            ret.resize(4, b'A');
179            ret
180        };
181        decode_block(&bytes, &mut bytes_dec)?;
182        match pos_padding {
183            Some(2) => {
184                res.extend(&bytes_dec[0..1]);
185            }
186            Some(3) => {
187                res.extend(&bytes_dec[0..2]);
188            }
189            _ => {
190                res.extend(&bytes_dec);
191            }
192        }
193        Ok(())
194    }
195
196    fn is_decodable(&self, input: &str) -> bool {
197        let rem = input.len() % 4;
198        let pos_last = if rem == 0 {
199            input.len() - 4
200        } else {
201            input.len() - rem
202        };
203        for &byte in &input.as_bytes()[0..pos_last] {
204            if self.decode_byte(byte).is_err() {
205                return false;
206            }
207        }
208        // In the last chunk padding character "=" is valid
209        for &byte in &input.as_bytes()[pos_last..] {
210            if byte != b'=' && self.decode_byte(byte).is_err() {
211                return false;
212            }
213        }
214        true
215    }
216}