1use crate::{Error, MASK_FOUR_BITS, MASK_TWO_BITS, ASCII_OFFSET, SHIFT_TWO_BITS, SHIFT_FOUR_BITS, SHIFT_SIX_BITS};
8
9#[inline(always)]
27pub fn encode(str: &str) -> Result<(Vec<u8>, usize), Error> {
28 if !str.is_ascii() {
30 return Err(Error::InvalidCharacter);
31 }
32 let len = str.len();
33 let bytes_needed = (len * 3 + 3) / 4;
35 let mut bytes = vec![0u8; bytes_needed];
36
37 let full_chunks = len / 4;
38 let remaining = len % 4;
39
40 for chunk_idx in 0..full_chunks {
41 let start = chunk_idx * 4;
42 let chunk = &str.as_bytes()[start..start + 4];
43
44 for &code in chunk {
46 if !(ASCII_OFFSET..=95).contains(&code) {
47 return Err(Error::InvalidCharacter);
48 }
49 }
50
51 let a = chunk[0] - ASCII_OFFSET;
53 let b = chunk[1] - ASCII_OFFSET;
54 let c = chunk[2] - ASCII_OFFSET;
55 let d = chunk[3] - ASCII_OFFSET;
56
57 let byte_idx = chunk_idx * 3;
58
59 bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
61 bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
62 bytes[byte_idx + 2] = ((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d;
63 }
64
65 if remaining > 0 {
67 let start = full_chunks * 4;
68 let chunk = &str.as_bytes()[start..];
69 let byte_idx = full_chunks * 3;
70
71 match chunk.len() {
72 3 => {
73 for &code in chunk {
75 if !(ASCII_OFFSET..=95).contains(&code) {
76 return Err(Error::InvalidCharacter);
77 }
78 }
79
80 let a = chunk[0] - ASCII_OFFSET;
82 let b = chunk[1] - ASCII_OFFSET;
83 let c = chunk[2] - ASCII_OFFSET;
84
85 bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
87 bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
88 bytes[byte_idx + 2] = (c & MASK_TWO_BITS) << SHIFT_SIX_BITS;
89 },
90 2 => {
91 for &code in chunk {
93 if !(ASCII_OFFSET..=95).contains(&code) {
94 return Err(Error::InvalidCharacter);
95 }
96 }
97
98 let a = chunk[0] - ASCII_OFFSET;
100 let b = chunk[1] - ASCII_OFFSET;
101
102 bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
104 bytes[byte_idx + 1] = (b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS;
105 },
106 1 => {
107 let code = chunk[0];
109 if !(ASCII_OFFSET..=95).contains(&code) {
110 return Err(Error::InvalidCharacter);
111 }
112
113 let a = code - ASCII_OFFSET;
115
116 bytes[byte_idx] = a << SHIFT_TWO_BITS;
118 },
119 _ => unreachable!(),
120 }
121 }
122
123 Ok((bytes, len))
124}
125
126#[inline(always)]
142pub fn encode_unchecked(str: &str) -> (Vec<u8>, usize) {
143 let len = str.len();
144 let bytes_needed = (len * 3 + 3) / 4;
146 let mut bytes = vec![0u8; bytes_needed];
147
148 let full_chunks = len / 4;
149 let remaining = len % 4;
150
151 for chunk_idx in 0..full_chunks {
152 let start = chunk_idx * 4;
153 let chunk = &str.as_bytes()[start..start + 4];
154
155 let a = chunk[0] - ASCII_OFFSET;
157 let b = chunk[1] - ASCII_OFFSET;
158 let c = chunk[2] - ASCII_OFFSET;
159 let d = chunk[3] - ASCII_OFFSET;
160
161 let byte_idx = chunk_idx * 3;
162
163 bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
165 bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
166 bytes[byte_idx + 2] = ((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d;
167 }
168
169 if remaining > 0 {
171 let start = full_chunks * 4;
172 let chunk = &str.as_bytes()[start..];
173 let byte_idx = full_chunks * 3;
174
175 match chunk.len() {
176 3 => {
177 let a = chunk[0] - ASCII_OFFSET;
179 let b = chunk[1] - ASCII_OFFSET;
180 let c = chunk[2] - ASCII_OFFSET;
181
182 bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
184 bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
185 bytes[byte_idx + 2] = (c & MASK_TWO_BITS) << SHIFT_SIX_BITS;
186 },
187 2 => {
188 let a = chunk[0] - ASCII_OFFSET;
190 let b = chunk[1] - ASCII_OFFSET;
191
192 bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
194 bytes[byte_idx + 1] = (b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS;
195 },
196 1 => {
197 let a = chunk[0] - ASCII_OFFSET;
199
200 bytes[byte_idx] = a << SHIFT_TWO_BITS;
202 },
203 _ => unreachable!(),
204 }
205 }
206
207 (bytes, len)
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213
214 #[test]
215 fn test_encode_empty_string() {
216 let input = "";
217 let (encoded, len) = encode(input).expect("Encoding should succeed for empty string");
218 assert!(encoded.is_empty(), "Encoded bytes should be empty");
219 assert_eq!(len, 0, "Length should be 0");
220 }
221
222 #[test]
223 fn test_encode_single_character() {
224 let input = "A"; let (encoded, len) = encode(input).expect("Encoding should succeed for single character");
226 let expected = vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS];
227 assert_eq!(encoded, expected, "Encoded bytes do not match expected value");
228 assert_eq!(len, 1, "Length should be 1");
229 }
230
231 #[test]
232 fn test_encode_two_characters() {
233 let input = "AB"; let (encoded, len) = encode(input).expect("Encoding should succeed for two characters");
235 let a = 65 - ASCII_OFFSET;
236 let b = 66 - ASCII_OFFSET;
237 let expected = vec![
238 (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS),
239 (b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS,
240 ];
241 assert_eq!(encoded, expected, "Encoded bytes do not match expected value for two characters");
242 assert_eq!(len, 2, "Length should be 2");
243 }
244
245 #[test]
246 fn test_encode_three_characters() {
247 let input = "ABC"; let (encoded, len) = encode(input).expect("Encoding should succeed for three characters");
249 let a = 65 - ASCII_OFFSET;
250 let b = 66 - ASCII_OFFSET;
251 let c = 67 - ASCII_OFFSET;
252 let expected = vec![
253 (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS),
254 ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS),
255 (c & MASK_TWO_BITS) << SHIFT_SIX_BITS,
256 ];
257 assert_eq!(encoded, expected, "Encoded bytes do not match expected value for three characters");
258 assert_eq!(len, 3, "Length should be 3");
259 }
260
261 #[test]
262 fn test_encode_four_characters() {
263 let input = "ABCD"; let (encoded, len) = encode(input).expect("Encoding should succeed for four characters");
265 let a = 65 - ASCII_OFFSET;
266 let b = 66 - ASCII_OFFSET;
267 let c = 67 - ASCII_OFFSET;
268 let d = 68 - ASCII_OFFSET;
269 let expected = vec![
270 (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS),
271 ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS),
272 ((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d,
273 ];
274 assert_eq!(encoded, expected, "Encoded bytes do not match expected value for four characters");
275 assert_eq!(len, 4, "Length should be 4");
276 }
277
278 #[test]
279 fn test_encode_multiple_chunks() {
280 let input = "HELLOWORLD_ "; let (encoded, len) = encode(input).expect("Encoding should succeed for multiple chunks");
282 assert_eq!(len, input.len(), "Length should match input length");
283
284 let chunks = ["HELL", "OWOR", "LD_ "];
287 let mut expected = Vec::new();
288
289 for chunk in &chunks {
290 let a = chunk.as_bytes()[0] - ASCII_OFFSET;
291 let b = chunk.as_bytes()[1] - ASCII_OFFSET;
292 let c = chunk.as_bytes()[2] - ASCII_OFFSET;
293 let d = chunk.as_bytes()[3] - ASCII_OFFSET;
294
295 expected.push((a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS));
296 expected.push(((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS));
297 expected.push(((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d);
298 }
299
300 assert_eq!(encoded, expected, "Encoded bytes do not match expected value for multiple chunks");
301 }
302
303 #[test]
304 fn test_encode_with_invalid_character_non_ascii() {
305 let input = "Hello€"; let result = encode(input);
307 assert!(matches!(result, Err(Error::InvalidCharacter)), "Should return InvalidCharacter error for non-ASCII characters");
308 }
309
310 #[test]
311 fn test_encode_with_invalid_character_below_range() {
312 let input = "HELLO\x1F"; let result = encode(input);
314 assert!(matches!(result, Err(Error::InvalidCharacter)), "Should return InvalidCharacter error for characters below range");
315 }
316
317 #[test]
318 fn test_encode_with_invalid_character_above_range() {
319 let input = "HELLO~"; let result = encode(input);
321 assert!(matches!(result, Err(Error::InvalidCharacter)), "Should return InvalidCharacter error for characters above range");
322 }
323
324 #[test]
325 fn test_encode_unchecked_valid_input() {
326 let input = "ABCD";
327 let (checked_encoded, _) = encode(input).expect("Safe encode should succeed for valid input");
328 let (unchecked_encoded, _) = encode_unchecked(input);
329 assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for valid input");
330 }
331
332 #[test]
333 fn test_encode_unchecked_empty_string() {
334 let input = "";
335 let (encoded, len) = encode_unchecked(input);
336 assert!(encoded.is_empty(), "Encoded bytes should be empty for empty string");
337 assert_eq!(len, 0, "Length should be 0 for empty string");
338 }
339
340 #[test]
341 fn test_encode_unchecked_large_input() {
342 let input = "THEQUICKBROWNFOXJUMPSOVERTHELAZYDOG_12345";
343 let (checked_encoded, len_checked) = encode(input).expect("Safe encode should succeed for large input");
344 let (unchecked_encoded, len_unchecked) = {
345 let (bytes, len) = encode_unchecked(input);
346 (bytes, len)
347 };
348 assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for large input");
349 assert_eq!(len_checked, len_unchecked, "Lengths should match for large input");
350 }
351
352 #[test]
353 #[allow(clippy::precedence)]
354 fn test_encode_partial_chunks() {
355 let cases = vec![
357 ("A", vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS], 1),
358 ("AB", vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS), ((66 - ASCII_OFFSET) & 0b1111) << SHIFT_FOUR_BITS], 2),
359 ("ABC", vec![
360 (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
361 (((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
362 ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS
363 ], 3),
364 ("ABCDE", vec![
365 (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
367 (((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
368 ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS | (68 - ASCII_OFFSET),
369 (69 - ASCII_OFFSET) << SHIFT_TWO_BITS
371 ], 5),
372 ];
373
374 for (input, expected, len) in cases {
375 let (encoded, encoded_len) = encode(input).expect("Encoding should succeed");
376 assert_eq!(encoded, expected, "Encoded bytes do not match for input '{}'", input);
377 assert_eq!(encoded_len, len, "Length does not match for input '{}'", input);
378 }
379 }
380
381 #[test]
382 fn test_encode_unchecked_two_characters() {
383 let input = "AB"; let (checked_encoded, _) = encode(input).expect("Safe encode should succeed for two characters");
386 let (unchecked_encoded, _) = encode_unchecked(input);
387 assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for two characters");
388 }
389
390 #[test]
391 fn test_encode_unchecked_three_characters() {
392 let input = "ABC"; let (checked_encoded, _) = encode(input).expect("Safe encode should succeed for three characters");
395 let (unchecked_encoded, _) = encode_unchecked(input);
396 assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for three characters");
397 }
398
399 #[test]
400 #[allow(clippy::precedence)]
401 fn test_encode_unchecked_partial_chunks() {
402 let cases = vec![
404 ("A", vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS], 1),
405 ("AB", vec![
406 (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
407 ((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS,
408 ], 2),
409 ("ABC", vec![
410 (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
411 ((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
412 ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS,
413 ], 3),
414 ("ABCDE", vec![
415 (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
417 ((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
418 ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS | (68 - ASCII_OFFSET),
419 (69 - ASCII_OFFSET) << SHIFT_TWO_BITS,
421 ], 5),
422 ];
423
424 for (input, expected, len) in cases {
425 let (checked_encoded, encoded_len_checked) = encode(input).expect("Safe encode should succeed");
426 let (unchecked_encoded, encoded_len_unchecked) = encode_unchecked(input);
427 assert_eq!(checked_encoded, expected, "Safe encoding does not match expected for input '{}'", input);
428 assert_eq!(unchecked_encoded, expected, "Unchecked encoding does not match expected for input '{}'", input);
429 assert_eq!(encoded_len_checked, len, "Length does not match expected value for input '{}'", input);
430 assert_eq!(encoded_len_unchecked, len, "Length should be correct for input '{}'", input);
431 }
432 }
433}