dec_sixbit/
encode.rs

1//! Functions for encoding strings into DEC SIXBIT format.
2//!
3//! This module provides both checked and unchecked encoding functions. The safe functions perform validation
4//! to ensure all characters are within the valid SIXBIT range, while the unchecked functions assume the input
5//! is already valid for increased performance.
6
7use crate::{Error, MASK_FOUR_BITS, MASK_TWO_BITS, ASCII_OFFSET, SHIFT_TWO_BITS, SHIFT_FOUR_BITS, SHIFT_SIX_BITS};
8
9/// This function converts the input string into a compact SIXBIT-encoded byte vector and returns the
10/// encoded bytes along with the original string length.
11///
12/// # Constraints
13/// - Only ASCII characters in the range 32-95 (space through underscore) are allowed.
14///
15/// # Errors
16/// Returns an [`Error::InvalidCharacter`] if the input contains characters outside the valid range.
17///
18/// # Examples
19///
20/// ```rust
21/// use dec_sixbit::encode;
22///
23/// let input = "HELLO";
24/// let (encoded_bytes, length) = encode(input).unwrap();
25/// ```
26#[inline(always)]
27pub fn encode(str: &str) -> Result<(Vec<u8>, usize), Error> {
28    // Check if input string contains only ASCII characters
29    if !str.is_ascii() {
30        return Err(Error::InvalidCharacter);
31    }
32    let len = str.len();
33    // Every 4 characters need 3 bytes, round up
34    let bytes_needed = (len * 3 + 3) / 4;
35    let mut bytes = vec![0u8; bytes_needed];
36
37    let full_chunks = len / 4;
38    let remaining = len % 4;
39
40    for chunk_idx in 0..full_chunks {
41        let start = chunk_idx * 4;
42        let chunk = &str.as_bytes()[start..start + 4];
43
44        // Validate characters
45        for &code in chunk {
46            if !(ASCII_OFFSET..=95).contains(&code) {
47                return Err(Error::InvalidCharacter);
48            }
49        }
50
51        // Convert to SIXBIT values by subtracting ASCII_OFFSET
52        let a = chunk[0] - ASCII_OFFSET;
53        let b = chunk[1] - ASCII_OFFSET;
54        let c = chunk[2] - ASCII_OFFSET;
55        let d = chunk[3] - ASCII_OFFSET;
56
57        let byte_idx = chunk_idx * 3;
58
59        // Pack 4 SIXBIT values into 3 bytes
60        bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
61        bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
62        bytes[byte_idx + 2] = ((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d;
63    }
64
65    // Handle the remaining 1-3 characters, if any
66    if remaining > 0 {
67        let start = full_chunks * 4;
68        let chunk = &str.as_bytes()[start..];
69        let byte_idx = full_chunks * 3;
70
71        match chunk.len() {
72            3 => {
73                // Validate characters
74                for &code in chunk {
75                    if !(ASCII_OFFSET..=95).contains(&code) {
76                        return Err(Error::InvalidCharacter);
77                    }
78                }
79
80                // Convert to SIXBIT values by subtracting ASCII_OFFSET
81                let a = chunk[0] - ASCII_OFFSET;
82                let b = chunk[1] - ASCII_OFFSET;
83                let c = chunk[2] - ASCII_OFFSET;
84
85                // Pack 3 SIXBIT values into 2.25 bytes (rounded up to 3 bytes)
86                bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
87                bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
88                bytes[byte_idx + 2] = (c & MASK_TWO_BITS) << SHIFT_SIX_BITS;
89            },
90            2 => {
91                // Validate characters
92                for &code in chunk {
93                    if !(ASCII_OFFSET..=95).contains(&code) {
94                        return Err(Error::InvalidCharacter);
95                    }
96                }
97
98                // Convert to SIXBIT values by subtracting ASCII_OFFSET
99                let a = chunk[0] - ASCII_OFFSET;
100                let b = chunk[1] - ASCII_OFFSET;
101
102                // Pack 2 SIXBIT values into 1.5 bytes (rounded up to 2 bytes)
103                bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
104                bytes[byte_idx + 1] = (b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS;
105            },
106            1 => {
107                // Validate character
108                let code = chunk[0];
109                if !(ASCII_OFFSET..=95).contains(&code) {
110                    return Err(Error::InvalidCharacter);
111                }
112
113                // Convert to SIXBIT value by subtracting ASCII_OFFSET
114                let a = code - ASCII_OFFSET;
115
116                // Pack 1 SIXBIT value into 0.75 bytes (rounded up to 1 byte)
117                bytes[byte_idx] = a << SHIFT_TWO_BITS;
118            },
119            _ => unreachable!(),
120        }
121    }
122
123    Ok((bytes, len))
124}
125
126/// This function performs encoding without validating whether the input string contains only
127/// valid SIXBIT characters (ASCII 32-95). Use this function only when you are certain the input
128/// meets the required constraints to avoid undefined behavior.
129///
130/// # Safety
131/// The caller must ensure that all characters in `str` are within the valid SIXBIT range (32-95).
132///
133/// # Examples
134///
135/// ```rust
136/// use dec_sixbit::encode_unchecked;
137///
138/// let input = "HELLO";
139/// let (encoded_bytes, length) = encode_unchecked(input);
140/// ```
141#[inline(always)]
142pub fn encode_unchecked(str: &str) -> (Vec<u8>, usize) {
143    let len = str.len();
144    // Every 4 characters need 3 bytes, round up
145    let bytes_needed = (len * 3 + 3) / 4;
146    let mut bytes = vec![0u8; bytes_needed];
147
148    let full_chunks = len / 4;
149    let remaining = len % 4;
150
151    for chunk_idx in 0..full_chunks {
152        let start = chunk_idx * 4;
153        let chunk = &str.as_bytes()[start..start + 4];
154
155        // Convert to SIXBIT values by subtracting ASCII_OFFSET directly
156        let a = chunk[0] - ASCII_OFFSET;
157        let b = chunk[1] - ASCII_OFFSET;
158        let c = chunk[2] - ASCII_OFFSET;
159        let d = chunk[3] - ASCII_OFFSET;
160
161        let byte_idx = chunk_idx * 3;
162
163        // Pack 4 SIXBIT values into 3 bytes
164        bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
165        bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
166        bytes[byte_idx + 2] = ((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d;
167    }
168
169    // Handle the remaining 1-3 characters, if any
170    if remaining > 0 {
171        let start = full_chunks * 4;
172        let chunk = &str.as_bytes()[start..];
173        let byte_idx = full_chunks * 3;
174
175        match chunk.len() {
176            3 => {
177                // Convert to SIXBIT values by subtracting ASCII_OFFSET directly
178                let a = chunk[0] - ASCII_OFFSET;
179                let b = chunk[1] - ASCII_OFFSET;
180                let c = chunk[2] - ASCII_OFFSET;
181
182                // Pack 3 SIXBIT values into 2.25 bytes (rounded up to 3 bytes)
183                bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
184                bytes[byte_idx + 1] = ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS);
185                bytes[byte_idx + 2] = (c & MASK_TWO_BITS) << SHIFT_SIX_BITS;
186            },
187            2 => {
188                // Convert to SIXBIT values by subtracting ASCII_OFFSET directly
189                let a = chunk[0] - ASCII_OFFSET;
190                let b = chunk[1] - ASCII_OFFSET;
191
192                // Pack 2 SIXBIT values into 1.5 bytes (rounded up to 2 bytes)
193                bytes[byte_idx] = (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS);
194                bytes[byte_idx + 1] = (b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS;
195            },
196            1 => {
197                // Convert to SIXBIT value by subtracting ASCII_OFFSET directly
198                let a = chunk[0] - ASCII_OFFSET;
199
200                // Pack 1 SIXBIT value into 0.75 bytes (rounded up to 1 byte)
201                bytes[byte_idx] = a << SHIFT_TWO_BITS;
202            },
203            _ => unreachable!(),
204        }
205    }
206
207    (bytes, len)
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    #[test]
215    fn test_encode_empty_string() {
216        let input = "";
217        let (encoded, len) = encode(input).expect("Encoding should succeed for empty string");
218        assert!(encoded.is_empty(), "Encoded bytes should be empty");
219        assert_eq!(len, 0, "Length should be 0");
220    }
221
222    #[test]
223    fn test_encode_single_character() {
224        let input = "A"; // ASCII 65
225        let (encoded, len) = encode(input).expect("Encoding should succeed for single character");
226        let expected = vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS];
227        assert_eq!(encoded, expected, "Encoded bytes do not match expected value");
228        assert_eq!(len, 1, "Length should be 1");
229    }
230
231    #[test]
232    fn test_encode_two_characters() {
233        let input = "AB"; // ASCII 65, 66
234        let (encoded, len) = encode(input).expect("Encoding should succeed for two characters");
235        let a = 65 - ASCII_OFFSET;
236        let b = 66 - ASCII_OFFSET;
237        let expected = vec![
238            (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS),
239            (b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS,
240        ];
241        assert_eq!(encoded, expected, "Encoded bytes do not match expected value for two characters");
242        assert_eq!(len, 2, "Length should be 2");
243    }
244
245    #[test]
246    fn test_encode_three_characters() {
247        let input = "ABC"; // ASCII 65, 66, 67
248        let (encoded, len) = encode(input).expect("Encoding should succeed for three characters");
249        let a = 65 - ASCII_OFFSET;
250        let b = 66 - ASCII_OFFSET;
251        let c = 67 - ASCII_OFFSET;
252        let expected = vec![
253            (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS),
254            ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS),
255            (c & MASK_TWO_BITS) << SHIFT_SIX_BITS,
256        ];
257        assert_eq!(encoded, expected, "Encoded bytes do not match expected value for three characters");
258        assert_eq!(len, 3, "Length should be 3");
259    }
260
261    #[test]
262    fn test_encode_four_characters() {
263        let input = "ABCD"; // ASCII 65, 66, 67, 68
264        let (encoded, len) = encode(input).expect("Encoding should succeed for four characters");
265        let a = 65 - ASCII_OFFSET;
266        let b = 66 - ASCII_OFFSET;
267        let c = 67 - ASCII_OFFSET;
268        let d = 68 - ASCII_OFFSET;
269        let expected = vec![
270            (a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS),
271            ((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS),
272            ((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d,
273        ];
274        assert_eq!(encoded, expected, "Encoded bytes do not match expected value for four characters");
275        assert_eq!(len, 4, "Length should be 4");
276    }
277
278    #[test]
279    fn test_encode_multiple_chunks() {
280        let input = "HELLOWORLD_ "; // 12 characters
281        let (encoded, len) = encode(input).expect("Encoding should succeed for multiple chunks");
282        assert_eq!(len, input.len(), "Length should match input length");
283
284        // Manually compute expected bytes
285        // Chunks: "HELL", "OWOR", "LD_ "
286        let chunks = ["HELL", "OWOR", "LD_ "];
287        let mut expected = Vec::new();
288
289        for chunk in &chunks {
290            let a = chunk.as_bytes()[0] - ASCII_OFFSET;
291            let b = chunk.as_bytes()[1] - ASCII_OFFSET;
292            let c = chunk.as_bytes()[2] - ASCII_OFFSET;
293            let d = chunk.as_bytes()[3] - ASCII_OFFSET;
294
295            expected.push((a << SHIFT_TWO_BITS) | (b >> SHIFT_FOUR_BITS));
296            expected.push(((b & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | (c >> SHIFT_TWO_BITS));
297            expected.push(((c & MASK_TWO_BITS) << SHIFT_SIX_BITS) | d);
298        }
299
300        assert_eq!(encoded, expected, "Encoded bytes do not match expected value for multiple chunks");
301    }
302
303    #[test]
304    fn test_encode_with_invalid_character_non_ascii() {
305        let input = "Hello€"; // '€' is not ASCII
306        let result = encode(input);
307        assert!(matches!(result, Err(Error::InvalidCharacter)), "Should return InvalidCharacter error for non-ASCII characters");
308    }
309
310    #[test]
311    fn test_encode_with_invalid_character_below_range() {
312        let input = "HELLO\x1F"; // ASCII 31, below valid range
313        let result = encode(input);
314        assert!(matches!(result, Err(Error::InvalidCharacter)), "Should return InvalidCharacter error for characters below range");
315    }
316
317    #[test]
318    fn test_encode_with_invalid_character_above_range() {
319        let input = "HELLO~"; // '~' is ASCII 126, above valid range
320        let result = encode(input);
321        assert!(matches!(result, Err(Error::InvalidCharacter)), "Should return InvalidCharacter error for characters above range");
322    }
323
324    #[test]
325    fn test_encode_unchecked_valid_input() {
326        let input = "ABCD";
327        let (checked_encoded, _) = encode(input).expect("Safe encode should succeed for valid input");
328        let (unchecked_encoded, _) = encode_unchecked(input);
329        assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for valid input");
330    }
331
332    #[test]
333    fn test_encode_unchecked_empty_string() {
334        let input = "";
335        let (encoded, len) = encode_unchecked(input);
336        assert!(encoded.is_empty(), "Encoded bytes should be empty for empty string");
337        assert_eq!(len, 0, "Length should be 0 for empty string");
338    }
339
340    #[test]
341    fn test_encode_unchecked_large_input() {
342        let input = "THEQUICKBROWNFOXJUMPSOVERTHELAZYDOG_12345";
343        let (checked_encoded, len_checked) = encode(input).expect("Safe encode should succeed for large input");
344        let (unchecked_encoded, len_unchecked) = {
345            let (bytes, len) = encode_unchecked(input);
346            (bytes, len)
347        };
348        assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for large input");
349        assert_eq!(len_checked, len_unchecked, "Lengths should match for large input");
350    }
351
352    #[test]
353    #[allow(clippy::precedence)]
354    fn test_encode_partial_chunks() {
355        // Test inputs that result in partial chunks (1-3 remaining characters)
356        let cases = vec![
357            ("A", vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS], 1),
358            ("AB", vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS), ((66 - ASCII_OFFSET) & 0b1111) << SHIFT_FOUR_BITS], 2),
359            ("ABC", vec![
360                (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
361                (((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
362                ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS
363            ], 3),
364            ("ABCDE", vec![
365                // "ABCD"
366                (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
367                (((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS) | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
368                ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS | (68 - ASCII_OFFSET),
369                // "E"
370                (69 - ASCII_OFFSET) << SHIFT_TWO_BITS
371            ], 5),
372        ];
373
374        for (input, expected, len) in cases {
375            let (encoded, encoded_len) = encode(input).expect("Encoding should succeed");
376            assert_eq!(encoded, expected, "Encoded bytes do not match for input '{}'", input);
377            assert_eq!(encoded_len, len, "Length does not match for input '{}'", input);
378        }
379    }
380
381    #[test]
382    fn test_encode_unchecked_two_characters() {
383        let input = "AB"; // ASCII 65, 66
384        // Confirm that encode_unchecked produces the same result after safely encoding
385        let (checked_encoded, _) = encode(input).expect("Safe encode should succeed for two characters");
386        let (unchecked_encoded, _) = encode_unchecked(input);
387        assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for two characters");
388    }
389
390    #[test]
391    fn test_encode_unchecked_three_characters() {
392        let input = "ABC"; // ASCII 65, 66, 67
393        // Confirm that encode_unchecked produces the same result after safely encoding
394        let (checked_encoded, _) = encode(input).expect("Safe encode should succeed for three characters");
395        let (unchecked_encoded, _) = encode_unchecked(input);
396        assert_eq!(checked_encoded, unchecked_encoded, "Unchecked encoding should match safe encoding for three characters");
397    }
398
399    #[test]
400    #[allow(clippy::precedence)]
401    fn test_encode_unchecked_partial_chunks() {
402        // Cases including partial chunks (1-3 characters)
403        let cases = vec![
404            ("A", vec![(65 - ASCII_OFFSET) << SHIFT_TWO_BITS], 1),
405            ("AB", vec![
406                (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
407                ((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS,
408            ], 2),
409            ("ABC", vec![
410                (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
411                ((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
412                ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS,
413            ], 3),
414            ("ABCDE", vec![
415                // "ABCD"
416                (65 - ASCII_OFFSET) << SHIFT_TWO_BITS | ((66 - ASCII_OFFSET) >> SHIFT_FOUR_BITS),
417                ((66 - ASCII_OFFSET) & MASK_FOUR_BITS) << SHIFT_FOUR_BITS | ((67 - ASCII_OFFSET) >> SHIFT_TWO_BITS),
418                ((67 - ASCII_OFFSET) & MASK_TWO_BITS) << SHIFT_SIX_BITS | (68 - ASCII_OFFSET),
419                // "E"
420                (69 - ASCII_OFFSET) << SHIFT_TWO_BITS,
421            ], 5),
422        ];
423
424        for (input, expected, len) in cases {
425            let (checked_encoded, encoded_len_checked) = encode(input).expect("Safe encode should succeed");
426            let (unchecked_encoded, encoded_len_unchecked) = encode_unchecked(input);
427            assert_eq!(checked_encoded, expected, "Safe encoding does not match expected for input '{}'", input);
428            assert_eq!(unchecked_encoded, expected, "Unchecked encoding does not match expected for input '{}'", input);
429            assert_eq!(encoded_len_checked, len, "Length does not match expected value for input '{}'", input);
430            assert_eq!(encoded_len_unchecked, len, "Length should be correct for input '{}'", input);
431        }
432    }
433}