dec_sixbit/
decode.rs

1//! Functions for decoding DEC SIXBIT-encoded bytes back into strings.
2//!
3//! This module provides both checked and unchecked decoding functions. The safe functions perform validation
4//! to ensure all SIXBIT values are within the valid range, while the unchecked functions assume the input
5//! is already valid for increased performance.
6
7use crate::{Error, ASCII_OFFSET, MASK_SIX_BITS};
8
9/// This function converts a slice of SIXBIT-encoded bytes into the original string based on the provided length.
10///
11/// # Parameters
12/// - `bytes`: A slice of bytes containing SIXBIT-encoded data.
13/// - `len`: The length of the original string.
14///
15/// # Errors
16/// Returns an [`Error::InvalidBytesLength`] if `bytes.len()` and `len` are inconsistent.
17///
18/// # Examples
19///
20/// ```rust
21/// use dec_sixbit::{encode, decode};
22///
23/// let input = "HELLO";
24/// let (encoded_bytes, length) = encode(input).unwrap();
25/// let decoded_string = decode(&encoded_bytes, length).unwrap();
26/// assert_eq!(decoded_string, input);
27/// ```
28#[inline(always)]
29pub fn decode(bytes: &[u8], len: usize) -> Result<String, Error> {
30    if bytes.len() != (len * 6 + 7) / 8 {
31        return Err(Error::InvalidBytesLength);
32    }
33    Ok(decode_core(bytes, len))
34}
35
36/// This function performs decoding without validating whether the SIXBIT values are within the
37/// valid range or whether the resulting bytes form a valid UTF-8 string. Use this function only
38/// when you are certain the input is valid to avoid undefined behavior.
39///
40/// # Safety
41/// The `bytes` slice must contain valid SIXBIT-encoded data:
42/// - The `len` must accurately reflect the number of original characters.
43///
44/// # Parameters
45/// - `bytes`: A slice of bytes containing SIXBIT-encoded data.
46/// - `len`: The length of the original string.
47///
48/// # Returns
49/// The decoded string.
50///
51/// # Examples
52///
53/// ```rust
54/// use dec_sixbit::{encode, decode_unchecked};
55///
56/// let input = "HELLO";
57/// let (encoded_bytes, length) = encode(input).unwrap();
58/// let decoded_string = decode_unchecked(&encoded_bytes, length);
59/// assert_eq!(decoded_string, input);
60/// ```
61#[inline(always)]
62pub fn decode_unchecked(bytes: &[u8], len: usize) -> String {
63    decode_core(bytes, len)
64}
65
66#[inline(always)]
67fn decode_core(bytes: &[u8], len: usize) -> String {
68    if len == 0 {
69        return String::new();
70    }
71
72    let mut result = vec![0u8; len];
73    let full_chunks = len / 4;
74    let remaining_chars = len % 4;
75
76    let bytes_ptr = bytes.as_ptr();
77    let result_ptr: *mut u8 = result.as_mut_ptr();
78
79    unsafe {
80        // Process full chunks
81        for chunk_idx in 0..full_chunks {
82            let byte_idx = chunk_idx * 3;
83            let str_idx = chunk_idx * 4;
84
85            // Load 3 bytes into a 32-bit integer and perform bit operations in a single step
86            let bytes = ((*bytes_ptr.add(byte_idx) as u32) << 16)
87                      | ((*bytes_ptr.add(byte_idx + 1) as u32) << 8)
88                      | (*bytes_ptr.add(byte_idx + 2) as u32);
89
90            // Extract 6-bit values and add ASCII offset in one operation per byte
91            let char1 = ((bytes >> 18) as u8 & MASK_SIX_BITS) + ASCII_OFFSET;
92            let char2 = ((bytes >> 12) as u8 & MASK_SIX_BITS) + ASCII_OFFSET;
93            let char3 = ((bytes >> 6) as u8 & MASK_SIX_BITS) + ASCII_OFFSET;
94            let char4 = (bytes as u8 & MASK_SIX_BITS) + ASCII_OFFSET;
95
96            // Store results with sequential memory access
97            *result_ptr.add(str_idx) = char1;
98            *result_ptr.add(str_idx + 1) = char2;
99            *result_ptr.add(str_idx + 2) = char3;
100            *result_ptr.add(str_idx + 3) = char4;
101        }
102
103        // Process remaining characters
104        match remaining_chars {
105            0 => {},
106            1 => {
107                let byte0 = *bytes_ptr.add(full_chunks * 3);
108                let char1 = (byte0 >> 2) + ASCII_OFFSET;
109                *result_ptr.add(full_chunks * 4) = char1;
110            },
111            2 => {
112                let byte0 = *bytes_ptr.add(full_chunks * 3);
113                let byte1 = *bytes_ptr.add(full_chunks * 3 + 1);
114                let char1 = (byte0 >> 2) + ASCII_OFFSET;
115                let char2 = (((byte0 & 0b00000011) << 4) | (byte1 >> 4)) + ASCII_OFFSET;
116                *result_ptr.add(full_chunks * 4) = char1;
117                *result_ptr.add(full_chunks * 4 + 1) = char2;
118            },
119            3 => {
120                let byte0 = *bytes_ptr.add(full_chunks * 3);
121                let byte1 = *bytes_ptr.add(full_chunks * 3 + 1);
122                let byte2 = *bytes_ptr.add(full_chunks * 3 + 2);
123                let char1 = (byte0 >> 2) + ASCII_OFFSET;
124                let char2 = (((byte0 & 0b00000011) << 4) | (byte1 >> 4)) + ASCII_OFFSET;
125                let char3 = (((byte1 & 0b00001111) << 2) | (byte2 >> 6)) + ASCII_OFFSET;
126                *result_ptr.add(full_chunks * 4) = char1;
127                *result_ptr.add(full_chunks * 4 + 1) = char2;
128                *result_ptr.add(full_chunks * 4 + 2) = char3;
129            },
130            _ => unreachable!(),
131        }
132    }
133
134    // SAFETY: Each byte of result is guaranteed to fit to any ASCII printable character
135    unsafe { String::from_utf8_unchecked(result) }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    #[test]
143    fn test_decode_empty() {
144        let bytes = [];
145        let decoded = decode(&bytes, 0).unwrap();
146        assert_eq!(decoded, "");
147    }
148
149    #[test]
150    fn test_decode_basic() {
151        let input = "HELLO";
152        let (encoded_bytes, length) = crate::encode(input).unwrap();
153        let decoded = decode(&encoded_bytes, length).unwrap();
154        assert_eq!(decoded, input);
155    }
156
157    #[test]
158    fn test_decode_unchecked() {
159        let input = "WORLD";
160        let (encoded_bytes, length) = crate::encode(input).unwrap();
161        let decoded = decode_unchecked(&encoded_bytes, length);
162        assert_eq!(decoded, input);
163    }
164
165    #[test]
166    fn test_invalid_length() {
167        let bytes = [0u8; 2];
168        assert!(decode(&bytes, 3).is_err());
169    }
170
171    #[test]
172    fn test_not_zero_len_but_empty() {
173        let bytes = [0u8; 0];
174        let decoded = decode(&bytes, 1);
175        assert!(decoded.is_err());
176    }
177}