dec_sixbit/
encode.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
//! Functions for encoding strings into DEC SIXBIT format.
//!
//! This module provides both safe and unsafe encoding functions. The safe functions perform validation
//! to ensure all characters are within the valid SIXBIT range, while the unsafe functions assume the input
//! is already valid for increased performance.

use crate::Error;

/// This function converts the input string into a compact SIXBIT-encoded byte vector and returns the
/// encoded bytes along with the original string length.
///
/// # Constraints
/// - Only ASCII characters in the range 32-95 (space through underscore) are allowed.
///
/// # Errors
/// Returns an [`Error::InvalidCharacter`] if the input contains characters outside the valid range.
///
/// # Examples
///
/// ```rust
/// use dec_sixbit::encode;
///
/// let input = "HELLO";
/// let (encoded_bytes, length) = encode(input).unwrap();
/// ```
pub fn encode(str: &str) -> Result<(Vec<u8>, usize), Error> {
    let len = str.len();
    // Every 4 characters need 3 bytes, round up
    let bytes_needed = (len * 3 + 3) / 4;
    let mut bytes = vec![0u8; bytes_needed];

    let full_chunks = len / 4;
    let remaining = len % 4;

    for chunk_idx in 0..full_chunks {
        let start = chunk_idx * 4;
        let chunk = &str.as_bytes()[start..start + 4];

        // Validate characters
        for &code in chunk {
            if !(32..=95).contains(&code) {
                return Err(Error::InvalidCharacter);
            }
        }

        // Convert to SIXBIT values by subtracting 32
        let a = chunk[0] - 32;
        let b = chunk[1] - 32;
        let c = chunk[2] - 32;
        let d = chunk[3] - 32;

        let byte_idx = chunk_idx * 3;

        // Pack 4 SIXBIT values into 3 bytes
        bytes[byte_idx] = (a << 2) | (b >> 4);
        bytes[byte_idx + 1] = ((b & 0b1111) << 4) | (c >> 2);
        bytes[byte_idx + 2] = ((c & 0b11) << 6) | d;
    }

    // Handle the remaining 1-3 characters, if any
    if remaining > 0 {
        let start = full_chunks * 4;
        let chunk = &str.as_bytes()[start..];
        let byte_idx = full_chunks * 3;

        match chunk.len() {
            3 => {
                // Validate characters
                for &code in chunk {
                    if !(32..=95).contains(&code) {
                        return Err(Error::InvalidCharacter);
                    }
                }

                // Convert to SIXBIT values by subtracting 32
                let a = chunk[0] - 32;
                let b = chunk[1] - 32;
                let c = chunk[2] - 32;

                // Pack 3 SIXBIT values into 2.25 bytes (rounded up to 3 bytes)
                bytes[byte_idx] = (a << 2) | (b >> 4);
                bytes[byte_idx + 1] = ((b & 0b1111) << 4) | (c >> 2);
                bytes[byte_idx + 2] = (c & 0b11) << 6;
            },
            2 => {
                // Validate characters
                for &code in chunk {
                    if !(32..=95).contains(&code) {
                        return Err(Error::InvalidCharacter);
                    }
                }

                // Convert to SIXBIT values by subtracting 32
                let a = chunk[0] - 32;
                let b = chunk[1] - 32;

                // Pack 2 SIXBIT values into 1.5 bytes (rounded up to 2 bytes)
                bytes[byte_idx] = (a << 2) | (b >> 4);
                bytes[byte_idx + 1] = (b & 0b1111) << 4;
            },
            1 => {
                // Validate character
                let code = chunk[0];
                if !(32..=95).contains(&code) {
                    return Err(Error::InvalidCharacter);
                }

                // Convert to SIXBIT value by subtracting 32
                let a = code - 32;

                // Pack 1 SIXBIT value into 0.75 bytes (rounded up to 1 byte)
                bytes[byte_idx] = a << 2;
            },
            _ => unreachable!(),
        }
    }

    Ok((bytes, len))
}

/// This function performs encoding without validating whether the input string contains only
/// valid SIXBIT characters (ASCII 32-95). Use this function only when you are certain the input
/// meets the required constraints to avoid undefined behavior.
///
/// # Safety
/// The caller must ensure that all characters in `str` are within the valid SIXBIT range (32-95).
///
/// # Examples
///
/// ```rust
/// use dec_sixbit::encode_unchecked;
///
/// let input = "HELLO";
/// let (encoded_bytes, length) = unsafe { encode_unchecked(input) };
/// ```
pub unsafe fn encode_unchecked(str: &str) -> (Vec<u8>, usize) {
    let len = str.len();
    // Every 4 characters need 3 bytes, round up
    let bytes_needed = (len * 3 + 3) / 4;
    let mut bytes = vec![0u8; bytes_needed];

    let full_chunks = len / 4;
    let remaining = len % 4;

    for chunk_idx in 0..full_chunks {
        let start = chunk_idx * 4;
        let chunk = &str.as_bytes()[start..start + 4];

        // Convert to SIXBIT values by subtracting 32 directly
        let a = chunk[0] - 32;
        let b = chunk[1] - 32;
        let c = chunk[2] - 32;
        let d = chunk[3] - 32;

        let byte_idx = chunk_idx * 3;

        // Pack 4 SIXBIT values into 3 bytes
        bytes[byte_idx] = (a << 2) | (b >> 4);
        bytes[byte_idx + 1] = ((b & 0b1111) << 4) | (c >> 2);
        bytes[byte_idx + 2] = ((c & 0b11) << 6) | d;
    }

    // Handle the remaining 1-3 characters, if any
    if remaining > 0 {
        let start = full_chunks * 4;
        let chunk = &str.as_bytes()[start..];
        let byte_idx = full_chunks * 3;

        match chunk.len() {
            3 => {
                // Convert to SIXBIT values by subtracting 32 directly
                let a = chunk[0] - 32;
                let b = chunk[1] - 32;
                let c = chunk[2] - 32;

                // Pack 3 SIXBIT values into 2.25 bytes (rounded up to 3 bytes)
                bytes[byte_idx] = (a << 2) | (b >> 4);
                bytes[byte_idx + 1] = ((b & 0b1111) << 4) | (c >> 2);
                bytes[byte_idx + 2] = (c & 0b11) << 6;
            },
            2 => {
                // Convert to SIXBIT values by subtracting 32 directly
                let a = chunk[0] - 32;
                let b = chunk[1] - 32;

                // Pack 2 SIXBIT values into 1.5 bytes (rounded up to 2 bytes)
                bytes[byte_idx] = (a << 2) | (b >> 4);
                bytes[byte_idx + 1] = (b & 0b1111) << 4;
            },
            1 => {
                // Convert to SIXBIT value by subtracting 32 directly
                let a = chunk[0] - 32;

                // Pack 1 SIXBIT value into 0.75 bytes (rounded up to 1 byte)
                bytes[byte_idx] = a << 2;
            },
            _ => unreachable!(),
        }
    }

    (bytes, len)
}