cbm_dos/
lib.rs

1pub struct GCR {
2    decode_mappings: [u8; 32], // Index by 5-bit value, store decoded nibble
3    encode_mappings: [u8; 16], // Index by nibble 0..15, store 5-bit encoded value
4}
5
6impl GCR {
7    /// Constructs a new `GCR` (Group Code Recording) instance with precomputed
8    /// lookup tables for efficient encoding and decoding operations.
9    ///
10    /// The `GCR` struct uses two lookup tables:
11    ///
12    /// - `decode_mappings`: A table that maps 5-bit encoded values (keys)
13    ///   to their decoded 4-bit values. This is used for decoding operations.
14    ///   Values that are considered invalid are initialized to `0xFF`.
15    /// - `encode_mappings`: A table that maps 4-bit decoded values into their
16    ///   respective 5-bit encoded counterparts, which is used for encoding
17    ///   operations.
18    ///
19    /// The mapping pairs are predefined and represent the 4-bit to 5-bit
20    /// encoding scheme:
21    ///
22    /// ```plaintext
23    /// (Encoded, Decoded)
24    /// (01010, 0), (01011, 1), (10010, 2), (10011, 3),
25    /// (01110, 4), (01111, 5), (10110, 6), (10111, 7),
26    /// (01001, 8), (11001, 9), (11010, 10), (11011, 11),
27    /// (01101, 12), (11101, 13), (11110, 14), (10101, 15)
28    /// ```
29    ///
30    /// Each `(encoded, decoded)` mapping is used to populate the appropriate
31    /// indices in the lookup tables. For example:
32    /// - `decode_mappings[encoded] = decoded`
33    /// - `encode_mappings[decoded] = encoded`
34    ///
35    /// # Returns
36    ///
37    /// Returns an instance of the `GCR` struct with initialized `decode_mappings`
38    /// and `encode_mappings`.
39    ///
40    /// # Example
41    ///
42    /// ```rust
43    /// let gcr = GCR::new();
44    /// assert_eq!(gcr.decode_mappings[0b01010], 0); // Decodes "01010" to 0
45    /// assert_eq!(gcr.encode_mappings[0], 0b01010); // Encodes 0 to "01010"
46    /// ```
47    pub fn new() -> Self {
48        // Pre-compute lookup tables as arrays for O(1) access
49        let mut decode_mappings = [0xFF; 32]; // Initialize with invalid marker
50        let mut encode_mappings = [0u8; 16];
51
52        // Populate the lookup tables
53        let mapping_pairs = [
54            (0b01010, 0),
55            (0b01011, 1),
56            (0b10010, 2),
57            (0b10011, 3),
58            (0b01110, 4),
59            (0b01111, 5),
60            (0b10110, 6),
61            (0b10111, 7),
62            (0b01001, 8),
63            (0b11001, 9),
64            (0b11010, 10),
65            (0b11011, 11),
66            (0b01101, 12),
67            (0b11101, 13),
68            (0b11110, 14),
69            (0b10101, 15),
70        ];
71
72        for (encoded, decoded) in mapping_pairs {
73            decode_mappings[encoded as usize] = decoded;
74            encode_mappings[decoded as usize] = encoded as u8;
75        }
76        GCR { decode_mappings, encode_mappings }
77    }
78
79    /// Decodes a 64-bit encoded value into a `Vec<u8>` representing the original byte sequence.
80    ///
81    /// This function interprets the provided encoded value as consisting of 8 5-bit "quintuples"
82    /// and converts them into 4 bytes of data using the `decode_mappings` array of the struct.
83    ///
84    /// # Parameters
85    /// - `encoded_value` (`u64`): The 64-bit value to decode, containing 8 5-bit encoded segments.
86    ///
87    /// # Returns
88    /// - `Option<Vec<u8>>`: A `Vec<u8>` containing the decoded bytes if the input is valid, or `None`
89    ///   if any of the quintuples are invalid (i.e., mapped to `0xFF` in the `decode_mappings`).
90    ///
91    /// # Details
92    /// - The function operates on 40 bits of input (8 quintuples of 5 bits each).
93    /// - For each quintuple:
94    ///   - It calculates the appropriate shift to extract the quintuple from the `encoded_value`.
95    ///   - It uses the `decode_mappings` array for direct lookup to map the quintuple to a 4-bit value.
96    /// - Decoding alternates between filling the high nibble and low nibble of a byte:
97    ///   - If the nibble is the high nibble, it gets shifted left and stored.
98    ///   - If the nibble is the low nibble, it gets combined with the high nibble to form a complete byte, which is appended to the result.
99    /// - If any quintuple decodes to `0xFF`, the function returns `None` (indicating an invalid encoding).
100    ///
101    /// # Memory Management
102    /// - The `Vec<u8>` result is pre-allocated with a capacity of 4 to match the exact number of decoded bytes.
103    ///
104    /// # Example
105    /// ```rust
106    /// let decoder = MyDecoderStruct {
107    ///     decode_mappings: [ /* array mapping 32 possible quintuples to decoded nibbles */ ],
108    /// };
109    /// let encoded_value = 0x1A2B3C4D5E; // Some encoded value
110    /// let decoded = decoder.decode_quintuple(encoded_value);
111    ///
112    /// match decoded {
113    ///     Some(bytes) => println!("Decoded bytes: {:?}", bytes),
114    ///     None => println!("Invalid encoding"),
115    /// }
116    /// ```
117    fn decode_quintuple(&self, encoded_value: u64) -> Option<Vec<u8>> {
118        let mut result = Vec::with_capacity(4); // Pre-allocate exact capacity
119        let mut current_byte = 0u8;
120        let mut is_high_nibble = true; // Start with high nibble for correct order
121
122        // Process 8 quintuples (40 bits total)
123        for j in 0..8 {
124            let shift_amount = 35 - j * 5; // Calculate shift for each quintuple
125            let quintuple_bits = ((encoded_value >> shift_amount) & 0x1f) as usize;
126
127            // Direct array lookup instead of HashMap
128            let decoded_nibble = self.decode_mappings[quintuple_bits];
129
130            // Skip invalid encodings
131            if decoded_nibble == 0xFF {
132                return None;
133            }
134
135            if is_high_nibble {
136                current_byte = decoded_nibble << 4;
137            } else {
138                current_byte |= decoded_nibble;
139                result.push(current_byte);
140                current_byte = 0;
141            }
142            is_high_nibble = !is_high_nibble;
143        }
144
145        Some(result)
146    }
147
148    /// Decodes the provided input byte slice (`value`) into a `Vec<u8>`.
149    ///
150    /// The `decode` method processes the input in chunks of 5 bytes, converting each chunk into a
151    /// 64-bit integer by padding it with three leading zero bytes. It then calls the
152    /// `decode_quintuple` method to decode the chunk into a vector of bytes.
153    ///
154    /// # Parameters
155    /// - `value`: A reference to a slice of bytes (`&[u8]`) that represents the encoded input data.
156    ///            This slice must have a length that is a multiple of 5 for full decoding.
157    ///
158    /// # Return
159    /// - Returns `Some(Vec<u8>)` if decoding is successful for all chunks.
160    /// - Returns `None` if any chunk fails to decode.
161    ///
162    /// # Example
163    /// ```
164    /// let mut decoder = MyDecoder::new();
165    /// let input = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A];
166    /// if let Some(decoded) = decoder.decode(&input) {
167    ///     println!("Decoded output: {:?}", decoded);
168    /// } else {
169    ///     eprintln!("Failed to decode input.");
170    /// }
171    /// ```
172    ///
173    /// # Notes
174    /// - This method uses `chunks_exact(5)` to divide the input into fixed-size chunks of 5.
175    /// - For each chunk, a `u64` is constructed by appending three leading zero bytes to the 5-byte chunk to
176    ///   match the byte size of a `u64`.
177    /// - It relies on the `decode_quintuple` method to handle the actual decoding logic for
178    ///   each chunk of reconstructed data. If `decode_quintuple` returns `None` for any chunk,
179    ///   the entire decoding fails and the method returns `None`.
180    ///
181    /// # Panics
182    /// This method does not panic under normal operation. However, improper implementation of
183    /// `decode_quintuple` or incorrect input may result in unexpected behavior.
184    pub fn decode(&self, value: &[u8]) -> Option<Vec<u8>> {
185        let mut result: Vec<u8> = Vec::new();
186        // Process chunks more efficiently using exact_chunks
187        for chunk in value.chunks_exact(5) {
188            let final_value = u64::from_be_bytes([
189                0, 0, 0, // pad with zeros for the upper 3 bytes
190                chunk[0], chunk[1], chunk[2], chunk[3], chunk[4],
191            ]);
192
193            if let Some(res) = self.decode_quintuple(final_value) {
194                //println!("{:x?}", res);
195                result.extend(res);
196            } else {
197                return None;
198            }
199        }
200        Some(result)
201    }
202
203    /// Encodes a slice of bytes using a custom encoding scheme and returns the encoded data as a `Vec<u8>`.
204    ///
205    /// This method processes the input data in chunks of 4 bytes at a time. For each chunk:
206    /// - Each byte is split into two 4-bit nibbles (high and low).
207    /// - These nibbles are then mapped to corresponding 5-bit encoded values using a predefined `encode_mappings` table.
208    /// - The 8 encoded nibbles (now 5-bit codes) are packed into a 40-bit value in a big-endian format.
209    /// - Finally, the 40-bit value is split into 5 bytes and appended to the output vector.
210    ///
211    /// # Parameters
212    /// - `value`: A slice of bytes (`&[u8]`) to be encoded.
213    ///
214    /// # Returns
215    /// - `Vec<u8>`: A vector containing the encoded bytes.
216    ///
217    /// # Panics
218    /// This function assumes that `self.encode_mappings` is properly defined (with valid mappings for all 4-bit values [0-15])
219    /// and does not perform boundary checks on its size. Providing an invalid or incorrectly sized mapping may result in undefined behavior.
220    ///
221    /// # Example
222    /// ```rust
223    /// struct Encoder {
224    ///     encode_mappings: [u8; 16],
225    /// }
226    ///
227    /// let encoder = Encoder {
228    ///     encode_mappings: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
229    /// };
230    /// let input = vec![0x12, 0x34, 0x56, 0x78];
231    /// let encoded = encoder.encode(&input);
232    /// println!("{:?}", encoded);
233    /// ```
234    ///
235    /// # Notes
236    /// - The function processes the input in chunks of exactly 4 bytes. If the length of the input slice is not a multiple
237    ///   of 4, the remaining bytes will be ignored. It is the caller's responsibility to handle padding or provide properly-sized input.
238    pub fn encode(&self, value: &[u8]) -> Vec<u8> {
239        let mut result: Vec<u8> = Vec::new();
240        for chunk in value.chunks_exact(4) {
241            // Prepare the 8 nibbles in the required order: high, low for each byte
242            let nibbles = [
243                chunk[0] >> 4,
244                chunk[0] & 0x0F,
245                chunk[1] >> 4,
246                chunk[1] & 0x0F,
247                chunk[2] >> 4,
248                chunk[2] & 0x0F,
249                chunk[3] >> 4,
250                chunk[3] & 0x0F,
251            ];
252
253            // Pack 8 quintuples (5-bit codes) into a 40-bit big-endian value
254            let mut acc: u64 = 0;
255            for (j, &nib) in nibbles.iter().enumerate() {
256                let code = self.encode_mappings[nib as usize] as u64;
257                let shift_amount = 35 - (j as u32) * 5;
258                acc |= code << shift_amount;
259            }
260
261            // Emit 5 bytes big-endian
262            result.push(((acc >> 32) & 0xFF) as u8);
263            result.push(((acc >> 24) & 0xFF) as u8);
264            result.push(((acc >> 16) & 0xFF) as u8);
265            result.push(((acc >> 8) & 0xFF) as u8);
266            result.push((acc & 0xFF) as u8);
267        }
268        result
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn decode_works() {
278        let gcr = GCR::new();
279        let final_data: Vec<u8> = vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a];
280        assert_eq!(
281            gcr.decode(&final_data).unwrap(),
282            vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00]
283        );
284    }
285
286    #[test]
287    fn encode_works() {
288        let flux = GCR::new();
289        let data: Vec<u8> = vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00];
290        assert_eq!(
291            flux.encode(&data),
292            vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a]
293        );
294    }
295}