Skip to main content

cbm_dos/
lib.rs

1pub struct GCR {
2    decode_mappings: [u8; 32], // Index by 5-bit value, store decoded nibble
3    encode_mappings: [u8; 16], // Index by nibble 0..15, store 5-bit encoded value
4}
5
6const QUINTUPLE_SIZE: usize = 5;
7
8impl GCR {
9    /// Constructs a new `GCR` (Group Code Recording) instance with precomputed
10    /// lookup tables for efficient encoding and decoding operations.
11    ///
12    /// The `GCR` struct uses two lookup tables:
13    ///
14    /// - `decode_mappings`: A table that maps 5-bit encoded values (keys)
15    ///   to their decoded 4-bit values. This is used for decoding operations.
16    ///   Values that are considered invalid are initialized to `0xFF`.
17    /// - `encode_mappings`: A table that maps 4-bit decoded values into their
18    ///   respective 5-bit encoded counterparts, which is used for encoding
19    ///   operations.
20    ///
21    /// The mapping pairs are predefined and represent the 4-bit to 5-bit
22    /// encoding scheme:
23    ///
24    /// ```plaintext
25    /// (Encoded, Decoded)
26    /// (01010, 0), (01011, 1), (10010, 2), (10011, 3),
27    /// (01110, 4), (01111, 5), (10110, 6), (10111, 7),
28    /// (01001, 8), (11001, 9), (11010, 10), (11011, 11),
29    /// (01101, 12), (11101, 13), (11110, 14), (10101, 15)
30    /// ```
31    ///
32    /// Each `(encoded, decoded)` mapping is used to populate the appropriate
33    /// indices in the lookup tables. For example:
34    /// - `decode_mappings[encoded] = decoded`
35    /// - `encode_mappings[decoded] = encoded`
36    ///
37    /// # Returns
38    ///
39    /// Returns an instance of the `GCR` struct with initialized `decode_mappings`
40    /// and `encode_mappings`.
41    ///
42    /// # Example
43    ///
44    /// ```rust
45    /// let gcr = GCR::new();
46    /// assert_eq!(gcr.decode_mappings[0b01010], 0); // Decodes "01010" to 0
47    /// assert_eq!(gcr.encode_mappings[0], 0b01010); // Encodes 0 to "01010"
48    /// ```
49    pub fn new() -> Self {
50        // Pre-compute lookup tables as arrays for O(1) access
51        let mut decode_mappings = [0xFF; 32]; // Initialize with invalid marker
52        let mut encode_mappings = [0u8; 16];
53
54        // Populate the lookup tables
55        let mapping_pairs = [
56            (0b01010, 0),
57            (0b01011, 1),
58            (0b10010, 2),
59            (0b10011, 3),
60            (0b01110, 4),
61            (0b01111, 5),
62            (0b10110, 6),
63            (0b10111, 7),
64            (0b01001, 8),
65            (0b11001, 9),
66            (0b11010, 10),
67            (0b11011, 11),
68            (0b01101, 12),
69            (0b11101, 13),
70            (0b11110, 14),
71            (0b10101, 15),
72        ];
73
74        for (encoded, decoded) in mapping_pairs {
75            decode_mappings[encoded as usize] = decoded;
76            encode_mappings[decoded as usize] = encoded as u8;
77        }
78        GCR {
79            decode_mappings,
80            encode_mappings,
81        }
82    }
83
84    /// Decodes a 40-bit encoded value into a vector of bytes (maximum 4 bytes).
85    ///
86    /// This function processes an encoded 40-bit quintuple value, where each 5-bit segment (quintuple)
87    /// translates to its corresponding decoded nibble using a precomputed `decode_mappings` array.
88    /// The function decodes 8 quintuples (2 per byte) and returns a `Vec<u8>` containing the resulting bytes.
89    ///
90    /// If any quintuple cannot be decoded (i.e., its mapping results in `0xFF`, which is treated as invalid),
91    /// the function returns `None`.
92    ///
93    /// ### Parameters
94    /// - `encoded_value (u64)`: The 40-bit value to decode. It should be properly aligned so that the relevant bits
95    ///   can be shifted and masked correctly during decoding.
96    ///
97    /// ### Returns
98    /// - `Option<Vec<u8>>`: A `Some` containing the decoded vector of up to 4 bytes if decoding is successful,
99    ///   or `None` if any quin-tuple is invalid.
100    ///
101    /// ### Precondition
102    /// - The caller must ensure that the `self.decode_mappings` array is properly populated so that each 5-bit value
103    ///   (0 through 31) either maps to a valid 4-bit nibble or `0xFF` for invalid encodings.
104    ///
105    /// ### Algorithm
106    /// - For each pair of consecutive quintuples (2 quintuples per iteration):
107    ///   1. Shift and mask the first quintuple from the encoded value.
108    ///   2. Look up its corresponding nibble in `decode_mappings`.
109    ///   3. Repeat for the second quintuple in the pair.
110    ///   4. If either quintuple mapping results in an invalid value (`0xFF`), terminate early and return `None`.
111    ///   5. Combine the two valid decoded nibbles into a single byte and append to the result.
112    ///
113    /// ### Example
114    /// ```rust
115    /// let decoder = MyDecoder::new();
116    /// let encoded_value: u64 = 0b11110_00001_11110_00001_11110_00001_11110_00001; // Example encoded value
117    /// let decoded = decoder.decode_quintuple(encoded_value);
118    /// assert_eq!(decoded, Some(vec![0xF1, 0xF1, 0xF1, 0xF1])); // Decoding successful
119    ///
120    /// let invalid_encoded_value: u64 = 0b11110_11110_11110_11110_11110_11110_11110_11111; // Invalid encoding
121    /// let decoded = decoder.decode_quintuple(invalid_encoded_value);
122    /// assert_eq!(decoded, None); // Decoding failed due to an invalid quintuple
123    /// ```
124    ///
125    /// ### Notes
126    /// - The function uses a pre-allocated vector (`Vec`) with a capacity of 4 to maximize efficiency and prevent resizing.
127    /// - The function assumes `QUINTUPLE_SIZE` is defined as a constant equal to 5 (5 bits per quintuple).
128    /// - This function is particularly optimized for scenarios where the decoding process is executed frequently by utilizing
129    ///   direct array lookups rather than more expensive structures like `HashMap`.
130    fn decode_quintuple(&self, encoded_value: u64) -> Option<Vec<u8>> {
131        let mut result = Vec::with_capacity(4); // Pre-allocate exact capacity
132
133        // Process 8 quintuples (40 bits total)
134        for j in (0..8).step_by(2) {
135            // Direct array lookup instead of HashMap
136            let decoded_nibble_high =
137                self.decode_mappings[((encoded_value >> 35 - j * QUINTUPLE_SIZE) & 0x1f) as usize];
138            // Direct array lookup instead of HashMap
139            let decoded_nibble_low = self.decode_mappings
140                [((encoded_value >> 35 - (j + 1) * QUINTUPLE_SIZE) & 0x1f) as usize];
141            // Skip invalid encodings
142            if decoded_nibble_high == 0xFF || decoded_nibble_low == 0xFF {
143                return None;
144            }
145
146            result.push(decoded_nibble_high << 4 | decoded_nibble_low);
147        }
148
149        Some(result)
150    }
151
152    /// Decodes a slice of bytes using a specific decoding logic implemented in conjunction with the `decode_quintuple` method.
153    ///
154    /// This method processes the given input slice `value`, dividing it into fixed-size chunks (of size `QUINTUPLE_SIZE`),
155    /// and applies decoding logic to each chunk. The decoded bytes are collected and returned as a `Vec<u8>`.
156    ///
157    /// # Parameters
158    /// - `value`: A slice of bytes (`&[u8]`) that represents the encoded input to be decoded.
159    ///
160    /// # Returns
161    /// - `Some(Vec<u8>)`: A `Vec<u8>` containing the decoded bytes, if decoding is successful.
162    /// - `None`: Returned if decoding fails for any of the data chunks.
163    ///
164    /// # Methodology
165    /// 1. The input slice `value` is iterated in fixed-size chunks. This is achieved using the `chunks_exact`
166    ///    method, which ensures efficient processing of chunks of size `QUINTUPLE_SIZE`.
167    /// 2. For each chunk, it is converted into a 64-bit integer by padding the upper 3 bytes with zeros.
168    /// 3. The method `decode_quintuple` (presumably implemented elsewhere in the code) is invoked with the 64-bit integer.
169    ///    - If `decode_quintuple` returns a valid result, the decoded data is appended to the result vector (`result`).
170    ///    - If `decode_quintuple` fails for any chunk, the function returns `None`.
171    /// 4. If all chunks are successfully decoded, the accumulated result is wrapped in `Some` and returned.
172    ///
173    /// # Example
174    /// ```
175    /// let decoder = MyDecoder::new(); // Assuming a struct that implements the method
176    /// let encoded_data: &[u8] = &[/* encoded bytes */];
177    /// if let Some(decoded_data) = decoder.decode(encoded_data) {
178    ///     println!("Decoded data: {:?}", decoded_data);
179    /// } else {
180    ///     println!("Failed to decode the data.");
181    /// }
182    /// ```
183    ///
184    /// # Note
185    /// The size of `QUINTUPLE_SIZE` and the implementation of the `decode_quintuple` method
186    /// are critical for the proper functionality of this method. Ensure these are defined
187    /// and implemented correctly in the same context.
188    ///
189    /// # Assumptions
190    /// - The `QUINTUPLE_SIZE` constant is defined and is less than or equal to 5.
191    /// - The `decode_quintuple` function is implemented to correctly decode a `u64` value into a `Vec<u8>`.
192    pub fn decode(&self, value: &[u8]) -> Option<Vec<u8>> {
193        let mut result: Vec<u8> = Vec::new();
194        // Process chunks more efficiently using exact_chunks
195        for chunk in value.chunks_exact(QUINTUPLE_SIZE) {
196            let final_value = u64::from_be_bytes([
197                0, 0, 0, // pad with zeros for the upper 3 bytes
198                chunk[0], chunk[1], chunk[2], chunk[3], chunk[4],
199            ]);
200
201            if let Some(res) = self.decode_quintuple(final_value) {
202                result.extend(res);
203            } else {
204                return None;
205            }
206        }
207        Some(result)
208    }
209
210    /// Encodes a 4-byte sequence into a 40-bit number using predefined mappings.
211    ///
212    /// This function takes a reference to a slice of 4 bytes (`decoded_value`)
213    /// and encodes it into a `u64` (64-bit unsigned integer) using a provided
214    /// `encode_mappings` array. Each byte is split into two 4-bit halves, and
215    /// each half is converted into an encoded value based on the mapping table.
216    /// These encoded values are then combined into a single 64-bit value, with
217    /// each encoded value taking up a specific bit range in the result.
218    ///
219    /// # Parameters
220    /// - `decoded_value`: A reference to an array of 4 bytes to be encoded.
221    ///   The slice must be exactly 4 bytes long, or the behavior is undefined.
222    ///
223    /// # Returns
224    /// - A `u64` value representing the encoded result of the given slice.
225    ///
226    /// # Panics
227    /// - This function will not panic under normal operation as long as the
228    ///   `decoded_value` slice is exactly 4 bytes long and the indices used
229    ///   for `encode_mappings` are within bounds.
230    ///
231    /// # Assumptions
232    /// - `self.encode_mappings` is an array of values that maps 4-bit components
233    ///   (0 through 15) to their corresponding encoded values.
234    /// - The constant `QUINTUPLE_SIZE` determines the size of the bit shift
235    ///   and should align with the encoding rules.
236    ///
237    /// # Implementation Details
238    /// - Each byte in the `decoded_value` slice is divided into two 4-bit
239    ///   components:
240    ///   - The high nibble (upper 4 bits): `decoded_value[i] >> 4`
241    ///   - The low nibble (lower 4 bits): `decoded_value[i] & 0x0F`
242    /// - These components are looked up in `self.encode_mappings` to obtain
243    ///   their encoded values.
244    /// - The encoded values are right-shifted into their respective positions
245    ///   within the 64-bit result (`acc`), based on their sequence order.
246    ///
247    /// # Example
248    /// ```rust
249    /// // Assuming `QUINTUPLE_SIZE` is defined and `self.encode_mappings` is
250    /// // already initialized correctly:
251    /// let decoded_data: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
252    /// let encoded_value = your_object.encode_quintuple(&decoded_data);
253    /// println!("Encoded Value: {:#X}", encoded_value);
254    /// ```
255    ///
256    /// # Output
257    /// - The function will return the encoded 40-bit value as part of a `u64`.
258    fn encode_quintuple(&self, decoded_value: &[u8]) -> u64 {
259        let mut acc: u64 = 0;
260
261        for i in 0..4 {
262            let shift_amount_high = 35 - ((i as u32) * (QUINTUPLE_SIZE * 2) as u32);
263            let shift_amount_low = shift_amount_high - QUINTUPLE_SIZE as u32;
264
265            acc |= (self.encode_mappings[(decoded_value[i] >> 4) as usize] as u64)
266                << shift_amount_high;
267            acc |= (self.encode_mappings[(decoded_value[i] & 0x0F) as usize] as u64)
268                << shift_amount_low;
269        }
270
271        acc
272    }
273
274    /// Encodes the input byte slice (`value`) into a custom encoding format.
275    ///
276    /// This function processes the input slice in chunks of 4 bytes, encoding each chunk into a new 5-byte segment
277    /// by delegating the operation to the `encode_quintuple` method. The resulting encoded chunks are concatenated
278    /// into a single vector of bytes.
279    ///
280    /// # Parameters
281    /// - `value`: A slice of bytes (`&[u8]`) representing the data to be encoded.
282    ///
283    /// # Returns
284    /// - `Vec<u8>`: A vector containing the concatenated encoding result of all 4-byte chunks, where each chunk is
285    ///   transformed into a 5-byte encoded segment.
286    ///
287    /// # Details
288    /// - The chunking is done using `chunks_exact(4)`, ensuring that only complete chunks of 4 bytes are processed.
289    ///   If `value`'s length is not a multiple of 4, the remainder is ignored.
290    /// - For each chunk, the `encode_quintuple` method is called to perform the encoding, returning an integer result
291    ///   that is then converted into its big-endian byte representation (`to_be_bytes`).
292    /// - Only the last 5 bytes of the big-endian representation are used (as the encoded quintuple is presumed to
293    ///   require 5 bytes), and these are added to the result vector efficiently using `extend_from_slice`.
294    ///
295    /// # Example
296    /// ```rust
297    /// let encoder = Encoder::new();
298    /// let input: &[u8] = &[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC];
299    /// let output = encoder.encode(input);
300    ///
301    /// // The output will contain the encoded representation of the first 4 bytes
302    /// // and then process additional 4-byte chunks as applicable.
303    /// ```
304    ///
305    /// # Note
306    /// - `QUINTUPLE_SIZE` is assumed to be defined elsewhere in the module and represents the fixed size (5 bytes)
307    ///   of each encoded segment.
308    /// - The `encode_quintuple` method is expected to be implemented for the object type of `self` and should return
309    ///   an integer representing the encoded form of a 4-byte chunk.
310    ///
311    /// # Performance
312    /// - The `Vec::with_capacity` is preallocated based on the number of chunks and quintuple size to improve efficiency.
313    /// - This method disregards non-complete chunks (remainder of length % 4).
314    pub fn encode(&self, value: &[u8]) -> Vec<u8> {
315        let num_chunks = value.len() / 4;
316        let mut result = Vec::with_capacity(num_chunks * QUINTUPLE_SIZE);
317
318        for chunk in value.chunks_exact(4) {
319            let acc = self.encode_quintuple(chunk);
320            // Convert to bytes using to_be_bytes and extend efficiently
321            result.extend_from_slice(&acc.to_be_bytes()[3..]); // Take last 5 bytes
322        }
323        result
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn decode_works() {
333        let gcr = GCR::new();
334        let final_data: Vec<u8> = vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a];
335        assert_eq!(
336            gcr.decode(&final_data).unwrap(),
337            vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00]
338        );
339    }
340
341    #[test]
342    fn encode_works() {
343        let flux = GCR::new();
344        let data: Vec<u8> = vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00];
345        assert_eq!(
346            flux.encode(&data),
347            vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a]
348        );
349    }
350}