cbm_dos/lib.rs
1pub struct GCR {
2 decode_mappings: [u8; 32], // Index by 5-bit value, store decoded nibble
3 encode_mappings: [u8; 16], // Index by nibble 0..15, store 5-bit encoded value
4}
5
6const QUINTUPLE_SIZE: usize = 5;
7
8impl GCR {
9 /// Constructs a new `GCR` (Group Code Recording) instance with precomputed
10 /// lookup tables for efficient encoding and decoding operations.
11 ///
12 /// The `GCR` struct uses two lookup tables:
13 ///
14 /// - `decode_mappings`: A table that maps 5-bit encoded values (keys)
15 /// to their decoded 4-bit values. This is used for decoding operations.
16 /// Values that are considered invalid are initialized to `0xFF`.
17 /// - `encode_mappings`: A table that maps 4-bit decoded values into their
18 /// respective 5-bit encoded counterparts, which is used for encoding
19 /// operations.
20 ///
21 /// The mapping pairs are predefined and represent the 4-bit to 5-bit
22 /// encoding scheme:
23 ///
24 /// ```plaintext
25 /// (Encoded, Decoded)
26 /// (01010, 0), (01011, 1), (10010, 2), (10011, 3),
27 /// (01110, 4), (01111, 5), (10110, 6), (10111, 7),
28 /// (01001, 8), (11001, 9), (11010, 10), (11011, 11),
29 /// (01101, 12), (11101, 13), (11110, 14), (10101, 15)
30 /// ```
31 ///
32 /// Each `(encoded, decoded)` mapping is used to populate the appropriate
33 /// indices in the lookup tables. For example:
34 /// - `decode_mappings[encoded] = decoded`
35 /// - `encode_mappings[decoded] = encoded`
36 ///
37 /// # Returns
38 ///
39 /// Returns an instance of the `GCR` struct with initialized `decode_mappings`
40 /// and `encode_mappings`.
41 ///
42 /// # Example
43 ///
44 /// ```rust
45 /// let gcr = GCR::new();
46 /// assert_eq!(gcr.decode_mappings[0b01010], 0); // Decodes "01010" to 0
47 /// assert_eq!(gcr.encode_mappings[0], 0b01010); // Encodes 0 to "01010"
48 /// ```
49 pub fn new() -> Self {
50 // Pre-compute lookup tables as arrays for O(1) access
51 let mut decode_mappings = [0xFF; 32]; // Initialize with invalid marker
52 let mut encode_mappings = [0u8; 16];
53
54 // Populate the lookup tables
55 let mapping_pairs = [
56 (0b01010, 0),
57 (0b01011, 1),
58 (0b10010, 2),
59 (0b10011, 3),
60 (0b01110, 4),
61 (0b01111, 5),
62 (0b10110, 6),
63 (0b10111, 7),
64 (0b01001, 8),
65 (0b11001, 9),
66 (0b11010, 10),
67 (0b11011, 11),
68 (0b01101, 12),
69 (0b11101, 13),
70 (0b11110, 14),
71 (0b10101, 15),
72 ];
73
74 for (encoded, decoded) in mapping_pairs {
75 decode_mappings[encoded as usize] = decoded;
76 encode_mappings[decoded as usize] = encoded as u8;
77 }
78 GCR {
79 decode_mappings,
80 encode_mappings,
81 }
82 }
83
84 /// Decodes a 40-bit encoded value into a vector of bytes (maximum 4 bytes).
85 ///
86 /// This function processes an encoded 40-bit quintuple value, where each 5-bit segment (quintuple)
87 /// translates to its corresponding decoded nibble using a precomputed `decode_mappings` array.
88 /// The function decodes 8 quintuples (2 per byte) and returns a `Vec<u8>` containing the resulting bytes.
89 ///
90 /// If any quintuple cannot be decoded (i.e., its mapping results in `0xFF`, which is treated as invalid),
91 /// the function returns `None`.
92 ///
93 /// ### Parameters
94 /// - `encoded_value (u64)`: The 40-bit value to decode. It should be properly aligned so that the relevant bits
95 /// can be shifted and masked correctly during decoding.
96 ///
97 /// ### Returns
98 /// - `Option<Vec<u8>>`: A `Some` containing the decoded vector of up to 4 bytes if decoding is successful,
99 /// or `None` if any quin-tuple is invalid.
100 ///
101 /// ### Precondition
102 /// - The caller must ensure that the `self.decode_mappings` array is properly populated so that each 5-bit value
103 /// (0 through 31) either maps to a valid 4-bit nibble or `0xFF` for invalid encodings.
104 ///
105 /// ### Algorithm
106 /// - For each pair of consecutive quintuples (2 quintuples per iteration):
107 /// 1. Shift and mask the first quintuple from the encoded value.
108 /// 2. Look up its corresponding nibble in `decode_mappings`.
109 /// 3. Repeat for the second quintuple in the pair.
110 /// 4. If either quintuple mapping results in an invalid value (`0xFF`), terminate early and return `None`.
111 /// 5. Combine the two valid decoded nibbles into a single byte and append to the result.
112 ///
113 /// ### Example
114 /// ```rust
115 /// let decoder = MyDecoder::new();
116 /// let encoded_value: u64 = 0b11110_00001_11110_00001_11110_00001_11110_00001; // Example encoded value
117 /// let decoded = decoder.decode_quintuple(encoded_value);
118 /// assert_eq!(decoded, Some(vec![0xF1, 0xF1, 0xF1, 0xF1])); // Decoding successful
119 ///
120 /// let invalid_encoded_value: u64 = 0b11110_11110_11110_11110_11110_11110_11110_11111; // Invalid encoding
121 /// let decoded = decoder.decode_quintuple(invalid_encoded_value);
122 /// assert_eq!(decoded, None); // Decoding failed due to an invalid quintuple
123 /// ```
124 ///
125 /// ### Notes
126 /// - The function uses a pre-allocated vector (`Vec`) with a capacity of 4 to maximize efficiency and prevent resizing.
127 /// - The function assumes `QUINTUPLE_SIZE` is defined as a constant equal to 5 (5 bits per quintuple).
128 /// - This function is particularly optimized for scenarios where the decoding process is executed frequently by utilizing
129 /// direct array lookups rather than more expensive structures like `HashMap`.
130 fn decode_quintuple(&self, encoded_value: u64) -> Option<Vec<u8>> {
131 let mut result = Vec::with_capacity(4); // Pre-allocate exact capacity
132
133 // Process 8 quintuples (40 bits total)
134 for j in (0..8).step_by(2) {
135 // Direct array lookup instead of HashMap
136 let decoded_nibble_high =
137 self.decode_mappings[((encoded_value >> 35 - j * QUINTUPLE_SIZE) & 0x1f) as usize];
138 // Direct array lookup instead of HashMap
139 let decoded_nibble_low = self.decode_mappings
140 [((encoded_value >> 35 - (j + 1) * QUINTUPLE_SIZE) & 0x1f) as usize];
141 // Skip invalid encodings
142 if decoded_nibble_high == 0xFF || decoded_nibble_low == 0xFF {
143 return None;
144 }
145
146 result.push(decoded_nibble_high << 4 | decoded_nibble_low);
147 }
148
149 Some(result)
150 }
151
152 /// Decodes a slice of bytes using a specific decoding logic implemented in conjunction with the `decode_quintuple` method.
153 ///
154 /// This method processes the given input slice `value`, dividing it into fixed-size chunks (of size `QUINTUPLE_SIZE`),
155 /// and applies decoding logic to each chunk. The decoded bytes are collected and returned as a `Vec<u8>`.
156 ///
157 /// # Parameters
158 /// - `value`: A slice of bytes (`&[u8]`) that represents the encoded input to be decoded.
159 ///
160 /// # Returns
161 /// - `Some(Vec<u8>)`: A `Vec<u8>` containing the decoded bytes, if decoding is successful.
162 /// - `None`: Returned if decoding fails for any of the data chunks.
163 ///
164 /// # Methodology
165 /// 1. The input slice `value` is iterated in fixed-size chunks. This is achieved using the `chunks_exact`
166 /// method, which ensures efficient processing of chunks of size `QUINTUPLE_SIZE`.
167 /// 2. For each chunk, it is converted into a 64-bit integer by padding the upper 3 bytes with zeros.
168 /// 3. The method `decode_quintuple` (presumably implemented elsewhere in the code) is invoked with the 64-bit integer.
169 /// - If `decode_quintuple` returns a valid result, the decoded data is appended to the result vector (`result`).
170 /// - If `decode_quintuple` fails for any chunk, the function returns `None`.
171 /// 4. If all chunks are successfully decoded, the accumulated result is wrapped in `Some` and returned.
172 ///
173 /// # Example
174 /// ```
175 /// let decoder = MyDecoder::new(); // Assuming a struct that implements the method
176 /// let encoded_data: &[u8] = &[/* encoded bytes */];
177 /// if let Some(decoded_data) = decoder.decode(encoded_data) {
178 /// println!("Decoded data: {:?}", decoded_data);
179 /// } else {
180 /// println!("Failed to decode the data.");
181 /// }
182 /// ```
183 ///
184 /// # Note
185 /// The size of `QUINTUPLE_SIZE` and the implementation of the `decode_quintuple` method
186 /// are critical for the proper functionality of this method. Ensure these are defined
187 /// and implemented correctly in the same context.
188 ///
189 /// # Assumptions
190 /// - The `QUINTUPLE_SIZE` constant is defined and is less than or equal to 5.
191 /// - The `decode_quintuple` function is implemented to correctly decode a `u64` value into a `Vec<u8>`.
192 pub fn decode(&self, value: &[u8]) -> Option<Vec<u8>> {
193 let mut result: Vec<u8> = Vec::new();
194 // Process chunks more efficiently using exact_chunks
195 for chunk in value.chunks_exact(QUINTUPLE_SIZE) {
196 let final_value = u64::from_be_bytes([
197 0, 0, 0, // pad with zeros for the upper 3 bytes
198 chunk[0], chunk[1], chunk[2], chunk[3], chunk[4],
199 ]);
200
201 if let Some(res) = self.decode_quintuple(final_value) {
202 result.extend(res);
203 } else {
204 return None;
205 }
206 }
207 Some(result)
208 }
209
210 /// Encodes a 4-byte sequence into a 40-bit number using predefined mappings.
211 ///
212 /// This function takes a reference to a slice of 4 bytes (`decoded_value`)
213 /// and encodes it into a `u64` (64-bit unsigned integer) using a provided
214 /// `encode_mappings` array. Each byte is split into two 4-bit halves, and
215 /// each half is converted into an encoded value based on the mapping table.
216 /// These encoded values are then combined into a single 64-bit value, with
217 /// each encoded value taking up a specific bit range in the result.
218 ///
219 /// # Parameters
220 /// - `decoded_value`: A reference to an array of 4 bytes to be encoded.
221 /// The slice must be exactly 4 bytes long, or the behavior is undefined.
222 ///
223 /// # Returns
224 /// - A `u64` value representing the encoded result of the given slice.
225 ///
226 /// # Panics
227 /// - This function will not panic under normal operation as long as the
228 /// `decoded_value` slice is exactly 4 bytes long and the indices used
229 /// for `encode_mappings` are within bounds.
230 ///
231 /// # Assumptions
232 /// - `self.encode_mappings` is an array of values that maps 4-bit components
233 /// (0 through 15) to their corresponding encoded values.
234 /// - The constant `QUINTUPLE_SIZE` determines the size of the bit shift
235 /// and should align with the encoding rules.
236 ///
237 /// # Implementation Details
238 /// - Each byte in the `decoded_value` slice is divided into two 4-bit
239 /// components:
240 /// - The high nibble (upper 4 bits): `decoded_value[i] >> 4`
241 /// - The low nibble (lower 4 bits): `decoded_value[i] & 0x0F`
242 /// - These components are looked up in `self.encode_mappings` to obtain
243 /// their encoded values.
244 /// - The encoded values are right-shifted into their respective positions
245 /// within the 64-bit result (`acc`), based on their sequence order.
246 ///
247 /// # Example
248 /// ```rust
249 /// // Assuming `QUINTUPLE_SIZE` is defined and `self.encode_mappings` is
250 /// // already initialized correctly:
251 /// let decoded_data: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
252 /// let encoded_value = your_object.encode_quintuple(&decoded_data);
253 /// println!("Encoded Value: {:#X}", encoded_value);
254 /// ```
255 ///
256 /// # Output
257 /// - The function will return the encoded 40-bit value as part of a `u64`.
258 fn encode_quintuple(&self, decoded_value: &[u8]) -> u64 {
259 let mut acc: u64 = 0;
260
261 for i in 0..4 {
262 let shift_amount_high = 35 - ((i as u32) * (QUINTUPLE_SIZE * 2) as u32);
263 let shift_amount_low = shift_amount_high - QUINTUPLE_SIZE as u32;
264
265 acc |= (self.encode_mappings[(decoded_value[i] >> 4) as usize] as u64)
266 << shift_amount_high;
267 acc |= (self.encode_mappings[(decoded_value[i] & 0x0F) as usize] as u64)
268 << shift_amount_low;
269 }
270
271 acc
272 }
273
274 /// Encodes the input byte slice (`value`) into a custom encoding format.
275 ///
276 /// This function processes the input slice in chunks of 4 bytes, encoding each chunk into a new 5-byte segment
277 /// by delegating the operation to the `encode_quintuple` method. The resulting encoded chunks are concatenated
278 /// into a single vector of bytes.
279 ///
280 /// # Parameters
281 /// - `value`: A slice of bytes (`&[u8]`) representing the data to be encoded.
282 ///
283 /// # Returns
284 /// - `Vec<u8>`: A vector containing the concatenated encoding result of all 4-byte chunks, where each chunk is
285 /// transformed into a 5-byte encoded segment.
286 ///
287 /// # Details
288 /// - The chunking is done using `chunks_exact(4)`, ensuring that only complete chunks of 4 bytes are processed.
289 /// If `value`'s length is not a multiple of 4, the remainder is ignored.
290 /// - For each chunk, the `encode_quintuple` method is called to perform the encoding, returning an integer result
291 /// that is then converted into its big-endian byte representation (`to_be_bytes`).
292 /// - Only the last 5 bytes of the big-endian representation are used (as the encoded quintuple is presumed to
293 /// require 5 bytes), and these are added to the result vector efficiently using `extend_from_slice`.
294 ///
295 /// # Example
296 /// ```rust
297 /// let encoder = Encoder::new();
298 /// let input: &[u8] = &[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC];
299 /// let output = encoder.encode(input);
300 ///
301 /// // The output will contain the encoded representation of the first 4 bytes
302 /// // and then process additional 4-byte chunks as applicable.
303 /// ```
304 ///
305 /// # Note
306 /// - `QUINTUPLE_SIZE` is assumed to be defined elsewhere in the module and represents the fixed size (5 bytes)
307 /// of each encoded segment.
308 /// - The `encode_quintuple` method is expected to be implemented for the object type of `self` and should return
309 /// an integer representing the encoded form of a 4-byte chunk.
310 ///
311 /// # Performance
312 /// - The `Vec::with_capacity` is preallocated based on the number of chunks and quintuple size to improve efficiency.
313 /// - This method disregards non-complete chunks (remainder of length % 4).
314 pub fn encode(&self, value: &[u8]) -> Vec<u8> {
315 let num_chunks = value.len() / 4;
316 let mut result = Vec::with_capacity(num_chunks * QUINTUPLE_SIZE);
317
318 for chunk in value.chunks_exact(4) {
319 let acc = self.encode_quintuple(chunk);
320 // Convert to bytes using to_be_bytes and extend efficiently
321 result.extend_from_slice(&acc.to_be_bytes()[3..]); // Take last 5 bytes
322 }
323 result
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 #[test]
332 fn decode_works() {
333 let gcr = GCR::new();
334 let final_data: Vec<u8> = vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a];
335 assert_eq!(
336 gcr.decode(&final_data).unwrap(),
337 vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00]
338 );
339 }
340
341 #[test]
342 fn encode_works() {
343 let flux = GCR::new();
344 let data: Vec<u8> = vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00];
345 assert_eq!(
346 flux.encode(&data),
347 vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a]
348 );
349 }
350}