cbm_dos/lib.rs
1pub struct GCR {
2 decode_mappings: [u8; 32], // Index by 5-bit value, store decoded nibble
3 encode_mappings: [u8; 16], // Index by nibble 0..15, store 5-bit encoded value
4}
5
6impl GCR {
7 /// Constructs a new `GCR` (Group Code Recording) instance with precomputed
8 /// lookup tables for efficient encoding and decoding operations.
9 ///
10 /// The `GCR` struct uses two lookup tables:
11 ///
12 /// - `decode_mappings`: A table that maps 5-bit encoded values (keys)
13 /// to their decoded 4-bit values. This is used for decoding operations.
14 /// Values that are considered invalid are initialized to `0xFF`.
15 /// - `encode_mappings`: A table that maps 4-bit decoded values into their
16 /// respective 5-bit encoded counterparts, which is used for encoding
17 /// operations.
18 ///
19 /// The mapping pairs are predefined and represent the 4-bit to 5-bit
20 /// encoding scheme:
21 ///
22 /// ```plaintext
23 /// (Encoded, Decoded)
24 /// (01010, 0), (01011, 1), (10010, 2), (10011, 3),
25 /// (01110, 4), (01111, 5), (10110, 6), (10111, 7),
26 /// (01001, 8), (11001, 9), (11010, 10), (11011, 11),
27 /// (01101, 12), (11101, 13), (11110, 14), (10101, 15)
28 /// ```
29 ///
30 /// Each `(encoded, decoded)` mapping is used to populate the appropriate
31 /// indices in the lookup tables. For example:
32 /// - `decode_mappings[encoded] = decoded`
33 /// - `encode_mappings[decoded] = encoded`
34 ///
35 /// # Returns
36 ///
37 /// Returns an instance of the `GCR` struct with initialized `decode_mappings`
38 /// and `encode_mappings`.
39 ///
40 /// # Example
41 ///
42 /// ```rust
43 /// let gcr = GCR::new();
44 /// assert_eq!(gcr.decode_mappings[0b01010], 0); // Decodes "01010" to 0
45 /// assert_eq!(gcr.encode_mappings[0], 0b01010); // Encodes 0 to "01010"
46 /// ```
47 pub fn new() -> Self {
48 // Pre-compute lookup tables as arrays for O(1) access
49 let mut decode_mappings = [0xFF; 32]; // Initialize with invalid marker
50 let mut encode_mappings = [0u8; 16];
51
52 // Populate the lookup tables
53 let mapping_pairs = [
54 (0b01010, 0),
55 (0b01011, 1),
56 (0b10010, 2),
57 (0b10011, 3),
58 (0b01110, 4),
59 (0b01111, 5),
60 (0b10110, 6),
61 (0b10111, 7),
62 (0b01001, 8),
63 (0b11001, 9),
64 (0b11010, 10),
65 (0b11011, 11),
66 (0b01101, 12),
67 (0b11101, 13),
68 (0b11110, 14),
69 (0b10101, 15),
70 ];
71
72 for (encoded, decoded) in mapping_pairs {
73 decode_mappings[encoded as usize] = decoded;
74 encode_mappings[decoded as usize] = encoded as u8;
75 }
76 GCR { decode_mappings, encode_mappings }
77 }
78
79 /// Decodes a 64-bit encoded value into a `Vec<u8>` representing the original byte sequence.
80 ///
81 /// This function interprets the provided encoded value as consisting of 8 5-bit "quintuples"
82 /// and converts them into 4 bytes of data using the `decode_mappings` array of the struct.
83 ///
84 /// # Parameters
85 /// - `encoded_value` (`u64`): The 64-bit value to decode, containing 8 5-bit encoded segments.
86 ///
87 /// # Returns
88 /// - `Option<Vec<u8>>`: A `Vec<u8>` containing the decoded bytes if the input is valid, or `None`
89 /// if any of the quintuples are invalid (i.e., mapped to `0xFF` in the `decode_mappings`).
90 ///
91 /// # Details
92 /// - The function operates on 40 bits of input (8 quintuples of 5 bits each).
93 /// - For each quintuple:
94 /// - It calculates the appropriate shift to extract the quintuple from the `encoded_value`.
95 /// - It uses the `decode_mappings` array for direct lookup to map the quintuple to a 4-bit value.
96 /// - Decoding alternates between filling the high nibble and low nibble of a byte:
97 /// - If the nibble is the high nibble, it gets shifted left and stored.
98 /// - If the nibble is the low nibble, it gets combined with the high nibble to form a complete byte, which is appended to the result.
99 /// - If any quintuple decodes to `0xFF`, the function returns `None` (indicating an invalid encoding).
100 ///
101 /// # Memory Management
102 /// - The `Vec<u8>` result is pre-allocated with a capacity of 4 to match the exact number of decoded bytes.
103 ///
104 /// # Example
105 /// ```rust
106 /// let decoder = MyDecoderStruct {
107 /// decode_mappings: [ /* array mapping 32 possible quintuples to decoded nibbles */ ],
108 /// };
109 /// let encoded_value = 0x1A2B3C4D5E; // Some encoded value
110 /// let decoded = decoder.decode_quintuple(encoded_value);
111 ///
112 /// match decoded {
113 /// Some(bytes) => println!("Decoded bytes: {:?}", bytes),
114 /// None => println!("Invalid encoding"),
115 /// }
116 /// ```
117 fn decode_quintuple(&self, encoded_value: u64) -> Option<Vec<u8>> {
118 let mut result = Vec::with_capacity(4); // Pre-allocate exact capacity
119 let mut current_byte = 0u8;
120 let mut is_high_nibble = true; // Start with high nibble for correct order
121
122 // Process 8 quintuples (40 bits total)
123 for j in 0..8 {
124 let shift_amount = 35 - j * 5; // Calculate shift for each quintuple
125 let quintuple_bits = ((encoded_value >> shift_amount) & 0x1f) as usize;
126
127 // Direct array lookup instead of HashMap
128 let decoded_nibble = self.decode_mappings[quintuple_bits];
129
130 // Skip invalid encodings
131 if decoded_nibble == 0xFF {
132 return None;
133 }
134
135 if is_high_nibble {
136 current_byte = decoded_nibble << 4;
137 } else {
138 current_byte |= decoded_nibble;
139 result.push(current_byte);
140 current_byte = 0;
141 }
142 is_high_nibble = !is_high_nibble;
143 }
144
145 Some(result)
146 }
147
148 /// Decodes the provided input byte slice (`value`) into a `Vec<u8>`.
149 ///
150 /// The `decode` method processes the input in chunks of 5 bytes, converting each chunk into a
151 /// 64-bit integer by padding it with three leading zero bytes. It then calls the
152 /// `decode_quintuple` method to decode the chunk into a vector of bytes.
153 ///
154 /// # Parameters
155 /// - `value`: A reference to a slice of bytes (`&[u8]`) that represents the encoded input data.
156 /// This slice must have a length that is a multiple of 5 for full decoding.
157 ///
158 /// # Return
159 /// - Returns `Some(Vec<u8>)` if decoding is successful for all chunks.
160 /// - Returns `None` if any chunk fails to decode.
161 ///
162 /// # Example
163 /// ```
164 /// let mut decoder = MyDecoder::new();
165 /// let input = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A];
166 /// if let Some(decoded) = decoder.decode(&input) {
167 /// println!("Decoded output: {:?}", decoded);
168 /// } else {
169 /// eprintln!("Failed to decode input.");
170 /// }
171 /// ```
172 ///
173 /// # Notes
174 /// - This method uses `chunks_exact(5)` to divide the input into fixed-size chunks of 5.
175 /// - For each chunk, a `u64` is constructed by appending three leading zero bytes to the 5-byte chunk to
176 /// match the byte size of a `u64`.
177 /// - It relies on the `decode_quintuple` method to handle the actual decoding logic for
178 /// each chunk of reconstructed data. If `decode_quintuple` returns `None` for any chunk,
179 /// the entire decoding fails and the method returns `None`.
180 ///
181 /// # Panics
182 /// This method does not panic under normal operation. However, improper implementation of
183 /// `decode_quintuple` or incorrect input may result in unexpected behavior.
184 pub fn decode(&self, value: &[u8]) -> Option<Vec<u8>> {
185 let mut result: Vec<u8> = Vec::new();
186 // Process chunks more efficiently using exact_chunks
187 for chunk in value.chunks_exact(5) {
188 let final_value = u64::from_be_bytes([
189 0, 0, 0, // pad with zeros for the upper 3 bytes
190 chunk[0], chunk[1], chunk[2], chunk[3], chunk[4],
191 ]);
192
193 if let Some(res) = self.decode_quintuple(final_value) {
194 //println!("{:x?}", res);
195 result.extend(res);
196 } else {
197 return None;
198 }
199 }
200 Some(result)
201 }
202
203 /// Encodes a slice of bytes using a custom encoding scheme and returns the encoded data as a `Vec<u8>`.
204 ///
205 /// This method processes the input data in chunks of 4 bytes at a time. For each chunk:
206 /// - Each byte is split into two 4-bit nibbles (high and low).
207 /// - These nibbles are then mapped to corresponding 5-bit encoded values using a predefined `encode_mappings` table.
208 /// - The 8 encoded nibbles (now 5-bit codes) are packed into a 40-bit value in a big-endian format.
209 /// - Finally, the 40-bit value is split into 5 bytes and appended to the output vector.
210 ///
211 /// # Parameters
212 /// - `value`: A slice of bytes (`&[u8]`) to be encoded.
213 ///
214 /// # Returns
215 /// - `Vec<u8>`: A vector containing the encoded bytes.
216 ///
217 /// # Panics
218 /// This function assumes that `self.encode_mappings` is properly defined (with valid mappings for all 4-bit values [0-15])
219 /// and does not perform boundary checks on its size. Providing an invalid or incorrectly sized mapping may result in undefined behavior.
220 ///
221 /// # Example
222 /// ```rust
223 /// struct Encoder {
224 /// encode_mappings: [u8; 16],
225 /// }
226 ///
227 /// let encoder = Encoder {
228 /// encode_mappings: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
229 /// };
230 /// let input = vec![0x12, 0x34, 0x56, 0x78];
231 /// let encoded = encoder.encode(&input);
232 /// println!("{:?}", encoded);
233 /// ```
234 ///
235 /// # Notes
236 /// - The function processes the input in chunks of exactly 4 bytes. If the length of the input slice is not a multiple
237 /// of 4, the remaining bytes will be ignored. It is the caller's responsibility to handle padding or provide properly-sized input.
238 pub fn encode(&self, value: &[u8]) -> Vec<u8> {
239 let mut result: Vec<u8> = Vec::new();
240 for chunk in value.chunks_exact(4) {
241 // Prepare the 8 nibbles in the required order: high, low for each byte
242 let nibbles = [
243 chunk[0] >> 4,
244 chunk[0] & 0x0F,
245 chunk[1] >> 4,
246 chunk[1] & 0x0F,
247 chunk[2] >> 4,
248 chunk[2] & 0x0F,
249 chunk[3] >> 4,
250 chunk[3] & 0x0F,
251 ];
252
253 // Pack 8 quintuples (5-bit codes) into a 40-bit big-endian value
254 let mut acc: u64 = 0;
255 for (j, &nib) in nibbles.iter().enumerate() {
256 let code = self.encode_mappings[nib as usize] as u64;
257 let shift_amount = 35 - (j as u32) * 5;
258 acc |= code << shift_amount;
259 }
260
261 // Emit 5 bytes big-endian
262 result.push(((acc >> 32) & 0xFF) as u8);
263 result.push(((acc >> 24) & 0xFF) as u8);
264 result.push(((acc >> 16) & 0xFF) as u8);
265 result.push(((acc >> 8) & 0xFF) as u8);
266 result.push((acc & 0xFF) as u8);
267 }
268 result
269 }
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 #[test]
277 fn decode_works() {
278 let gcr = GCR::new();
279 let final_data: Vec<u8> = vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a];
280 assert_eq!(
281 gcr.decode(&final_data).unwrap(),
282 vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00]
283 );
284 }
285
286 #[test]
287 fn encode_works() {
288 let flux = GCR::new();
289 let data: Vec<u8> = vec![0x08, 0x01, 0x00, 0x01, 0x30, 0x30, 0x00, 0x00];
290 assert_eq!(
291 flux.encode(&data),
292 vec![0x52, 0x54, 0xb5, 0x29, 0x4b, 0x9a, 0xa6, 0xa5, 0x29, 0x4a]
293 );
294 }
295}