ragc_core/
tuple_packing.rs

1// Tuple Packing for AGC Segment Data
2// Matches C++ AGC implementation in segment.h
3
4/// Pack bytes into tuples based on maximum value
5/// This matches C++ AGC's bytes2tuples implementation
6pub fn bytes_to_tuples(bytes: &[u8]) -> Vec<u8> {
7    if bytes.is_empty() {
8        return vec![0x10]; // Empty with marker
9    }
10
11    let max_elem = *bytes.iter().max().unwrap();
12
13    if max_elem < 4 {
14        pack_tuples::<4, 4>(bytes)
15    } else if max_elem < 6 {
16        pack_tuples::<3, 6>(bytes)
17    } else if max_elem < 16 {
18        pack_tuples::<2, 16>(bytes)
19    } else {
20        // No packing needed
21        let mut result = bytes.to_vec();
22        result.push(0x10); // Marker: no packing
23        result
24    }
25}
26
27/// Unpack tuples back to bytes
28/// This matches C++ AGC's tuples2bytes implementation
29pub fn tuples_to_bytes(tuples: &[u8]) -> Vec<u8> {
30    if tuples.is_empty() {
31        return Vec::new();
32    }
33
34    let marker = tuples[tuples.len() - 1];
35    let no_bytes = marker >> 4;
36    let trailing_bytes = marker & 0xf;
37
38    if no_bytes == 1 {
39        // No packing was used, just return data without marker
40        return tuples[..tuples.len() - 1].to_vec();
41    }
42
43    // Output size calculation matches C++ AGC line 99
44    let output_size = (tuples.len() - 2) * (no_bytes as usize) + (trailing_bytes as usize);
45    let mut result = vec![0u8; output_size];
46
47    match no_bytes {
48        2 => unpack_tuples::<2, 16>(&tuples[..tuples.len() - 1], &mut result, output_size),
49        3 => unpack_tuples::<3, 6>(&tuples[..tuples.len() - 1], &mut result, output_size),
50        4 => unpack_tuples::<4, 4>(&tuples[..tuples.len() - 1], &mut result, output_size),
51        _ => panic!("Invalid no_bytes: {no_bytes}"),
52    }
53
54    result
55}
56
57/// Pack N values per byte, where each value is in range [0, MAX)
58/// Matches C++ bytes2tuples_impl
59fn pack_tuples<const N: usize, const MAX: u8>(bytes: &[u8]) -> Vec<u8> {
60    let mut result = Vec::new();
61    let mut i = 0;
62
63    // Pack full tuples (C++ lines 122-130)
64    while i + N <= bytes.len() {
65        let mut c: u32 = 0;
66        for j in 0..N {
67            c = c * (MAX as u32) + (bytes[i + j] as u32);
68        }
69        result.push(c as u8);
70        i += N;
71    }
72
73    // Pack trailing bytes (C++ lines 132-135)
74    // ALWAYS add trailing tuple (even if c=0)
75    let mut c: u32 = 0;
76    while i < bytes.len() {
77        c = c * (MAX as u32) + (bytes[i] as u32);
78        i += 1;
79    }
80    result.push(c as u8);
81
82    // Add marker byte (C++ line 137): (NO_BYTES << 4) + (v_bytes.size() % NO_BYTES)
83    let marker = ((N as u8) << 4) | ((bytes.len() % N) as u8);
84    result.push(marker);
85
86    result
87}
88
89/// Unpack N values per byte
90/// Matches C++ tuples2bytes_impl
91fn unpack_tuples<const N: usize, const MAX: u8>(
92    tuples: &[u8],
93    output: &mut [u8],
94    output_size: usize,
95) {
96    let mut i = 0; // tuple index
97    let mut j = 0; // output index
98
99    // Unpack full tuples (C++ lines 148-157)
100    while j + N <= output_size {
101        let mut c = tuples[i] as u32;
102
103        // Extract N values in reverse order (C++ lines 152-156)
104        for k in (0..N).rev() {
105            output[j + k] = (c % (MAX as u32)) as u8;
106            c /= MAX as u32;
107        }
108
109        i += 1;
110        j += N;
111    }
112
113    // Handle trailing bytes (C++ lines 159-168)
114    let n = output_size % N;
115    if n > 0 {
116        let mut c = tuples[i] as u32;
117
118        for k in (0..n).rev() {
119            output[j + k] = (c % (MAX as u32)) as u8;
120            c /= MAX as u32;
121        }
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn test_pack_unpack_dna() {
131        // DNA: values 0-3 (A=0, C=1, G=2, T=3)
132        let dna = vec![0, 1, 2, 3, 0, 1, 2, 3, 0, 1];
133        let packed = bytes_to_tuples(&dna);
134        let unpacked = tuples_to_bytes(&packed);
135        assert_eq!(dna, unpacked);
136    }
137
138    #[test]
139    fn test_pack_unpack_with_n() {
140        // DNA with N (N=4)
141        let data = vec![0, 1, 2, 3, 4, 1, 2, 3];
142        let packed = bytes_to_tuples(&data);
143        let unpacked = tuples_to_bytes(&packed);
144        assert_eq!(data, unpacked);
145    }
146
147    #[test]
148    fn test_pack_unpack_threshold_6() {
149        // Values 0-5 (should use 3-per-byte packing)
150        let data = vec![0, 1, 2, 3, 4, 5, 0, 1, 2];
151        let packed = bytes_to_tuples(&data);
152        let unpacked = tuples_to_bytes(&packed);
153        assert_eq!(data, unpacked);
154    }
155
156    #[test]
157    fn test_pack_unpack_threshold_7() {
158        // Values 0-7 (should use 2-per-byte packing, NOT 3-per-byte!)
159        let data = vec![0, 1, 2, 3, 4, 5, 6, 7, 0, 1];
160        let packed = bytes_to_tuples(&data);
161        let unpacked = tuples_to_bytes(&packed);
162        assert_eq!(data, unpacked);
163    }
164
165    #[test]
166    fn test_empty() {
167        let data = vec![];
168        let packed = bytes_to_tuples(&data);
169        let unpacked = tuples_to_bytes(&packed);
170        assert_eq!(data, unpacked);
171    }
172
173    #[test]
174    fn test_no_packing_needed() {
175        // Values >= 16 should not be packed
176        let data = vec![16, 20, 100, 200];
177        let packed = bytes_to_tuples(&data);
178        // Should be: data + 0x10 marker
179        assert_eq!(packed.len(), data.len() + 1);
180        assert_eq!(packed[packed.len() - 1], 0x10);
181
182        let unpacked = tuples_to_bytes(&packed);
183        assert_eq!(data, unpacked);
184    }
185}