libtoa/
header.rs

1use super::{
2    ErrorCorrection, Prefilter, Result, TOA_MAGIC, TOA_VERSION, TOAOptions, Write,
3    error_invalid_data, error_unsupported, lzma,
4    reed_solomon::{code_32_10, code_64_40},
5};
6
7/// TOA file header containing format metadata and compression parameters.
8#[derive(Debug, Clone, Copy)]
9pub struct TOAHeader {
10    capabilities: u8,
11    prefilter: Prefilter,
12    block_size_exponent: u8,
13    lc: u8,
14    lp: u8,
15    pb: u8,
16    dict_size_log2: u8,
17}
18
19impl TOAHeader {
20    /// Create a new TOA header from options.
21    pub fn from_options(options: &TOAOptions) -> Self {
22        Self {
23            capabilities: options.error_correction.capability_bits(),
24            prefilter: options.prefilter,
25            block_size_exponent: options.block_size_exponent.unwrap_or(62),
26            lc: options.lc,
27            lp: options.lp,
28            pb: options.pb,
29            dict_size_log2: options.dictionary_size_exponent,
30        }
31    }
32
33    /// Get the capabilities field.
34    pub fn capabilities(&self) -> u8 {
35        self.capabilities
36    }
37
38    /// Get the error correction level from the capabilities.
39    pub fn error_correction(&self) -> ErrorCorrection {
40        match self.capabilities & 0b11 {
41            0b00 => ErrorCorrection::None,
42            0b01 => ErrorCorrection::Standard,
43            0b10 => ErrorCorrection::Paranoid,
44            0b11 => ErrorCorrection::Extreme,
45            _ => unreachable!(),
46        }
47    }
48
49    /// Get the prefilter used.
50    pub fn prefilter(&self) -> Prefilter {
51        self.prefilter
52    }
53
54    /// Get the LZMA literal context bits.
55    pub fn lc(&self) -> u8 {
56        self.lc
57    }
58
59    /// Get the LZMA literal position bits.
60    pub fn lp(&self) -> u8 {
61        self.lp
62    }
63
64    /// Get the LZMA position bits.
65    pub fn pb(&self) -> u8 {
66        self.pb
67    }
68
69    /// Get the actual dictionary size.
70    pub fn dict_size(&self) -> u32 {
71        2u32.pow(self.dict_size_log2 as u32)
72            .min(lzma::DICT_SIZE_MAX)
73    }
74
75    /// Get the actual block size.
76    pub fn block_size(&self) -> u64 {
77        2u64.pow(self.block_size_exponent as u32)
78    }
79
80    /// Parse an TOA header from a buffer.
81    pub fn parse(buffer: &[u8; 32], apply_rs_correction: bool) -> crate::Result<TOAHeader> {
82        let mut corrected_buffer = *buffer;
83
84        if apply_rs_correction {
85            let mut header_codeword = *buffer;
86
87            let corrected = code_32_10::decode(&mut header_codeword)
88                .map_err(|_| error_invalid_data("header Reed-Solomon correction failed"))?;
89
90            if corrected {
91                eprintln!("Header errors detected and corrected by Reed-Solomon");
92                corrected_buffer = header_codeword;
93            }
94        }
95
96        if corrected_buffer[0..4] != TOA_MAGIC {
97            return Err(error_invalid_data("invalid TOA magic bytes"));
98        }
99
100        let version = corrected_buffer[4];
101        if version != TOA_VERSION {
102            return Err(error_unsupported("unsupported TOA version"));
103        }
104
105        let capabilities = corrected_buffer[5];
106        if (capabilities & 0b11111100) != 0 {
107            return Err(error_unsupported(
108                "unsupported TOA capabilities (reserved bits set)",
109            ));
110        }
111
112        let error_correction_bits = capabilities & 0b11;
113        if error_correction_bits > 0b11 {
114            return Err(error_unsupported("invalid error correction level"));
115        }
116
117        let prefilter_byte = corrected_buffer[6];
118        let prefilter = Prefilter::try_from(prefilter_byte)
119            .map_err(|_| error_invalid_data("unsupported prefilter type"))?;
120
121        let block_size_exponent = corrected_buffer[7];
122        if !(16u8..=62u8).contains(&block_size_exponent) {
123            return Err(error_invalid_data("invalid block size exponent"));
124        }
125
126        let lzma_props_byte = corrected_buffer[8];
127        let dict_size_log2 = corrected_buffer[9];
128
129        let lc = lzma_props_byte % 9;
130        let temp = lzma_props_byte / 9;
131        let lp = temp % 5;
132        let pb = temp / 5;
133
134        if lc > 8 || lp > 4 || pb > 4 {
135            return Err(error_invalid_data("invalid LZMA properties"));
136        }
137        if !(16u8..=31u8).contains(&dict_size_log2) {
138            return Err(error_invalid_data("invalid dictionary size"));
139        }
140
141        Ok(TOAHeader {
142            capabilities,
143            prefilter,
144            block_size_exponent,
145            lc,
146            lp,
147            pb,
148            dict_size_log2,
149        })
150    }
151
152    /// Write the header to a encoder.
153    pub fn write<W: Write>(&self, mut encoder: W) -> crate::Result<()> {
154        let mut data_bytes = [0u8; 10];
155
156        data_bytes[0..4].copy_from_slice(&TOA_MAGIC);
157        data_bytes[4] = TOA_VERSION;
158        data_bytes[5] = self.capabilities;
159        data_bytes[6] = u8::from(self.prefilter);
160        data_bytes[7] = self.block_size_exponent;
161
162        let lzma_props_byte = (self.pb * 5 + self.lp) * 9 + self.lc;
163        data_bytes[8] = lzma_props_byte;
164        data_bytes[9] = self.dict_size_log2;
165
166        let parity_bytes = code_32_10::encode(&data_bytes);
167        let mut header_bytes = [0; 32];
168        header_bytes[..10].copy_from_slice(&data_bytes);
169        header_bytes[10..].copy_from_slice(&parity_bytes);
170
171        encoder.write_all(&header_bytes)
172    }
173}
174
175/// TOA block header containing size information, hash, and Reed-Solomon parity.
176#[derive(Debug, Clone, Copy)]
177pub struct TOABlockHeader {
178    physical_size_with_flags: u64,
179    blake3_hash: [u8; 32],
180    rs_parity: [u8; 24],
181}
182
183impl TOABlockHeader {
184    /// Create a block header with appropriate MSB flags.
185    pub fn new(physical_size: u64, is_partial: bool, blake3_hash: [u8; 32]) -> Self {
186        // Clear top 2 bits
187        let mut physical_size_with_flags = physical_size & !(0b11u64 << 62);
188
189        if is_partial {
190            // Set partial block flag
191            physical_size_with_flags |= 1u64 << 62;
192        }
193
194        let mut payload = [0u8; 40];
195        payload[..8].copy_from_slice(&physical_size_with_flags.to_be_bytes());
196        payload[8..].copy_from_slice(&blake3_hash);
197
198        let rs_parity = code_64_40::encode(&payload);
199
200        Self {
201            physical_size_with_flags,
202            blake3_hash,
203            rs_parity,
204        }
205    }
206
207    /// Parse a block header from a buffer.
208    pub fn parse(buffer: &[u8; 64], apply_rs_correction: bool) -> crate::Result<TOABlockHeader> {
209        let mut corrected_buffer = *buffer;
210
211        if apply_rs_correction {
212            let mut codeword = [0u8; 64];
213            codeword.copy_from_slice(buffer);
214
215            match code_64_40::decode(&mut codeword) {
216                Ok(corrected) => {
217                    if corrected {
218                        eprintln!("block header errors detected and corrected by Reed-Solomon");
219                        corrected_buffer.copy_from_slice(&codeword);
220                    }
221                }
222                Err(_) => {
223                    return Err(error_invalid_data(
224                        "block header Reed-Solomon correction failed",
225                    ));
226                }
227            }
228        }
229
230        let physical_size_with_flags = u64::from_be_bytes([
231            corrected_buffer[0],
232            corrected_buffer[1],
233            corrected_buffer[2],
234            corrected_buffer[3],
235            corrected_buffer[4],
236            corrected_buffer[5],
237            corrected_buffer[6],
238            corrected_buffer[7],
239        ]);
240
241        let mut blake3_hash = [0u8; 32];
242        blake3_hash.copy_from_slice(&corrected_buffer[8..40]);
243
244        let mut rs_parity = [0u8; 24];
245        rs_parity.copy_from_slice(&corrected_buffer[40..64]);
246
247        Ok(TOABlockHeader {
248            physical_size_with_flags,
249            blake3_hash,
250            rs_parity,
251        })
252    }
253
254    /// Get the Blake3 hash.
255    pub fn blake3_hash(&self) -> [u8; 32] {
256        self.blake3_hash
257    }
258
259    /// Get the Reed-Solomon parity data.
260    pub fn rs_parity(&self) -> [u8; 24] {
261        self.rs_parity
262    }
263
264    /// Get the physical block size without flag bits.
265    pub fn physical_size(&self) -> u64 {
266        self.physical_size_with_flags & !(0b11u64 << 62)
267    }
268
269    /// Check if this is a partial block (only allowed as the final block).
270    pub fn is_partial_block(&self) -> bool {
271        (self.physical_size_with_flags & (1u64 << 62)) != 0
272    }
273
274    /// Write the block header to a encoder.
275    pub fn write<W: Write>(&self, mut encoder: W) -> crate::Result<()> {
276        let mut header_bytes = [0u8; 64];
277        header_bytes[0..8].copy_from_slice(&self.physical_size_with_flags.to_be_bytes());
278        header_bytes[8..40].copy_from_slice(&self.blake3_hash);
279        header_bytes[40..64].copy_from_slice(&self.rs_parity);
280
281        encoder.write_all(&header_bytes)
282    }
283}
284
285/// Determines whether a 64-byte buffer is a file trailer or block header after applying ECC.
286///
287/// This function safely checks the MSB (bit 63) to distinguish between:
288/// - File trailer: MSB = 1
289/// - Block header: MSB = 0
290///
291/// The MSB check is only performed after optional Reed-Solomon error correction to ensure
292/// corrupted bits don't cause misidentification.
293///
294/// # Returns
295/// - `Ok(true)` if the buffer is a file trailer
296/// - `Ok(false)` if the buffer is a block header
297/// - `Err` if Reed-Solomon correction fails
298pub fn is_trailer_after_ecc(buffer: &[u8; 64], apply_rs_correction: bool) -> Result<bool> {
299    if apply_rs_correction {
300        let mut codeword = *buffer;
301
302        code_64_40::decode(&mut codeword)
303            .map_err(|_| error_invalid_data("Reed-Solomon correction failed for header/trailer"))?;
304
305        Ok((codeword[0] & 0x80) != 0)
306    } else {
307        Ok((buffer[0] & 0x80) != 0)
308    }
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314
315    #[test]
316    fn test_toa_header_roundtrip() {
317        let options = TOAOptions::from_preset(5)
318            .with_prefilter(Prefilter::BcjX86)
319            .with_block_size_exponent(Some(20));
320
321        let header = TOAHeader::from_options(&options);
322
323        let mut buffer = Vec::new();
324        header.write(&mut buffer).unwrap();
325
326        let mut header_array = [0u8; 32];
327        header_array.copy_from_slice(&buffer);
328        let parsed_header = TOAHeader::parse(&header_array, true).unwrap();
329
330        assert_eq!(parsed_header.capabilities(), header.capabilities());
331        assert_eq!(parsed_header.prefilter(), header.prefilter());
332        assert_eq!(parsed_header.lc(), header.lc());
333        assert_eq!(parsed_header.lp(), header.lp());
334        assert_eq!(parsed_header.pb(), header.pb());
335        assert_eq!(parsed_header.dict_size(), header.dict_size());
336        assert_eq!(parsed_header.block_size(), header.block_size());
337    }
338
339    #[test]
340    fn test_toa_block_header_roundtrip() {
341        let physical_size = 65536;
342        let is_partial = false;
343        let blake3_hash = [42u8; 32];
344
345        let block_header = TOABlockHeader::new(physical_size, is_partial, blake3_hash);
346
347        let mut buffer = Vec::new();
348        block_header.write(&mut buffer).unwrap();
349
350        let mut header_array = [0u8; 64];
351        header_array.copy_from_slice(&buffer);
352        let parsed_header = TOABlockHeader::parse(&header_array, true).unwrap();
353
354        assert_eq!(parsed_header.physical_size(), physical_size);
355        assert_eq!(parsed_header.is_partial_block(), is_partial);
356        assert_eq!(parsed_header.blake3_hash(), blake3_hash);
357        assert_eq!(parsed_header.rs_parity(), block_header.rs_parity());
358    }
359}