struct_compression_analyzer/
offset_evaluator.rs

1//! # Bit-Packed Offset Evaluator
2//!
3//! This is the part that automatically determines the offsets (and in the future lengths) of
4//! the data we're processing based on a set of simple rules. This is the magic that allows us
5//! to throw files like `.DDS` without manually stripping the unwanted metadata.
6//!
7//! ## What It Does
8//!
9//! - Finds data structures based on simple byte checks
10//! - Handles both MSB and LSB bit order formats
11//! - Works with file streams or in-memory byte slices
12//! - Can be used to validate file headers and exclude unwanted data
13//!
14//! ## Public API
15//!
16//! ### Main Types
17//!
18//! - [ConditionalOffset]: Defines conditions for offset evaluation
19//! - [Condition]: Individual condition for bit pattern matching
20//!
21//! ### Key Functions
22//!
23//! - [`try_evaluate_file_offset()`]: Find offset in file
24//! - [`try_evaluate_offset()`]: Find offset in byte slice
25//!
26//! ## Example Usage
27//!
28//! Evaluate an offset for sample data.
29//!
30//! ```rust
31//! use struct_compression_analyzer::offset_evaluator::try_evaluate_offset;
32//! use struct_compression_analyzer::schema::{BitOrder, Condition, ConditionalOffset};
33//!
34//! let mut sample_data = vec![0u8; 0x80 + 4];
35//! // Set DDS magic
36//! sample_data[0x00..0x04].copy_from_slice(&[0x44, 0x44, 0x53, 0x20]);
37//! // Set DX10 header
38//! sample_data[0x54..0x58].copy_from_slice(&[0x44, 0x58, 0x31, 0x30]);
39//!
40//! // DDS with DX10 header (BC7, BC6H etc.)
41//! let conditions = vec![ConditionalOffset {
42//!     offset: 0x94, // Offset to jump to (DX10 block data)
43//!     conditions: vec![
44//!         Condition {
45//!             byte_offset: 0, // File Magic
46//!             bit_offset: 0,
47//!             bits: 32,
48//!             value: 0x44445320, // DDS magic
49//!             bit_order: BitOrder::Msb,
50//!         },
51//!         Condition {
52//!             byte_offset: 0x54,
53//!             bit_offset: 0,
54//!             bits: 32,
55//!             value: 0x44583130, // 'DX10' fourCC code
56//!             bit_order: BitOrder::Msb,
57//!         },
58//!     ],
59//! }];
60//!
61//! let result = try_evaluate_offset(&conditions, &sample_data);
62//! assert_eq!(result, Some(0x94));
63//! ```
64
65use crate::{
66    schema::{BitOrder, Condition, ConditionalOffset},
67    utils::analyze_utils::reverse_bits,
68};
69use bitstream_io::{BigEndian, BitRead, BitReader};
70use std::{
71    fs::File,
72    io::{self, Cursor, Read, Seek, SeekFrom},
73};
74
75pub fn try_evaluate_file_offset(
76    conditional_offsets: &[ConditionalOffset],
77    file: &mut File,
78) -> io::Result<Option<u64>> {
79    // Calculate maximum needed read length from all conditions
80    let max_read = conditional_offsets
81        .iter()
82        .flat_map(|o| &o.conditions)
83        .map(|c| c.byte_offset + (c.bits as u64).div_ceil(8)) // Bytes needed
84        .max()
85        .unwrap_or(0);
86
87    // Read required portion without reopening file
88    file.seek(SeekFrom::Start(0))?;
89    let mut data = unsafe { Box::new_uninit_slice(max_read as usize).assume_init() };
90    file.read_exact(&mut data)?;
91
92    Ok(try_evaluate_offset(conditional_offsets, &data))
93}
94
95pub fn try_evaluate_offset(conditional_offsets: &[ConditionalOffset], data: &[u8]) -> Option<u64> {
96    for offset_def in conditional_offsets {
97        if matches_all_conditions(offset_def, data) {
98            return Some(offset_def.offset);
99        }
100    }
101    None
102}
103
104fn matches_all_conditions(offset_def: &ConditionalOffset, data: &[u8]) -> bool {
105    offset_def
106        .conditions
107        .iter()
108        .all(|cond| check_condition(cond, data))
109}
110
111fn check_condition(condition: &Condition, data: &[u8]) -> bool {
112    let mut reader = BitReader::endian(Cursor::new(data), BigEndian);
113    let start_bit = (condition.byte_offset * 8) + condition.bit_offset as u64;
114
115    if reader.seek_bits(SeekFrom::Start(start_bit)).is_err() {
116        return false;
117    }
118
119    let comp_value = match condition.bit_order {
120        BitOrder::Default => condition.value,
121        BitOrder::Msb => condition.value,
122        BitOrder::Lsb => reverse_bits(condition.bits as u32, condition.value),
123    };
124
125    match reader.read::<u64>(condition.bits as u32) {
126        Ok(extracted) => extracted == comp_value,
127        Err(_) => false,
128    }
129}
130
131#[cfg(test)]
132mod byte_tests {
133    use super::*;
134    use crate::schema::{BitOrder, Condition, ConditionalOffset};
135
136    fn create_bc7_conditions() -> Vec<ConditionalOffset> {
137        vec![ConditionalOffset {
138            offset: 0x94,
139            conditions: vec![
140                Condition {
141                    byte_offset: 0x00,
142                    bit_offset: 0,
143                    bits: 32,
144                    value: 0x44445320,
145                    bit_order: BitOrder::Msb,
146                },
147                Condition {
148                    byte_offset: 0x54,
149                    bit_offset: 0,
150                    bits: 32,
151                    value: 0x44583130,
152                    bit_order: BitOrder::Msb,
153                },
154            ],
155        }]
156    }
157
158    #[test]
159    fn matches_valid_bc7_header() {
160        let mut data = vec![0u8; 0x80 + 4];
161        // Set DDS magic
162        data[0x00..0x04].copy_from_slice(&[0x44, 0x44, 0x53, 0x20]);
163        // Set DX10 header
164        data[0x54..0x58].copy_from_slice(&[0x44, 0x58, 0x31, 0x30]);
165
166        let conditions = create_bc7_conditions();
167        assert_eq!(try_evaluate_offset(&conditions, &data), Some(0x94));
168    }
169
170    #[test]
171    fn rejects_invalid_dx10_header() {
172        let mut data = vec![0u8; 0x80 + 4];
173        data[0x00..0x04].copy_from_slice(&[0x44, 0x44, 0x53, 0x20]);
174        // Invalid DX10
175        data[0x54..0x58].copy_from_slice(&[0x41, 0x42, 0x43, 0x44]);
176
177        let conditions = create_bc7_conditions();
178        assert_eq!(try_evaluate_offset(&conditions, &data), None);
179    }
180
181    #[test]
182    fn handles_short_data() {
183        let data = vec![0u8; 0x50]; // Too short for DX10 check
184
185        let conditions = create_bc7_conditions();
186        assert_eq!(try_evaluate_offset(&conditions, &data), None);
187    }
188
189    #[test]
190    fn matches_valid_bc7_header_from_yaml() {
191        let yaml_data = r#"
192            - offset: 0x94
193              conditions:
194                - byte_offset: 0
195                  bit_offset: 0
196                  bits: 32
197                  value: 0x44445320
198                - byte_offset: 0x54
199                  bit_offset: 0
200                  bits: 32
201                  value: 0x44583130
202        "#;
203
204        // This test ensures that the YAML parser works as expected,
205        // with our value being treated in big endian form when specified as hex.
206        let conditions: Vec<ConditionalOffset> = serde_yaml::from_str(yaml_data).unwrap();
207        let mut data = vec![0u8; 0x80 + 4];
208        // Set DDS magic
209        data[0x00..0x04].copy_from_slice(&[0x44, 0x44, 0x53, 0x20]);
210        // Set DX10 header
211        data[0x54..0x58].copy_from_slice(&[0x44, 0x58, 0x31, 0x30]);
212        assert_eq!(try_evaluate_offset(&conditions, &data), Some(0x94));
213    }
214}
215
216#[cfg(test)]
217mod bit_tests {
218    use super::*;
219    use crate::schema::{BitOrder, Condition, ConditionalOffset};
220
221    // New bit-oriented tests will go here
222
223    #[test]
224    fn validates_bitstream_header() {
225        let conditions = [ConditionalOffset {
226            offset: 0,
227            conditions: vec![
228                Condition {
229                    byte_offset: 0,
230                    bit_offset: 4,
231                    bits: 4,
232                    value: 0b1110,
233                    bit_order: BitOrder::Msb,
234                },
235                Condition {
236                    byte_offset: 1,
237                    bit_offset: 0,
238                    bits: 8,
239                    value: 0xC0,
240                    bit_order: BitOrder::Msb,
241                },
242            ],
243        }];
244
245        // Valid header: 0xXXAXXC0XX (bits 4-7 = 0xA, byte 1 = 0xC0)
246        let valid_data = [0x0E, 0xC0, 0x00];
247        assert!(matches_all_conditions(&conditions[0], &valid_data));
248
249        // Invalid header: bits 4-7 = 0xB
250        let invalid_data = [0x0B, 0xC0, 0x00];
251        assert!(!matches_all_conditions(&conditions[0], &invalid_data));
252    }
253}
254
255#[cfg(test)]
256mod endian_tests {
257    use super::*;
258    use crate::schema::{BitOrder, Condition};
259
260    #[test]
261    fn big_endian() {
262        let data = [0b0011_0000u8];
263        let condition = Condition {
264            byte_offset: 0,
265            bit_offset: 0,
266            bits: 4,
267            value: 0b0011,
268            bit_order: BitOrder::Msb,
269        };
270        assert!(check_condition(&condition, &data));
271    }
272
273    #[test]
274    fn little_endian() {
275        let data = [0b0011_0000u8];
276        let condition = Condition {
277            byte_offset: 0,
278            bit_offset: 0,
279            bits: 4,
280            value: 0b1100,
281            bit_order: BitOrder::Lsb,
282        };
283        assert!(check_condition(&condition, &data));
284    }
285}