exif_oxide/exif/
binary_data.rs

1//! Variable ProcessBinaryData implementation for EXIF module
2//!
3//! This module implements ExifTool's variable-length ProcessBinaryData functionality
4//! with DataMember dependencies and format expression evaluation.
5//!
6//! **Trust ExifTool**: This code translates ExifTool's ProcessBinaryData verbatim,
7//! including the two-phase processing and $val{} expression evaluation.
8//!
9//! Primary ExifTool References:
10//! - lib/Image/ExifTool.pm:9750+ ProcessBinaryData function
11//! - lib/Image/ExifTool.pm:9850-9856 format expression evaluation
12
13use crate::tiff_types::ByteOrder;
14use crate::types::{
15    BinaryDataFormat, DataMemberValue, ExifError, ExpressionEvaluator, ResolvedFormat, Result,
16    TagSourceInfo, TagValue,
17};
18use tracing::debug;
19
20use super::ExifReader;
21
22impl ExifReader {
23    /// Process binary data with variable-length formats and DataMember dependencies
24    /// ExifTool: ProcessBinaryData with two-phase processing for DataMember tags
25    /// Reference: third-party/exiftool/lib/Image/ExifTool.pm:9750+ ProcessBinaryData function
26    /// Milestone 12: Variable ProcessBinaryData implementation
27    pub fn process_binary_data_with_dependencies(
28        &mut self,
29        data: &[u8],
30        offset: usize,
31        size: usize,
32        table: &crate::types::BinaryDataTable,
33    ) -> Result<()> {
34        debug!(
35            "Processing binary data with dependencies: offset={:#x}, size={}, format={:?}",
36            offset, size, table.default_format
37        );
38
39        // Build dependency order if not already analyzed
40        let mut table = table.clone();
41        if table.dependency_order.is_empty() {
42            table.analyze_dependencies();
43        }
44
45        // Create expression evaluator with current DataMember context
46        let val_hash = std::collections::HashMap::new();
47        let mut evaluator = ExpressionEvaluator::new(val_hash, &self.data_members);
48
49        // Track cumulative offset for variable-length entries
50        // ExifTool: ProcessBinaryData processes entries sequentially, accounting for variable sizes
51        let mut cumulative_offset = 0;
52        let first_entry = table.first_entry.unwrap_or(0);
53
54        // Process tags in dependency order
55        for &index in &table.dependency_order {
56            if let Some(tag_def) = table.tags.get(&index) {
57                if index < first_entry {
58                    continue;
59                }
60
61                // Calculate current data position
62                let data_offset = offset + cumulative_offset;
63
64                debug!(
65                    "Processing tag {} at index {}: offset={:#x}, cumulative_offset={}",
66                    tag_def.name, index, data_offset, cumulative_offset
67                );
68
69                // Get format specification and resolve expressions if needed
70                let format_spec = table
71                    .get_tag_format_spec(index)
72                    .unwrap_or(crate::types::FormatSpec::Fixed(table.default_format));
73
74                let resolved_format = if format_spec.needs_evaluation() {
75                    match evaluator.resolve_format(&format_spec) {
76                        Ok(resolved) => resolved,
77                        Err(e) => {
78                            debug!("Failed to resolve format for tag {}: {}", tag_def.name, e);
79                            continue;
80                        }
81                    }
82                } else {
83                    match format_spec {
84                        crate::types::FormatSpec::Fixed(format) => {
85                            crate::types::ResolvedFormat::Single(format)
86                        }
87                        _ => {
88                            debug!(
89                                "Unexpected format spec that doesn't need evaluation: {:?}",
90                                format_spec
91                            );
92                            continue;
93                        }
94                    }
95                };
96
97                // Extract value based on resolved format
98                let raw_value =
99                    match self.extract_with_resolved_format(data, data_offset, &resolved_format) {
100                        Ok(value) => value,
101                        Err(e) => {
102                            debug!("Failed to extract value for tag {}: {}", tag_def.name, e);
103                            continue;
104                        }
105                    };
106
107                // Convert to DataMember value for $val hash
108                let data_member_value = match &raw_value {
109                    TagValue::U8(v) => DataMemberValue::U8(*v),
110                    TagValue::U16(v) => DataMemberValue::U16(*v),
111                    TagValue::U32(v) => DataMemberValue::U32(*v),
112                    TagValue::String(s) => DataMemberValue::String(s.clone()),
113                    // Convert other types to appropriate DataMember types
114                    TagValue::I16(v) => DataMemberValue::U16(*v as u16),
115                    TagValue::I32(v) => DataMemberValue::U32(*v as u32),
116                    _ => {
117                        if let Some(data_member_name) = &tag_def.data_member {
118                            debug!(
119                                "Cannot convert tag value {:?} to DataMember for {}",
120                                raw_value, data_member_name
121                            );
122                        }
123                        // Still store in $val hash as U16 for index reference
124                        DataMemberValue::U16(0)
125                    }
126                };
127
128                // Store in DataMember system if this tag is a DataMember
129                if let Some(data_member_name) = &tag_def.data_member {
130                    // Store in global DataMember collection (need to refactor to avoid borrow conflicts)
131                    debug!(
132                        "Would store DataMember '{}' = {:?} from tag {}",
133                        data_member_name, raw_value, tag_def.name
134                    );
135                    // TODO: Fix borrowing issue - need to restructure the evaluation
136                }
137
138                // Store in $val hash for current block references
139                evaluator.set_val(index, data_member_value);
140
141                // Update cumulative offset based on actual data size consumed
142                let consumed_bytes = match &resolved_format {
143                    crate::types::ResolvedFormat::Single(format) => format.byte_size(),
144                    crate::types::ResolvedFormat::Array(format, count) => {
145                        format.byte_size() * count
146                    }
147                    crate::types::ResolvedFormat::StringWithLength(length) => *length,
148                    crate::types::ResolvedFormat::VarString => {
149                        // Find actual string length in data
150                        let mut string_len = 0;
151                        let start_pos = data_offset;
152                        while start_pos + string_len < data.len()
153                            && data[start_pos + string_len] != 0
154                        {
155                            string_len += 1;
156                        }
157                        string_len + 1 // Include null terminator
158                    }
159                };
160
161                debug!(
162                    "Tag {} consumed {} bytes, new cumulative_offset={}",
163                    tag_def.name,
164                    consumed_bytes,
165                    cumulative_offset + consumed_bytes
166                );
167
168                cumulative_offset += consumed_bytes;
169
170                // Bounds check for next iteration
171                if cumulative_offset > size {
172                    debug!(
173                        "Cumulative offset {} exceeds data bounds {}",
174                        cumulative_offset, size
175                    );
176                    break;
177                }
178
179                // Apply PrintConv if available
180                let final_value = if let Some(print_conv) = &tag_def.print_conv {
181                    match &raw_value {
182                        TagValue::I16(val) => {
183                            if let Some(converted) = print_conv.get(&(*val as u32)) {
184                                TagValue::String(converted.clone())
185                            } else {
186                                raw_value
187                            }
188                        }
189                        TagValue::U16(val) => {
190                            if let Some(converted) = print_conv.get(&(*val as u32)) {
191                                TagValue::String(converted.clone())
192                            } else {
193                                raw_value
194                            }
195                        }
196                        _ => raw_value,
197                    }
198                } else {
199                    raw_value
200                };
201
202                // Store the tag with source info
203                let group_0 = table
204                    .groups
205                    .get(&0)
206                    .cloned()
207                    .unwrap_or_else(|| "Unknown".to_string());
208                let source_info =
209                    TagSourceInfo::new(group_0, "BinaryData".to_string(), "BinaryData".to_string());
210
211                self.extracted_tags.insert(index as u16, final_value);
212                self.tag_sources.insert(index as u16, source_info);
213
214                debug!(
215                    "Extracted binary tag {} (index {}) = {:?}",
216                    tag_def.name,
217                    index,
218                    self.extracted_tags.get(&(index as u16))
219                );
220            }
221        }
222
223        Ok(())
224    }
225
226    /// Extract value using resolved format specification
227    /// Handles single values, arrays, and variable-length strings
228    fn extract_with_resolved_format(
229        &self,
230        data: &[u8],
231        offset: usize,
232        resolved_format: &ResolvedFormat,
233    ) -> Result<TagValue> {
234        let byte_order = if let Some(header) = &self.header {
235            header.byte_order
236        } else {
237            ByteOrder::LittleEndian
238        };
239
240        match resolved_format {
241            ResolvedFormat::Single(format) => {
242                self.extract_single_binary_value(data, offset, *format, byte_order)
243            }
244            ResolvedFormat::Array(format, count) => {
245                self.extract_binary_array(data, offset, *format, *count, byte_order)
246            }
247            ResolvedFormat::StringWithLength(length) => {
248                if offset + length > data.len() {
249                    return Err(ExifError::ParseError(
250                        "String with length extends beyond data bounds".to_string(),
251                    ));
252                }
253                let string_bytes = &data[offset..offset + length];
254                let string_value = String::from_utf8_lossy(string_bytes).to_string();
255                Ok(TagValue::String(string_value))
256            }
257            ResolvedFormat::VarString => {
258                // Find null terminator
259                let mut end = offset;
260                while end < data.len() && data[end] != 0 {
261                    end += 1;
262                }
263                let string_bytes = &data[offset..end];
264                let string_value = String::from_utf8_lossy(string_bytes).to_string();
265                Ok(TagValue::String(string_value))
266            }
267        }
268    }
269
270    /// Extract a single binary value
271    fn extract_single_binary_value(
272        &self,
273        _data: &[u8],
274        offset: usize,
275        format: BinaryDataFormat,
276        _byte_order: ByteOrder,
277    ) -> Result<TagValue> {
278        // Use existing extract_binary_value from Canon module
279        crate::implementations::canon::extract_binary_value(self, offset, format, 1)
280    }
281
282    /// Extract an array of binary values
283    fn extract_binary_array(
284        &self,
285        data: &[u8],
286        offset: usize,
287        format: BinaryDataFormat,
288        count: usize,
289        byte_order: ByteOrder,
290    ) -> Result<TagValue> {
291        // BinaryDataFormat already imported at module level
292
293        if count == 0 {
294            return Ok(TagValue::U8Array(vec![]));
295        }
296
297        let format_size = format.byte_size();
298        let total_size = format_size * count;
299
300        if offset + total_size > data.len() {
301            return Err(ExifError::ParseError(format!(
302                "Array of {count} {format_size} elements extends beyond data bounds"
303            )));
304        }
305
306        match format {
307            BinaryDataFormat::Int16s => {
308                let mut values = Vec::new();
309                for i in 0..count {
310                    let value_offset = offset + i * format_size;
311                    let value = byte_order.read_u16(data, value_offset)? as i16;
312                    values.push(value);
313                }
314                // Convert to appropriate array type
315                Ok(TagValue::U16Array(
316                    values.into_iter().map(|v| v as u16).collect(),
317                ))
318            }
319            BinaryDataFormat::Int16u => {
320                let mut values = Vec::new();
321                for i in 0..count {
322                    let value_offset = offset + i * format_size;
323                    let value = byte_order.read_u16(data, value_offset)?;
324                    values.push(value);
325                }
326                Ok(TagValue::U16Array(values))
327            }
328            BinaryDataFormat::Int32u => {
329                let mut values = Vec::new();
330                for i in 0..count {
331                    let value_offset = offset + i * format_size;
332                    let value = byte_order.read_u32(data, value_offset)?;
333                    values.push(value);
334                }
335                Ok(TagValue::U32Array(values))
336            }
337            // Add more format types as needed
338            _ => Err(ExifError::ParseError(format!(
339                "Array extraction not yet implemented for format {format:?}"
340            ))),
341        }
342    }
343}