exif_oxide/
value_extraction.rs

1//! Value extraction functions for EXIF/TIFF data
2//!
3//! This module contains pure functions for extracting typed values from EXIF/TIFF data,
4//! translating ExifTool's value extraction logic from lib/Image/ExifTool/Exif.pm.
5//! These functions handle inline vs offset storage, byte order conversion, and array processing.
6
7use crate::tiff_types::{ByteOrder, IfdEntry};
8use crate::types::{ExifError, Result, TagValue};
9
10/// Extract ASCII string value from IFD entry
11/// ExifTool: lib/Image/ExifTool/Exif.pm:6372-6398 ASCII value handling
12pub fn extract_ascii_value(data: &[u8], entry: &IfdEntry, byte_order: ByteOrder) -> Result<String> {
13    // debug!("extract_ascii_value: tag {:#x}, count {}, is_inline: {}, value_or_offset: {:#x}",
14    //        entry.tag_id, entry.count, entry.is_inline(), entry.value_or_offset);
15
16    let value_data = if entry.is_inline() {
17        // Value stored inline in the 4-byte value field
18        // ExifTool: lib/Image/ExifTool/Exif.pm:6372 inline value handling
19        let bytes = match byte_order {
20            ByteOrder::BigEndian => entry.value_or_offset.to_be_bytes(),
21            ByteOrder::LittleEndian => entry.value_or_offset.to_le_bytes(),
22        };
23        // debug!("Inline bytes for tag {:#x}: {:02x?}", entry.tag_id, &bytes[..entry.count.min(4) as usize]);
24        bytes[..entry.count.min(4) as usize].to_vec()
25    } else {
26        // Value stored at offset
27        // ExifTool: lib/Image/ExifTool/Exif.pm:6398 offset value handling
28        let offset = entry.value_or_offset as usize;
29        let size = entry.count as usize;
30
31        if offset + size > data.len() {
32            return Err(ExifError::ParseError(format!(
33                "ASCII value offset {offset:#x} + size {size} beyond data bounds"
34            )));
35        }
36
37        data[offset..offset + size].to_vec()
38    };
39
40    // Convert bytes to string with null-termination handling
41    // ExifTool handles null-terminated strings gracefully
42    let null_pos = value_data
43        .iter()
44        .position(|&b| b == 0)
45        .unwrap_or(value_data.len());
46    let trimmed = &value_data[..null_pos];
47
48    // Convert to UTF-8, handling invalid sequences gracefully
49    match String::from_utf8(trimmed.to_vec()) {
50        Ok(s) => Ok(s.trim().to_string()), // Trim whitespace
51        Err(_) => {
52            // Fallback for invalid UTF-8 - convert lossy
53            Ok(String::from_utf8_lossy(trimmed).trim().to_string())
54        }
55    }
56}
57
58/// Extract SHORT (u16) value from IFD entry
59/// ExifTool: lib/Image/ExifTool/Exif.pm:6372-6398 value extraction
60pub fn extract_short_value(data: &[u8], entry: &IfdEntry, byte_order: ByteOrder) -> Result<u16> {
61    if entry.count != 1 {
62        return Err(ExifError::ParseError(format!(
63            "SHORT value with count {} not supported yet",
64            entry.count
65        )));
66    }
67
68    if entry.is_inline() {
69        // Value stored inline - use lower 2 bytes of value_or_offset
70        // ExifTool: lib/Image/ExifTool/Exif.pm:6372 inline value handling
71        // The value_or_offset field is always stored in the file's byte order
72        let bytes = match byte_order {
73            ByteOrder::LittleEndian => entry.value_or_offset.to_le_bytes(),
74            ByteOrder::BigEndian => entry.value_or_offset.to_be_bytes(),
75        };
76        // For inline SHORT values, use the first 2 bytes in the correct order
77        Ok(match byte_order {
78            ByteOrder::LittleEndian => u16::from_le_bytes([bytes[0], bytes[1]]),
79            ByteOrder::BigEndian => u16::from_be_bytes([bytes[0], bytes[1]]),
80        })
81    } else {
82        // Value stored at offset
83        let offset = entry.value_or_offset as usize;
84        byte_order.read_u16(data, offset)
85    }
86}
87
88/// Extract BYTE (u8) value from IFD entry
89/// ExifTool: lib/Image/ExifTool/Exif.pm:6372-6398 value extraction
90pub fn extract_byte_value(data: &[u8], entry: &IfdEntry) -> Result<u8> {
91    if entry.count != 1 {
92        return Err(ExifError::ParseError(format!(
93            "BYTE value with count {} not supported yet",
94            entry.count
95        )));
96    }
97
98    if entry.is_inline() {
99        // Value stored inline - use lowest byte of value_or_offset
100        // ExifTool: lib/Image/ExifTool/Exif.pm:6372 inline value handling
101        Ok(entry.value_or_offset as u8)
102    } else {
103        // Value stored at offset
104        let offset = entry.value_or_offset as usize;
105        if offset >= data.len() {
106            return Err(ExifError::ParseError(format!(
107                "BYTE value offset {offset:#x} beyond data bounds"
108            )));
109        }
110        Ok(data[offset])
111    }
112}
113
114/// Extract LONG (u32) value from IFD entry
115/// ExifTool: lib/Image/ExifTool/Exif.pm:6372-6398 value extraction
116pub fn extract_long_value(data: &[u8], entry: &IfdEntry, byte_order: ByteOrder) -> Result<u32> {
117    if entry.count != 1 {
118        return Err(ExifError::ParseError(format!(
119            "LONG value with count {} not supported yet",
120            entry.count
121        )));
122    }
123
124    if entry.is_inline() {
125        // Value stored inline
126        Ok(entry.value_or_offset)
127    } else {
128        // Value stored at offset
129        let offset = entry.value_or_offset as usize;
130        byte_order.read_u32(data, offset)
131    }
132}
133
134/// Extract RATIONAL (2x u32) value - numerator and denominator
135/// ExifTool: lib/Image/ExifTool/Exif.pm format 5 (rational64u)
136pub fn extract_rational_value(
137    data: &[u8],
138    entry: &IfdEntry,
139    byte_order: ByteOrder,
140) -> Result<TagValue> {
141    if entry.count == 1 {
142        // Single rational value
143        if entry.is_inline() {
144            // 8-byte rational cannot fit inline (4-byte field), so this should never happen
145            return Err(ExifError::ParseError(
146                "RATIONAL value cannot be stored inline".to_string(),
147            ));
148        }
149
150        // Value stored at offset - read 2x uint32
151        let offset = entry.value_or_offset as usize;
152        if offset + 8 > data.len() {
153            return Err(ExifError::ParseError(format!(
154                "RATIONAL value offset {offset:#x} + 8 bytes beyond data bounds"
155            )));
156        }
157
158        let numerator = byte_order.read_u32(data, offset)?;
159        let denominator = byte_order.read_u32(data, offset + 4)?;
160        Ok(TagValue::Rational(numerator, denominator))
161    } else {
162        // Multiple rational values - GPS coordinates use 3 rationals
163        if entry.is_inline() {
164            return Err(ExifError::ParseError(
165                "RATIONAL array cannot be stored inline".to_string(),
166            ));
167        }
168
169        let offset = entry.value_or_offset as usize;
170        let total_size = entry.count as usize * 8; // 8 bytes per rational
171        if offset + total_size > data.len() {
172            return Err(ExifError::ParseError(format!(
173                "RATIONAL array offset {offset:#x} + {total_size} bytes beyond data bounds"
174            )));
175        }
176
177        let mut rationals = Vec::new();
178        for i in 0..entry.count {
179            let rat_offset = offset + (i as usize * 8);
180            let numerator = byte_order.read_u32(data, rat_offset)?;
181            let denominator = byte_order.read_u32(data, rat_offset + 4)?;
182            rationals.push((numerator, denominator));
183        }
184        Ok(TagValue::RationalArray(rationals))
185    }
186}
187
188/// Extract SRATIONAL (2x i32) value - signed numerator and denominator
189/// ExifTool: lib/Image/ExifTool/Exif.pm format 10 (rational64s)
190pub fn extract_srational_value(
191    data: &[u8],
192    entry: &IfdEntry,
193    byte_order: ByteOrder,
194) -> Result<TagValue> {
195    if entry.count == 1 {
196        // Single signed rational value
197        if entry.is_inline() {
198            return Err(ExifError::ParseError(
199                "SRATIONAL value cannot be stored inline".to_string(),
200            ));
201        }
202
203        let offset = entry.value_or_offset as usize;
204        if offset + 8 > data.len() {
205            return Err(ExifError::ParseError(format!(
206                "SRATIONAL value offset {offset:#x} + 8 bytes beyond data bounds"
207            )));
208        }
209
210        // Read as u32 first, then convert to i32 to handle signed values correctly
211        let numerator_u32 = byte_order.read_u32(data, offset)?;
212        let denominator_u32 = byte_order.read_u32(data, offset + 4)?;
213        let numerator = numerator_u32 as i32;
214        let denominator = denominator_u32 as i32;
215        Ok(TagValue::SRational(numerator, denominator))
216    } else {
217        // Multiple signed rational values
218        if entry.is_inline() {
219            return Err(ExifError::ParseError(
220                "SRATIONAL array cannot be stored inline".to_string(),
221            ));
222        }
223
224        let offset = entry.value_or_offset as usize;
225        let total_size = entry.count as usize * 8;
226        if offset + total_size > data.len() {
227            return Err(ExifError::ParseError(format!(
228                "SRATIONAL array offset {offset:#x} + {total_size} bytes beyond data bounds"
229            )));
230        }
231
232        let mut rationals = Vec::new();
233        for i in 0..entry.count {
234            let rat_offset = offset + (i as usize * 8);
235            let numerator_u32 = byte_order.read_u32(data, rat_offset)?;
236            let denominator_u32 = byte_order.read_u32(data, rat_offset + 4)?;
237            let numerator = numerator_u32 as i32;
238            let denominator = denominator_u32 as i32;
239            rationals.push((numerator, denominator));
240        }
241        Ok(TagValue::SRationalArray(rationals))
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use crate::tiff_types::TiffFormat;
249
250    #[test]
251    fn test_extract_ascii_inline() {
252        let entry = IfdEntry {
253            tag_id: 0x010f,
254            format: TiffFormat::Ascii,
255            count: 3,
256            value_or_offset: u32::from_le_bytes([b'A', b'B', b'C', 0]), // "ABC" + null byte in little-endian
257        };
258        let data = &[];
259        let result = extract_ascii_value(data, &entry, ByteOrder::LittleEndian).unwrap();
260        assert_eq!(result, "ABC");
261    }
262
263    #[test]
264    fn test_extract_short_inline() {
265        let entry = IfdEntry {
266            tag_id: 0x0100,
267            format: TiffFormat::Short,
268            count: 1,
269            value_or_offset: 0x12340000, // 0x1234 in big-endian format, stored in first 2 bytes
270        };
271        let data = &[];
272        let result = extract_short_value(data, &entry, ByteOrder::BigEndian).unwrap();
273        assert_eq!(result, 0x1234);
274    }
275
276    #[test]
277    fn test_extract_byte_inline() {
278        let entry = IfdEntry {
279            tag_id: 0x0101,
280            format: TiffFormat::Byte,
281            count: 1,
282            value_or_offset: 0x42,
283        };
284        let data = &[];
285        let result = extract_byte_value(data, &entry).unwrap();
286        assert_eq!(result, 0x42);
287    }
288
289    #[test]
290    fn test_extract_long_inline() {
291        let entry = IfdEntry {
292            tag_id: 0x0102,
293            format: TiffFormat::Long,
294            count: 1,
295            value_or_offset: 0x12345678,
296        };
297        let data = &[];
298        let result = extract_long_value(data, &entry, ByteOrder::BigEndian).unwrap();
299        assert_eq!(result, 0x12345678);
300    }
301
302    #[test]
303    fn test_extract_rational_at_offset() {
304        let data = [0x00, 0x00, 0x01, 0x2c, 0x00, 0x00, 0x00, 0x01]; // 300/1 in big-endian
305        let entry = IfdEntry {
306            tag_id: 0x011a,
307            format: TiffFormat::Rational,
308            count: 1,
309            value_or_offset: 0, // Data starts at offset 0
310        };
311        let result = extract_rational_value(&data, &entry, ByteOrder::BigEndian).unwrap();
312        if let TagValue::Rational(num, den) = result {
313            assert_eq!(num, 300);
314            assert_eq!(den, 1);
315        } else {
316            panic!("Expected TagValue::Rational");
317        }
318    }
319
320    #[test]
321    fn test_extract_srational_at_offset() {
322        let data = [0xff, 0xff, 0xff, 0x9c, 0x00, 0x00, 0x00, 0x01]; // -100/1 in big-endian
323        let entry = IfdEntry {
324            tag_id: 0x9201,
325            format: TiffFormat::SRational,
326            count: 1,
327            value_or_offset: 0,
328        };
329        let result = extract_srational_value(&data, &entry, ByteOrder::BigEndian).unwrap();
330        if let TagValue::SRational(num, den) = result {
331            assert_eq!(num, -100);
332            assert_eq!(den, 1);
333        } else {
334            panic!("Expected TagValue::SRational");
335        }
336    }
337}