exif_oxide/formats/
mod.rs

1//! File format detection and processing
2//!
3//! This module handles different image file formats and extracts
4//! metadata from each according to format-specific requirements.
5
6mod detection;
7mod jpeg;
8mod tiff;
9
10pub use detection::{
11    detect_file_format, detect_file_format_from_path, get_format_properties, FileFormat,
12};
13pub use jpeg::{
14    extract_jpeg_exif, extract_jpeg_xmp, scan_jpeg_segments, JpegSegment, JpegSegmentInfo,
15};
16pub use tiff::{extract_tiff_exif, extract_tiff_xmp, get_tiff_endianness, validate_tiff_format};
17
18use crate::exif::ExifReader;
19use crate::file_detection::FileTypeDetector;
20use crate::generated::{EXIF_MAIN_TAGS, REQUIRED_PRINT_CONV, REQUIRED_VALUE_CONV};
21use crate::types::{ExifData, Result, TagEntry, TagValue};
22use crate::xmp::XmpProcessor;
23use std::collections::HashMap;
24use std::fs::File;
25use std::io::{BufReader, Read, Seek, SeekFrom};
26use std::path::Path;
27
28/// Extract metadata from a file (Milestone 1: real file I/O with JPEG detection)
29///
30/// This function now implements real file reading and JPEG segment scanning.
31/// It detects JPEG files by magic bytes and locates EXIF data in APP1 segments.
32pub fn extract_metadata(path: &Path, show_missing: bool) -> Result<ExifData> {
33    // Ensure conversions are registered
34    crate::init();
35
36    // Open file with buffered reading for performance
37    let file = File::open(path)?;
38    let mut reader = BufReader::new(file);
39
40    // Detect file type using the new ExifTool-compatible detector
41    let detector = FileTypeDetector::new();
42    let detection_result = detector.detect_file_type(path, &mut reader)?;
43
44    // Get actual file metadata
45    let file_metadata = std::fs::metadata(path)?;
46    let file_size = file_metadata.len();
47
48    let mut tags = HashMap::new();
49    let mut tag_entries = Vec::new();
50
51    // Basic file information (now real data) - create as TagEntry objects
52    let filename = path
53        .file_name()
54        .unwrap_or_default()
55        .to_string_lossy()
56        .to_string();
57    tag_entries.push(TagEntry {
58        group: "File".to_string(),
59        group1: "File".to_string(),
60        name: "FileName".to_string(),
61        value: TagValue::String(filename.clone()),
62        print: TagValue::String(filename),
63    });
64
65    let directory = path
66        .parent()
67        .unwrap_or_else(|| Path::new("."))
68        .to_string_lossy()
69        .to_string();
70    tag_entries.push(TagEntry {
71        group: "File".to_string(),
72        group1: "File".to_string(),
73        name: "Directory".to_string(),
74        value: TagValue::String(directory.clone()),
75        print: TagValue::String(directory),
76    });
77
78    // Handle file size - use U32 if it fits, otherwise F64 for large files
79    let file_size_value = if file_size <= u32::MAX as u64 {
80        TagValue::U32(file_size as u32)
81    } else {
82        TagValue::F64(file_size as f64)
83    };
84    tag_entries.push(TagEntry {
85        group: "File".to_string(),
86        group1: "File".to_string(),
87        name: "FileSize".to_string(),
88        value: file_size_value,
89        print: TagValue::string(file_size.to_string()),
90    });
91
92    // Format file modification time to match ExifTool format: "YYYY:MM:DD HH:MM:SS±TZ:TZ"
93    // ExifTool.pm formats this as local time with timezone offset
94    if let Ok(modified) = file_metadata.modified() {
95        use chrono::{DateTime, Local};
96        let datetime: DateTime<Local> = modified.into();
97        // Format to match ExifTool exactly: "2025:06:30 10:16:40-07:00"
98        let formatted = datetime.format("%Y:%m:%d %H:%M:%S%:z").to_string();
99        tag_entries.push(TagEntry {
100            group: "File".to_string(),
101            group1: "File".to_string(),
102            name: "FileModifyDate".to_string(),
103            value: TagValue::String(formatted.clone()),
104            print: TagValue::String(formatted),
105        });
106    }
107
108    // Add FileType and FileTypeExtension using ExifTool-compatible values
109    // Note: We'll store the initial file type here, but it may be overridden later
110    // (e.g., NEF -> NRW during TIFF processing)
111    let mut file_type = detection_result.file_type.clone();
112    tag_entries.push(TagEntry {
113        group: "File".to_string(),
114        group1: "File".to_string(),
115        name: "FileType".to_string(),
116        value: TagValue::String(file_type.clone()),
117        print: TagValue::String(file_type.clone()),
118    });
119
120    // File extension is based on the detected file type
121    let file_type_ext = detection_result.file_type.to_lowercase();
122    tag_entries.push(TagEntry {
123        group: "File".to_string(),
124        group1: "File".to_string(),
125        name: "FileTypeExtension".to_string(),
126        value: TagValue::String(file_type_ext.clone()),
127        print: TagValue::String(file_type_ext),
128    });
129
130    let mime_type = detection_result.mime_type.clone();
131    tag_entries.push(TagEntry {
132        group: "File".to_string(),
133        group1: "File".to_string(),
134        name: "MIMEType".to_string(),
135        value: TagValue::String(mime_type.clone()),
136        print: TagValue::String(mime_type),
137    });
138
139    // Format-specific processing based on the detected format
140    match detection_result.format.as_str() {
141        "RAW" => {
142            // RAW format processing (Milestone 17a: Kyocera RAW support)
143            // Reset reader to start of file
144            reader.seek(SeekFrom::Start(0))?;
145
146            // Read entire file for RAW processing
147            let mut raw_data = Vec::new();
148            reader.read_to_end(&mut raw_data)?;
149
150            // Process RAW data using RAW processor
151            let raw_processor = crate::raw::RawProcessor::new();
152            let mut exif_reader = ExifReader::new();
153
154            // Store the original file type for format detection
155            exif_reader.set_file_type(detection_result.file_type.clone());
156
157            match raw_processor.process_raw(&mut exif_reader, &raw_data, &detection_result) {
158                Ok(()) => {
159                    // Successfully processed RAW - extract all found tags using new TagEntry API
160                    let mut raw_tag_entries = exif_reader.get_all_tag_entries();
161
162                    // Append RAW tag entries to our collection
163                    tag_entries.append(&mut raw_tag_entries);
164
165                    // Also populate legacy tags for backward compatibility
166                    let raw_tags = exif_reader.get_all_tags();
167                    for (key, value) in raw_tags {
168                        tags.insert(key, value);
169                    }
170
171                    // Add RAW processing warnings as tags for debugging
172                    for (i, warning) in exif_reader.get_warnings().iter().enumerate() {
173                        tags.insert(
174                            format!("Warning:RawWarning{i}"),
175                            TagValue::String(warning.clone()),
176                        );
177                    }
178                }
179                Err(e) => {
180                    // Failed to parse RAW - add error information
181                    tags.insert(
182                        "Warning:RawParseError".to_string(),
183                        TagValue::string(format!("Failed to parse RAW: {e}")),
184                    );
185                }
186            }
187        }
188        "JPEG" => {
189            // Scan for EXIF data in JPEG segments
190            match scan_jpeg_segments(&mut reader)? {
191                Some(segment_info) => {
192                    let exif_status = format!(
193                        "EXIF data found in APP1 segment at offset {:#x}, length {} bytes",
194                        segment_info.offset, segment_info.length
195                    );
196
197                    // Add EXIF detection status
198                    tags.insert(
199                        "System:ExifDetectionStatus".to_string(),
200                        TagValue::String(exif_status),
201                    );
202
203                    // Extract actual EXIF data using our new ExifReader
204                    reader.seek(SeekFrom::Start(segment_info.offset))?;
205                    let mut exif_data = vec![0u8; segment_info.length as usize];
206                    reader.read_exact(&mut exif_data)?;
207
208                    // Parse EXIF data
209                    let mut exif_reader = ExifReader::new();
210                    match exif_reader.parse_exif_data(&exif_data) {
211                        Ok(()) => {
212                            // Successfully parsed EXIF - extract all found tags using new TagEntry API
213                            let mut exif_tag_entries = exif_reader.get_all_tag_entries();
214
215                            // Append EXIF tag entries to our collection
216                            tag_entries.append(&mut exif_tag_entries);
217
218                            // Also populate legacy tags for backward compatibility
219                            let exif_tags = exif_reader.get_all_tags();
220                            for (key, value) in exif_tags {
221                                tags.insert(key, value);
222                            }
223
224                            // Add EXIF processing warnings as tags for debugging
225                            for (i, warning) in exif_reader.get_warnings().iter().enumerate() {
226                                tags.insert(
227                                    format!("Warning:ExifWarning{i}"),
228                                    TagValue::String(warning.clone()),
229                                );
230                            }
231                        }
232                        Err(e) => {
233                            // Failed to parse EXIF - add error information
234                            tags.insert(
235                                "Warning:ExifParseError".to_string(),
236                                TagValue::string(format!("Failed to parse EXIF: {e}")),
237                            );
238                        }
239                    }
240                }
241                None => {
242                    // No EXIF data found
243                    tags.insert(
244                        "System:ExifDetectionStatus".to_string(),
245                        "No EXIF data found in JPEG".into(),
246                    );
247                }
248            }
249
250            // Extract XMP data (handles both regular and Extended XMP)
251            reader.seek(SeekFrom::Start(0))?;
252            match extract_jpeg_xmp(&mut reader) {
253                Ok(xmp_data) => {
254                    // Process XMP data with XmpProcessor
255                    let mut xmp_processor = XmpProcessor::new();
256                    match xmp_processor.process_xmp_data(&xmp_data) {
257                        Ok(xmp_entry) => {
258                            // Add structured XMP TagEntry
259                            tag_entries.push(xmp_entry);
260
261                            // Add XMP detection status
262                            tags.insert(
263                                "System:XmpDetectionStatus".to_string(),
264                                TagValue::String(format!(
265                                    "XMP data found ({} bytes total)",
266                                    xmp_data.len()
267                                )),
268                            );
269                        }
270                        Err(e) => {
271                            // Failed to parse XMP - add error information
272                            tags.insert(
273                                "Warning:XmpParseError".to_string(),
274                                TagValue::string(format!("Failed to parse XMP: {e}")),
275                            );
276                        }
277                    }
278                }
279                Err(e) if e.to_string().contains("No XMP data found") => {
280                    // No XMP data found (not an error)
281                    tags.insert(
282                        "System:XmpDetectionStatus".to_string(),
283                        "No XMP data found in JPEG".into(),
284                    );
285                }
286                Err(e) => {
287                    // Real error scanning for XMP
288                    tags.insert(
289                        "Warning:XmpScanError".to_string(),
290                        TagValue::string(format!("Error scanning for XMP: {e}")),
291                    );
292                }
293            }
294        }
295        "TIFF" => {
296            // For TIFF-based files (including NEF, NRW, CR2, etc.), process as TIFF
297            // Reset reader to start of file
298            reader.seek(SeekFrom::Start(0))?;
299
300            // Read entire file for TIFF processing
301            let mut tiff_data = Vec::new();
302            reader.read_to_end(&mut tiff_data)?;
303
304            // Parse TIFF/EXIF data
305            let mut exif_reader = ExifReader::new();
306
307            // Store the original file type for NEF/NRW detection
308            exif_reader.set_file_type(detection_result.file_type.clone());
309
310            match exif_reader.parse_exif_data(&tiff_data) {
311                Ok(()) => {
312                    // Check if file type was overridden during processing
313                    if let Some(new_file_type) = exif_reader.get_overridden_file_type() {
314                        // Update file type tags with the overridden value
315                        file_type = new_file_type.clone();
316
317                        // Find and update the FileType tag entry
318                        for entry in &mut tag_entries {
319                            if entry.name == "FileType" {
320                                entry.value = TagValue::String(file_type.clone());
321                                entry.print = TagValue::String(file_type.clone());
322                            } else if entry.name == "FileTypeExtension" {
323                                entry.value = TagValue::String(file_type.to_lowercase());
324                                entry.print = TagValue::String(file_type.to_lowercase());
325                            } else if entry.name == "MIMEType" {
326                                // Update MIME type for NRW
327                                if file_type == "NRW" {
328                                    entry.value = "image/x-nikon-nrw".into();
329                                    entry.print = "image/x-nikon-nrw".into();
330                                }
331                            }
332                        }
333                    }
334
335                    // Extract all found tags using new TagEntry API
336                    let mut exif_tag_entries = exif_reader.get_all_tag_entries();
337
338                    // Append EXIF tag entries to our collection
339                    tag_entries.append(&mut exif_tag_entries);
340
341                    // Also populate legacy tags for backward compatibility
342                    let exif_tags = exif_reader.get_all_tags();
343                    for (key, value) in exif_tags {
344                        tags.insert(key, value);
345                    }
346
347                    // Add EXIF processing warnings as tags for debugging
348                    for (i, warning) in exif_reader.get_warnings().iter().enumerate() {
349                        tags.insert(
350                            format!("Warning:ExifWarning{i}"),
351                            TagValue::String(warning.clone()),
352                        );
353                    }
354                }
355                Err(e) => {
356                    // Failed to parse TIFF - add error information
357                    tags.insert(
358                        "Warning:TiffParseError".to_string(),
359                        TagValue::string(format!("Failed to parse TIFF: {e}")),
360                    );
361                }
362            }
363
364            // Check for XMP data in TIFF IFD0
365            match extract_tiff_xmp(&tiff_data) {
366                Ok(Some(xmp_data)) => {
367                    // Process XMP data with XmpProcessor
368                    let mut xmp_processor = XmpProcessor::new();
369                    match xmp_processor.process_xmp_data(&xmp_data) {
370                        Ok(xmp_entry) => {
371                            // Add structured XMP TagEntry
372                            tag_entries.push(xmp_entry);
373
374                            // Add XMP detection status
375                            tags.insert(
376                                "System:XmpDetectionStatus".to_string(),
377                                TagValue::String(format!(
378                                    "XMP data found in TIFF IFD0 tag 0x02bc, length {} bytes",
379                                    xmp_data.len()
380                                )),
381                            );
382                        }
383                        Err(e) => {
384                            // Failed to parse XMP - add error information
385                            tags.insert(
386                                "Warning:XmpParseError".to_string(),
387                                TagValue::string(format!("Failed to parse XMP: {e}")),
388                            );
389                        }
390                    }
391                }
392                Ok(None) => {
393                    // No XMP data found
394                    tags.insert(
395                        "System:XmpDetectionStatus".to_string(),
396                        "No XMP data found in TIFF".into(),
397                    );
398                }
399                Err(e) => {
400                    // Error extracting XMP
401                    tags.insert(
402                        "Warning:XmpExtractionError".to_string(),
403                        TagValue::string(format!("Error extracting XMP: {e}")),
404                    );
405                }
406            }
407        }
408        "XMP" => {
409            // Standalone XMP file processing
410            reader.seek(SeekFrom::Start(0))?;
411            let mut xmp_data = Vec::new();
412            reader.read_to_end(&mut xmp_data)?;
413
414            // Process XMP data with XmpProcessor
415            let mut xmp_processor = XmpProcessor::new();
416            match xmp_processor.process_xmp_data(&xmp_data) {
417                Ok(xmp_entry) => {
418                    // Add structured XMP TagEntry
419                    tag_entries.push(xmp_entry);
420
421                    // Add XMP detection status
422                    tags.insert(
423                        "System:XmpDetectionStatus".to_string(),
424                        TagValue::String(format!(
425                            "XMP data processed from standalone file, length {} bytes",
426                            xmp_data.len()
427                        )),
428                    );
429                }
430                Err(e) => {
431                    // Failed to parse XMP - add error information
432                    tags.insert(
433                        "Warning:XmpParseError".to_string(),
434                        TagValue::string(format!("Failed to parse XMP: {e}")),
435                    );
436                }
437            }
438        }
439        "MRW" | "RW2" | "RWL" => {
440            // RAW format processing (Milestone 17b: Minolta MRW and Panasonic RW2 support)
441            // Reset reader to start of file
442            reader.seek(SeekFrom::Start(0))?;
443            // Read entire file for RAW processing
444            let mut raw_data = Vec::new();
445            reader.read_to_end(&mut raw_data)?;
446            // Process RAW data using RAW processor
447            let raw_processor = crate::raw::RawProcessor::new();
448            let mut exif_reader = ExifReader::new();
449            // Store the original file type for format detection
450            exif_reader.set_file_type(detection_result.file_type.clone());
451            match raw_processor.process_raw(&mut exif_reader, &raw_data, &detection_result) {
452                Ok(()) => {
453                    // Successfully processed RAW - extract all found tags using new TagEntry API
454                    let mut raw_tag_entries = exif_reader.get_all_tag_entries();
455                    // Append RAW tag entries to our collection
456                    tag_entries.append(&mut raw_tag_entries);
457                    // Also populate legacy tags for backward compatibility
458                    let raw_tags = exif_reader.get_all_tags();
459                    for (key, value) in raw_tags {
460                        tags.insert(key, value);
461                    }
462                    // Add RAW processing warnings as tags for debugging
463                    for (i, warning) in exif_reader.get_warnings().iter().enumerate() {
464                        tags.insert(
465                            format!("Warning:RawWarning{i}"),
466                            TagValue::String(warning.clone()),
467                        );
468                    }
469                }
470                Err(e) => {
471                    // Failed to parse RAW - add error information
472                    tags.insert(
473                        "Warning:RawParseError".to_string(),
474                        TagValue::string(format!(
475                            "Failed to parse {} RAW: {e}",
476                            detection_result.format
477                        )),
478                    );
479                }
480            }
481        }
482        _ => {
483            // Other formats not yet supported
484            tags.insert(
485                "System:ExifDetectionStatus".to_string(),
486                TagValue::string(format!(
487                    "Format {} not yet supported for EXIF extraction",
488                    detection_result.format
489                )),
490            );
491        }
492    }
493
494    // Collect any missing required tags for --show-missing functionality
495    let missing_implementations = if show_missing {
496        let mut missing = Vec::new();
497
498        // Check for missing mainstream tags
499        for tag_def in EXIF_MAIN_TAGS.iter() {
500            let tag_found = tags.keys().any(|key| {
501                key.contains(&format!("Tag_{:04X}", tag_def.id))
502                    || key.contains(&format!("{:#x}", tag_def.id))
503            });
504
505            if !tag_found {
506                missing.push(format!("Tag_{:04X}", tag_def.id));
507            }
508        }
509
510        // Check for missing required ValueConv functions
511        for conv_id in REQUIRED_VALUE_CONV.iter() {
512            // These would be checked during value conversion
513            // For now, just note that we need to implement them
514            missing.push(format!("ValueConv_{conv_id}"));
515        }
516
517        // Check for missing required PrintConv functions
518        for conv_id in REQUIRED_PRINT_CONV.iter() {
519            // These would be checked during print conversion
520            // For now, just note that we need to implement them
521            missing.push(format!("PrintConv_{conv_id}"));
522        }
523
524        if missing.is_empty() {
525            None
526        } else {
527            Some(missing)
528        }
529    } else {
530        None
531    };
532
533    // Create final ExifData structure
534    let source_file = path.to_string_lossy().to_string();
535    let mut exif_data = ExifData::new(source_file, env!("CARGO_PKG_VERSION").to_string());
536
537    // Set tag entries (new API)
538    exif_data.tags = tag_entries;
539
540    // Set legacy tags for backward compatibility
541    exif_data.legacy_tags = tags;
542
543    // Set missing implementations if requested
544    exif_data.missing_implementations = missing_implementations;
545
546    Ok(exif_data)
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552
553    #[test]
554    fn test_extract_metadata_nonexistent_file() {
555        let path = Path::new("nonexistent_file.jpg");
556        let result = extract_metadata(path, false);
557        assert!(result.is_err());
558        // Should be an IO error for file not found
559        assert!(result.is_err());
560    }
561}