Skip to main content

exiftool_rs/
exiftool.rs

1//! Core ExifTool struct and public API.
2//!
3//! This is the main entry point for reading metadata from files.
4//! Mirrors ExifTool.pm's ImageInfo/ExtractInfo/GetInfo pipeline.
5
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{
17    exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer,
18    psd_writer, tiff_writer, webp_writer, xmp_writer,
19};
20
21/// Processing options for metadata extraction.
22#[derive(Debug, Clone)]
23pub struct Options {
24    /// Include duplicate tags (different groups may have same tag name).
25    pub duplicates: bool,
26    /// Apply print conversions (human-readable values).
27    pub print_conv: bool,
28    /// Fast scan level: 0=normal, 1=skip composite, 2=skip maker notes, 3=skip thumbnails.
29    pub fast_scan: u8,
30    /// Only extract these tag names (empty = all).
31    pub requested_tags: Vec<String>,
32    /// Extract embedded documents/data (video frames, etc.). Level: 0=off, 1=-ee, 2=-ee2, 3=-ee3.
33    pub extract_embedded: u8,
34    /// Show unknown tags: 0=off, 1=-u (show unknown), 2=-U (show unknown + binary data).
35    pub show_unknown: u8,
36    /// Process compressed data in files (-z option).
37    pub process_compressed: bool,
38    /// Use MWG (Metadata Working Group) composite tags for reading/writing.
39    pub use_mwg: bool,
40    /// Reverse-geocode `Geolocation*` tags from GPS coordinates
41    /// (ExifTool's `Geolocation` API option). Off by default, like ExifTool.
42    pub geolocation: bool,
43}
44
45impl Default for Options {
46    fn default() -> Self {
47        Self {
48            duplicates: false,
49            print_conv: true,
50            fast_scan: 0,
51            requested_tags: Vec::new(),
52            extract_embedded: 0,
53            show_unknown: 0,
54            process_compressed: false,
55            use_mwg: false,
56            geolocation: false,
57        }
58    }
59}
60
61/// The main ExifTool struct. Create one and use it to extract metadata from files.
62///
63/// # Example
64/// ```no_run
65/// use exiftool_rs::ExifTool;
66///
67/// let mut et = ExifTool::new();
68/// let info = et.image_info("photo.jpg").unwrap();
69/// for (name, value) in &info {
70///     println!("{}: {}", name, value);
71/// }
72/// ```
73/// A queued tag change for writing.
74#[derive(Debug, Clone)]
75pub struct NewValue {
76    /// Tag name (e.g., "Artist", "Copyright", "XMP:Title")
77    pub tag: String,
78    /// Group prefix if specified (e.g., "EXIF", "XMP", "IPTC")
79    pub group: Option<String>,
80    /// New value (None = delete tag)
81    pub value: Option<String>,
82}
83
84/// The main ExifTool engine — read, write, and edit metadata.
85///
86/// # Reading metadata
87/// ```no_run
88/// use exiftool_rs::ExifTool;
89///
90/// let et = ExifTool::new();
91///
92/// // Full tag structs
93/// let tags = et.extract_info("photo.jpg").unwrap();
94/// for tag in &tags {
95///     println!("[{}] {}: {}", tag.group.family0, tag.name, tag.print_value);
96/// }
97///
98/// // Simple name→value map
99/// let info = et.image_info("photo.jpg").unwrap();
100/// println!("Camera: {}", info.get("Model").unwrap_or(&String::new()));
101/// ```
102///
103/// # Writing metadata
104/// ```no_run
105/// use exiftool_rs::ExifTool;
106///
107/// let mut et = ExifTool::new();
108/// et.set_new_value("Artist", Some("John Doe"));
109/// et.set_new_value("Copyright", Some("2024"));
110/// et.write_info("input.jpg", "output.jpg").unwrap();
111/// ```
112pub struct ExifTool {
113    options: Options,
114    new_values: Vec<NewValue>,
115}
116
117/// Result of metadata extraction: maps tag names to display values.
118pub type ImageInfo = HashMap<String, String>;
119
120impl ExifTool {
121    /// Create a new ExifTool instance with default options.
122    pub fn new() -> Self {
123        Self {
124            options: Options::default(),
125            new_values: Vec::new(),
126        }
127    }
128
129    /// Create a new ExifTool instance with custom options.
130    pub fn with_options(options: Options) -> Self {
131        Self {
132            options,
133            new_values: Vec::new(),
134        }
135    }
136
137    /// Get a mutable reference to the options.
138    pub fn options_mut(&mut self) -> &mut Options {
139        &mut self.options
140    }
141
142    /// Get a reference to the options.
143    pub fn options(&self) -> &Options {
144        &self.options
145    }
146
147    // ================================================================
148    // Writing API
149    // ================================================================
150
151    /// Queue a new tag value for writing.
152    ///
153    /// Call this one or more times, then call `write_info()` to apply changes.
154    ///
155    /// # Arguments
156    /// * `tag` - Tag name, optionally prefixed with group (e.g., "Artist", "XMP:Title", "EXIF:Copyright")
157    /// * `value` - New value, or None to delete the tag
158    ///
159    /// # Example
160    /// ```no_run
161    /// use exiftool_rs::ExifTool;
162    /// let mut et = ExifTool::new();
163    /// et.set_new_value("Artist", Some("John Doe"));
164    /// et.set_new_value("Copyright", Some("2024 John Doe"));
165    /// et.set_new_value("XMP:Title", Some("My Photo"));
166    /// et.write_info("photo.jpg", "photo_out.jpg").unwrap();
167    /// ```
168    pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
169        let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
170            (
171                Some(tag[..colon_pos].to_string()),
172                tag[colon_pos + 1..].to_string(),
173            )
174        } else {
175            (None, tag.to_string())
176        };
177
178        self.new_values.push(NewValue {
179            tag: tag_name,
180            group,
181            value: value.map(|v| v.to_string()),
182        });
183    }
184
185    /// Clear all queued new values.
186    pub fn clear_new_values(&mut self) {
187        self.new_values.clear();
188    }
189
190    /// Copy tags from a source file, queuing them as new values.
191    ///
192    /// Reads all tags from `src_path` and queues them for writing.
193    /// Optionally filter by tag names.
194    pub fn set_new_values_from_file<P: AsRef<Path>>(
195        &mut self,
196        src_path: P,
197        tags_to_copy: Option<&[&str]>,
198    ) -> Result<u32> {
199        let src_tags = self.extract_info(src_path)?;
200        let mut count = 0u32;
201
202        for tag in &src_tags {
203            // Skip file-level tags that shouldn't be copied
204            if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
205                continue;
206            }
207            // Skip binary/undefined data and empty values
208            if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
209                continue;
210            }
211            if tag.print_value.is_empty() {
212                continue;
213            }
214
215            // Filter by requested tags
216            if let Some(filter) = tags_to_copy {
217                let name_lower = tag.name.to_lowercase();
218                if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
219                    continue;
220                }
221            }
222
223            let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
224            self.new_values.push(NewValue {
225                tag: tag.name.clone(),
226                group: Some(tag.group.family0.clone()),
227                value: Some(tag.print_value.clone()),
228            });
229            count += 1;
230        }
231
232        Ok(count)
233    }
234
235    /// Set a file's name based on a tag value.
236    pub fn set_file_name_from_tag<P: AsRef<Path>>(
237        &self,
238        path: P,
239        tag_name: &str,
240        template: &str,
241    ) -> Result<String> {
242        let path = path.as_ref();
243        let tags = self.extract_info(path)?;
244
245        let tag_value = tags
246            .iter()
247            .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
248            .map(|t| &t.print_value)
249            .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
250
251        // Build new filename from template
252        // Template: "prefix%value%suffix.ext" or just use the tag value
253        let new_name = if template.contains('%') {
254            template.replace("%v", value_to_filename(tag_value).as_str())
255        } else {
256            // Default: use tag value as filename, keep extension
257            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
258            let clean = value_to_filename(tag_value);
259            if ext.is_empty() {
260                clean
261            } else {
262                format!("{}.{}", clean, ext)
263            }
264        };
265
266        let parent = path.parent().unwrap_or(Path::new(""));
267        let new_path = parent.join(&new_name);
268
269        fs::rename(path, &new_path).map_err(Error::Io)?;
270        Ok(new_path.to_string_lossy().to_string())
271    }
272
273    /// Write queued changes to a file.
274    ///
275    /// If `dst_path` is the same as `src_path`, the file is modified in-place
276    /// (via a temporary file).
277    pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(
278        &self,
279        src_path: P,
280        dst_path: Q,
281    ) -> Result<u32> {
282        let src_path = src_path.as_ref();
283        let dst_path = dst_path.as_ref();
284        let data = fs::read(src_path).map_err(Error::Io)?;
285
286        let file_type = self.detect_file_type(&data, src_path)?;
287        let output = self.apply_changes(&data, file_type)?;
288
289        // Write to temp file first, then rename (atomic)
290        let temp_path = dst_path.with_extension("exiftool_tmp");
291        fs::write(&temp_path, &output).map_err(Error::Io)?;
292        fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
293
294        Ok(self.new_values.len() as u32)
295    }
296
297    /// Apply queued changes to in-memory data.
298    fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
299        match file_type {
300            FileType::Jpeg => self.write_jpeg(data),
301            FileType::Png => self.write_png(data),
302            FileType::Tiff
303            | FileType::Dng
304            | FileType::Cr2
305            | FileType::Nef
306            | FileType::Arw
307            | FileType::Orf
308            | FileType::Pef => self.write_tiff(data),
309            FileType::WebP => self.write_webp(data),
310            FileType::Mp4
311            | FileType::QuickTime
312            | FileType::M4a
313            | FileType::ThreeGP
314            | FileType::F4v => self.write_mp4(data),
315            FileType::Psd => self.write_psd(data),
316            FileType::Pdf => self.write_pdf(data),
317            FileType::Heif | FileType::Avif => self.write_mp4(data),
318            FileType::Mkv | FileType::WebM => self.write_matroska(data),
319            FileType::Gif => {
320                let comment = self
321                    .new_values
322                    .iter()
323                    .find(|nv| nv.tag.to_lowercase() == "comment")
324                    .and_then(|nv| nv.value.clone());
325                crate::writer::gif_writer::write_gif(data, comment.as_deref())
326            }
327            FileType::Flac => {
328                let changes: Vec<(&str, &str)> = self
329                    .new_values
330                    .iter()
331                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
332                    .collect();
333                crate::writer::flac_writer::write_flac(data, &changes)
334            }
335            FileType::Mp3 | FileType::Aiff => {
336                let changes: Vec<(&str, &str)> = self
337                    .new_values
338                    .iter()
339                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
340                    .collect();
341                crate::writer::id3_writer::write_id3(data, &changes)
342            }
343            FileType::Jp2 | FileType::Jxl => {
344                let new_xmp = if self
345                    .new_values
346                    .iter()
347                    .any(|nv| nv.group.as_deref() == Some("XMP"))
348                {
349                    let refs: Vec<&NewValue> = self
350                        .new_values
351                        .iter()
352                        .filter(|nv| nv.group.as_deref() == Some("XMP"))
353                        .collect();
354                    Some(self.build_new_xmp(&refs))
355                } else {
356                    None
357                };
358                crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
359            }
360            FileType::PostScript => {
361                let changes: Vec<(&str, &str)> = self
362                    .new_values
363                    .iter()
364                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
365                    .collect();
366                crate::writer::ps_writer::write_postscript(data, &changes)
367            }
368            FileType::Ogg | FileType::Opus => {
369                let changes: Vec<(&str, &str)> = self
370                    .new_values
371                    .iter()
372                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
373                    .collect();
374                crate::writer::ogg_writer::write_ogg(data, &changes)
375            }
376            FileType::Xmp => {
377                let props: Vec<xmp_writer::XmpProperty> = self
378                    .new_values
379                    .iter()
380                    .filter_map(|nv| {
381                        let val = nv.value.as_deref()?;
382                        Some(xmp_writer::XmpProperty {
383                            namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
384                            property: nv.tag.clone(),
385                            values: vec![val.to_string()],
386                            prop_type: xmp_writer::XmpPropertyType::Simple,
387                        })
388                    })
389                    .collect();
390                Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
391            }
392            _ => Err(Error::UnsupportedFileType(format!(
393                "writing not yet supported for {}",
394                file_type
395            ))),
396        }
397    }
398
399    /// Returns the set of tag names (lowercase) that are writable for a given file type.
400    /// Returns `None` if any tag is writable (open-ended formats like PNG, FLAC, MKV).
401    /// Returns `Some(empty set)` if the format has no writer.
402    pub fn writable_tags(file_type: FileType) -> Option<std::collections::HashSet<&'static str>> {
403        use std::collections::HashSet;
404
405        // EXIF tags supported by exif_writer
406        const EXIF_TAGS: &[&str] = &[
407            "imagedescription",
408            "make",
409            "model",
410            "orientation",
411            "xresolution",
412            "yresolution",
413            "resolutionunit",
414            "software",
415            "modifydate",
416            "datetime",
417            "artist",
418            "copyright",
419            "datetimeoriginal",
420            "createdate",
421            "datetimedigitized",
422            "usercomment",
423            "imageuniqueid",
424            "ownername",
425            "cameraownername",
426            "serialnumber",
427            "bodyserialnumber",
428            "lensmake",
429            "lensmodel",
430            "lensserialnumber",
431        ];
432
433        // IPTC tags supported by iptc_writer
434        const IPTC_TAGS: &[&str] = &[
435            "objectname",
436            "title",
437            "urgency",
438            "category",
439            "supplementalcategories",
440            "keywords",
441            "specialinstructions",
442            "datecreated",
443            "timecreated",
444            "by-line",
445            "author",
446            "byline",
447            "by-linetitle",
448            "authorsposition",
449            "bylinetitle",
450            "city",
451            "sub-location",
452            "sublocation",
453            "province-state",
454            "state",
455            "provincestate",
456            "country-primarylocationcode",
457            "countrycode",
458            "country-primarylocationname",
459            "country",
460            "headline",
461            "credit",
462            "source",
463            "copyrightnotice",
464            "contact",
465            "caption-abstract",
466            "caption",
467            "description",
468            "writer-editor",
469            "captionwriter",
470        ];
471
472        // XMP auto-detected tags (no group prefix needed)
473        const XMP_AUTO_TAGS: &[&str] = &[
474            "title",
475            "description",
476            "subject",
477            "creator",
478            "rights",
479            "keywords",
480            "rating",
481            "label",
482            "hierarchicalsubject",
483        ];
484
485        // ID3 tags
486        const ID3_TAGS: &[&str] = &[
487            "title",
488            "artist",
489            "album",
490            "year",
491            "date",
492            "track",
493            "genre",
494            "comment",
495            "composer",
496            "albumartist",
497            "encoder",
498            "encodedby",
499            "publisher",
500            "copyright",
501            "bpm",
502            "lyrics",
503        ];
504
505        // MP4/MOV ilst tags
506        const MP4_TAGS: &[&str] = &[
507            "title",
508            "artist",
509            "album",
510            "year",
511            "date",
512            "comment",
513            "genre",
514            "composer",
515            "writer",
516            "encoder",
517            "encodedby",
518            "grouping",
519            "lyrics",
520            "description",
521            "albumartist",
522            "copyright",
523        ];
524
525        // PDF Info dict tags
526        const PDF_TAGS: &[&str] = &[
527            "title", "author", "subject", "keywords", "creator", "producer",
528        ];
529
530        // PostScript DSC tags
531        const PS_TAGS: &[&str] = &[
532            "title",
533            "creator",
534            "author",
535            "for",
536            "creationdate",
537            "createdate",
538        ];
539
540        match file_type {
541            // Open-ended: any tag name accepted
542            FileType::Png
543            | FileType::Flac
544            | FileType::Mkv
545            | FileType::WebM
546            | FileType::Ogg
547            | FileType::Opus
548            | FileType::Xmp => None,
549
550            // JPEG: EXIF + IPTC + XMP auto + comment
551            FileType::Jpeg => {
552                let mut set: HashSet<&str> = HashSet::new();
553                set.extend(EXIF_TAGS);
554                set.extend(IPTC_TAGS);
555                set.extend(XMP_AUTO_TAGS);
556                set.insert("comment");
557                Some(set)
558            }
559
560            // TIFF-based: EXIF only
561            FileType::Tiff
562            | FileType::Dng
563            | FileType::Cr2
564            | FileType::Nef
565            | FileType::Arw
566            | FileType::Orf
567            | FileType::Pef => {
568                let mut set: HashSet<&str> = HashSet::new();
569                set.extend(EXIF_TAGS);
570                Some(set)
571            }
572
573            // WebP: EXIF + XMP auto
574            FileType::WebP => {
575                let mut set: HashSet<&str> = HashSet::new();
576                set.extend(EXIF_TAGS);
577                set.extend(XMP_AUTO_TAGS);
578                Some(set)
579            }
580
581            // MP4/MOV/HEIF: ilst + XMP auto
582            FileType::Mp4
583            | FileType::QuickTime
584            | FileType::M4a
585            | FileType::ThreeGP
586            | FileType::F4v
587            | FileType::Heif
588            | FileType::Avif => {
589                let mut set: HashSet<&str> = HashSet::new();
590                set.extend(MP4_TAGS);
591                set.extend(XMP_AUTO_TAGS);
592                Some(set)
593            }
594
595            // PSD: IPTC + XMP auto
596            FileType::Psd => {
597                let mut set: HashSet<&str> = HashSet::new();
598                set.extend(IPTC_TAGS);
599                set.extend(XMP_AUTO_TAGS);
600                Some(set)
601            }
602
603            FileType::Pdf => Some(PDF_TAGS.iter().copied().collect()),
604            FileType::PostScript => Some(PS_TAGS.iter().copied().collect()),
605
606            FileType::Mp3 | FileType::Aiff => Some(ID3_TAGS.iter().copied().collect()),
607
608            FileType::Gif => {
609                let mut set: HashSet<&str> = HashSet::new();
610                set.insert("comment");
611                Some(set)
612            }
613
614            // JP2/JXL: XMP only (with group prefix)
615            FileType::Jp2 | FileType::Jxl => Some(XMP_AUTO_TAGS.iter().copied().collect()),
616
617            // No writer
618            _ => Some(HashSet::new()),
619        }
620    }
621
622    /// Write metadata changes to JPEG data.
623    fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
624        // Classify new values by target group
625        let mut exif_values: Vec<&NewValue> = Vec::new();
626        let mut xmp_values: Vec<&NewValue> = Vec::new();
627        let mut iptc_values: Vec<&NewValue> = Vec::new();
628        let mut comment_value: Option<&str> = None;
629        let mut remove_exif = false;
630        let mut remove_xmp = false;
631        let mut remove_iptc = false;
632        let mut remove_comment = false;
633
634        for nv in &self.new_values {
635            let group = nv.group.as_deref().unwrap_or("");
636            let group_upper = group.to_uppercase();
637
638            // Check for group deletion
639            if nv.value.is_none() && nv.tag == "*" {
640                match group_upper.as_str() {
641                    "EXIF" => {
642                        remove_exif = true;
643                        continue;
644                    }
645                    "XMP" => {
646                        remove_xmp = true;
647                        continue;
648                    }
649                    "IPTC" => {
650                        remove_iptc = true;
651                        continue;
652                    }
653                    _ => {}
654                }
655            }
656
657            match group_upper.as_str() {
658                "XMP" => xmp_values.push(nv),
659                "IPTC" => iptc_values.push(nv),
660                "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
661                "" => {
662                    // Auto-detect best group based on tag name
663                    if nv.tag.to_lowercase() == "comment" {
664                        if nv.value.is_none() {
665                            remove_comment = true;
666                        } else {
667                            comment_value = nv.value.as_deref();
668                        }
669                    } else if is_xmp_tag(&nv.tag) {
670                        xmp_values.push(nv);
671                    } else {
672                        exif_values.push(nv);
673                    }
674                }
675                _ => exif_values.push(nv), // default to EXIF
676            }
677        }
678
679        // Build new EXIF data
680        let new_exif = if !exif_values.is_empty() {
681            Some(self.build_new_exif(data, &exif_values)?)
682        } else {
683            None
684        };
685
686        // Build new XMP data
687        let new_xmp = if !xmp_values.is_empty() {
688            Some(self.build_new_xmp(&xmp_values))
689        } else {
690            None
691        };
692
693        // Build new IPTC data
694        let new_iptc_data = if !iptc_values.is_empty() {
695            let records: Vec<iptc_writer::IptcRecord> = iptc_values
696                .iter()
697                .filter_map(|nv| {
698                    let value = nv.value.as_deref()?;
699                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
700                    Some(iptc_writer::IptcRecord {
701                        record,
702                        dataset,
703                        data: value.as_bytes().to_vec(),
704                    })
705                })
706                .collect();
707            if records.is_empty() {
708                None
709            } else {
710                Some(iptc_writer::build_iptc(&records))
711            }
712        } else {
713            None
714        };
715
716        // Rewrite JPEG
717        jpeg_writer::write_jpeg(
718            data,
719            new_exif.as_deref(),
720            new_xmp.as_deref(),
721            new_iptc_data.as_deref(),
722            comment_value,
723            remove_exif,
724            remove_xmp,
725            remove_iptc,
726            remove_comment,
727        )
728    }
729
730    /// Build new EXIF data by merging existing EXIF with queued changes.
731    fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
732        let bo = ByteOrderMark::BigEndian;
733        let mut ifd0_entries = Vec::new();
734        let mut exif_entries = Vec::new();
735        let mut gps_entries = Vec::new();
736
737        // Step 1: Extract existing EXIF entries from the JPEG
738        let existing = extract_existing_exif_entries(jpeg_data, bo);
739        for entry in &existing {
740            match classify_exif_tag(entry.tag) {
741                ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
742                ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
743                ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
744            }
745        }
746
747        // Step 2: Apply queued changes (add/replace/delete)
748        let deleted_tags: Vec<u16> = values
749            .iter()
750            .filter(|nv| nv.value.is_none())
751            .filter_map(|nv| tag_name_to_id(&nv.tag))
752            .collect();
753
754        // Remove deleted tags
755        ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
756        exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
757        gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
758
759        // Add/replace new values
760        for nv in values {
761            if nv.value.is_none() {
762                continue;
763            }
764            let value_str = nv.value.as_deref().unwrap_or("");
765            let group = nv.group.as_deref().unwrap_or("");
766
767            if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo)
768            {
769                let entry = exif_writer::IfdEntry {
770                    tag: tag_id,
771                    format,
772                    data: encoded,
773                };
774
775                let target = match group.to_uppercase().as_str() {
776                    "GPS" => &mut gps_entries,
777                    "EXIFIFD" => &mut exif_entries,
778                    _ => match classify_exif_tag(tag_id) {
779                        ExifIfdGroup::ExifIfd => &mut exif_entries,
780                        ExifIfdGroup::Gps => &mut gps_entries,
781                        ExifIfdGroup::Ifd0 => &mut ifd0_entries,
782                    },
783                };
784
785                // Replace existing or add new
786                if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
787                    *existing = entry;
788                } else {
789                    target.push(entry);
790                }
791            }
792        }
793
794        // Remove sub-IFD pointers from entries (they'll be rebuilt by build_exif)
795        ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
796
797        exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
798    }
799
800    /// Write metadata changes to PNG data.
801    fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
802        let mut new_text: Vec<(&str, &str)> = Vec::new();
803        let mut remove_text: Vec<&str> = Vec::new();
804
805        // Collect text-based changes
806        // We need to hold the strings in vectors that live long enough
807        let owned_pairs: Vec<(String, String)> = self
808            .new_values
809            .iter()
810            .filter(|nv| nv.value.is_some())
811            .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
812            .collect();
813
814        for (tag, value) in &owned_pairs {
815            new_text.push((tag.as_str(), value.as_str()));
816        }
817
818        for nv in &self.new_values {
819            if nv.value.is_none() {
820                remove_text.push(&nv.tag);
821            }
822        }
823
824        png_writer::write_png(data, &new_text, None, &remove_text)
825    }
826
827    /// Write metadata changes to PSD data.
828    fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
829        let mut iptc_values = Vec::new();
830        let mut xmp_values = Vec::new();
831
832        for nv in &self.new_values {
833            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
834            match group.as_str() {
835                "XMP" => xmp_values.push(nv),
836                "IPTC" => iptc_values.push(nv),
837                _ => {
838                    if is_xmp_tag(&nv.tag) {
839                        xmp_values.push(nv);
840                    } else {
841                        iptc_values.push(nv);
842                    }
843                }
844            }
845        }
846
847        let new_iptc = if !iptc_values.is_empty() {
848            let records: Vec<_> = iptc_values
849                .iter()
850                .filter_map(|nv| {
851                    let value = nv.value.as_deref()?;
852                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
853                    Some(iptc_writer::IptcRecord {
854                        record,
855                        dataset,
856                        data: value.as_bytes().to_vec(),
857                    })
858                })
859                .collect();
860            if records.is_empty() {
861                None
862            } else {
863                Some(iptc_writer::build_iptc(&records))
864            }
865        } else {
866            None
867        };
868
869        let new_xmp = if !xmp_values.is_empty() {
870            let refs: Vec<&NewValue> = xmp_values.to_vec();
871            Some(self.build_new_xmp(&refs))
872        } else {
873            None
874        };
875
876        psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
877    }
878
879    /// Write metadata changes to Matroska (MKV/WebM) data.
880    fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
881        let changes: Vec<(&str, &str)> = self
882            .new_values
883            .iter()
884            .filter_map(|nv| {
885                let value = nv.value.as_deref()?;
886                Some((nv.tag.as_str(), value))
887            })
888            .collect();
889
890        matroska_writer::write_matroska(data, &changes)
891    }
892
893    /// Write metadata changes to PDF data.
894    fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
895        let changes: Vec<(&str, &str)> = self
896            .new_values
897            .iter()
898            .filter_map(|nv| {
899                let value = nv.value.as_deref()?;
900                Some((nv.tag.as_str(), value))
901            })
902            .collect();
903
904        pdf_writer::write_pdf(data, &changes)
905    }
906
907    /// Write metadata changes to MP4/MOV data.
908    fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
909        let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
910        let mut xmp_values: Vec<&NewValue> = Vec::new();
911
912        for nv in &self.new_values {
913            if nv.value.is_none() {
914                continue;
915            }
916            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
917            if group == "XMP" {
918                xmp_values.push(nv);
919            } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
920                ilst_tags.push((key, nv.value.clone().unwrap()));
921            }
922        }
923
924        let tag_refs: Vec<(&[u8; 4], &str)> =
925            ilst_tags.iter().map(|(k, v)| (k, v.as_str())).collect();
926
927        let new_xmp = if !xmp_values.is_empty() {
928            let refs: Vec<&NewValue> = xmp_values.to_vec();
929            Some(self.build_new_xmp(&refs))
930        } else {
931            None
932        };
933
934        mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
935    }
936
937    /// Write metadata changes to WebP data.
938    fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
939        let mut exif_values: Vec<&NewValue> = Vec::new();
940        let mut xmp_values: Vec<&NewValue> = Vec::new();
941        let mut remove_exif = false;
942        let mut remove_xmp = false;
943
944        for nv in &self.new_values {
945            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
946            if nv.value.is_none() && nv.tag == "*" {
947                if group == "EXIF" {
948                    remove_exif = true;
949                }
950                if group == "XMP" {
951                    remove_xmp = true;
952                }
953                continue;
954            }
955            match group.as_str() {
956                "XMP" => xmp_values.push(nv),
957                _ => exif_values.push(nv),
958            }
959        }
960
961        let new_exif = if !exif_values.is_empty() {
962            let bo = ByteOrderMark::BigEndian;
963            let mut entries = Vec::new();
964            for nv in &exif_values {
965                if let Some(ref v) = nv.value {
966                    let group = nv.group.as_deref().unwrap_or("");
967                    if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo)
968                    {
969                        entries.push(exif_writer::IfdEntry {
970                            tag: tag_id,
971                            format,
972                            data: encoded,
973                        });
974                    }
975                }
976            }
977            if !entries.is_empty() {
978                Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
979            } else {
980                None
981            }
982        } else {
983            None
984        };
985
986        let new_xmp = if !xmp_values.is_empty() {
987            Some(self.build_new_xmp(&xmp_values.to_vec()))
988        } else {
989            None
990        };
991
992        webp_writer::write_webp(
993            data,
994            new_exif.as_deref(),
995            new_xmp.as_deref(),
996            remove_exif,
997            remove_xmp,
998        )
999    }
1000
1001    /// Write metadata changes to TIFF data.
1002    fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
1003        let bo = if data.starts_with(b"II") {
1004            ByteOrderMark::LittleEndian
1005        } else {
1006            ByteOrderMark::BigEndian
1007        };
1008
1009        let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
1010        for nv in &self.new_values {
1011            if let Some(ref value) = nv.value {
1012                let group = nv.group.as_deref().unwrap_or("");
1013                if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo)
1014                {
1015                    changes.push((tag_id, encoded));
1016                }
1017            }
1018        }
1019
1020        tiff_writer::write_tiff(data, &changes)
1021    }
1022
1023    /// Build new XMP data from queued values.
1024    fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
1025        let mut properties = Vec::new();
1026
1027        for nv in values {
1028            let value_str = match &nv.value {
1029                Some(v) => v.clone(),
1030                None => continue,
1031            };
1032
1033            let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
1034            let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
1035
1036            let prop_type = match nv.tag.to_lowercase().as_str() {
1037                "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
1038                "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
1039                "creator" => xmp_writer::XmpPropertyType::Seq,
1040                _ => xmp_writer::XmpPropertyType::Simple,
1041            };
1042
1043            let values = if matches!(
1044                prop_type,
1045                xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq
1046            ) {
1047                value_str.split(',').map(|s| s.trim().to_string()).collect()
1048            } else {
1049                vec![value_str]
1050            };
1051
1052            properties.push(xmp_writer::XmpProperty {
1053                namespace: ns,
1054                property: nv.tag.clone(),
1055                values,
1056                prop_type,
1057            });
1058        }
1059
1060        xmp_writer::build_xmp(&properties).into_bytes()
1061    }
1062
1063    // ================================================================
1064    // Reading API
1065    // ================================================================
1066
1067    /// Extract metadata from a file and return a simple name→value map.
1068    ///
1069    /// This is the high-level one-shot API, equivalent to ExifTool's `ImageInfo()`.
1070    pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
1071        let tags = self.extract_info(path)?;
1072        Ok(self.get_info(&tags))
1073    }
1074
1075    /// Extract all metadata tags from a file.
1076    ///
1077    /// Returns the full `Tag` structs with groups, raw values, etc.
1078    pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
1079        let path = path.as_ref();
1080        let data = fs::read(path).map_err(Error::Io)?;
1081
1082        self.extract_info_from_bytes(&data, path)
1083    }
1084
1085    /// Extract metadata from in-memory data.
1086    pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1087        // Propagate show_unknown to EXIF/MakerNotes parsers via thread-local
1088        crate::metadata::exif::set_show_unknown(self.options.show_unknown);
1089        // Propagate process_compressed to format readers via thread-local
1090        crate::formats::pdf::set_process_compressed(self.options.process_compressed);
1091
1092        let file_type_result = self.detect_file_type(data, path);
1093        let (file_type, mut tags) = match file_type_result {
1094            Ok(ft) => {
1095                let t = self
1096                    .process_file(data, ft)
1097                    .or_else(|_| self.process_by_extension(data, path))?;
1098                (Some(ft), t)
1099            }
1100            Err(_) => {
1101                // File type unknown by magic/extension — try extension-based fallback
1102                let t = self.process_by_extension(data, path)?;
1103                (None, t)
1104            }
1105        };
1106        let file_type = file_type.unwrap_or(FileType::Zip); // placeholder for file-level tags
1107
1108        // Some types refine their FileType/MIMEType/extension from the content
1109        // (ExifTool SetFileType): e.g. EXE -> "Win32 EXE" / "ELF executable" / Mach-O.
1110        let default_tags = || {
1111            (
1112                file_type.code().to_string(),
1113                file_type.mime_type().to_string(),
1114                file_type
1115                    .extensions()
1116                    .first()
1117                    .copied()
1118                    .unwrap_or("")
1119                    .to_string(),
1120            )
1121        };
1122        let (ft_code, mime_str, ext_str): (String, String, String) = if file_type == FileType::Exe {
1123            exe_subtype(data)
1124                .map(|(ft, mime, ext)| (ft.to_string(), mime.to_string(), ext.to_string()))
1125                .unwrap_or_else(default_tags)
1126        } else if let Some((code, mime)) = refine_filetype_by_content(file_type, data) {
1127            let (_, _, ext) = default_tags();
1128            (code, mime, ext)
1129        } else {
1130            default_tags()
1131        };
1132
1133        // Add file-level tags
1134        tags.push(Tag {
1135            id: crate::tag::TagId::Text("FileType".into()),
1136            name: "FileType".into(),
1137            description: "File Type".into(),
1138            group: crate::tag::TagGroup {
1139                family0: "File".into(),
1140                family1: "File".into(),
1141                family2: "Other".into(),
1142            },
1143            raw_value: Value::String(format!("{:?}", file_type)),
1144            // ExifTool's FileType value is the short code ("JPEG"), not the
1145            // human-readable description ("JPEG image").
1146            print_value: ft_code.clone(),
1147            priority: 1,
1148        });
1149
1150        tags.push(Tag {
1151            id: crate::tag::TagId::Text("MIMEType".into()),
1152            name: "MIMEType".into(),
1153            description: "MIME Type".into(),
1154            group: crate::tag::TagGroup {
1155                family0: "File".into(),
1156                family1: "File".into(),
1157                family2: "Other".into(),
1158            },
1159            raw_value: Value::String(mime_str.clone()),
1160            print_value: mime_str.clone(),
1161            priority: 1,
1162        });
1163
1164        if let Ok(metadata) = fs::metadata(path) {
1165            tags.push(Tag {
1166                id: crate::tag::TagId::Text("FileSize".into()),
1167                name: "FileSize".into(),
1168                description: "File Size".into(),
1169                group: crate::tag::TagGroup {
1170                    family0: "File".into(),
1171                    family1: "File".into(),
1172                    family2: "Other".into(),
1173                },
1174                raw_value: Value::U32(metadata.len() as u32),
1175                print_value: format_file_size(metadata.len()),
1176                priority: 0,
1177            });
1178        }
1179
1180        // Add more file-level tags
1181        let file_tag = |name: &str, val: Value| -> Tag {
1182            Tag {
1183                id: crate::tag::TagId::Text(name.to_string()),
1184                name: name.to_string(),
1185                description: name.to_string(),
1186                group: crate::tag::TagGroup {
1187                    family0: "File".into(),
1188                    family1: "File".into(),
1189                    family2: "Other".into(),
1190                },
1191                raw_value: val.clone(),
1192                print_value: val.to_display_string(),
1193                priority: 1,
1194            }
1195        };
1196
1197        if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
1198            tags.push(file_tag("FileName", Value::String(fname.to_string())));
1199        }
1200        if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
1201            tags.push(file_tag("Directory", Value::String(dir.to_string())));
1202        }
1203        // Use the canonical (first) extension from the FileType, matching Perl ExifTool behavior.
1204        // EXE subtypes emit FileTypeExtension even when empty (ExifTool sets ext='').
1205        if !ext_str.is_empty() || file_type == FileType::Exe {
1206            tags.push(file_tag(
1207                "FileTypeExtension",
1208                Value::String(ext_str.clone()),
1209            ));
1210        }
1211
1212        #[cfg(unix)]
1213        if let Ok(metadata) = fs::metadata(path) {
1214            use std::os::unix::fs::MetadataExt;
1215            let mode = metadata.mode();
1216            tags.push(file_tag(
1217                "FilePermissions",
1218                Value::String(format!("{:o}", mode & 0o7777)),
1219            ));
1220
1221            // FileModifyDate
1222            if let Ok(modified) = metadata.modified() {
1223                if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
1224                    let secs = dur.as_secs() as i64;
1225                    tags.push(file_tag(
1226                        "FileModifyDate",
1227                        Value::String(unix_to_datetime(secs)),
1228                    ));
1229                }
1230            }
1231            // FileAccessDate
1232            if let Ok(accessed) = metadata.accessed() {
1233                if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
1234                    let secs = dur.as_secs() as i64;
1235                    tags.push(file_tag(
1236                        "FileAccessDate",
1237                        Value::String(unix_to_datetime(secs)),
1238                    ));
1239                }
1240            }
1241            // FileInodeChangeDate (ctime on Unix)
1242            let ctime = metadata.ctime();
1243            if ctime > 0 {
1244                tags.push(file_tag(
1245                    "FileInodeChangeDate",
1246                    Value::String(unix_to_datetime(ctime)),
1247                ));
1248            }
1249        }
1250
1251        // ExifByteOrder (from TIFF header)
1252        {
1253            let bo_str = if data.len() > 8 {
1254                // Check EXIF in JPEG or TIFF header or WebP/RIFF EXIF chunk
1255                let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
1256                    // JPEG: find APP1 EXIF header
1257                    data.windows(6)
1258                        .position(|w| w == b"Exif\0\0")
1259                        .map(|p| &data[p + 6..])
1260                } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
1261                    // RAF: look in the embedded JPEG for EXIF byte order
1262                    let jpeg_offset =
1263                        u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]])
1264                            as usize;
1265                    let jpeg_length =
1266                        u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]])
1267                            as usize;
1268                    if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
1269                        let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
1270                        jpeg.windows(6)
1271                            .position(|w| w == b"Exif\0\0")
1272                            .map(|p| &jpeg[p + 6..])
1273                    } else {
1274                        None
1275                    }
1276                } else if data.starts_with(b"RIFF") && data.len() >= 12 {
1277                    // RIFF/WebP: find EXIF chunk
1278                    let mut riff_bo: Option<&[u8]> = None;
1279                    let mut pos = 12usize;
1280                    while pos + 8 <= data.len() {
1281                        let cid = &data[pos..pos + 4];
1282                        let csz = u32::from_le_bytes([
1283                            data[pos + 4],
1284                            data[pos + 5],
1285                            data[pos + 6],
1286                            data[pos + 7],
1287                        ]) as usize;
1288                        let cstart = pos + 8;
1289                        let cend = (cstart + csz).min(data.len());
1290                        if cid == b"EXIF" && cend > cstart {
1291                            let exif_data = &data[cstart..cend];
1292                            let tiff = if exif_data.starts_with(b"Exif\0\0") {
1293                                &exif_data[6..]
1294                            } else {
1295                                exif_data
1296                            };
1297                            riff_bo = Some(tiff);
1298                            break;
1299                        }
1300                        // Also check LIST chunks
1301                        if cid == b"LIST" && cend >= cstart + 4 {
1302                            // recurse not needed for this simple scan - just advance
1303                        }
1304                        pos = cend + (csz & 1);
1305                    }
1306                    riff_bo
1307                } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
1308                    // JXL container: scan for brob Exif box and decompress to get byte order
1309                    let mut jxl_bo: Option<String> = None;
1310                    let mut jpos = 12usize; // skip JXL signature box
1311                    while jpos + 8 <= data.len() {
1312                        let bsize = u32::from_be_bytes([
1313                            data[jpos],
1314                            data[jpos + 1],
1315                            data[jpos + 2],
1316                            data[jpos + 3],
1317                        ]) as usize;
1318                        let btype = &data[jpos + 4..jpos + 8];
1319                        if bsize < 8 || jpos + bsize > data.len() {
1320                            break;
1321                        }
1322                        if btype == b"brob" && jpos + bsize > 12 {
1323                            let inner_type = &data[jpos + 8..jpos + 12];
1324                            if inner_type == b"Exif" || inner_type == b"exif" {
1325                                let brotli_payload = &data[jpos + 12..jpos + bsize];
1326                                use std::io::Cursor;
1327                                let mut inp = Cursor::new(brotli_payload);
1328                                let mut out: Vec<u8> = Vec::new();
1329                                if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
1330                                    let exif_start = if out.len() > 4 { 4 } else { 0 };
1331                                    if exif_start < out.len() {
1332                                        if out[exif_start..].starts_with(b"MM") {
1333                                            jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
1334                                        } else if out[exif_start..].starts_with(b"II") {
1335                                            jxl_bo = Some("Little-endian (Intel, II)".to_string());
1336                                        }
1337                                    }
1338                                }
1339                                break;
1340                            }
1341                        }
1342                        jpos += bsize;
1343                    }
1344                    if let Some(bo) = jxl_bo {
1345                        if !bo.is_empty() && file_type != FileType::Btf {
1346                            tags.push(file_tag("ExifByteOrder", Value::String(bo)));
1347                        }
1348                    }
1349                    // Return None to skip the generic byte order check below
1350                    None
1351                } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
1352                    // MRW: find TTW segment which contains TIFF/EXIF data
1353                    let mrw_data_offset = if data.len() >= 8 {
1354                        u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
1355                    } else {
1356                        0
1357                    };
1358                    let mut mrw_bo: Option<&[u8]> = None;
1359                    let mut mpos = 8usize;
1360                    while mpos + 8 <= mrw_data_offset.min(data.len()) {
1361                        let seg_tag = &data[mpos..mpos + 4];
1362                        let seg_len = u32::from_be_bytes([
1363                            data[mpos + 4],
1364                            data[mpos + 5],
1365                            data[mpos + 6],
1366                            data[mpos + 7],
1367                        ]) as usize;
1368                        if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
1369                            mrw_bo = Some(&data[mpos + 8..mpos + 8 + seg_len]);
1370                            break;
1371                        }
1372                        mpos += 8 + seg_len;
1373                    }
1374                    mrw_bo
1375                } else {
1376                    Some(data)
1377                };
1378                if let Some(tiff) = check {
1379                    if tiff.starts_with(b"II") {
1380                        "Little-endian (Intel, II)"
1381                    } else if tiff.starts_with(b"MM") {
1382                        "Big-endian (Motorola, MM)"
1383                    } else {
1384                        ""
1385                    }
1386                } else {
1387                    ""
1388                }
1389            } else {
1390                ""
1391            };
1392            // Suppress ExifByteOrder for BigTIFF, Canon VRD/DR4 (Perl doesn't output it for these)
1393            // Also skip if already emitted by ExifReader (TIFF-based formats)
1394            let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
1395            if !bo_str.is_empty()
1396                && !already_has_exifbyteorder
1397                && file_type != FileType::Btf
1398                && file_type != FileType::Dr4
1399                && file_type != FileType::Vrd
1400                && file_type != FileType::Crw
1401            {
1402                tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
1403            }
1404        }
1405
1406        tags.push(file_tag(
1407            "ExifToolVersion",
1408            Value::String(crate::VERSION.to_string()),
1409        ));
1410
1411        // Promote authoritative specialized-source tags before computing composites,
1412        // so derived tags (ShutterSpeed, LightValue, ...) use the primary value.
1413        {
1414            const SPECIAL_WINS: &[(&str, &str)] = &[
1415                ("Kodak", "FNumber"),
1416                ("Kodak", "ExposureTime"),
1417                ("MinoltaRaw", "Contrast"),
1418                ("MinoltaRaw", "Saturation"),
1419                ("MinoltaRaw", "Sharpness"),
1420                ("MinoltaRaw", "ISOSetting"),
1421                // Lytro's own FocalLength (full precision) is primary over embedded EXIF;
1422                // the 35efl/FOV/HyperfocalDistance composites must use it.
1423                ("Lytro", "FocalLength"),
1424            ];
1425            for (grp, name) in SPECIAL_WINS {
1426                if tags
1427                    .iter()
1428                    .any(|t| t.name == *name && t.group.family1 == *grp)
1429                {
1430                    tags.retain(|t| t.name != *name || t.group.family1 == *grp);
1431                }
1432            }
1433        }
1434
1435        // Compute composite tags
1436        let composite = crate::composite::compute_composite_tags(&tags);
1437        tags.extend(composite);
1438
1439        // ExifTool's Composite RedBalance/BlueBalance (computed from WB level tags)
1440        // is preferred over a manufacturer's own same-named tag. Drop the non-Composite
1441        // versions when a Composite one exists so the Composite value wins.
1442        for bal in ["RedBalance", "BlueBalance"] {
1443            let has_composite = tags
1444                .iter()
1445                .any(|t| t.name == bal && t.group.family0 == "Composite");
1446            if has_composite {
1447                tags.retain(|t| t.name != bal || t.group.family0 == "Composite");
1448            }
1449        }
1450
1451        // Geolocation is opt-in, matching ExifTool's `Geolocation` API option.
1452        if self.options.geolocation {
1453            if let Some(geo) = crate::composite::compute_geolocation(&tags) {
1454                tags.extend(geo);
1455            }
1456        }
1457
1458        // MWG (Metadata Working Group) composite tags
1459        if self.options.use_mwg {
1460            let mwg = crate::composite::compute_mwg_composites(&tags);
1461            tags.extend(mwg);
1462        }
1463
1464        // FLIR post-processing: remove LensID composite for FLIR cameras.
1465        // Perl's LensID composite requires LensType EXIF tag (not present in FLIR images),
1466        // and LensID-2 requires LensModel to match /(mm|\d\/F)/ (FLIR names like "FOL7"
1467        // don't match).  Our composite.rs uses a simpler fallback that picks up any non-empty
1468        // LensModel, so we remove LensID when the image is from a FLIR camera with FFF data.
1469        {
1470            let is_flir_fff = tags
1471                .iter()
1472                .any(|t| t.group.family0 == "APP1" && t.group.family1 == "FLIR");
1473            if is_flir_fff {
1474                tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1475            }
1476        }
1477
1478        // Olympus post-processing: remove the generic "Lens" composite for Olympus cameras.
1479        // In Perl, the "Lens" composite tag requires Canon:MinFocalLength (Canon namespace).
1480        // Our composite.rs generates Lens for any manufacturer that has MinFocalLength +
1481        // MaxFocalLength (e.g., Olympus Equipment sub-IFD).  Remove it for non-Canon cameras.
1482        {
1483            let make = tags
1484                .iter()
1485                .find(|t| t.name == "Make")
1486                .map(|t| t.print_value.clone())
1487                .unwrap_or_default();
1488            if !make.to_uppercase().contains("CANON") {
1489                tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1490            }
1491        }
1492
1493        // Priority-based deduplication: when the same tag name appears from both RIFF (priority 0)
1494        // and MakerNotes/EXIF (priority 0 but higher-quality source), remove the RIFF copy.
1495        // Mirrors ExifTool's PRIORITY => 0 behavior for RIFF StreamHeader tags.
1496        {
1497            let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1498            for tag_name in &riff_priority_zero_tags {
1499                let has_makernotes = tags
1500                    .iter()
1501                    .any(|t| t.name == *tag_name && t.group.family0 != "RIFF");
1502                if has_makernotes {
1503                    tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1504                }
1505            }
1506        }
1507
1508        // Priority-based deduplication: when the same tag name appears multiple times,
1509        // keep only the one with the highest priority (e.g., EXIF over JFIF, FFF over MakerNote).
1510        if !self.options.duplicates {
1511            // Specialized-source precedence: a few container/sidecar groups are
1512            // authoritative for specific tags and win over a generic EXIF copy
1513            // (ExifTool reports the GoPro GPMF value). Applied before the priority
1514            // dedup so the (priority-0) specialized tag isn't pruned first.
1515            {
1516                const SPECIAL_WINS: &[(&str, &str)] = &[
1517                    ("GoPro", "WhiteBalance"),
1518                    ("GoPro", "Sharpness"),
1519                    ("GoPro", "ExposureCompensation"),
1520                    // Embedded ID3 Comment overrides the native container's (AIFF/...).
1521                    ("ID3", "Comment"),
1522                    // Minolta RAW (.mrw PRD/native block) is authoritative for these
1523                    // over the embedded EXIF maker note copies.
1524                    ("MinoltaRaw", "Contrast"),
1525                    ("MinoltaRaw", "Saturation"),
1526                    ("MinoltaRaw", "Sharpness"),
1527                    ("MinoltaRaw", "ISOSetting"),
1528                    // Kodak maker note carries more precise Exposure/FNumber than EXIF.
1529                    ("Kodak", "FNumber"),
1530                    ("Kodak", "ExposureTime"),
1531                    // Sigma maker note X3FillLight (int) is primary over the X3F header.
1532                    ("Sigma", "X3FillLight"),
1533                ];
1534                for (grp, name) in SPECIAL_WINS {
1535                    if tags
1536                        .iter()
1537                        .any(|t| t.name == *name && t.group.family1 == *grp)
1538                    {
1539                        tags.retain(|t| t.name != *name || t.group.family1 == *grp);
1540                    }
1541                }
1542
1543                // QuickTime container/handler tags: ExifTool reports the LAST track's
1544                // value (e.g. the metadata-track HandlerType), unlike per-track TrackID
1545                // which keeps the first. Keep only the last instance of these.
1546                const QT_LAST_WINS: &[&str] = &[
1547                    "HandlerType",
1548                    "HandlerClass",
1549                    "HandlerVendorID",
1550                    "MediaTimeScale",
1551                    "SourceImageWidth",
1552                    "SourceImageHeight",
1553                    // Multiple 'mdat' boxes: ExifTool's RawConv overwrites, so the
1554                    // last box's offset/size is reported.
1555                    "MediaDataOffset",
1556                    "MediaDataSize",
1557                ];
1558                for name in QT_LAST_WINS {
1559                    let last = tags
1560                        .iter()
1561                        .rposition(|t| t.name == *name && t.group.family1 == "QuickTime");
1562                    if let Some(li) = last {
1563                        let mut i = 0usize;
1564                        tags.retain(|t| {
1565                            let keep =
1566                                !(t.name == *name && t.group.family1 == "QuickTime" && i != li);
1567                            i += 1;
1568                            keep
1569                        });
1570                    }
1571                }
1572            }
1573
1574            let mut best_priority: HashMap<String, i32> = HashMap::new();
1575            for tag in &tags {
1576                let entry = best_priority
1577                    .entry(tag.name.clone())
1578                    .or_insert(tag.priority);
1579                if tag.priority > *entry {
1580                    *entry = tag.priority;
1581                }
1582            }
1583            tags.retain(|t| t.priority >= *best_priority.get(&t.name).unwrap_or(&0));
1584
1585            // Document formats (PDF/PostScript/DjVu): their native Info metadata is the
1586            // LOWEST priority in ExifTool — XMP and embedded EXIF both win. Drop the
1587            // native copy when any non-native source provides the same tag.
1588            {
1589                let is_native_doc =
1590                    |g1: &str| matches!(g1, "PDF" | "PostScript" | "DjVu" | "DjVu-Meta");
1591                let other_names: std::collections::HashSet<String> = tags
1592                    .iter()
1593                    .filter(|t| !is_native_doc(&t.group.family1) && !t.print_value.is_empty())
1594                    .map(|t| t.name.clone())
1595                    .collect();
1596                tags.retain(|t| {
1597                    // Trapped keeps its native value ('Unknown' vs XMP's raw '/Unknown').
1598                    t.name == "Trapped"
1599                        || !is_native_doc(&t.group.family1)
1600                        || !other_names.contains(&t.name)
1601                });
1602            }
1603
1604            // MWG (Metadata Working Group) reconciliation: for these tags ExifTool
1605            // prefers XMP over the IPTC copy. Drop the IPTC version when XMP provides it.
1606            {
1607                const MWG_XMP_WINS: &[&str] = &["City", "DateCreated"];
1608                let xmp_has: std::collections::HashSet<String> = tags
1609                    .iter()
1610                    .filter(|t| t.group.family0 == "XMP" && !t.print_value.is_empty())
1611                    .map(|t| t.name.clone())
1612                    .collect();
1613                tags.retain(|t| {
1614                    !(MWG_XMP_WINS.contains(&t.name.as_str())
1615                        && t.group.family0 == "IPTC"
1616                        && xmp_has.contains(&t.name))
1617                });
1618            }
1619
1620            // EXIF/IPTC/MakerNotes outrank XMP for the same tag name (ExifTool default
1621            // group priority). Drop an XMP duplicate only when a non-XMP source at the
1622            // same (now-max) priority exists.
1623            let has_non_xmp: std::collections::HashSet<String> = tags
1624                .iter()
1625                .filter(|t| t.group.family0 != "XMP" && !t.print_value.is_empty())
1626                .map(|t| t.name.clone())
1627                .collect();
1628            tags.retain(|t| t.group.family0 != "XMP" || !has_non_xmp.contains(&t.name));
1629
1630            // Full-resolution-IFD precedence (TIFF-based RAW like NEF/DNG): the
1631            // structural image tags come from the IFD whose SubfileType is
1632            // "Full-resolution image" (NewSubfileType bit0 clear), not the
1633            // reduced-resolution IFD0/thumbnail. When such an IFD exists, its
1634            // structural tags are primary — mirrors ExifTool's directory priority.
1635            {
1636                let full_res_ifds: std::collections::HashSet<String> = tags
1637                    .iter()
1638                    .filter(|t| t.name == "SubfileType" && t.print_value == "Full-resolution image")
1639                    .map(|t| t.group.family1.clone())
1640                    .collect();
1641                if !full_res_ifds.is_empty() {
1642                    // Note: SubfileType itself is NOT promoted — ExifTool keeps each
1643                    // IFD's own SubfileType (DNG primary stays IFD0's reduced value).
1644                    const STRUCTURAL: &[&str] = &[
1645                        "ImageWidth",
1646                        "ImageHeight",
1647                        "BitsPerSample",
1648                        "Compression",
1649                        "PhotometricInterpretation",
1650                        "SamplesPerPixel",
1651                        "StripOffsets",
1652                        "StripByteCounts",
1653                        "RowsPerStrip",
1654                        "PlanarConfiguration",
1655                    ];
1656                    // For each structural tag that has a full-res instance, drop the
1657                    // instances from non-full-res IFDs.
1658                    let has_full: std::collections::HashSet<String> = tags
1659                        .iter()
1660                        .filter(|t| {
1661                            STRUCTURAL.contains(&t.name.as_str())
1662                                && full_res_ifds.contains(&t.group.family1)
1663                        })
1664                        .map(|t| t.name.clone())
1665                        .collect();
1666                    tags.retain(|t| {
1667                        !STRUCTURAL.contains(&t.name.as_str())
1668                            || !has_full.contains(&t.name)
1669                            || full_res_ifds.contains(&t.group.family1)
1670                    });
1671                }
1672            }
1673
1674            // SubfileType across the IFD0/SubIFD image pyramid: ExifTool extracts
1675            // each IFD in order and the LAST value wins (NEF → SubIFD1's
1676            // full-res 0; DNG → the trailing reduced SubIFD's 1). SubIFDs are
1677            // otherwise first-wins, so this is a targeted exception.
1678            {
1679                let last = tags.iter().rposition(|t| {
1680                    t.name == "SubfileType"
1681                        && (t.group.family1 == "IFD0" || t.group.family1.starts_with("SubIFD"))
1682                });
1683                if let Some(li) = last {
1684                    let mut i = 0usize;
1685                    tags.retain(|t| {
1686                        let drop = t.name == "SubfileType"
1687                            && (t.group.family1 == "IFD0" || t.group.family1.starts_with("SubIFD"))
1688                            && i != li;
1689                        i += 1;
1690                        !drop
1691                    });
1692                }
1693            }
1694
1695            // QuickTime container dates are primary over an embedded EXIF copy
1696            // (ExifTool reports the QuickTime CreateDate/ModifyDate for MOV/CR3/etc.).
1697            {
1698                for dname in ["CreateDate", "ModifyDate"] {
1699                    let has_qt = tags
1700                        .iter()
1701                        .any(|t| t.name == dname && t.group.family1 == "QuickTime");
1702                    if has_qt {
1703                        tags.retain(|t| t.name != dname || t.group.family1 == "QuickTime");
1704                    }
1705                }
1706            }
1707
1708            // ExifTool FoundTag rule (narrow, safe subset): among duplicates of the
1709            // same tag name that all live in the SAME main-document group (same
1710            // family1, not a sub-document like IFD1/SubIFD/PreviewIFD/Track2+/Doc2+),
1711            // the LAST extracted overrides the earlier ones. Keep only that last
1712            // instance so the primary value matches ExifTool's `-TAG` output.
1713            {
1714                fn is_sub_document(g1: &str) -> bool {
1715                    g1 == "IFD1"
1716                        || g1.starts_with("SubIFD")
1717                        || g1 == "PreviewIFD"
1718                        || g1.starts_with("Doc")
1719                        || (g1.starts_with("Track")
1720                            && g1 != "Track1"
1721                            && g1.len() > 5
1722                            && g1.as_bytes()[5].is_ascii_digit())
1723                        // Container / first-wins groups where ExifTool keeps the FIRST
1724                        // duplicate (QuickTime per-track tags are sub-documents; the
1725                        // others use first-priority within the module).
1726                        || g1 == "QuickTime"
1727                        || g1 == "Track1"
1728                        || g1 == "JP2"
1729                        || g1 == "PhotoMechanic"
1730                        || g1 == "DjVu"
1731                    // VCard removed: within one vCard, duplicate tags are last-wins
1732                    // (TelephoneOtherVoice); the 2nd vCard is demoted to priority -1.
1733                }
1734                use std::collections::HashMap as HM;
1735                // group indices by name
1736                let mut by_name: HM<&str, Vec<usize>> = HM::new();
1737                for (i, t) in tags.iter().enumerate() {
1738                    by_name.entry(t.name.as_str()).or_default().push(i);
1739                }
1740                let mut drop: std::collections::HashSet<usize> = std::collections::HashSet::new();
1741                for idxs in by_name.values() {
1742                    if idxs.len() < 2 {
1743                        continue;
1744                    }
1745                    let g1 = &tags[idxs[0]].group.family1;
1746                    // all same family1, same priority, not a sub-document group
1747                    let uniform = idxs.iter().all(|&i| {
1748                        &tags[i].group.family1 == g1 && tags[i].priority == tags[idxs[0]].priority
1749                    });
1750                    if uniform && !is_sub_document(g1) {
1751                        // drop all but the last
1752                        for &i in &idxs[..idxs.len() - 1] {
1753                            drop.insert(i);
1754                        }
1755                    }
1756                }
1757                if !drop.is_empty() {
1758                    let mut i = 0usize;
1759                    tags.retain(|_| {
1760                        let keep = !drop.contains(&i);
1761                        i += 1;
1762                        keep
1763                    });
1764                }
1765            }
1766        }
1767
1768        // Filter by requested tags if specified
1769        if !self.options.requested_tags.is_empty() {
1770            let requested: Vec<String> = self
1771                .options
1772                .requested_tags
1773                .iter()
1774                .map(|t| t.to_lowercase())
1775                .collect();
1776            tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1777        }
1778
1779        Ok(tags)
1780    }
1781
1782    /// Format extracted tags into a simple name→value map.
1783    ///
1784    /// Handles duplicate tag names by appending group info.
1785    fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1786        let mut info = ImageInfo::new();
1787        let mut seen: HashMap<String, (usize, i32)> = HashMap::new(); // (count, best priority)
1788
1789        for tag in tags {
1790            let value = if self.options.print_conv {
1791                &tag.print_value
1792            } else {
1793                &tag.raw_value.to_display_string()
1794            };
1795
1796            let entry = seen.entry(tag.name.clone()).or_insert((0, i32::MIN));
1797            entry.0 += 1;
1798
1799            if entry.0 == 1 {
1800                entry.1 = tag.priority;
1801                info.insert(tag.name.clone(), value.clone());
1802            } else if tag.priority > entry.1 {
1803                // Higher priority tag replaces the previous one
1804                entry.1 = tag.priority;
1805                info.insert(tag.name.clone(), value.clone());
1806            } else if self.options.duplicates {
1807                let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1808                info.insert(key, value.clone());
1809            }
1810        }
1811
1812        info
1813    }
1814
1815    /// Detect file type from magic bytes and extension.
1816    fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1817        // Try magic bytes first
1818        let header_len = data.len().min(256);
1819        if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1820            // Override ICO to Font if extension is .dfont (Mac resource fork)
1821            if ft == FileType::Ico {
1822                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1823                    if ext.eq_ignore_ascii_case("dfont") {
1824                        return Ok(FileType::Dfont);
1825                    }
1826                }
1827            }
1828            // Override JPEG to JPS if the file extension is .jps
1829            if ft == FileType::Jpeg {
1830                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1831                    if ext.eq_ignore_ascii_case("jps") {
1832                        return Ok(FileType::Jps);
1833                    }
1834                }
1835            }
1836            // Override PLIST to AAE if extension is .aae
1837            if ft == FileType::Plist {
1838                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1839                    if ext.eq_ignore_ascii_case("aae") {
1840                        return Ok(FileType::Aae);
1841                    }
1842                }
1843            }
1844            // Override XMP/XML to PLIST/AAE if extension is .plist or .aae
1845            if ft == FileType::Xmp || ft == FileType::Xml {
1846                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1847                    if ext.eq_ignore_ascii_case("plist") {
1848                        return Ok(FileType::Plist);
1849                    }
1850                    if ext.eq_ignore_ascii_case("aae") {
1851                        return Ok(FileType::Aae);
1852                    }
1853                }
1854            }
1855            // Override to PhotoCD if extension is .pcd (file starts with 0xFF padding)
1856            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1857                if ext.eq_ignore_ascii_case("pcd")
1858                    && data.len() >= 2056
1859                    && &data[2048..2055] == b"PCD_IPI"
1860                {
1861                    return Ok(FileType::PhotoCd);
1862                }
1863            }
1864            // Override MP3 to MPC/APE/WavPack if extension says otherwise
1865            if ft == FileType::Mp3 {
1866                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1867                    if ext.eq_ignore_ascii_case("mpc") {
1868                        return Ok(FileType::Mpc);
1869                    }
1870                    if ext.eq_ignore_ascii_case("ape") {
1871                        return Ok(FileType::Ape);
1872                    }
1873                    if ext.eq_ignore_ascii_case("wv") {
1874                        return Ok(FileType::WavPack);
1875                    }
1876                }
1877            }
1878            // ASF is the container for WMV (video) and WMA (audio); refine by extension.
1879            if ft == FileType::Asf {
1880                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1881                    if ext.eq_ignore_ascii_case("wmv") {
1882                        return Ok(FileType::Wmv);
1883                    }
1884                    if ext.eq_ignore_ascii_case("wma") {
1885                        return Ok(FileType::Wma);
1886                    }
1887                }
1888            }
1889            // Opus is an Ogg stream with the Opus codec.
1890            if ft == FileType::Ogg {
1891                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1892                    if ext.eq_ignore_ascii_case("opus") {
1893                        return Ok(FileType::Opus);
1894                    }
1895                }
1896            }
1897            // TIFF magic covers many RAW variants (DNG, NEF, ARW, …); ExifTool refines
1898            // the type by extension since they share the TIFF structure.
1899            if ft == FileType::Tiff {
1900                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1901                    if let Some(ext_ft) = file_type::detect_from_extension(ext) {
1902                        if ext_ft != FileType::Tiff && is_tiff_based(ext_ft) {
1903                            return Ok(ext_ft);
1904                        }
1905                    }
1906                }
1907            }
1908            // For ZIP files, check if it's an EIP (by extension) or OpenDocument format
1909            if ft == FileType::Zip {
1910                // Check extension first for EIP
1911                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1912                    if ext.eq_ignore_ascii_case("eip") {
1913                        return Ok(FileType::Eip);
1914                    }
1915                }
1916                // iWork (KEY/PAGES/NUMBERS): ExifTool keys on the file extension once an
1917                // iWork marker member is present (ZIP.pm Process_iWork).
1918                if let Some(iw) = detect_iwork_type(data, path) {
1919                    return Ok(iw);
1920                }
1921                if let Some(od_type) = detect_opendocument_type(data) {
1922                    return Ok(od_type);
1923                }
1924            }
1925            // OLE2 compound files (DOC/XLS/PPT/FlashPix) all share the D0CF11E0 magic;
1926            // refine by the UTF-16 stream names in the directory.
1927            if ft == FileType::Doc {
1928                if let Some(ole) = detect_ole2_type(data) {
1929                    return Ok(ole);
1930                }
1931            }
1932            return Ok(ft);
1933        }
1934
1935        // Fall back to extension
1936        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1937            if let Some(ft) = file_type::detect_from_extension(ext) {
1938                return Ok(ft);
1939            }
1940        }
1941
1942        let ext_str = path
1943            .extension()
1944            .and_then(|e| e.to_str())
1945            .unwrap_or("unknown");
1946        Err(Error::UnsupportedFileType(ext_str.to_string()))
1947    }
1948
1949    /// Dispatch to the appropriate format reader.
1950    fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1951        match file_type {
1952            FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1953            FileType::Png | FileType::Mng => formats::png::read_png(data),
1954            // All TIFF-based formats (TIFF + most RAW formats)
1955            FileType::Tiff
1956            | FileType::Btf
1957            | FileType::Dng
1958            | FileType::Cr2
1959            | FileType::Nef
1960            | FileType::Arw
1961            | FileType::Sr2
1962            | FileType::Orf
1963            | FileType::Pef
1964            | FileType::Erf
1965            | FileType::Fff
1966            | FileType::Rwl
1967            | FileType::Mef
1968            | FileType::Srw
1969            | FileType::Gpr
1970            | FileType::Arq
1971            | FileType::ThreeFR
1972            | FileType::Dcr
1973            | FileType::Rw2
1974            | FileType::Srf => formats::tiff::read_tiff(data),
1975            // Phase One IIQ: TIFF + PhaseOne maker note block
1976            FileType::Iiq => formats::iiq::read_iiq(data),
1977            // Image formats
1978            FileType::Gif => formats::gif::read_gif(data),
1979            FileType::Bmp => formats::bmp::read_bmp(data),
1980            FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1981            FileType::Psd => formats::psd::read_psd(data),
1982            // Audio formats
1983            FileType::Mp3 => formats::id3::read_mp3(data),
1984            FileType::Flac => formats::flac::read_flac(data),
1985            FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1986            FileType::Aiff => formats::aiff::read_aiff(data),
1987            // Video formats
1988            FileType::Mp4
1989            | FileType::QuickTime
1990            | FileType::M4a
1991            | FileType::ThreeGP
1992            | FileType::Heif
1993            | FileType::Avif
1994            | FileType::Cr3
1995            | FileType::Crm
1996            | FileType::F4v
1997            | FileType::Mqv
1998            | FileType::Lrv => {
1999                formats::quicktime::read_quicktime_with_ee(data, self.options.extract_embedded)
2000            }
2001            FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
2002            FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
2003            FileType::Wtv => formats::wtv::read_wtv(data),
2004            // RAW formats with custom containers
2005            FileType::Crw => formats::canon_raw::read_crw(data),
2006            FileType::Raf => formats::raf::read_raf(data),
2007            FileType::Mrw => formats::mrw::read_mrw(data),
2008            FileType::Mrc => formats::mrc::read_mrc(data),
2009            // Image formats
2010            FileType::Jp2 => formats::jp2::read_jp2(data),
2011            FileType::J2c => formats::jp2::read_j2c(data),
2012            FileType::Jxl => formats::jp2::read_jxl(data),
2013            FileType::Ico => formats::ico::read_ico(data),
2014            FileType::Icc => formats::icc::read_icc(data),
2015            // Documents
2016            FileType::Pdf => formats::pdf::read_pdf(data),
2017            FileType::PostScript => {
2018                // PFA fonts start with %!PS-AdobeFont or %!FontType1
2019                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
2020                    formats::font::read_pfa(data)
2021                        .or_else(|_| formats::postscript::read_postscript(data))
2022                } else {
2023                    formats::postscript::read_postscript(data)
2024                }
2025            }
2026            FileType::Eip => formats::capture_one::read_eip(data),
2027            FileType::Zip
2028            | FileType::Docx
2029            | FileType::Xlsx
2030            | FileType::Pptx
2031            | FileType::Doc
2032            | FileType::Xls
2033            | FileType::Ppt
2034            | FileType::Numbers
2035            | FileType::Pages
2036            | FileType::Key => formats::zip::read_zip(data),
2037            FileType::Rtf => formats::rtf::read_rtf(data),
2038            FileType::InDesign => formats::indesign::read_indesign(data),
2039            FileType::Pcap => formats::pcap::read_pcap(data),
2040            FileType::Pcapng => formats::pcap::read_pcapng(data),
2041            // Canon VRD / DR4
2042            FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
2043            FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
2044            // Metadata / Other
2045            FileType::Xmp => formats::xmp_file::read_xmp(data),
2046            FileType::Svg => formats::svg::read_svg(data),
2047            FileType::Html => {
2048                // SVG files that weren't detected by magic (e.g., via extension fallback)
2049                let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
2050                if is_svg {
2051                    formats::svg::read_svg(data)
2052                } else {
2053                    formats::html::read_html(data)
2054                }
2055            }
2056            FileType::Exe => formats::exe::read_exe(data),
2057            FileType::Font => {
2058                // AFM: Adobe Font Metrics text file
2059                if data.starts_with(b"StartFontMetrics") {
2060                    return formats::font::read_afm(data);
2061                }
2062                // PFA: PostScript Type 1 ASCII font
2063                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
2064                    return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
2065                }
2066                // PFB: PostScript Type 1 Binary font
2067                if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
2068                    return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
2069                }
2070                formats::font::read_font(data)
2071            }
2072            // Audio with ID3
2073            FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
2074            FileType::Ape => formats::ape::read_ape(data),
2075            FileType::Mpc => formats::ape::read_mpc(data),
2076            FileType::Aac => formats::aac::read_aac(data),
2077            FileType::RealAudio => {
2078                formats::real_audio::read_real_audio(data).or_else(|_| Ok(Vec::new()))
2079            }
2080            FileType::RealMedia => {
2081                formats::real_media::read_real_media(data).or_else(|_| Ok(Vec::new()))
2082            }
2083            // Misc formats
2084            FileType::Czi => formats::czi::read_czi(data).or_else(|_| Ok(Vec::new())),
2085            FileType::PhotoCd => formats::photo_cd::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
2086            FileType::Dicom => formats::dicom::read_dicom(data),
2087            FileType::Fits => formats::fits::read_fits(data),
2088            FileType::Flv => formats::flv::read_flv(data),
2089            FileType::Mxf => formats::mxf::read_mxf(data).or_else(|_| Ok(Vec::new())),
2090            FileType::Swf => formats::swf::read_swf(data),
2091            FileType::Hdr => formats::hdr::read_hdr(data),
2092            FileType::DjVu => formats::djvu::read_djvu(data),
2093            FileType::Xcf => formats::gimp::read_xcf(data),
2094            FileType::Mie => formats::mie::read_mie(data),
2095            FileType::Lfp => formats::lytro::read_lfp(data),
2096            // FileType::Miff dispatched via string extension below
2097            FileType::Fpf => formats::flir_fpf::read_fpf(data),
2098            FileType::Flif => formats::flif::read_flif(data),
2099            FileType::Bpg => formats::bpg::read_bpg(data),
2100            FileType::Pcx => formats::pcx::read_pcx(data),
2101            FileType::Pict => formats::pict::read_pict(data),
2102            FileType::Mpeg => formats::mpeg::read_mpeg(data),
2103            FileType::M2ts => formats::m2ts::read_m2ts(data, self.options.extract_embedded),
2104            FileType::Gzip => formats::gzip::read_gzip(data),
2105            FileType::Rar => formats::rar::read_rar(data),
2106            FileType::SevenZ => formats::sevenz::read_7z(data),
2107            FileType::Dss => formats::dss::read_dss(data),
2108            FileType::Moi => formats::moi::read_moi(data),
2109            FileType::MacOs => formats::macos::read_macos(data),
2110            FileType::Json => formats::json_format::read_json(data),
2111            // New formats
2112            FileType::Pgf => formats::pgf::read_pgf(data),
2113            FileType::Xisf => formats::xisf::read_xisf(data),
2114            FileType::Torrent => formats::torrent::read_torrent(data),
2115            FileType::Mobi => formats::palm::read_palm(data),
2116            FileType::Psp => formats::psp::read_psp(data),
2117            FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
2118            FileType::Audible => formats::audible::read_audible(data),
2119            FileType::Exr => formats::openexr::read_openexr(data),
2120            // New formats
2121            FileType::Plist => {
2122                if data.starts_with(b"bplist") {
2123                    formats::plist::read_binary_plist_tags(data)
2124                } else {
2125                    formats::plist::read_xml_plist(data)
2126                }
2127            }
2128            FileType::Aae => {
2129                if data.starts_with(b"bplist") {
2130                    formats::plist::read_binary_plist_tags(data)
2131                } else {
2132                    formats::plist::read_aae_plist(data)
2133                }
2134            }
2135            FileType::KyoceraRaw => formats::kyocera_raw::read_kyocera_raw(data),
2136            FileType::PortableFloatMap => formats::pfm::read_pfm(data),
2137            FileType::Ods
2138            | FileType::Odt
2139            | FileType::Odp
2140            | FileType::Odg
2141            | FileType::Odf
2142            | FileType::Odb
2143            | FileType::Odi
2144            | FileType::Odc => formats::zip::read_zip(data),
2145            FileType::Lif => formats::lif::read_lif(data),
2146            FileType::Rwz => formats::rawzor::read_rawzor(data),
2147            FileType::Jxr => formats::jxr::read_jxr(data),
2148            FileType::Miff => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
2149            FileType::Tnef => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
2150            FileType::Wpg => formats::wpg::read_wpg(data).or_else(|_| Ok(Vec::new())),
2151            FileType::Dv => {
2152                formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new()))
2153            }
2154            FileType::Itc => formats::itc::read_itc(data).or_else(|_| Ok(Vec::new())),
2155            FileType::Iso => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
2156            FileType::Afm => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
2157            FileType::Pfa => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
2158            FileType::Pfb => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
2159            FileType::Dfont => formats::font::read_font(data).or_else(|_| Ok(Vec::new())),
2160            FileType::Xml | FileType::Inx => {
2161                formats::xmp_file::read_xmp(data).or_else(|_| Ok(Vec::new()))
2162            }
2163            FileType::Eps => formats::postscript::read_postscript(data),
2164            _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
2165        }
2166    }
2167
2168    /// Fallback: try to read file based on extension for formats without magic detection.
2169    fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
2170        let ext = path
2171            .extension()
2172            .and_then(|e| e.to_str())
2173            .unwrap_or("")
2174            .to_ascii_lowercase();
2175
2176        match ext.as_str() {
2177            "ppm" | "pgm" | "pbm" => formats::ppm::read_ppm(data),
2178            "pfm" => {
2179                // PFM can be Portable Float Map or Printer Font Metrics
2180                if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
2181                    formats::ppm::read_ppm(data)
2182                } else {
2183                    Ok(Vec::new()) // Printer Font Metrics
2184                }
2185            }
2186            "json" => formats::json_format::read_json(data),
2187            "svg" => formats::svg::read_svg(data),
2188            "ram" => formats::ram::read_ram(data).or_else(|_| Ok(Vec::new())),
2189            "txt" | "log" | "igc" => Ok(compute_text_tags(data, false)),
2190            "csv" => Ok(compute_text_tags(data, true)),
2191            "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
2192            "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
2193            "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
2194            "plist" => {
2195                if data.starts_with(b"bplist") {
2196                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
2197                } else {
2198                    formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
2199                }
2200            }
2201            "aae" => {
2202                if data.starts_with(b"bplist") {
2203                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
2204                } else {
2205                    formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
2206                }
2207            }
2208            "vcf" | "ics" | "vcard" => {
2209                let s = crate::encoding::decode_utf8_or_latin1(&data[..data.len().min(100)]);
2210                if s.contains("BEGIN:VCALENDAR") {
2211                    formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
2212                } else {
2213                    formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
2214                }
2215            }
2216            "xcf" => Ok(Vec::new()), // GIMP
2217            "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
2218            "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
2219            "indd" | "indt" => Ok(Vec::new()), // InDesign
2220            "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
2221            "mie" => Ok(Vec::new()), // MIE
2222            "exr" => Ok(Vec::new()), // OpenEXR
2223            "wpg" => formats::wpg::read_wpg(data).or_else(|_| Ok(Vec::new())),
2224            "moi" => formats::moi::read_moi(data).or_else(|_| Ok(Vec::new())),
2225            "macos" => formats::macos::read_macos(data).or_else(|_| Ok(Vec::new())),
2226            "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
2227            "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
2228            "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
2229            "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
2230            "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
2231            "itc" => formats::itc::read_itc(data).or_else(|_| Ok(Vec::new())),
2232            "mpg" | "mpeg" | "m1v" | "m2v" | "mpv" => {
2233                formats::mpeg::read_mpeg(data).or_else(|_| Ok(Vec::new()))
2234            }
2235            "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
2236            "czi" => formats::czi::read_czi(data).or_else(|_| Ok(Vec::new())),
2237            "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
2238            "lfp" | "mrc" | "dss" | "mobi" | "psp" | "pgf" | "raw" | "pmp" | "torrent" | "xisf"
2239            | "mxf" | "dfont" => Ok(Vec::new()),
2240            "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
2241            "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
2242            "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
2243            "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
2244            _ => Err(Error::UnsupportedFileType(ext)),
2245        }
2246    }
2247}
2248
2249impl Default for ExifTool {
2250    fn default() -> Self {
2251        Self::new()
2252    }
2253}
2254
2255/// Detect OpenDocument file type by reading the `mimetype` entry from a ZIP.
2256/// Returns None if not an OpenDocument file.
2257/// Refine an EXE file's (FileType, MIMEType, FileTypeExtension) from its magic, mirroring
2258/// ExifTool's EXE SetFileType. MIME is always application/octet-stream for these.
2259fn exe_subtype(d: &[u8]) -> Option<(&'static str, &'static str, &'static str)> {
2260    const MIME: &str = "application/octet-stream";
2261    if d.len() < 8 {
2262        return None;
2263    }
2264    // ELF: 0x7F 'E' 'L' 'F'; data[5] endianness (1=LE,2=BE); e_type at offset 16 (2 bytes)
2265    if &d[0..4] == b"\x7fELF" && d.len() >= 18 {
2266        let le = d[5] == 1;
2267        let e_type = if le {
2268            u16::from_le_bytes([d[16], d[17]])
2269        } else {
2270            u16::from_be_bytes([d[16], d[17]])
2271        };
2272        return Some(match e_type {
2273            1 => ("ELF relocatable", MIME, "o"),
2274            2 => ("ELF executable", MIME, ""),
2275            3 => ("ELF shared library", MIME, "so"),
2276            4 => ("ELF core file", MIME, ""),
2277            _ => ("ELF", MIME, ""),
2278        });
2279    }
2280    // Mach-O thin binary: magic FEEDFACE/FEEDFACF (BE) or CEFAEDFE/CFFAEDFE (LE).
2281    let magic_be = u32::from_be_bytes([d[0], d[1], d[2], d[3]]);
2282    let macho = matches!(magic_be, 0xFEEDFACE | 0xFEEDFACF | 0xCEFAEDFE | 0xCFFAEDFE);
2283    if macho && d.len() >= 16 {
2284        let le = matches!(magic_be, 0xCEFAEDFE | 0xCFFAEDFE);
2285        let filetype = if le {
2286            u32::from_le_bytes([d[12], d[13], d[14], d[15]])
2287        } else {
2288            u32::from_be_bytes([d[12], d[13], d[14], d[15]])
2289        };
2290        return Some(match filetype {
2291            1 => ("Mach-O object file", MIME, "o"),
2292            6 => ("Mach-O dynamic link library", MIME, "dylib"),
2293            8 => ("Mach-O dynamic bound bundle", MIME, "dylib"),
2294            9 => ("Mach-O dynamic link library stub", MIME, "dylib"),
2295            _ => ("Mach-O executable", MIME, ""),
2296        });
2297    }
2298    // Mach-O fat binary: CAFEBABE / BEBAFECA
2299    if matches!(magic_be, 0xCAFEBABE | 0xBEBAFECA) {
2300        return Some(("Mach-O fat binary executable", MIME, ""));
2301    }
2302    // ar archive ("!<arch>\n"): static library (Mach-O if it contains Mach-O members).
2303    if d.starts_with(b"!<arch>\n") {
2304        let is_macho = d.windows(4).take(4096).any(|w| {
2305            let m = u32::from_be_bytes([w[0], w[1], w[2], w[3]]);
2306            matches!(
2307                m,
2308                0xFEEDFACE | 0xFEEDFACF | 0xCEFAEDFE | 0xCFFAEDFE | 0xCAFEBABE
2309            )
2310        });
2311        return Some(if is_macho {
2312            ("Mach-O static library", MIME, "a")
2313        } else {
2314            ("Static library", MIME, "a")
2315        });
2316    }
2317    // PE (Windows): "MZ" then PE header; machine field selects Win32/Win64.
2318    if &d[0..2] == b"MZ" && d.len() >= 0x40 {
2319        let pe_off = u32::from_le_bytes([d[0x3c], d[0x3d], d[0x3e], d[0x3f]]) as usize;
2320        if pe_off + 6 <= d.len() && &d[pe_off..pe_off + 4] == b"PE\0\0" {
2321            let machine = u16::from_le_bytes([d[pe_off + 4], d[pe_off + 5]]);
2322            return Some(match machine {
2323                0x8664 | 0xAA64 => ("Win64 EXE", MIME, "exe"),
2324                _ => ("Win32 EXE", MIME, "exe"),
2325            });
2326        }
2327    }
2328    None
2329}
2330
2331/// Whether a FileType is a TIFF-based RAW variant (shares TIFF magic, refined by extension).
2332fn is_tiff_based(ft: FileType) -> bool {
2333    matches!(
2334        ft,
2335        FileType::Dng
2336            | FileType::Cr2
2337            | FileType::Nef
2338            | FileType::Arw
2339            | FileType::Sr2
2340            | FileType::Orf
2341            | FileType::Pef
2342            | FileType::Erf
2343            | FileType::Rwl
2344            | FileType::Mef
2345            | FileType::Srw
2346            | FileType::Gpr
2347            | FileType::Arq
2348            | FileType::ThreeFR
2349            | FileType::Dcr
2350            | FileType::Rw2
2351            | FileType::Srf
2352            | FileType::Iiq
2353            | FileType::Btf
2354    )
2355}
2356
2357/// Refine an OLE2 compound document (DOC/XLS/PPT) by scanning the directory for
2358/// well-known UTF-16LE stream names. Returns None (→ keep DOC) when none match.
2359fn detect_ole2_type(data: &[u8]) -> Option<FileType> {
2360    fn has_utf16(data: &[u8], name: &str) -> bool {
2361        let needle: Vec<u8> = name.encode_utf16().flat_map(|u| u.to_le_bytes()).collect();
2362        data.windows(needle.len()).any(|w| w == needle.as_slice())
2363    }
2364    if has_utf16(data, "PowerPoint Document") {
2365        Some(FileType::Ppt)
2366    } else if has_utf16(data, "Workbook") || has_utf16(data, "Book") {
2367        Some(FileType::Xls)
2368    } else {
2369        None
2370    }
2371}
2372
2373/// Detect an iWork (KEY/PAGES/NUMBERS) ZIP. ExifTool recognises these by the
2374/// presence of an iWork marker member, then maps the file type from the
2375/// extension (ZIP.pm `%iWorkType` / Process_iWork).
2376fn detect_iwork_type(data: &[u8], path: &Path) -> Option<FileType> {
2377    const MARKERS: &[&[u8]] = &[
2378        b"index.xml",
2379        b"index.apxl",
2380        b"QuickLook/Thumbnail.jpg",
2381        b"Index/Document.iwa",
2382        b"Index/Slide.iwa",
2383        b"Index/Tables/DataList.iwa",
2384    ];
2385    let has_marker = MARKERS
2386        .iter()
2387        .any(|m| data.windows(m.len()).any(|w| w == *m));
2388    if !has_marker {
2389        return None;
2390    }
2391    let ext = path
2392        .extension()
2393        .and_then(|e| e.to_str())
2394        .unwrap_or("")
2395        .to_ascii_lowercase();
2396    match ext.as_str() {
2397        "numbers" | "nmbtemplate" => Some(FileType::Numbers),
2398        "pages" => Some(FileType::Pages),
2399        "key" | "kth" => Some(FileType::Key),
2400        _ => None,
2401    }
2402}
2403
2404/// Content-dependent FileType code / MIME refinements (ExifTool SetFileType with a
2405/// content test). Returns (code, mime); the extension keeps its default.
2406fn refine_filetype_by_content(file_type: FileType, data: &[u8]) -> Option<(String, String)> {
2407    match file_type {
2408        // Printer Font Metrics (font, starts 0x00 0x01/0x02) vs Portable Float Map (image, "PF").
2409        FileType::PortableFloatMap if data.len() >= 2 && data[0] == 0x00 && data[1] <= 0x02 => {
2410            Some(("PFM".into(), "application/x-font-type1".into()))
2411        }
2412        // XML property list → application/xml (binary plist keeps application/x-plist).
2413        FileType::Plist if !data.starts_with(b"bplist") => {
2414            Some(("PLIST".into(), "application/xml".into()))
2415        }
2416        // Naked JPEG XL codestream (FF 0A) vs the ISOBMFF container.
2417        FileType::Jxl if data.starts_with(&[0xFF, 0x0A]) => {
2418            Some(("JXL Codestream".into(), file_type.mime_type().to_string()))
2419        }
2420        // Extended WebP: VP8X chunk at offset 12.
2421        FileType::WebP if data.len() >= 16 && &data[12..16] == b"VP8X" => {
2422            Some(("Extended WEBP".into(), file_type.mime_type().to_string()))
2423        }
2424        // Multi-page DjVu: "DJVM" form type at offset 12.
2425        FileType::DjVu if data.len() >= 16 && &data[12..16] == b"DJVM" => Some((
2426            "DJVU (multi-page)".into(),
2427            file_type.mime_type().to_string(),
2428        )),
2429        _ => None,
2430    }
2431}
2432
2433fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
2434    // OpenDocument ZIPs have "mimetype" as the FIRST local file entry (uncompressed)
2435    if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
2436        return None;
2437    }
2438    let compression = u16::from_le_bytes([data[8], data[9]]);
2439    let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
2440    let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
2441    let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
2442    let name_start = 30;
2443    if name_start + name_len > data.len() {
2444        return None;
2445    }
2446    let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
2447    if filename != "mimetype" || compression != 0 {
2448        return None;
2449    }
2450    let content_start = name_start + name_len + extra_len;
2451    let content_end = (content_start + compressed_size).min(data.len());
2452    if content_start >= content_end {
2453        return None;
2454    }
2455    let mime = std::str::from_utf8(&data[content_start..content_end])
2456        .unwrap_or("")
2457        .trim();
2458    match mime {
2459        "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
2460        "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
2461        "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
2462        "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
2463        "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
2464        "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
2465        "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
2466        "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
2467        _ => None,
2468    }
2469}
2470
2471/// Detect the file type of a file at the given path.
2472pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
2473    let path = path.as_ref();
2474    let mut file = fs::File::open(path).map_err(Error::Io)?;
2475    let mut header = [0u8; 256];
2476    use std::io::Read;
2477    let n = file.read(&mut header).map_err(Error::Io)?;
2478
2479    if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
2480        return Ok(ft);
2481    }
2482
2483    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
2484        if let Some(ft) = file_type::detect_from_extension(ext) {
2485            return Ok(ft);
2486        }
2487    }
2488
2489    Err(Error::UnsupportedFileType("unknown".into()))
2490}
2491
2492/// Classification of EXIF tags into IFD groups.
2493enum ExifIfdGroup {
2494    Ifd0,
2495    ExifIfd,
2496    Gps,
2497}
2498
2499/// Determine which IFD a tag belongs to based on its ID.
2500fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
2501    match tag_id {
2502        // ExifIFD tags
2503        0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292 | 0xA000..=0xA435 => {
2504            ExifIfdGroup::ExifIfd
2505        }
2506        // GPS tags
2507        0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
2508        // Everything else → IFD0
2509        _ => ExifIfdGroup::Ifd0,
2510    }
2511}
2512
2513/// Extract existing EXIF entries from a JPEG file's APP1 segment.
2514fn extract_existing_exif_entries(
2515    jpeg_data: &[u8],
2516    target_bo: ByteOrderMark,
2517) -> Vec<exif_writer::IfdEntry> {
2518    let mut entries = Vec::new();
2519
2520    // Find EXIF APP1 segment
2521    let mut pos = 2; // Skip SOI
2522    while pos + 4 <= jpeg_data.len() {
2523        if jpeg_data[pos] != 0xFF {
2524            pos += 1;
2525            continue;
2526        }
2527        let marker = jpeg_data[pos + 1];
2528        pos += 2;
2529
2530        if marker == 0xDA || marker == 0xD9 {
2531            break; // SOS or EOI
2532        }
2533        if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
2534            continue;
2535        }
2536
2537        if pos + 2 > jpeg_data.len() {
2538            break;
2539        }
2540        let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
2541        if seg_len < 2 || pos + seg_len > jpeg_data.len() {
2542            break;
2543        }
2544
2545        let seg_data = &jpeg_data[pos + 2..pos + seg_len];
2546
2547        // EXIF APP1
2548        if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
2549            let tiff_data = &seg_data[6..];
2550            extract_ifd_entries(tiff_data, target_bo, &mut entries);
2551            break;
2552        }
2553
2554        pos += seg_len;
2555    }
2556
2557    entries
2558}
2559
2560/// Extract IFD entries from TIFF data, re-encoding values in the target byte order.
2561fn extract_ifd_entries(
2562    tiff_data: &[u8],
2563    target_bo: ByteOrderMark,
2564    entries: &mut Vec<exif_writer::IfdEntry>,
2565) {
2566    use crate::metadata::exif::parse_tiff_header;
2567
2568    let header = match parse_tiff_header(tiff_data) {
2569        Ok(h) => h,
2570        Err(_) => return,
2571    };
2572
2573    let src_bo = header.byte_order;
2574
2575    // Read IFD0
2576    read_ifd_for_merge(
2577        tiff_data,
2578        header.ifd0_offset as usize,
2579        src_bo,
2580        target_bo,
2581        entries,
2582    );
2583
2584    // Find ExifIFD and GPS pointers
2585    let ifd0_offset = header.ifd0_offset as usize;
2586    if ifd0_offset + 2 > tiff_data.len() {
2587        return;
2588    }
2589    let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
2590    for i in 0..count {
2591        let eoff = ifd0_offset + 2 + i * 12;
2592        if eoff + 12 > tiff_data.len() {
2593            break;
2594        }
2595        let tag = read_u16_bo(tiff_data, eoff, src_bo);
2596        let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
2597
2598        match tag {
2599            0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
2600            0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
2601            _ => {}
2602        }
2603    }
2604}
2605
2606/// Read a single IFD and extract entries for merge.
2607fn read_ifd_for_merge(
2608    data: &[u8],
2609    offset: usize,
2610    src_bo: ByteOrderMark,
2611    target_bo: ByteOrderMark,
2612    entries: &mut Vec<exif_writer::IfdEntry>,
2613) {
2614    if offset + 2 > data.len() {
2615        return;
2616    }
2617    let count = read_u16_bo(data, offset, src_bo) as usize;
2618
2619    for i in 0..count {
2620        let eoff = offset + 2 + i * 12;
2621        if eoff + 12 > data.len() {
2622            break;
2623        }
2624
2625        let tag = read_u16_bo(data, eoff, src_bo);
2626        let dtype = read_u16_bo(data, eoff + 2, src_bo);
2627        let count_val = read_u32_bo(data, eoff + 4, src_bo);
2628
2629        // Skip sub-IFD pointers and MakerNote
2630        if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
2631            continue;
2632        }
2633
2634        let type_size = match dtype {
2635            1 | 2 | 6 | 7 => 1usize,
2636            3 | 8 => 2,
2637            4 | 9 | 11 | 13 => 4,
2638            5 | 10 | 12 => 8,
2639            _ => continue,
2640        };
2641
2642        let total_size = type_size * count_val as usize;
2643        let raw_data = if total_size <= 4 {
2644            data[eoff + 8..eoff + 12].to_vec()
2645        } else {
2646            let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
2647            if voff + total_size > data.len() {
2648                continue;
2649            }
2650            data[voff..voff + total_size].to_vec()
2651        };
2652
2653        // Re-encode multi-byte values if byte orders differ
2654        let final_data = if src_bo != target_bo && type_size > 1 {
2655            reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
2656        } else {
2657            raw_data[..total_size].to_vec()
2658        };
2659
2660        let format = match dtype {
2661            1 => exif_writer::ExifFormat::Byte,
2662            2 => exif_writer::ExifFormat::Ascii,
2663            3 => exif_writer::ExifFormat::Short,
2664            4 => exif_writer::ExifFormat::Long,
2665            5 => exif_writer::ExifFormat::Rational,
2666            6 => exif_writer::ExifFormat::SByte,
2667            7 => exif_writer::ExifFormat::Undefined,
2668            8 => exif_writer::ExifFormat::SShort,
2669            9 => exif_writer::ExifFormat::SLong,
2670            10 => exif_writer::ExifFormat::SRational,
2671            11 => exif_writer::ExifFormat::Float,
2672            12 => exif_writer::ExifFormat::Double,
2673            _ => continue,
2674        };
2675
2676        entries.push(exif_writer::IfdEntry {
2677            tag,
2678            format,
2679            data: final_data,
2680        });
2681    }
2682}
2683
2684/// Re-encode multi-byte values when converting between byte orders.
2685fn reencode_bytes(
2686    data: &[u8],
2687    dtype: u16,
2688    count: usize,
2689    src_bo: ByteOrderMark,
2690    dst_bo: ByteOrderMark,
2691) -> Vec<u8> {
2692    let mut out = Vec::with_capacity(data.len());
2693    match dtype {
2694        3 | 8 => {
2695            // 16-bit
2696            for i in 0..count {
2697                let v = read_u16_bo(data, i * 2, src_bo);
2698                match dst_bo {
2699                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
2700                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
2701                }
2702            }
2703        }
2704        4 | 9 | 11 | 13 => {
2705            // 32-bit
2706            for i in 0..count {
2707                let v = read_u32_bo(data, i * 4, src_bo);
2708                match dst_bo {
2709                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
2710                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
2711                }
2712            }
2713        }
2714        5 | 10 => {
2715            // Rational (two 32-bit)
2716            for i in 0..count {
2717                let n = read_u32_bo(data, i * 8, src_bo);
2718                let d = read_u32_bo(data, i * 8 + 4, src_bo);
2719                match dst_bo {
2720                    ByteOrderMark::LittleEndian => {
2721                        out.extend_from_slice(&n.to_le_bytes());
2722                        out.extend_from_slice(&d.to_le_bytes());
2723                    }
2724                    ByteOrderMark::BigEndian => {
2725                        out.extend_from_slice(&n.to_be_bytes());
2726                        out.extend_from_slice(&d.to_be_bytes());
2727                    }
2728                }
2729            }
2730        }
2731        12 => {
2732            // 64-bit double
2733            for i in 0..count {
2734                let mut bytes = [0u8; 8];
2735                bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
2736                if src_bo != dst_bo {
2737                    bytes.reverse();
2738                }
2739                out.extend_from_slice(&bytes);
2740            }
2741        }
2742        _ => out.extend_from_slice(data),
2743    }
2744    out
2745}
2746
2747fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
2748    if offset + 2 > data.len() {
2749        return 0;
2750    }
2751    match bo {
2752        ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
2753        ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
2754    }
2755}
2756
2757fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
2758    if offset + 4 > data.len() {
2759        return 0;
2760    }
2761    match bo {
2762        ByteOrderMark::LittleEndian => u32::from_le_bytes([
2763            data[offset],
2764            data[offset + 1],
2765            data[offset + 2],
2766            data[offset + 3],
2767        ]),
2768        ByteOrderMark::BigEndian => u32::from_be_bytes([
2769            data[offset],
2770            data[offset + 1],
2771            data[offset + 2],
2772            data[offset + 3],
2773        ]),
2774    }
2775}
2776
2777/// Map tag name to numeric EXIF tag ID.
2778fn tag_name_to_id(name: &str) -> Option<u16> {
2779    encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
2780}
2781
2782/// Convert a tag value to a safe filename.
2783fn value_to_filename(value: &str) -> String {
2784    value
2785        .chars()
2786        .map(|c| match c {
2787            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
2788            c if c.is_control() => '_',
2789            c => c,
2790        })
2791        .collect::<String>()
2792        .trim()
2793        .to_string()
2794}
2795
2796/// Parse a date shift string like "+1:0:0" (add 1 hour) or "-0:30:0" (subtract 30 min).
2797/// Returns (sign, hours, minutes, seconds).
2798pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
2799    let (sign, rest) = if let Some(stripped) = shift.strip_prefix('-') {
2800        (-1, stripped)
2801    } else if let Some(stripped) = shift.strip_prefix('+') {
2802        (1, stripped)
2803    } else {
2804        (1, shift)
2805    };
2806
2807    let parts: Vec<&str> = rest.split(':').collect();
2808    match parts.len() {
2809        1 => {
2810            let h: u32 = parts[0].parse().ok()?;
2811            Some((sign, h, 0, 0))
2812        }
2813        2 => {
2814            let h: u32 = parts[0].parse().ok()?;
2815            let m: u32 = parts[1].parse().ok()?;
2816            Some((sign, h, m, 0))
2817        }
2818        3 => {
2819            let h: u32 = parts[0].parse().ok()?;
2820            let m: u32 = parts[1].parse().ok()?;
2821            let s: u32 = parts[2].parse().ok()?;
2822            Some((sign, h, m, s))
2823        }
2824        _ => None,
2825    }
2826}
2827
2828/// Shift a datetime string by the given amount.
2829/// Input format: "YYYY:MM:DD HH:MM:SS"
2830pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
2831    let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
2832
2833    // Parse date/time
2834    if datetime.len() < 19 {
2835        return None;
2836    }
2837    let year: i32 = datetime[0..4].parse().ok()?;
2838    let month: u32 = datetime[5..7].parse().ok()?;
2839    let day: u32 = datetime[8..10].parse().ok()?;
2840    let hour: u32 = datetime[11..13].parse().ok()?;
2841    let min: u32 = datetime[14..16].parse().ok()?;
2842    let sec: u32 = datetime[17..19].parse().ok()?;
2843
2844    // Convert to total seconds, shift, convert back
2845    let total_secs = (hour * 3600 + min * 60 + sec) as i64
2846        + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
2847
2848    let days_shift = if total_secs < 0 {
2849        -1 - (-total_secs - 1) / 86400
2850    } else {
2851        total_secs / 86400
2852    };
2853
2854    let time_secs = ((total_secs % 86400) + 86400) % 86400;
2855    let new_hour = (time_secs / 3600) as u32;
2856    let new_min = ((time_secs % 3600) / 60) as u32;
2857    let new_sec = (time_secs % 60) as u32;
2858
2859    // Simple day shifting (doesn't handle month/year rollover perfectly for large shifts)
2860    let mut new_day = day as i32 + days_shift as i32;
2861    let mut new_month = month;
2862    let mut new_year = year;
2863
2864    let days_in_month = |m: u32, y: i32| -> i32 {
2865        match m {
2866            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
2867            4 | 6 | 9 | 11 => 30,
2868            2 => {
2869                if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 {
2870                    29
2871                } else {
2872                    28
2873                }
2874            }
2875            _ => 30,
2876        }
2877    };
2878
2879    while new_day > days_in_month(new_month, new_year) {
2880        new_day -= days_in_month(new_month, new_year);
2881        new_month += 1;
2882        if new_month > 12 {
2883            new_month = 1;
2884            new_year += 1;
2885        }
2886    }
2887    while new_day < 1 {
2888        new_month = if new_month == 1 { 12 } else { new_month - 1 };
2889        if new_month == 12 {
2890            new_year -= 1;
2891        }
2892        new_day += days_in_month(new_month, new_year);
2893    }
2894
2895    Some(format!(
2896        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
2897        new_year, new_month, new_day, new_hour, new_min, new_sec
2898    ))
2899}
2900
2901// Only used by the `#[cfg(unix)]` File:System pseudo-tags above (stat-derived
2902// FileModifyDate/FileAccessDate/FileInodeChangeDate); dead on Windows otherwise.
2903#[cfg(unix)]
2904fn unix_to_datetime(secs: i64) -> String {
2905    let days = secs / 86400;
2906    let time = secs % 86400;
2907    let h = time / 3600;
2908    let m = (time % 3600) / 60;
2909    let s = time % 60;
2910    let mut y = 1970i32;
2911    let mut rem = days;
2912    loop {
2913        let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 {
2914            366
2915        } else {
2916            365
2917        };
2918        if rem < dy {
2919            break;
2920        }
2921        rem -= dy;
2922        y += 1;
2923    }
2924    let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2925    let months = [
2926        31,
2927        if leap { 29 } else { 28 },
2928        31,
2929        30,
2930        31,
2931        30,
2932        31,
2933        31,
2934        30,
2935        31,
2936        30,
2937        31,
2938    ];
2939    let mut mo = 1;
2940    for &dm in &months {
2941        if rem < dm {
2942            break;
2943        }
2944        rem -= dm;
2945        mo += 1;
2946    }
2947    format!(
2948        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
2949        y,
2950        mo,
2951        rem + 1,
2952        h,
2953        m,
2954        s
2955    )
2956}
2957
2958/// Port of ExifTool ConvertFileSize (decimal units): %.1f below 10× a unit, %.0f above.
2959fn format_file_size(bytes: u64) -> String {
2960    let v = bytes as f64;
2961    if bytes < 2000 {
2962        format!("{} bytes", bytes)
2963    } else if bytes < 10_000 {
2964        format!("{:.1} kB", v / 1000.0)
2965    } else if bytes < 2_000_000 {
2966        format!("{:.0} kB", v / 1000.0)
2967    } else if bytes < 10_000_000 {
2968        format!("{:.1} MB", v / 1_000_000.0)
2969    } else if bytes < 2_000_000_000 {
2970        format!("{:.0} MB", v / 1_000_000.0)
2971    } else if bytes < 10_000_000_000 {
2972        format!("{:.1} GB", v / 1_000_000_000.0)
2973    } else {
2974        format!("{:.0} GB", v / 1_000_000_000.0)
2975    }
2976}
2977
2978/// Check if a tag name is typically XMP.
2979fn is_xmp_tag(tag: &str) -> bool {
2980    matches!(
2981        tag.to_lowercase().as_str(),
2982        "title"
2983            | "description"
2984            | "subject"
2985            | "creator"
2986            | "rights"
2987            | "keywords"
2988            | "rating"
2989            | "label"
2990            | "hierarchicalsubject"
2991    )
2992}
2993
2994/// Encode an EXIF tag value to binary.
2995/// Returns (tag_id, format, encoded_data) or None if tag is unknown.
2996fn encode_exif_tag(
2997    tag_name: &str,
2998    value: &str,
2999    _group: &str,
3000    bo: ByteOrderMark,
3001) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
3002    let tag_lower = tag_name.to_lowercase();
3003
3004    // Map common tag names to EXIF tag IDs and formats
3005    let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
3006        // IFD0 string tags
3007        "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
3008        "make" => (0x010F, exif_writer::ExifFormat::Ascii),
3009        "model" => (0x0110, exif_writer::ExifFormat::Ascii),
3010        "software" => (0x0131, exif_writer::ExifFormat::Ascii),
3011        "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
3012        "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
3013        "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
3014        // IFD0 numeric tags
3015        "orientation" => (0x0112, exif_writer::ExifFormat::Short),
3016        "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
3017        "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
3018        "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
3019        // ExifIFD tags
3020        "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
3021        "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
3022        "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
3023        "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
3024        "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
3025        "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
3026        "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
3027        "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
3028        "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
3029        _ => return None,
3030    };
3031
3032    let encoded = match format {
3033        exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
3034        exif_writer::ExifFormat::Short => {
3035            let v: u16 = value.parse().ok()?;
3036            exif_writer::encode_u16(v, bo)
3037        }
3038        exif_writer::ExifFormat::Long => {
3039            let v: u32 = value.parse().ok()?;
3040            exif_writer::encode_u32(v, bo)
3041        }
3042        exif_writer::ExifFormat::Rational => {
3043            // Parse "N/D" or just "N"
3044            if let Some(slash) = value.find('/') {
3045                let num: u32 = value[..slash].trim().parse().ok()?;
3046                let den: u32 = value[slash + 1..].trim().parse().ok()?;
3047                exif_writer::encode_urational(num, den, bo)
3048            } else if let Ok(v) = value.parse::<f64>() {
3049                // Convert float to rational
3050                let den = 10000u32;
3051                let num = (v * den as f64).round() as u32;
3052                exif_writer::encode_urational(num, den, bo)
3053            } else {
3054                return None;
3055            }
3056        }
3057        exif_writer::ExifFormat::Undefined => {
3058            // UserComment: 8 bytes charset + data
3059            let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; // "ASCII\0\0\0"
3060            data.extend_from_slice(value.as_bytes());
3061            data
3062        }
3063        _ => return None,
3064    };
3065
3066    Some((tag_id, format, encoded))
3067}
3068
3069/// Compute text file tags (from Perl Text.pm).
3070fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
3071    let mut tags = Vec::new();
3072    let mk = |name: &str, val: String| Tag {
3073        id: crate::tag::TagId::Text(name.into()),
3074        name: name.into(),
3075        description: name.into(),
3076        group: crate::tag::TagGroup {
3077            family0: "File".into(),
3078            family1: "File".into(),
3079            family2: "Other".into(),
3080        },
3081        raw_value: Value::String(val.clone()),
3082        print_value: val,
3083        priority: 0,
3084    };
3085
3086    // Detect encoding and BOM
3087    let is_ascii = data.iter().all(|&b| b < 128);
3088    let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
3089    let has_utf16le_bom =
3090        data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
3091    let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
3092    let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
3093    let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
3094
3095    // Detect if file has weird non-text control characters (like multi-byte unicode without BOM)
3096    let has_weird_ctrl = data.iter().any(|&b| {
3097        (b <= 0x06) || (0x0e..=0x1a).contains(&b) || (0x1c..=0x1f).contains(&b) || b == 0x7f
3098    });
3099
3100    let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
3101        ("utf-32le", true, false)
3102    } else if has_utf32be_bom {
3103        ("utf-32be", true, false)
3104    } else if has_utf16le_bom {
3105        ("utf-16le", true, true)
3106    } else if has_utf16be_bom {
3107        ("utf-16be", true, true)
3108    } else if has_weird_ctrl {
3109        // Not a text file (has binary-like control chars but no recognized multi-byte marker)
3110        return tags;
3111    } else if is_ascii {
3112        ("us-ascii", false, false)
3113    } else {
3114        // Check UTF-8
3115        let is_valid_utf8 = std::str::from_utf8(data).is_ok();
3116        if is_valid_utf8 {
3117            if has_utf8_bom {
3118                ("utf-8", true, false)
3119            } else {
3120                // Check if it has high bytes suggesting iso-8859-1 vs utf-8
3121                // Perl's IsUTF8: returns >0 if valid UTF-8 with multi-byte, 0 if ASCII, <0 if invalid
3122                // For simplicity: valid UTF-8 without BOM = utf-8
3123                ("utf-8", false, false)
3124            }
3125        } else if !data.iter().any(|&b| (0x80..=0x9f).contains(&b)) {
3126            ("iso-8859-1", false, false)
3127        } else {
3128            ("unknown-8bit", false, false)
3129        }
3130    };
3131
3132    tags.push(mk("MIMEEncoding", encoding.into()));
3133
3134    if is_bom {
3135        tags.push(mk("ByteOrderMark", "Yes".into()));
3136    }
3137
3138    // Count newlines and detect type
3139    let has_cr = data.contains(&b'\r');
3140    let has_lf = data.contains(&b'\n');
3141    let newline_type = if has_cr && has_lf {
3142        "Windows CRLF"
3143    } else if has_lf {
3144        "Unix LF"
3145    } else if has_cr {
3146        "Macintosh CR"
3147    } else {
3148        "(none)"
3149    };
3150    tags.push(mk("Newlines", newline_type.into()));
3151
3152    if is_csv {
3153        // CSV analysis: detect delimiter, quoting, column count, row count
3154        let text = crate::encoding::decode_utf8_or_latin1(data);
3155        let mut delim = "";
3156        let mut quot = "";
3157        let mut ncols = 1usize;
3158        let mut nrows = 0usize;
3159
3160        for line in text.lines() {
3161            if nrows == 0 {
3162                // Detect delimiter from first line
3163                let comma_count = line.matches(',').count();
3164                let semi_count = line.matches(';').count();
3165                let tab_count = line.matches('\t').count();
3166                if comma_count > semi_count && comma_count > tab_count {
3167                    delim = ",";
3168                    ncols = comma_count + 1;
3169                } else if semi_count > tab_count {
3170                    delim = ";";
3171                    ncols = semi_count + 1;
3172                } else if tab_count > 0 {
3173                    delim = "\t";
3174                    ncols = tab_count + 1;
3175                } else {
3176                    delim = "";
3177                    ncols = 1;
3178                }
3179                // Detect quoting
3180                if line.contains('"') {
3181                    quot = "\"";
3182                } else if line.contains('\'') {
3183                    quot = "'";
3184                }
3185            }
3186            nrows += 1;
3187            if nrows >= 1000 {
3188                break;
3189            }
3190        }
3191
3192        let delim_display = match delim {
3193            "," => "Comma",
3194            ";" => "Semicolon",
3195            "\t" => "Tab",
3196            _ => "(none)",
3197        };
3198        let quot_display = match quot {
3199            "\"" => "Double quotes",
3200            "'" => "Single quotes",
3201            _ => "(none)",
3202        };
3203
3204        tags.push(mk("Delimiter", delim_display.into()));
3205        tags.push(mk("Quoting", quot_display.into()));
3206        tags.push(mk("ColumnCount", ncols.to_string()));
3207        if nrows > 0 {
3208            tags.push(mk("RowCount", nrows.to_string()));
3209        }
3210    } else if !is_utf16 {
3211        // Line count and word count for plain text files (not UTF-16/32)
3212        // ExifTool counts each ReadLine, so trailing content without a final newline
3213        // still counts as a line.
3214        let nl_count = data.iter().filter(|&&b| b == b'\n').count();
3215        let line_count = if !data.is_empty() && data.last() != Some(&b'\n') {
3216            nl_count + 1
3217        } else {
3218            nl_count
3219        };
3220        tags.push(mk("LineCount", line_count.to_string()));
3221
3222        let text = crate::encoding::decode_utf8_or_latin1(data);
3223        let word_count = text.split_whitespace().count();
3224        tags.push(mk("WordCount", word_count.to_string()));
3225    }
3226
3227    tags
3228}
3229
3230#[cfg(test)]
3231mod tests {
3232    use super::*;
3233
3234    #[test]
3235    fn new_has_default_options() {
3236        let et = ExifTool::new();
3237        assert!(!et.options().duplicates);
3238        assert!(et.options().print_conv);
3239        assert_eq!(et.options().fast_scan, 0);
3240        assert!(et.options().requested_tags.is_empty());
3241        assert_eq!(et.options().extract_embedded, 0);
3242        assert_eq!(et.options().show_unknown, 0);
3243        assert!(!et.options().process_compressed);
3244        assert!(!et.options().use_mwg);
3245    }
3246
3247    #[test]
3248    fn with_options_preserves_custom() {
3249        let opts = Options {
3250            duplicates: true,
3251            print_conv: false,
3252            fast_scan: 2,
3253            requested_tags: vec!["Artist".to_string()],
3254            extract_embedded: 1,
3255            show_unknown: 1,
3256            process_compressed: true,
3257            use_mwg: true,
3258            geolocation: true,
3259        };
3260        let et = ExifTool::with_options(opts.clone());
3261        assert!(et.options().duplicates);
3262        assert!(!et.options().print_conv);
3263        assert_eq!(et.options().fast_scan, 2);
3264        assert_eq!(et.options().requested_tags, vec!["Artist".to_string()]);
3265        assert_eq!(et.options().extract_embedded, 1);
3266        assert_eq!(et.options().show_unknown, 1);
3267        assert!(et.options().process_compressed);
3268        assert!(et.options().use_mwg);
3269    }
3270
3271    #[test]
3272    fn set_new_value_simple_tag() {
3273        let mut et = ExifTool::new();
3274        et.set_new_value("Artist", Some("John"));
3275        assert_eq!(et.new_values.len(), 1);
3276        assert_eq!(et.new_values[0].tag, "Artist");
3277        assert_eq!(et.new_values[0].group, None);
3278        assert_eq!(et.new_values[0].value, Some("John".to_string()));
3279    }
3280
3281    #[test]
3282    fn set_new_value_with_group_prefix() {
3283        let mut et = ExifTool::new();
3284        et.set_new_value("XMP:Title", Some("Test"));
3285        assert_eq!(et.new_values.len(), 1);
3286        assert_eq!(et.new_values[0].tag, "Title");
3287        assert_eq!(et.new_values[0].group, Some("XMP".to_string()));
3288        assert_eq!(et.new_values[0].value, Some("Test".to_string()));
3289    }
3290
3291    #[test]
3292    fn set_new_value_delete() {
3293        let mut et = ExifTool::new();
3294        et.set_new_value("Comment", None);
3295        assert_eq!(et.new_values.len(), 1);
3296        assert_eq!(et.new_values[0].tag, "Comment");
3297        assert_eq!(et.new_values[0].value, None);
3298    }
3299
3300    #[test]
3301    fn clear_new_values_empties_queue() {
3302        let mut et = ExifTool::new();
3303        et.set_new_value("Artist", Some("A"));
3304        et.set_new_value("Copyright", Some("B"));
3305        assert_eq!(et.new_values.len(), 2);
3306        et.clear_new_values();
3307        assert!(et.new_values.is_empty());
3308    }
3309
3310    #[test]
3311    fn set_new_value_multiple() {
3312        let mut et = ExifTool::new();
3313        et.set_new_value("Artist", Some("John"));
3314        et.set_new_value("IPTC:Keywords", Some("test"));
3315        et.set_new_value("XMP:Subject", None);
3316        assert_eq!(et.new_values.len(), 3);
3317        assert_eq!(et.new_values[1].group, Some("IPTC".to_string()));
3318        assert_eq!(et.new_values[1].tag, "Keywords");
3319        assert_eq!(et.new_values[2].value, None);
3320    }
3321
3322    #[test]
3323    fn options_mut_modifies() {
3324        let mut et = ExifTool::new();
3325        et.options_mut().duplicates = true;
3326        et.options_mut().fast_scan = 3;
3327        assert!(et.options().duplicates);
3328        assert_eq!(et.options().fast_scan, 3);
3329    }
3330
3331    #[test]
3332    fn default_options() {
3333        let opts = Options::default();
3334        assert!(!opts.duplicates);
3335        assert!(opts.print_conv);
3336        assert_eq!(opts.fast_scan, 0);
3337    }
3338}