Skip to main content

exiftool_rs/
exiftool.rs

1//! Core ExifTool struct and public API.
2//!
3//! This is the main entry point for reading metadata from files.
4//! Mirrors ExifTool.pm's ImageInfo/ExtractInfo/GetInfo pipeline.
5
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{
17    exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer,
18    psd_writer, tiff_writer, webp_writer, xmp_writer,
19};
20
21/// Processing options for metadata extraction.
22#[derive(Debug, Clone)]
23pub struct Options {
24    /// Include duplicate tags (different groups may have same tag name).
25    pub duplicates: bool,
26    /// Apply print conversions (human-readable values).
27    pub print_conv: bool,
28    /// Fast scan level: 0=normal, 1=skip composite, 2=skip maker notes, 3=skip thumbnails.
29    pub fast_scan: u8,
30    /// Only extract these tag names (empty = all).
31    pub requested_tags: Vec<String>,
32    /// Extract embedded documents/data (video frames, etc.). Level: 0=off, 1=-ee, 2=-ee2, 3=-ee3.
33    pub extract_embedded: u8,
34    /// Show unknown tags: 0=off, 1=-u (show unknown), 2=-U (show unknown + binary data).
35    pub show_unknown: u8,
36    /// Process compressed data in files (-z option).
37    pub process_compressed: bool,
38    /// Use MWG (Metadata Working Group) composite tags for reading/writing.
39    pub use_mwg: bool,
40}
41
42impl Default for Options {
43    fn default() -> Self {
44        Self {
45            duplicates: false,
46            print_conv: true,
47            fast_scan: 0,
48            requested_tags: Vec::new(),
49            extract_embedded: 0,
50            show_unknown: 0,
51            process_compressed: false,
52            use_mwg: false,
53        }
54    }
55}
56
57/// The main ExifTool struct. Create one and use it to extract metadata from files.
58///
59/// # Example
60/// ```no_run
61/// use exiftool_rs::ExifTool;
62///
63/// let mut et = ExifTool::new();
64/// let info = et.image_info("photo.jpg").unwrap();
65/// for (name, value) in &info {
66///     println!("{}: {}", name, value);
67/// }
68/// ```
69/// A queued tag change for writing.
70#[derive(Debug, Clone)]
71pub struct NewValue {
72    /// Tag name (e.g., "Artist", "Copyright", "XMP:Title")
73    pub tag: String,
74    /// Group prefix if specified (e.g., "EXIF", "XMP", "IPTC")
75    pub group: Option<String>,
76    /// New value (None = delete tag)
77    pub value: Option<String>,
78}
79
80/// The main ExifTool engine — read, write, and edit metadata.
81///
82/// # Reading metadata
83/// ```no_run
84/// use exiftool_rs::ExifTool;
85///
86/// let et = ExifTool::new();
87///
88/// // Full tag structs
89/// let tags = et.extract_info("photo.jpg").unwrap();
90/// for tag in &tags {
91///     println!("[{}] {}: {}", tag.group.family0, tag.name, tag.print_value);
92/// }
93///
94/// // Simple name→value map
95/// let info = et.image_info("photo.jpg").unwrap();
96/// println!("Camera: {}", info.get("Model").unwrap_or(&String::new()));
97/// ```
98///
99/// # Writing metadata
100/// ```no_run
101/// use exiftool_rs::ExifTool;
102///
103/// let mut et = ExifTool::new();
104/// et.set_new_value("Artist", Some("John Doe"));
105/// et.set_new_value("Copyright", Some("2024"));
106/// et.write_info("input.jpg", "output.jpg").unwrap();
107/// ```
108pub struct ExifTool {
109    options: Options,
110    new_values: Vec<NewValue>,
111}
112
113/// Result of metadata extraction: maps tag names to display values.
114pub type ImageInfo = HashMap<String, String>;
115
116impl ExifTool {
117    /// Create a new ExifTool instance with default options.
118    pub fn new() -> Self {
119        Self {
120            options: Options::default(),
121            new_values: Vec::new(),
122        }
123    }
124
125    /// Create a new ExifTool instance with custom options.
126    pub fn with_options(options: Options) -> Self {
127        Self {
128            options,
129            new_values: Vec::new(),
130        }
131    }
132
133    /// Get a mutable reference to the options.
134    pub fn options_mut(&mut self) -> &mut Options {
135        &mut self.options
136    }
137
138    /// Get a reference to the options.
139    pub fn options(&self) -> &Options {
140        &self.options
141    }
142
143    // ================================================================
144    // Writing API
145    // ================================================================
146
147    /// Queue a new tag value for writing.
148    ///
149    /// Call this one or more times, then call `write_info()` to apply changes.
150    ///
151    /// # Arguments
152    /// * `tag` - Tag name, optionally prefixed with group (e.g., "Artist", "XMP:Title", "EXIF:Copyright")
153    /// * `value` - New value, or None to delete the tag
154    ///
155    /// # Example
156    /// ```no_run
157    /// use exiftool_rs::ExifTool;
158    /// let mut et = ExifTool::new();
159    /// et.set_new_value("Artist", Some("John Doe"));
160    /// et.set_new_value("Copyright", Some("2024 John Doe"));
161    /// et.set_new_value("XMP:Title", Some("My Photo"));
162    /// et.write_info("photo.jpg", "photo_out.jpg").unwrap();
163    /// ```
164    pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
165        let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
166            (
167                Some(tag[..colon_pos].to_string()),
168                tag[colon_pos + 1..].to_string(),
169            )
170        } else {
171            (None, tag.to_string())
172        };
173
174        self.new_values.push(NewValue {
175            tag: tag_name,
176            group,
177            value: value.map(|v| v.to_string()),
178        });
179    }
180
181    /// Clear all queued new values.
182    pub fn clear_new_values(&mut self) {
183        self.new_values.clear();
184    }
185
186    /// Copy tags from a source file, queuing them as new values.
187    ///
188    /// Reads all tags from `src_path` and queues them for writing.
189    /// Optionally filter by tag names.
190    pub fn set_new_values_from_file<P: AsRef<Path>>(
191        &mut self,
192        src_path: P,
193        tags_to_copy: Option<&[&str]>,
194    ) -> Result<u32> {
195        let src_tags = self.extract_info(src_path)?;
196        let mut count = 0u32;
197
198        for tag in &src_tags {
199            // Skip file-level tags that shouldn't be copied
200            if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
201                continue;
202            }
203            // Skip binary/undefined data and empty values
204            if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
205                continue;
206            }
207            if tag.print_value.is_empty() {
208                continue;
209            }
210
211            // Filter by requested tags
212            if let Some(filter) = tags_to_copy {
213                let name_lower = tag.name.to_lowercase();
214                if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
215                    continue;
216                }
217            }
218
219            let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
220            self.new_values.push(NewValue {
221                tag: tag.name.clone(),
222                group: Some(tag.group.family0.clone()),
223                value: Some(tag.print_value.clone()),
224            });
225            count += 1;
226        }
227
228        Ok(count)
229    }
230
231    /// Set a file's name based on a tag value.
232    pub fn set_file_name_from_tag<P: AsRef<Path>>(
233        &self,
234        path: P,
235        tag_name: &str,
236        template: &str,
237    ) -> Result<String> {
238        let path = path.as_ref();
239        let tags = self.extract_info(path)?;
240
241        let tag_value = tags
242            .iter()
243            .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
244            .map(|t| &t.print_value)
245            .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
246
247        // Build new filename from template
248        // Template: "prefix%value%suffix.ext" or just use the tag value
249        let new_name = if template.contains('%') {
250            template.replace("%v", value_to_filename(tag_value).as_str())
251        } else {
252            // Default: use tag value as filename, keep extension
253            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
254            let clean = value_to_filename(tag_value);
255            if ext.is_empty() {
256                clean
257            } else {
258                format!("{}.{}", clean, ext)
259            }
260        };
261
262        let parent = path.parent().unwrap_or(Path::new(""));
263        let new_path = parent.join(&new_name);
264
265        fs::rename(path, &new_path).map_err(Error::Io)?;
266        Ok(new_path.to_string_lossy().to_string())
267    }
268
269    /// Write queued changes to a file.
270    ///
271    /// If `dst_path` is the same as `src_path`, the file is modified in-place
272    /// (via a temporary file).
273    pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(
274        &self,
275        src_path: P,
276        dst_path: Q,
277    ) -> Result<u32> {
278        let src_path = src_path.as_ref();
279        let dst_path = dst_path.as_ref();
280        let data = fs::read(src_path).map_err(Error::Io)?;
281
282        let file_type = self.detect_file_type(&data, src_path)?;
283        let output = self.apply_changes(&data, file_type)?;
284
285        // Write to temp file first, then rename (atomic)
286        let temp_path = dst_path.with_extension("exiftool_tmp");
287        fs::write(&temp_path, &output).map_err(Error::Io)?;
288        fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
289
290        Ok(self.new_values.len() as u32)
291    }
292
293    /// Apply queued changes to in-memory data.
294    fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
295        match file_type {
296            FileType::Jpeg => self.write_jpeg(data),
297            FileType::Png => self.write_png(data),
298            FileType::Tiff
299            | FileType::Dng
300            | FileType::Cr2
301            | FileType::Nef
302            | FileType::Arw
303            | FileType::Orf
304            | FileType::Pef => self.write_tiff(data),
305            FileType::WebP => self.write_webp(data),
306            FileType::Mp4
307            | FileType::QuickTime
308            | FileType::M4a
309            | FileType::ThreeGP
310            | FileType::F4v => self.write_mp4(data),
311            FileType::Psd => self.write_psd(data),
312            FileType::Pdf => self.write_pdf(data),
313            FileType::Heif | FileType::Avif => self.write_mp4(data),
314            FileType::Mkv | FileType::WebM => self.write_matroska(data),
315            FileType::Gif => {
316                let comment = self
317                    .new_values
318                    .iter()
319                    .find(|nv| nv.tag.to_lowercase() == "comment")
320                    .and_then(|nv| nv.value.clone());
321                crate::writer::gif_writer::write_gif(data, comment.as_deref())
322            }
323            FileType::Flac => {
324                let changes: Vec<(&str, &str)> = self
325                    .new_values
326                    .iter()
327                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
328                    .collect();
329                crate::writer::flac_writer::write_flac(data, &changes)
330            }
331            FileType::Mp3 | FileType::Aiff => {
332                let changes: Vec<(&str, &str)> = self
333                    .new_values
334                    .iter()
335                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
336                    .collect();
337                crate::writer::id3_writer::write_id3(data, &changes)
338            }
339            FileType::Jp2 | FileType::Jxl => {
340                let new_xmp = if self
341                    .new_values
342                    .iter()
343                    .any(|nv| nv.group.as_deref() == Some("XMP"))
344                {
345                    let refs: Vec<&NewValue> = self
346                        .new_values
347                        .iter()
348                        .filter(|nv| nv.group.as_deref() == Some("XMP"))
349                        .collect();
350                    Some(self.build_new_xmp(&refs))
351                } else {
352                    None
353                };
354                crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
355            }
356            FileType::PostScript => {
357                let changes: Vec<(&str, &str)> = self
358                    .new_values
359                    .iter()
360                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
361                    .collect();
362                crate::writer::ps_writer::write_postscript(data, &changes)
363            }
364            FileType::Ogg | FileType::Opus => {
365                let changes: Vec<(&str, &str)> = self
366                    .new_values
367                    .iter()
368                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
369                    .collect();
370                crate::writer::ogg_writer::write_ogg(data, &changes)
371            }
372            FileType::Xmp => {
373                let props: Vec<xmp_writer::XmpProperty> = self
374                    .new_values
375                    .iter()
376                    .filter_map(|nv| {
377                        let val = nv.value.as_deref()?;
378                        Some(xmp_writer::XmpProperty {
379                            namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
380                            property: nv.tag.clone(),
381                            values: vec![val.to_string()],
382                            prop_type: xmp_writer::XmpPropertyType::Simple,
383                        })
384                    })
385                    .collect();
386                Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
387            }
388            _ => Err(Error::UnsupportedFileType(format!(
389                "writing not yet supported for {}",
390                file_type
391            ))),
392        }
393    }
394
395    /// Returns the set of tag names (lowercase) that are writable for a given file type.
396    /// Returns `None` if any tag is writable (open-ended formats like PNG, FLAC, MKV).
397    /// Returns `Some(empty set)` if the format has no writer.
398    pub fn writable_tags(file_type: FileType) -> Option<std::collections::HashSet<&'static str>> {
399        use std::collections::HashSet;
400
401        // EXIF tags supported by exif_writer
402        const EXIF_TAGS: &[&str] = &[
403            "imagedescription",
404            "make",
405            "model",
406            "orientation",
407            "xresolution",
408            "yresolution",
409            "resolutionunit",
410            "software",
411            "modifydate",
412            "datetime",
413            "artist",
414            "copyright",
415            "datetimeoriginal",
416            "createdate",
417            "datetimedigitized",
418            "usercomment",
419            "imageuniqueid",
420            "ownername",
421            "cameraownername",
422            "serialnumber",
423            "bodyserialnumber",
424            "lensmake",
425            "lensmodel",
426            "lensserialnumber",
427        ];
428
429        // IPTC tags supported by iptc_writer
430        const IPTC_TAGS: &[&str] = &[
431            "objectname",
432            "title",
433            "urgency",
434            "category",
435            "supplementalcategories",
436            "keywords",
437            "specialinstructions",
438            "datecreated",
439            "timecreated",
440            "by-line",
441            "author",
442            "byline",
443            "by-linetitle",
444            "authorsposition",
445            "bylinetitle",
446            "city",
447            "sub-location",
448            "sublocation",
449            "province-state",
450            "state",
451            "provincestate",
452            "country-primarylocationcode",
453            "countrycode",
454            "country-primarylocationname",
455            "country",
456            "headline",
457            "credit",
458            "source",
459            "copyrightnotice",
460            "contact",
461            "caption-abstract",
462            "caption",
463            "description",
464            "writer-editor",
465            "captionwriter",
466        ];
467
468        // XMP auto-detected tags (no group prefix needed)
469        const XMP_AUTO_TAGS: &[&str] = &[
470            "title",
471            "description",
472            "subject",
473            "creator",
474            "rights",
475            "keywords",
476            "rating",
477            "label",
478            "hierarchicalsubject",
479        ];
480
481        // ID3 tags
482        const ID3_TAGS: &[&str] = &[
483            "title",
484            "artist",
485            "album",
486            "year",
487            "date",
488            "track",
489            "genre",
490            "comment",
491            "composer",
492            "albumartist",
493            "encoder",
494            "encodedby",
495            "publisher",
496            "copyright",
497            "bpm",
498            "lyrics",
499        ];
500
501        // MP4/MOV ilst tags
502        const MP4_TAGS: &[&str] = &[
503            "title",
504            "artist",
505            "album",
506            "year",
507            "date",
508            "comment",
509            "genre",
510            "composer",
511            "writer",
512            "encoder",
513            "encodedby",
514            "grouping",
515            "lyrics",
516            "description",
517            "albumartist",
518            "copyright",
519        ];
520
521        // PDF Info dict tags
522        const PDF_TAGS: &[&str] = &[
523            "title", "author", "subject", "keywords", "creator", "producer",
524        ];
525
526        // PostScript DSC tags
527        const PS_TAGS: &[&str] = &[
528            "title",
529            "creator",
530            "author",
531            "for",
532            "creationdate",
533            "createdate",
534        ];
535
536        match file_type {
537            // Open-ended: any tag name accepted
538            FileType::Png
539            | FileType::Flac
540            | FileType::Mkv
541            | FileType::WebM
542            | FileType::Ogg
543            | FileType::Opus
544            | FileType::Xmp => None,
545
546            // JPEG: EXIF + IPTC + XMP auto + comment
547            FileType::Jpeg => {
548                let mut set: HashSet<&str> = HashSet::new();
549                set.extend(EXIF_TAGS);
550                set.extend(IPTC_TAGS);
551                set.extend(XMP_AUTO_TAGS);
552                set.insert("comment");
553                Some(set)
554            }
555
556            // TIFF-based: EXIF only
557            FileType::Tiff
558            | FileType::Dng
559            | FileType::Cr2
560            | FileType::Nef
561            | FileType::Arw
562            | FileType::Orf
563            | FileType::Pef => {
564                let mut set: HashSet<&str> = HashSet::new();
565                set.extend(EXIF_TAGS);
566                Some(set)
567            }
568
569            // WebP: EXIF + XMP auto
570            FileType::WebP => {
571                let mut set: HashSet<&str> = HashSet::new();
572                set.extend(EXIF_TAGS);
573                set.extend(XMP_AUTO_TAGS);
574                Some(set)
575            }
576
577            // MP4/MOV/HEIF: ilst + XMP auto
578            FileType::Mp4
579            | FileType::QuickTime
580            | FileType::M4a
581            | FileType::ThreeGP
582            | FileType::F4v
583            | FileType::Heif
584            | FileType::Avif => {
585                let mut set: HashSet<&str> = HashSet::new();
586                set.extend(MP4_TAGS);
587                set.extend(XMP_AUTO_TAGS);
588                Some(set)
589            }
590
591            // PSD: IPTC + XMP auto
592            FileType::Psd => {
593                let mut set: HashSet<&str> = HashSet::new();
594                set.extend(IPTC_TAGS);
595                set.extend(XMP_AUTO_TAGS);
596                Some(set)
597            }
598
599            FileType::Pdf => Some(PDF_TAGS.iter().copied().collect()),
600            FileType::PostScript => Some(PS_TAGS.iter().copied().collect()),
601
602            FileType::Mp3 | FileType::Aiff => Some(ID3_TAGS.iter().copied().collect()),
603
604            FileType::Gif => {
605                let mut set: HashSet<&str> = HashSet::new();
606                set.insert("comment");
607                Some(set)
608            }
609
610            // JP2/JXL: XMP only (with group prefix)
611            FileType::Jp2 | FileType::Jxl => Some(XMP_AUTO_TAGS.iter().copied().collect()),
612
613            // No writer
614            _ => Some(HashSet::new()),
615        }
616    }
617
618    /// Write metadata changes to JPEG data.
619    fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
620        // Classify new values by target group
621        let mut exif_values: Vec<&NewValue> = Vec::new();
622        let mut xmp_values: Vec<&NewValue> = Vec::new();
623        let mut iptc_values: Vec<&NewValue> = Vec::new();
624        let mut comment_value: Option<&str> = None;
625        let mut remove_exif = false;
626        let mut remove_xmp = false;
627        let mut remove_iptc = false;
628        let mut remove_comment = false;
629
630        for nv in &self.new_values {
631            let group = nv.group.as_deref().unwrap_or("");
632            let group_upper = group.to_uppercase();
633
634            // Check for group deletion
635            if nv.value.is_none() && nv.tag == "*" {
636                match group_upper.as_str() {
637                    "EXIF" => {
638                        remove_exif = true;
639                        continue;
640                    }
641                    "XMP" => {
642                        remove_xmp = true;
643                        continue;
644                    }
645                    "IPTC" => {
646                        remove_iptc = true;
647                        continue;
648                    }
649                    _ => {}
650                }
651            }
652
653            match group_upper.as_str() {
654                "XMP" => xmp_values.push(nv),
655                "IPTC" => iptc_values.push(nv),
656                "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
657                "" => {
658                    // Auto-detect best group based on tag name
659                    if nv.tag.to_lowercase() == "comment" {
660                        if nv.value.is_none() {
661                            remove_comment = true;
662                        } else {
663                            comment_value = nv.value.as_deref();
664                        }
665                    } else if is_xmp_tag(&nv.tag) {
666                        xmp_values.push(nv);
667                    } else {
668                        exif_values.push(nv);
669                    }
670                }
671                _ => exif_values.push(nv), // default to EXIF
672            }
673        }
674
675        // Build new EXIF data
676        let new_exif = if !exif_values.is_empty() {
677            Some(self.build_new_exif(data, &exif_values)?)
678        } else {
679            None
680        };
681
682        // Build new XMP data
683        let new_xmp = if !xmp_values.is_empty() {
684            Some(self.build_new_xmp(&xmp_values))
685        } else {
686            None
687        };
688
689        // Build new IPTC data
690        let new_iptc_data = if !iptc_values.is_empty() {
691            let records: Vec<iptc_writer::IptcRecord> = iptc_values
692                .iter()
693                .filter_map(|nv| {
694                    let value = nv.value.as_deref()?;
695                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
696                    Some(iptc_writer::IptcRecord {
697                        record,
698                        dataset,
699                        data: value.as_bytes().to_vec(),
700                    })
701                })
702                .collect();
703            if records.is_empty() {
704                None
705            } else {
706                Some(iptc_writer::build_iptc(&records))
707            }
708        } else {
709            None
710        };
711
712        // Rewrite JPEG
713        jpeg_writer::write_jpeg(
714            data,
715            new_exif.as_deref(),
716            new_xmp.as_deref(),
717            new_iptc_data.as_deref(),
718            comment_value,
719            remove_exif,
720            remove_xmp,
721            remove_iptc,
722            remove_comment,
723        )
724    }
725
726    /// Build new EXIF data by merging existing EXIF with queued changes.
727    fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
728        let bo = ByteOrderMark::BigEndian;
729        let mut ifd0_entries = Vec::new();
730        let mut exif_entries = Vec::new();
731        let mut gps_entries = Vec::new();
732
733        // Step 1: Extract existing EXIF entries from the JPEG
734        let existing = extract_existing_exif_entries(jpeg_data, bo);
735        for entry in &existing {
736            match classify_exif_tag(entry.tag) {
737                ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
738                ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
739                ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
740            }
741        }
742
743        // Step 2: Apply queued changes (add/replace/delete)
744        let deleted_tags: Vec<u16> = values
745            .iter()
746            .filter(|nv| nv.value.is_none())
747            .filter_map(|nv| tag_name_to_id(&nv.tag))
748            .collect();
749
750        // Remove deleted tags
751        ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
752        exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
753        gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
754
755        // Add/replace new values
756        for nv in values {
757            if nv.value.is_none() {
758                continue;
759            }
760            let value_str = nv.value.as_deref().unwrap_or("");
761            let group = nv.group.as_deref().unwrap_or("");
762
763            if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo)
764            {
765                let entry = exif_writer::IfdEntry {
766                    tag: tag_id,
767                    format,
768                    data: encoded,
769                };
770
771                let target = match group.to_uppercase().as_str() {
772                    "GPS" => &mut gps_entries,
773                    "EXIFIFD" => &mut exif_entries,
774                    _ => match classify_exif_tag(tag_id) {
775                        ExifIfdGroup::ExifIfd => &mut exif_entries,
776                        ExifIfdGroup::Gps => &mut gps_entries,
777                        ExifIfdGroup::Ifd0 => &mut ifd0_entries,
778                    },
779                };
780
781                // Replace existing or add new
782                if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
783                    *existing = entry;
784                } else {
785                    target.push(entry);
786                }
787            }
788        }
789
790        // Remove sub-IFD pointers from entries (they'll be rebuilt by build_exif)
791        ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
792
793        exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
794    }
795
796    /// Write metadata changes to PNG data.
797    fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
798        let mut new_text: Vec<(&str, &str)> = Vec::new();
799        let mut remove_text: Vec<&str> = Vec::new();
800
801        // Collect text-based changes
802        // We need to hold the strings in vectors that live long enough
803        let owned_pairs: Vec<(String, String)> = self
804            .new_values
805            .iter()
806            .filter(|nv| nv.value.is_some())
807            .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
808            .collect();
809
810        for (tag, value) in &owned_pairs {
811            new_text.push((tag.as_str(), value.as_str()));
812        }
813
814        for nv in &self.new_values {
815            if nv.value.is_none() {
816                remove_text.push(&nv.tag);
817            }
818        }
819
820        png_writer::write_png(data, &new_text, None, &remove_text)
821    }
822
823    /// Write metadata changes to PSD data.
824    fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
825        let mut iptc_values = Vec::new();
826        let mut xmp_values = Vec::new();
827
828        for nv in &self.new_values {
829            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
830            match group.as_str() {
831                "XMP" => xmp_values.push(nv),
832                "IPTC" => iptc_values.push(nv),
833                _ => {
834                    if is_xmp_tag(&nv.tag) {
835                        xmp_values.push(nv);
836                    } else {
837                        iptc_values.push(nv);
838                    }
839                }
840            }
841        }
842
843        let new_iptc = if !iptc_values.is_empty() {
844            let records: Vec<_> = iptc_values
845                .iter()
846                .filter_map(|nv| {
847                    let value = nv.value.as_deref()?;
848                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
849                    Some(iptc_writer::IptcRecord {
850                        record,
851                        dataset,
852                        data: value.as_bytes().to_vec(),
853                    })
854                })
855                .collect();
856            if records.is_empty() {
857                None
858            } else {
859                Some(iptc_writer::build_iptc(&records))
860            }
861        } else {
862            None
863        };
864
865        let new_xmp = if !xmp_values.is_empty() {
866            let refs: Vec<&NewValue> = xmp_values.to_vec();
867            Some(self.build_new_xmp(&refs))
868        } else {
869            None
870        };
871
872        psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
873    }
874
875    /// Write metadata changes to Matroska (MKV/WebM) data.
876    fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
877        let changes: Vec<(&str, &str)> = self
878            .new_values
879            .iter()
880            .filter_map(|nv| {
881                let value = nv.value.as_deref()?;
882                Some((nv.tag.as_str(), value))
883            })
884            .collect();
885
886        matroska_writer::write_matroska(data, &changes)
887    }
888
889    /// Write metadata changes to PDF data.
890    fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
891        let changes: Vec<(&str, &str)> = self
892            .new_values
893            .iter()
894            .filter_map(|nv| {
895                let value = nv.value.as_deref()?;
896                Some((nv.tag.as_str(), value))
897            })
898            .collect();
899
900        pdf_writer::write_pdf(data, &changes)
901    }
902
903    /// Write metadata changes to MP4/MOV data.
904    fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
905        let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
906        let mut xmp_values: Vec<&NewValue> = Vec::new();
907
908        for nv in &self.new_values {
909            if nv.value.is_none() {
910                continue;
911            }
912            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
913            if group == "XMP" {
914                xmp_values.push(nv);
915            } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
916                ilst_tags.push((key, nv.value.clone().unwrap()));
917            }
918        }
919
920        let tag_refs: Vec<(&[u8; 4], &str)> =
921            ilst_tags.iter().map(|(k, v)| (k, v.as_str())).collect();
922
923        let new_xmp = if !xmp_values.is_empty() {
924            let refs: Vec<&NewValue> = xmp_values.to_vec();
925            Some(self.build_new_xmp(&refs))
926        } else {
927            None
928        };
929
930        mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
931    }
932
933    /// Write metadata changes to WebP data.
934    fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
935        let mut exif_values: Vec<&NewValue> = Vec::new();
936        let mut xmp_values: Vec<&NewValue> = Vec::new();
937        let mut remove_exif = false;
938        let mut remove_xmp = false;
939
940        for nv in &self.new_values {
941            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
942            if nv.value.is_none() && nv.tag == "*" {
943                if group == "EXIF" {
944                    remove_exif = true;
945                }
946                if group == "XMP" {
947                    remove_xmp = true;
948                }
949                continue;
950            }
951            match group.as_str() {
952                "XMP" => xmp_values.push(nv),
953                _ => exif_values.push(nv),
954            }
955        }
956
957        let new_exif = if !exif_values.is_empty() {
958            let bo = ByteOrderMark::BigEndian;
959            let mut entries = Vec::new();
960            for nv in &exif_values {
961                if let Some(ref v) = nv.value {
962                    let group = nv.group.as_deref().unwrap_or("");
963                    if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo)
964                    {
965                        entries.push(exif_writer::IfdEntry {
966                            tag: tag_id,
967                            format,
968                            data: encoded,
969                        });
970                    }
971                }
972            }
973            if !entries.is_empty() {
974                Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
975            } else {
976                None
977            }
978        } else {
979            None
980        };
981
982        let new_xmp = if !xmp_values.is_empty() {
983            Some(self.build_new_xmp(&xmp_values.to_vec()))
984        } else {
985            None
986        };
987
988        webp_writer::write_webp(
989            data,
990            new_exif.as_deref(),
991            new_xmp.as_deref(),
992            remove_exif,
993            remove_xmp,
994        )
995    }
996
997    /// Write metadata changes to TIFF data.
998    fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
999        let bo = if data.starts_with(b"II") {
1000            ByteOrderMark::LittleEndian
1001        } else {
1002            ByteOrderMark::BigEndian
1003        };
1004
1005        let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
1006        for nv in &self.new_values {
1007            if let Some(ref value) = nv.value {
1008                let group = nv.group.as_deref().unwrap_or("");
1009                if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo)
1010                {
1011                    changes.push((tag_id, encoded));
1012                }
1013            }
1014        }
1015
1016        tiff_writer::write_tiff(data, &changes)
1017    }
1018
1019    /// Build new XMP data from queued values.
1020    fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
1021        let mut properties = Vec::new();
1022
1023        for nv in values {
1024            let value_str = match &nv.value {
1025                Some(v) => v.clone(),
1026                None => continue,
1027            };
1028
1029            let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
1030            let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
1031
1032            let prop_type = match nv.tag.to_lowercase().as_str() {
1033                "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
1034                "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
1035                "creator" => xmp_writer::XmpPropertyType::Seq,
1036                _ => xmp_writer::XmpPropertyType::Simple,
1037            };
1038
1039            let values = if matches!(
1040                prop_type,
1041                xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq
1042            ) {
1043                value_str.split(',').map(|s| s.trim().to_string()).collect()
1044            } else {
1045                vec![value_str]
1046            };
1047
1048            properties.push(xmp_writer::XmpProperty {
1049                namespace: ns,
1050                property: nv.tag.clone(),
1051                values,
1052                prop_type,
1053            });
1054        }
1055
1056        xmp_writer::build_xmp(&properties).into_bytes()
1057    }
1058
1059    // ================================================================
1060    // Reading API
1061    // ================================================================
1062
1063    /// Extract metadata from a file and return a simple name→value map.
1064    ///
1065    /// This is the high-level one-shot API, equivalent to ExifTool's `ImageInfo()`.
1066    pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
1067        let tags = self.extract_info(path)?;
1068        Ok(self.get_info(&tags))
1069    }
1070
1071    /// Extract all metadata tags from a file.
1072    ///
1073    /// Returns the full `Tag` structs with groups, raw values, etc.
1074    pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
1075        let path = path.as_ref();
1076        let data = fs::read(path).map_err(Error::Io)?;
1077
1078        self.extract_info_from_bytes(&data, path)
1079    }
1080
1081    /// Extract metadata from in-memory data.
1082    pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1083        // Propagate show_unknown to EXIF/MakerNotes parsers via thread-local
1084        crate::metadata::exif::set_show_unknown(self.options.show_unknown);
1085        // Propagate process_compressed to format readers via thread-local
1086        crate::formats::pdf::set_process_compressed(self.options.process_compressed);
1087
1088        let file_type_result = self.detect_file_type(data, path);
1089        let (file_type, mut tags) = match file_type_result {
1090            Ok(ft) => {
1091                let t = self
1092                    .process_file(data, ft)
1093                    .or_else(|_| self.process_by_extension(data, path))?;
1094                (Some(ft), t)
1095            }
1096            Err(_) => {
1097                // File type unknown by magic/extension — try extension-based fallback
1098                let t = self.process_by_extension(data, path)?;
1099                (None, t)
1100            }
1101        };
1102        let file_type = file_type.unwrap_or(FileType::Zip); // placeholder for file-level tags
1103
1104        // Add file-level tags
1105        tags.push(Tag {
1106            id: crate::tag::TagId::Text("FileType".into()),
1107            name: "FileType".into(),
1108            description: "File Type".into(),
1109            group: crate::tag::TagGroup {
1110                family0: "File".into(),
1111                family1: "File".into(),
1112                family2: "Other".into(),
1113            },
1114            raw_value: Value::String(format!("{:?}", file_type)),
1115            print_value: file_type.description().to_string(),
1116            priority: 0,
1117        });
1118
1119        tags.push(Tag {
1120            id: crate::tag::TagId::Text("MIMEType".into()),
1121            name: "MIMEType".into(),
1122            description: "MIME Type".into(),
1123            group: crate::tag::TagGroup {
1124                family0: "File".into(),
1125                family1: "File".into(),
1126                family2: "Other".into(),
1127            },
1128            raw_value: Value::String(file_type.mime_type().to_string()),
1129            print_value: file_type.mime_type().to_string(),
1130            priority: 0,
1131        });
1132
1133        if let Ok(metadata) = fs::metadata(path) {
1134            tags.push(Tag {
1135                id: crate::tag::TagId::Text("FileSize".into()),
1136                name: "FileSize".into(),
1137                description: "File Size".into(),
1138                group: crate::tag::TagGroup {
1139                    family0: "File".into(),
1140                    family1: "File".into(),
1141                    family2: "Other".into(),
1142                },
1143                raw_value: Value::U32(metadata.len() as u32),
1144                print_value: format_file_size(metadata.len()),
1145                priority: 0,
1146            });
1147        }
1148
1149        // Add more file-level tags
1150        let file_tag = |name: &str, val: Value| -> Tag {
1151            Tag {
1152                id: crate::tag::TagId::Text(name.to_string()),
1153                name: name.to_string(),
1154                description: name.to_string(),
1155                group: crate::tag::TagGroup {
1156                    family0: "File".into(),
1157                    family1: "File".into(),
1158                    family2: "Other".into(),
1159                },
1160                raw_value: val.clone(),
1161                print_value: val.to_display_string(),
1162                priority: 0,
1163            }
1164        };
1165
1166        if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
1167            tags.push(file_tag("FileName", Value::String(fname.to_string())));
1168        }
1169        if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
1170            tags.push(file_tag("Directory", Value::String(dir.to_string())));
1171        }
1172        // Use the canonical (first) extension from the FileType, matching Perl ExifTool behavior.
1173        let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
1174        if !canonical_ext.is_empty() {
1175            tags.push(file_tag(
1176                "FileTypeExtension",
1177                Value::String(canonical_ext.to_string()),
1178            ));
1179        }
1180
1181        #[cfg(unix)]
1182        if let Ok(metadata) = fs::metadata(path) {
1183            use std::os::unix::fs::MetadataExt;
1184            let mode = metadata.mode();
1185            tags.push(file_tag(
1186                "FilePermissions",
1187                Value::String(format!("{:o}", mode & 0o7777)),
1188            ));
1189
1190            // FileModifyDate
1191            if let Ok(modified) = metadata.modified() {
1192                if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
1193                    let secs = dur.as_secs() as i64;
1194                    tags.push(file_tag(
1195                        "FileModifyDate",
1196                        Value::String(unix_to_datetime(secs)),
1197                    ));
1198                }
1199            }
1200            // FileAccessDate
1201            if let Ok(accessed) = metadata.accessed() {
1202                if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
1203                    let secs = dur.as_secs() as i64;
1204                    tags.push(file_tag(
1205                        "FileAccessDate",
1206                        Value::String(unix_to_datetime(secs)),
1207                    ));
1208                }
1209            }
1210            // FileInodeChangeDate (ctime on Unix)
1211            let ctime = metadata.ctime();
1212            if ctime > 0 {
1213                tags.push(file_tag(
1214                    "FileInodeChangeDate",
1215                    Value::String(unix_to_datetime(ctime)),
1216                ));
1217            }
1218        }
1219
1220        // ExifByteOrder (from TIFF header)
1221        {
1222            let bo_str = if data.len() > 8 {
1223                // Check EXIF in JPEG or TIFF header or WebP/RIFF EXIF chunk
1224                let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
1225                    // JPEG: find APP1 EXIF header
1226                    data.windows(6)
1227                        .position(|w| w == b"Exif\0\0")
1228                        .map(|p| &data[p + 6..])
1229                } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
1230                    // RAF: look in the embedded JPEG for EXIF byte order
1231                    let jpeg_offset =
1232                        u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]])
1233                            as usize;
1234                    let jpeg_length =
1235                        u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]])
1236                            as usize;
1237                    if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
1238                        let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
1239                        jpeg.windows(6)
1240                            .position(|w| w == b"Exif\0\0")
1241                            .map(|p| &jpeg[p + 6..])
1242                    } else {
1243                        None
1244                    }
1245                } else if data.starts_with(b"RIFF") && data.len() >= 12 {
1246                    // RIFF/WebP: find EXIF chunk
1247                    let mut riff_bo: Option<&[u8]> = None;
1248                    let mut pos = 12usize;
1249                    while pos + 8 <= data.len() {
1250                        let cid = &data[pos..pos + 4];
1251                        let csz = u32::from_le_bytes([
1252                            data[pos + 4],
1253                            data[pos + 5],
1254                            data[pos + 6],
1255                            data[pos + 7],
1256                        ]) as usize;
1257                        let cstart = pos + 8;
1258                        let cend = (cstart + csz).min(data.len());
1259                        if cid == b"EXIF" && cend > cstart {
1260                            let exif_data = &data[cstart..cend];
1261                            let tiff = if exif_data.starts_with(b"Exif\0\0") {
1262                                &exif_data[6..]
1263                            } else {
1264                                exif_data
1265                            };
1266                            riff_bo = Some(tiff);
1267                            break;
1268                        }
1269                        // Also check LIST chunks
1270                        if cid == b"LIST" && cend >= cstart + 4 {
1271                            // recurse not needed for this simple scan - just advance
1272                        }
1273                        pos = cend + (csz & 1);
1274                    }
1275                    riff_bo
1276                } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
1277                    // JXL container: scan for brob Exif box and decompress to get byte order
1278                    let mut jxl_bo: Option<String> = None;
1279                    let mut jpos = 12usize; // skip JXL signature box
1280                    while jpos + 8 <= data.len() {
1281                        let bsize = u32::from_be_bytes([
1282                            data[jpos],
1283                            data[jpos + 1],
1284                            data[jpos + 2],
1285                            data[jpos + 3],
1286                        ]) as usize;
1287                        let btype = &data[jpos + 4..jpos + 8];
1288                        if bsize < 8 || jpos + bsize > data.len() {
1289                            break;
1290                        }
1291                        if btype == b"brob" && jpos + bsize > 12 {
1292                            let inner_type = &data[jpos + 8..jpos + 12];
1293                            if inner_type == b"Exif" || inner_type == b"exif" {
1294                                let brotli_payload = &data[jpos + 12..jpos + bsize];
1295                                use std::io::Cursor;
1296                                let mut inp = Cursor::new(brotli_payload);
1297                                let mut out: Vec<u8> = Vec::new();
1298                                if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
1299                                    let exif_start = if out.len() > 4 { 4 } else { 0 };
1300                                    if exif_start < out.len() {
1301                                        if out[exif_start..].starts_with(b"MM") {
1302                                            jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
1303                                        } else if out[exif_start..].starts_with(b"II") {
1304                                            jxl_bo = Some("Little-endian (Intel, II)".to_string());
1305                                        }
1306                                    }
1307                                }
1308                                break;
1309                            }
1310                        }
1311                        jpos += bsize;
1312                    }
1313                    if let Some(bo) = jxl_bo {
1314                        if !bo.is_empty() && file_type != FileType::Btf {
1315                            tags.push(file_tag("ExifByteOrder", Value::String(bo)));
1316                        }
1317                    }
1318                    // Return None to skip the generic byte order check below
1319                    None
1320                } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
1321                    // MRW: find TTW segment which contains TIFF/EXIF data
1322                    let mrw_data_offset = if data.len() >= 8 {
1323                        u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
1324                    } else {
1325                        0
1326                    };
1327                    let mut mrw_bo: Option<&[u8]> = None;
1328                    let mut mpos = 8usize;
1329                    while mpos + 8 <= mrw_data_offset.min(data.len()) {
1330                        let seg_tag = &data[mpos..mpos + 4];
1331                        let seg_len = u32::from_be_bytes([
1332                            data[mpos + 4],
1333                            data[mpos + 5],
1334                            data[mpos + 6],
1335                            data[mpos + 7],
1336                        ]) as usize;
1337                        if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
1338                            mrw_bo = Some(&data[mpos + 8..mpos + 8 + seg_len]);
1339                            break;
1340                        }
1341                        mpos += 8 + seg_len;
1342                    }
1343                    mrw_bo
1344                } else {
1345                    Some(data)
1346                };
1347                if let Some(tiff) = check {
1348                    if tiff.starts_with(b"II") {
1349                        "Little-endian (Intel, II)"
1350                    } else if tiff.starts_with(b"MM") {
1351                        "Big-endian (Motorola, MM)"
1352                    } else {
1353                        ""
1354                    }
1355                } else {
1356                    ""
1357                }
1358            } else {
1359                ""
1360            };
1361            // Suppress ExifByteOrder for BigTIFF, Canon VRD/DR4 (Perl doesn't output it for these)
1362            // Also skip if already emitted by ExifReader (TIFF-based formats)
1363            let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
1364            if !bo_str.is_empty()
1365                && !already_has_exifbyteorder
1366                && file_type != FileType::Btf
1367                && file_type != FileType::Dr4
1368                && file_type != FileType::Vrd
1369                && file_type != FileType::Crw
1370            {
1371                tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
1372            }
1373        }
1374
1375        tags.push(file_tag(
1376            "ExifToolVersion",
1377            Value::String(crate::VERSION.to_string()),
1378        ));
1379
1380        // Compute composite tags
1381        let composite = crate::composite::compute_composite_tags(&tags);
1382        tags.extend(composite);
1383
1384        // MWG (Metadata Working Group) composite tags
1385        if self.options.use_mwg {
1386            let mwg = crate::composite::compute_mwg_composites(&tags);
1387            tags.extend(mwg);
1388        }
1389
1390        // FLIR post-processing: remove LensID composite for FLIR cameras.
1391        // Perl's LensID composite requires LensType EXIF tag (not present in FLIR images),
1392        // and LensID-2 requires LensModel to match /(mm|\d\/F)/ (FLIR names like "FOL7"
1393        // don't match).  Our composite.rs uses a simpler fallback that picks up any non-empty
1394        // LensModel, so we remove LensID when the image is from a FLIR camera with FFF data.
1395        {
1396            let is_flir_fff = tags
1397                .iter()
1398                .any(|t| t.group.family0 == "APP1" && t.group.family1 == "FLIR");
1399            if is_flir_fff {
1400                tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1401            }
1402        }
1403
1404        // Olympus post-processing: remove the generic "Lens" composite for Olympus cameras.
1405        // In Perl, the "Lens" composite tag requires Canon:MinFocalLength (Canon namespace).
1406        // Our composite.rs generates Lens for any manufacturer that has MinFocalLength +
1407        // MaxFocalLength (e.g., Olympus Equipment sub-IFD).  Remove it for non-Canon cameras.
1408        {
1409            let make = tags
1410                .iter()
1411                .find(|t| t.name == "Make")
1412                .map(|t| t.print_value.clone())
1413                .unwrap_or_default();
1414            if !make.to_uppercase().contains("CANON") {
1415                tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1416            }
1417        }
1418
1419        // Priority-based deduplication: when the same tag name appears from both RIFF (priority 0)
1420        // and MakerNotes/EXIF (priority 0 but higher-quality source), remove the RIFF copy.
1421        // Mirrors ExifTool's PRIORITY => 0 behavior for RIFF StreamHeader tags.
1422        {
1423            let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1424            for tag_name in &riff_priority_zero_tags {
1425                let has_makernotes = tags
1426                    .iter()
1427                    .any(|t| t.name == *tag_name && t.group.family0 != "RIFF");
1428                if has_makernotes {
1429                    tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1430                }
1431            }
1432        }
1433
1434        // Priority-based deduplication: when the same tag name appears multiple times,
1435        // keep only the one with the highest priority (e.g., EXIF over JFIF, FFF over MakerNote).
1436        if !self.options.duplicates {
1437            let mut best_priority: HashMap<String, i32> = HashMap::new();
1438            for tag in &tags {
1439                let entry = best_priority
1440                    .entry(tag.name.clone())
1441                    .or_insert(tag.priority);
1442                if tag.priority > *entry {
1443                    *entry = tag.priority;
1444                }
1445            }
1446            tags.retain(|t| t.priority >= *best_priority.get(&t.name).unwrap_or(&0));
1447        }
1448
1449        // Filter by requested tags if specified
1450        if !self.options.requested_tags.is_empty() {
1451            let requested: Vec<String> = self
1452                .options
1453                .requested_tags
1454                .iter()
1455                .map(|t| t.to_lowercase())
1456                .collect();
1457            tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1458        }
1459
1460        Ok(tags)
1461    }
1462
1463    /// Format extracted tags into a simple name→value map.
1464    ///
1465    /// Handles duplicate tag names by appending group info.
1466    fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1467        let mut info = ImageInfo::new();
1468        let mut seen: HashMap<String, (usize, i32)> = HashMap::new(); // (count, best priority)
1469
1470        for tag in tags {
1471            let value = if self.options.print_conv {
1472                &tag.print_value
1473            } else {
1474                &tag.raw_value.to_display_string()
1475            };
1476
1477            let entry = seen.entry(tag.name.clone()).or_insert((0, i32::MIN));
1478            entry.0 += 1;
1479
1480            if entry.0 == 1 {
1481                entry.1 = tag.priority;
1482                info.insert(tag.name.clone(), value.clone());
1483            } else if tag.priority > entry.1 {
1484                // Higher priority tag replaces the previous one
1485                entry.1 = tag.priority;
1486                info.insert(tag.name.clone(), value.clone());
1487            } else if self.options.duplicates {
1488                let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1489                info.insert(key, value.clone());
1490            }
1491        }
1492
1493        info
1494    }
1495
1496    /// Detect file type from magic bytes and extension.
1497    fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1498        // Try magic bytes first
1499        let header_len = data.len().min(256);
1500        if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1501            // Override ICO to Font if extension is .dfont (Mac resource fork)
1502            if ft == FileType::Ico {
1503                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1504                    if ext.eq_ignore_ascii_case("dfont") {
1505                        return Ok(FileType::Font);
1506                    }
1507                }
1508            }
1509            // Override JPEG to JPS if the file extension is .jps
1510            if ft == FileType::Jpeg {
1511                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1512                    if ext.eq_ignore_ascii_case("jps") {
1513                        return Ok(FileType::Jps);
1514                    }
1515                }
1516            }
1517            // Override PLIST to AAE if extension is .aae
1518            if ft == FileType::Plist {
1519                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1520                    if ext.eq_ignore_ascii_case("aae") {
1521                        return Ok(FileType::Aae);
1522                    }
1523                }
1524            }
1525            // Override XMP to PLIST/AAE if extension is .plist or .aae
1526            if ft == FileType::Xmp {
1527                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1528                    if ext.eq_ignore_ascii_case("plist") {
1529                        return Ok(FileType::Plist);
1530                    }
1531                    if ext.eq_ignore_ascii_case("aae") {
1532                        return Ok(FileType::Aae);
1533                    }
1534                }
1535            }
1536            // Override to PhotoCD if extension is .pcd (file starts with 0xFF padding)
1537            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1538                if ext.eq_ignore_ascii_case("pcd")
1539                    && data.len() >= 2056
1540                    && &data[2048..2055] == b"PCD_IPI"
1541                {
1542                    return Ok(FileType::PhotoCd);
1543                }
1544            }
1545            // Override MP3 to MPC/APE/WavPack if extension says otherwise
1546            if ft == FileType::Mp3 {
1547                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1548                    if ext.eq_ignore_ascii_case("mpc") {
1549                        return Ok(FileType::Mpc);
1550                    }
1551                    if ext.eq_ignore_ascii_case("ape") {
1552                        return Ok(FileType::Ape);
1553                    }
1554                    if ext.eq_ignore_ascii_case("wv") {
1555                        return Ok(FileType::WavPack);
1556                    }
1557                }
1558            }
1559            // For ZIP files, check if it's an EIP (by extension) or OpenDocument format
1560            if ft == FileType::Zip {
1561                // Check extension first for EIP
1562                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1563                    if ext.eq_ignore_ascii_case("eip") {
1564                        return Ok(FileType::Eip);
1565                    }
1566                }
1567                if let Some(od_type) = detect_opendocument_type(data) {
1568                    return Ok(od_type);
1569                }
1570            }
1571            return Ok(ft);
1572        }
1573
1574        // Fall back to extension
1575        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1576            if let Some(ft) = file_type::detect_from_extension(ext) {
1577                return Ok(ft);
1578            }
1579        }
1580
1581        let ext_str = path
1582            .extension()
1583            .and_then(|e| e.to_str())
1584            .unwrap_or("unknown");
1585        Err(Error::UnsupportedFileType(ext_str.to_string()))
1586    }
1587
1588    /// Dispatch to the appropriate format reader.
1589    fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1590        match file_type {
1591            FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1592            FileType::Png | FileType::Mng => formats::png::read_png(data),
1593            // All TIFF-based formats (TIFF + most RAW formats)
1594            FileType::Tiff
1595            | FileType::Btf
1596            | FileType::Dng
1597            | FileType::Cr2
1598            | FileType::Nef
1599            | FileType::Arw
1600            | FileType::Sr2
1601            | FileType::Orf
1602            | FileType::Pef
1603            | FileType::Erf
1604            | FileType::Fff
1605            | FileType::Rwl
1606            | FileType::Mef
1607            | FileType::Srw
1608            | FileType::Gpr
1609            | FileType::Arq
1610            | FileType::ThreeFR
1611            | FileType::Dcr
1612            | FileType::Rw2
1613            | FileType::Srf => formats::tiff::read_tiff(data),
1614            // Phase One IIQ: TIFF + PhaseOne maker note block
1615            FileType::Iiq => formats::iiq::read_iiq(data),
1616            // Image formats
1617            FileType::Gif => formats::gif::read_gif(data),
1618            FileType::Bmp => formats::bmp::read_bmp(data),
1619            FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1620            FileType::Psd => formats::psd::read_psd(data),
1621            // Audio formats
1622            FileType::Mp3 => formats::id3::read_mp3(data),
1623            FileType::Flac => formats::flac::read_flac(data),
1624            FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1625            FileType::Aiff => formats::aiff::read_aiff(data),
1626            // Video formats
1627            FileType::Mp4
1628            | FileType::QuickTime
1629            | FileType::M4a
1630            | FileType::ThreeGP
1631            | FileType::Heif
1632            | FileType::Avif
1633            | FileType::Cr3
1634            | FileType::Crm
1635            | FileType::F4v
1636            | FileType::Mqv
1637            | FileType::Lrv => {
1638                formats::quicktime::read_quicktime_with_ee(data, self.options.extract_embedded)
1639            }
1640            FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1641            FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1642            FileType::Wtv => formats::wtv::read_wtv(data),
1643            // RAW formats with custom containers
1644            FileType::Crw => formats::canon_raw::read_crw(data),
1645            FileType::Raf => formats::raf::read_raf(data),
1646            FileType::Mrw => formats::mrw::read_mrw(data),
1647            FileType::Mrc => formats::mrc::read_mrc(data),
1648            // Image formats
1649            FileType::Jp2 => formats::jp2::read_jp2(data),
1650            FileType::J2c => formats::jp2::read_j2c(data),
1651            FileType::Jxl => formats::jp2::read_jxl(data),
1652            FileType::Ico => formats::ico::read_ico(data),
1653            FileType::Icc => formats::icc::read_icc(data),
1654            // Documents
1655            FileType::Pdf => formats::pdf::read_pdf(data),
1656            FileType::PostScript => {
1657                // PFA fonts start with %!PS-AdobeFont or %!FontType1
1658                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1659                    formats::font::read_pfa(data)
1660                        .or_else(|_| formats::postscript::read_postscript(data))
1661                } else {
1662                    formats::postscript::read_postscript(data)
1663                }
1664            }
1665            FileType::Eip => formats::capture_one::read_eip(data),
1666            FileType::Zip
1667            | FileType::Docx
1668            | FileType::Xlsx
1669            | FileType::Pptx
1670            | FileType::Doc
1671            | FileType::Xls
1672            | FileType::Ppt => formats::zip::read_zip(data),
1673            FileType::Rtf => formats::rtf::read_rtf(data),
1674            FileType::InDesign => formats::indesign::read_indesign(data),
1675            FileType::Pcap => formats::pcap::read_pcap(data),
1676            FileType::Pcapng => formats::pcap::read_pcapng(data),
1677            // Canon VRD / DR4
1678            FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1679            FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1680            // Metadata / Other
1681            FileType::Xmp => formats::xmp_file::read_xmp(data),
1682            FileType::Svg => formats::svg::read_svg(data),
1683            FileType::Html => {
1684                // SVG files that weren't detected by magic (e.g., via extension fallback)
1685                let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1686                if is_svg {
1687                    formats::svg::read_svg(data)
1688                } else {
1689                    formats::html::read_html(data)
1690                }
1691            }
1692            FileType::Exe => formats::exe::read_exe(data),
1693            FileType::Font => {
1694                // AFM: Adobe Font Metrics text file
1695                if data.starts_with(b"StartFontMetrics") {
1696                    return formats::font::read_afm(data);
1697                }
1698                // PFA: PostScript Type 1 ASCII font
1699                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1700                    return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1701                }
1702                // PFB: PostScript Type 1 Binary font
1703                if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1704                    return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1705                }
1706                formats::font::read_font(data)
1707            }
1708            // Audio with ID3
1709            FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1710            FileType::Ape => formats::ape::read_ape(data),
1711            FileType::Mpc => formats::ape::read_mpc(data),
1712            FileType::Aac => formats::aac::read_aac(data),
1713            FileType::RealAudio => {
1714                formats::real_audio::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1715            }
1716            FileType::RealMedia => {
1717                formats::real_media::read_real_media(data).or_else(|_| Ok(Vec::new()))
1718            }
1719            // Misc formats
1720            FileType::Czi => formats::czi::read_czi(data).or_else(|_| Ok(Vec::new())),
1721            FileType::PhotoCd => formats::photo_cd::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1722            FileType::Dicom => formats::dicom::read_dicom(data),
1723            FileType::Fits => formats::fits::read_fits(data),
1724            FileType::Flv => formats::flv::read_flv(data),
1725            FileType::Mxf => formats::mxf::read_mxf(data).or_else(|_| Ok(Vec::new())),
1726            FileType::Swf => formats::swf::read_swf(data),
1727            FileType::Hdr => formats::hdr::read_hdr(data),
1728            FileType::DjVu => formats::djvu::read_djvu(data),
1729            FileType::Xcf => formats::gimp::read_xcf(data),
1730            FileType::Mie => formats::mie::read_mie(data),
1731            FileType::Lfp => formats::lytro::read_lfp(data),
1732            // FileType::Miff dispatched via string extension below
1733            FileType::Fpf => formats::flir_fpf::read_fpf(data),
1734            FileType::Flif => formats::flif::read_flif(data),
1735            FileType::Bpg => formats::bpg::read_bpg(data),
1736            FileType::Pcx => formats::pcx::read_pcx(data),
1737            FileType::Pict => formats::pict::read_pict(data),
1738            FileType::Mpeg => formats::mpeg::read_mpeg(data),
1739            FileType::M2ts => formats::m2ts::read_m2ts(data, self.options.extract_embedded),
1740            FileType::Gzip => formats::gzip::read_gzip(data),
1741            FileType::Rar => formats::rar::read_rar(data),
1742            FileType::SevenZ => formats::sevenz::read_7z(data),
1743            FileType::Dss => formats::dss::read_dss(data),
1744            FileType::Moi => formats::moi::read_moi(data),
1745            FileType::MacOs => formats::macos::read_macos(data),
1746            FileType::Json => formats::json_format::read_json(data),
1747            // New formats
1748            FileType::Pgf => formats::pgf::read_pgf(data),
1749            FileType::Xisf => formats::xisf::read_xisf(data),
1750            FileType::Torrent => formats::torrent::read_torrent(data),
1751            FileType::Mobi => formats::palm::read_palm(data),
1752            FileType::Psp => formats::psp::read_psp(data),
1753            FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1754            FileType::Audible => formats::audible::read_audible(data),
1755            FileType::Exr => formats::openexr::read_openexr(data),
1756            // New formats
1757            FileType::Plist => {
1758                if data.starts_with(b"bplist") {
1759                    formats::plist::read_binary_plist_tags(data)
1760                } else {
1761                    formats::plist::read_xml_plist(data)
1762                }
1763            }
1764            FileType::Aae => {
1765                if data.starts_with(b"bplist") {
1766                    formats::plist::read_binary_plist_tags(data)
1767                } else {
1768                    formats::plist::read_aae_plist(data)
1769                }
1770            }
1771            FileType::KyoceraRaw => formats::kyocera_raw::read_kyocera_raw(data),
1772            FileType::PortableFloatMap => formats::pfm::read_pfm(data),
1773            FileType::Ods
1774            | FileType::Odt
1775            | FileType::Odp
1776            | FileType::Odg
1777            | FileType::Odf
1778            | FileType::Odb
1779            | FileType::Odi
1780            | FileType::Odc => formats::zip::read_zip(data),
1781            FileType::Lif => formats::lif::read_lif(data),
1782            FileType::Rwz => formats::rawzor::read_rawzor(data),
1783            FileType::Jxr => formats::jxr::read_jxr(data),
1784            _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1785        }
1786    }
1787
1788    /// Fallback: try to read file based on extension for formats without magic detection.
1789    fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1790        let ext = path
1791            .extension()
1792            .and_then(|e| e.to_str())
1793            .unwrap_or("")
1794            .to_ascii_lowercase();
1795
1796        match ext.as_str() {
1797            "ppm" | "pgm" | "pbm" => formats::ppm::read_ppm(data),
1798            "pfm" => {
1799                // PFM can be Portable Float Map or Printer Font Metrics
1800                if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1801                    formats::ppm::read_ppm(data)
1802                } else {
1803                    Ok(Vec::new()) // Printer Font Metrics
1804                }
1805            }
1806            "json" => formats::json_format::read_json(data),
1807            "svg" => formats::svg::read_svg(data),
1808            "ram" => formats::ram::read_ram(data).or_else(|_| Ok(Vec::new())),
1809            "txt" | "log" | "igc" => Ok(compute_text_tags(data, false)),
1810            "csv" => Ok(compute_text_tags(data, true)),
1811            "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1812            "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1813            "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1814            "plist" => {
1815                if data.starts_with(b"bplist") {
1816                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1817                } else {
1818                    formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1819                }
1820            }
1821            "aae" => {
1822                if data.starts_with(b"bplist") {
1823                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1824                } else {
1825                    formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1826                }
1827            }
1828            "vcf" | "ics" | "vcard" => {
1829                let s = crate::encoding::decode_utf8_or_latin1(&data[..data.len().min(100)]);
1830                if s.contains("BEGIN:VCALENDAR") {
1831                    formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1832                } else {
1833                    formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1834                }
1835            }
1836            "xcf" => Ok(Vec::new()), // GIMP
1837            "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1838            "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1839            "indd" | "indt" => Ok(Vec::new()), // InDesign
1840            "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1841            "mie" => Ok(Vec::new()), // MIE
1842            "exr" => Ok(Vec::new()), // OpenEXR
1843            "wpg" => formats::wpg::read_wpg(data).or_else(|_| Ok(Vec::new())),
1844            "moi" => formats::moi::read_moi(data).or_else(|_| Ok(Vec::new())),
1845            "macos" => formats::macos::read_macos(data).or_else(|_| Ok(Vec::new())),
1846            "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1847            "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1848            "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1849            "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1850            "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1851            "itc" => formats::itc::read_itc(data).or_else(|_| Ok(Vec::new())),
1852            "mpg" | "mpeg" | "m1v" | "m2v" | "mpv" => {
1853                formats::mpeg::read_mpeg(data).or_else(|_| Ok(Vec::new()))
1854            }
1855            "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1856            "czi" => formats::czi::read_czi(data).or_else(|_| Ok(Vec::new())),
1857            "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1858            "lfp" | "mrc" | "dss" | "mobi" | "psp" | "pgf" | "raw" | "pmp" | "torrent" | "xisf"
1859            | "mxf" | "dfont" => Ok(Vec::new()),
1860            "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1861            "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1862            "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1863            "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1864            _ => Err(Error::UnsupportedFileType(ext)),
1865        }
1866    }
1867}
1868
1869impl Default for ExifTool {
1870    fn default() -> Self {
1871        Self::new()
1872    }
1873}
1874
1875/// Detect OpenDocument file type by reading the `mimetype` entry from a ZIP.
1876/// Returns None if not an OpenDocument file.
1877fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1878    // OpenDocument ZIPs have "mimetype" as the FIRST local file entry (uncompressed)
1879    if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1880        return None;
1881    }
1882    let compression = u16::from_le_bytes([data[8], data[9]]);
1883    let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1884    let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1885    let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1886    let name_start = 30;
1887    if name_start + name_len > data.len() {
1888        return None;
1889    }
1890    let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1891    if filename != "mimetype" || compression != 0 {
1892        return None;
1893    }
1894    let content_start = name_start + name_len + extra_len;
1895    let content_end = (content_start + compressed_size).min(data.len());
1896    if content_start >= content_end {
1897        return None;
1898    }
1899    let mime = std::str::from_utf8(&data[content_start..content_end])
1900        .unwrap_or("")
1901        .trim();
1902    match mime {
1903        "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1904        "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1905        "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1906        "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1907        "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1908        "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1909        "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1910        "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1911        _ => None,
1912    }
1913}
1914
1915/// Detect the file type of a file at the given path.
1916pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1917    let path = path.as_ref();
1918    let mut file = fs::File::open(path).map_err(Error::Io)?;
1919    let mut header = [0u8; 256];
1920    use std::io::Read;
1921    let n = file.read(&mut header).map_err(Error::Io)?;
1922
1923    if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1924        return Ok(ft);
1925    }
1926
1927    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1928        if let Some(ft) = file_type::detect_from_extension(ext) {
1929            return Ok(ft);
1930        }
1931    }
1932
1933    Err(Error::UnsupportedFileType("unknown".into()))
1934}
1935
1936/// Classification of EXIF tags into IFD groups.
1937enum ExifIfdGroup {
1938    Ifd0,
1939    ExifIfd,
1940    Gps,
1941}
1942
1943/// Determine which IFD a tag belongs to based on its ID.
1944fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1945    match tag_id {
1946        // ExifIFD tags
1947        0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292 | 0xA000..=0xA435 => {
1948            ExifIfdGroup::ExifIfd
1949        }
1950        // GPS tags
1951        0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1952        // Everything else → IFD0
1953        _ => ExifIfdGroup::Ifd0,
1954    }
1955}
1956
1957/// Extract existing EXIF entries from a JPEG file's APP1 segment.
1958fn extract_existing_exif_entries(
1959    jpeg_data: &[u8],
1960    target_bo: ByteOrderMark,
1961) -> Vec<exif_writer::IfdEntry> {
1962    let mut entries = Vec::new();
1963
1964    // Find EXIF APP1 segment
1965    let mut pos = 2; // Skip SOI
1966    while pos + 4 <= jpeg_data.len() {
1967        if jpeg_data[pos] != 0xFF {
1968            pos += 1;
1969            continue;
1970        }
1971        let marker = jpeg_data[pos + 1];
1972        pos += 2;
1973
1974        if marker == 0xDA || marker == 0xD9 {
1975            break; // SOS or EOI
1976        }
1977        if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1978            continue;
1979        }
1980
1981        if pos + 2 > jpeg_data.len() {
1982            break;
1983        }
1984        let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1985        if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1986            break;
1987        }
1988
1989        let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1990
1991        // EXIF APP1
1992        if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1993            let tiff_data = &seg_data[6..];
1994            extract_ifd_entries(tiff_data, target_bo, &mut entries);
1995            break;
1996        }
1997
1998        pos += seg_len;
1999    }
2000
2001    entries
2002}
2003
2004/// Extract IFD entries from TIFF data, re-encoding values in the target byte order.
2005fn extract_ifd_entries(
2006    tiff_data: &[u8],
2007    target_bo: ByteOrderMark,
2008    entries: &mut Vec<exif_writer::IfdEntry>,
2009) {
2010    use crate::metadata::exif::parse_tiff_header;
2011
2012    let header = match parse_tiff_header(tiff_data) {
2013        Ok(h) => h,
2014        Err(_) => return,
2015    };
2016
2017    let src_bo = header.byte_order;
2018
2019    // Read IFD0
2020    read_ifd_for_merge(
2021        tiff_data,
2022        header.ifd0_offset as usize,
2023        src_bo,
2024        target_bo,
2025        entries,
2026    );
2027
2028    // Find ExifIFD and GPS pointers
2029    let ifd0_offset = header.ifd0_offset as usize;
2030    if ifd0_offset + 2 > tiff_data.len() {
2031        return;
2032    }
2033    let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
2034    for i in 0..count {
2035        let eoff = ifd0_offset + 2 + i * 12;
2036        if eoff + 12 > tiff_data.len() {
2037            break;
2038        }
2039        let tag = read_u16_bo(tiff_data, eoff, src_bo);
2040        let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
2041
2042        match tag {
2043            0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
2044            0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
2045            _ => {}
2046        }
2047    }
2048}
2049
2050/// Read a single IFD and extract entries for merge.
2051fn read_ifd_for_merge(
2052    data: &[u8],
2053    offset: usize,
2054    src_bo: ByteOrderMark,
2055    target_bo: ByteOrderMark,
2056    entries: &mut Vec<exif_writer::IfdEntry>,
2057) {
2058    if offset + 2 > data.len() {
2059        return;
2060    }
2061    let count = read_u16_bo(data, offset, src_bo) as usize;
2062
2063    for i in 0..count {
2064        let eoff = offset + 2 + i * 12;
2065        if eoff + 12 > data.len() {
2066            break;
2067        }
2068
2069        let tag = read_u16_bo(data, eoff, src_bo);
2070        let dtype = read_u16_bo(data, eoff + 2, src_bo);
2071        let count_val = read_u32_bo(data, eoff + 4, src_bo);
2072
2073        // Skip sub-IFD pointers and MakerNote
2074        if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
2075            continue;
2076        }
2077
2078        let type_size = match dtype {
2079            1 | 2 | 6 | 7 => 1usize,
2080            3 | 8 => 2,
2081            4 | 9 | 11 | 13 => 4,
2082            5 | 10 | 12 => 8,
2083            _ => continue,
2084        };
2085
2086        let total_size = type_size * count_val as usize;
2087        let raw_data = if total_size <= 4 {
2088            data[eoff + 8..eoff + 12].to_vec()
2089        } else {
2090            let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
2091            if voff + total_size > data.len() {
2092                continue;
2093            }
2094            data[voff..voff + total_size].to_vec()
2095        };
2096
2097        // Re-encode multi-byte values if byte orders differ
2098        let final_data = if src_bo != target_bo && type_size > 1 {
2099            reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
2100        } else {
2101            raw_data[..total_size].to_vec()
2102        };
2103
2104        let format = match dtype {
2105            1 => exif_writer::ExifFormat::Byte,
2106            2 => exif_writer::ExifFormat::Ascii,
2107            3 => exif_writer::ExifFormat::Short,
2108            4 => exif_writer::ExifFormat::Long,
2109            5 => exif_writer::ExifFormat::Rational,
2110            6 => exif_writer::ExifFormat::SByte,
2111            7 => exif_writer::ExifFormat::Undefined,
2112            8 => exif_writer::ExifFormat::SShort,
2113            9 => exif_writer::ExifFormat::SLong,
2114            10 => exif_writer::ExifFormat::SRational,
2115            11 => exif_writer::ExifFormat::Float,
2116            12 => exif_writer::ExifFormat::Double,
2117            _ => continue,
2118        };
2119
2120        entries.push(exif_writer::IfdEntry {
2121            tag,
2122            format,
2123            data: final_data,
2124        });
2125    }
2126}
2127
2128/// Re-encode multi-byte values when converting between byte orders.
2129fn reencode_bytes(
2130    data: &[u8],
2131    dtype: u16,
2132    count: usize,
2133    src_bo: ByteOrderMark,
2134    dst_bo: ByteOrderMark,
2135) -> Vec<u8> {
2136    let mut out = Vec::with_capacity(data.len());
2137    match dtype {
2138        3 | 8 => {
2139            // 16-bit
2140            for i in 0..count {
2141                let v = read_u16_bo(data, i * 2, src_bo);
2142                match dst_bo {
2143                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
2144                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
2145                }
2146            }
2147        }
2148        4 | 9 | 11 | 13 => {
2149            // 32-bit
2150            for i in 0..count {
2151                let v = read_u32_bo(data, i * 4, src_bo);
2152                match dst_bo {
2153                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
2154                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
2155                }
2156            }
2157        }
2158        5 | 10 => {
2159            // Rational (two 32-bit)
2160            for i in 0..count {
2161                let n = read_u32_bo(data, i * 8, src_bo);
2162                let d = read_u32_bo(data, i * 8 + 4, src_bo);
2163                match dst_bo {
2164                    ByteOrderMark::LittleEndian => {
2165                        out.extend_from_slice(&n.to_le_bytes());
2166                        out.extend_from_slice(&d.to_le_bytes());
2167                    }
2168                    ByteOrderMark::BigEndian => {
2169                        out.extend_from_slice(&n.to_be_bytes());
2170                        out.extend_from_slice(&d.to_be_bytes());
2171                    }
2172                }
2173            }
2174        }
2175        12 => {
2176            // 64-bit double
2177            for i in 0..count {
2178                let mut bytes = [0u8; 8];
2179                bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
2180                if src_bo != dst_bo {
2181                    bytes.reverse();
2182                }
2183                out.extend_from_slice(&bytes);
2184            }
2185        }
2186        _ => out.extend_from_slice(data),
2187    }
2188    out
2189}
2190
2191fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
2192    if offset + 2 > data.len() {
2193        return 0;
2194    }
2195    match bo {
2196        ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
2197        ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
2198    }
2199}
2200
2201fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
2202    if offset + 4 > data.len() {
2203        return 0;
2204    }
2205    match bo {
2206        ByteOrderMark::LittleEndian => u32::from_le_bytes([
2207            data[offset],
2208            data[offset + 1],
2209            data[offset + 2],
2210            data[offset + 3],
2211        ]),
2212        ByteOrderMark::BigEndian => u32::from_be_bytes([
2213            data[offset],
2214            data[offset + 1],
2215            data[offset + 2],
2216            data[offset + 3],
2217        ]),
2218    }
2219}
2220
2221/// Map tag name to numeric EXIF tag ID.
2222fn tag_name_to_id(name: &str) -> Option<u16> {
2223    encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
2224}
2225
2226/// Convert a tag value to a safe filename.
2227fn value_to_filename(value: &str) -> String {
2228    value
2229        .chars()
2230        .map(|c| match c {
2231            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
2232            c if c.is_control() => '_',
2233            c => c,
2234        })
2235        .collect::<String>()
2236        .trim()
2237        .to_string()
2238}
2239
2240/// Parse a date shift string like "+1:0:0" (add 1 hour) or "-0:30:0" (subtract 30 min).
2241/// Returns (sign, hours, minutes, seconds).
2242pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
2243    let (sign, rest) = if let Some(stripped) = shift.strip_prefix('-') {
2244        (-1, stripped)
2245    } else if let Some(stripped) = shift.strip_prefix('+') {
2246        (1, stripped)
2247    } else {
2248        (1, shift)
2249    };
2250
2251    let parts: Vec<&str> = rest.split(':').collect();
2252    match parts.len() {
2253        1 => {
2254            let h: u32 = parts[0].parse().ok()?;
2255            Some((sign, h, 0, 0))
2256        }
2257        2 => {
2258            let h: u32 = parts[0].parse().ok()?;
2259            let m: u32 = parts[1].parse().ok()?;
2260            Some((sign, h, m, 0))
2261        }
2262        3 => {
2263            let h: u32 = parts[0].parse().ok()?;
2264            let m: u32 = parts[1].parse().ok()?;
2265            let s: u32 = parts[2].parse().ok()?;
2266            Some((sign, h, m, s))
2267        }
2268        _ => None,
2269    }
2270}
2271
2272/// Shift a datetime string by the given amount.
2273/// Input format: "YYYY:MM:DD HH:MM:SS"
2274pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
2275    let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
2276
2277    // Parse date/time
2278    if datetime.len() < 19 {
2279        return None;
2280    }
2281    let year: i32 = datetime[0..4].parse().ok()?;
2282    let month: u32 = datetime[5..7].parse().ok()?;
2283    let day: u32 = datetime[8..10].parse().ok()?;
2284    let hour: u32 = datetime[11..13].parse().ok()?;
2285    let min: u32 = datetime[14..16].parse().ok()?;
2286    let sec: u32 = datetime[17..19].parse().ok()?;
2287
2288    // Convert to total seconds, shift, convert back
2289    let total_secs = (hour * 3600 + min * 60 + sec) as i64
2290        + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
2291
2292    let days_shift = if total_secs < 0 {
2293        -1 - (-total_secs - 1) / 86400
2294    } else {
2295        total_secs / 86400
2296    };
2297
2298    let time_secs = ((total_secs % 86400) + 86400) % 86400;
2299    let new_hour = (time_secs / 3600) as u32;
2300    let new_min = ((time_secs % 3600) / 60) as u32;
2301    let new_sec = (time_secs % 60) as u32;
2302
2303    // Simple day shifting (doesn't handle month/year rollover perfectly for large shifts)
2304    let mut new_day = day as i32 + days_shift as i32;
2305    let mut new_month = month;
2306    let mut new_year = year;
2307
2308    let days_in_month = |m: u32, y: i32| -> i32 {
2309        match m {
2310            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
2311            4 | 6 | 9 | 11 => 30,
2312            2 => {
2313                if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 {
2314                    29
2315                } else {
2316                    28
2317                }
2318            }
2319            _ => 30,
2320        }
2321    };
2322
2323    while new_day > days_in_month(new_month, new_year) {
2324        new_day -= days_in_month(new_month, new_year);
2325        new_month += 1;
2326        if new_month > 12 {
2327            new_month = 1;
2328            new_year += 1;
2329        }
2330    }
2331    while new_day < 1 {
2332        new_month = if new_month == 1 { 12 } else { new_month - 1 };
2333        if new_month == 12 {
2334            new_year -= 1;
2335        }
2336        new_day += days_in_month(new_month, new_year);
2337    }
2338
2339    Some(format!(
2340        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
2341        new_year, new_month, new_day, new_hour, new_min, new_sec
2342    ))
2343}
2344
2345fn unix_to_datetime(secs: i64) -> String {
2346    let days = secs / 86400;
2347    let time = secs % 86400;
2348    let h = time / 3600;
2349    let m = (time % 3600) / 60;
2350    let s = time % 60;
2351    let mut y = 1970i32;
2352    let mut rem = days;
2353    loop {
2354        let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 {
2355            366
2356        } else {
2357            365
2358        };
2359        if rem < dy {
2360            break;
2361        }
2362        rem -= dy;
2363        y += 1;
2364    }
2365    let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2366    let months = [
2367        31,
2368        if leap { 29 } else { 28 },
2369        31,
2370        30,
2371        31,
2372        30,
2373        31,
2374        31,
2375        30,
2376        31,
2377        30,
2378        31,
2379    ];
2380    let mut mo = 1;
2381    for &dm in &months {
2382        if rem < dm {
2383            break;
2384        }
2385        rem -= dm;
2386        mo += 1;
2387    }
2388    format!(
2389        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
2390        y,
2391        mo,
2392        rem + 1,
2393        h,
2394        m,
2395        s
2396    )
2397}
2398
2399fn format_file_size(bytes: u64) -> String {
2400    if bytes < 1024 {
2401        format!("{} bytes", bytes)
2402    } else if bytes < 1024 * 1024 {
2403        format!("{:.1} kB", bytes as f64 / 1024.0)
2404    } else if bytes < 1024 * 1024 * 1024 {
2405        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
2406    } else {
2407        format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
2408    }
2409}
2410
2411/// Check if a tag name is typically XMP.
2412fn is_xmp_tag(tag: &str) -> bool {
2413    matches!(
2414        tag.to_lowercase().as_str(),
2415        "title"
2416            | "description"
2417            | "subject"
2418            | "creator"
2419            | "rights"
2420            | "keywords"
2421            | "rating"
2422            | "label"
2423            | "hierarchicalsubject"
2424    )
2425}
2426
2427/// Encode an EXIF tag value to binary.
2428/// Returns (tag_id, format, encoded_data) or None if tag is unknown.
2429fn encode_exif_tag(
2430    tag_name: &str,
2431    value: &str,
2432    _group: &str,
2433    bo: ByteOrderMark,
2434) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
2435    let tag_lower = tag_name.to_lowercase();
2436
2437    // Map common tag names to EXIF tag IDs and formats
2438    let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
2439        // IFD0 string tags
2440        "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
2441        "make" => (0x010F, exif_writer::ExifFormat::Ascii),
2442        "model" => (0x0110, exif_writer::ExifFormat::Ascii),
2443        "software" => (0x0131, exif_writer::ExifFormat::Ascii),
2444        "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
2445        "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
2446        "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
2447        // IFD0 numeric tags
2448        "orientation" => (0x0112, exif_writer::ExifFormat::Short),
2449        "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
2450        "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
2451        "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
2452        // ExifIFD tags
2453        "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
2454        "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
2455        "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
2456        "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
2457        "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
2458        "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
2459        "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
2460        "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
2461        "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
2462        _ => return None,
2463    };
2464
2465    let encoded = match format {
2466        exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
2467        exif_writer::ExifFormat::Short => {
2468            let v: u16 = value.parse().ok()?;
2469            exif_writer::encode_u16(v, bo)
2470        }
2471        exif_writer::ExifFormat::Long => {
2472            let v: u32 = value.parse().ok()?;
2473            exif_writer::encode_u32(v, bo)
2474        }
2475        exif_writer::ExifFormat::Rational => {
2476            // Parse "N/D" or just "N"
2477            if let Some(slash) = value.find('/') {
2478                let num: u32 = value[..slash].trim().parse().ok()?;
2479                let den: u32 = value[slash + 1..].trim().parse().ok()?;
2480                exif_writer::encode_urational(num, den, bo)
2481            } else if let Ok(v) = value.parse::<f64>() {
2482                // Convert float to rational
2483                let den = 10000u32;
2484                let num = (v * den as f64).round() as u32;
2485                exif_writer::encode_urational(num, den, bo)
2486            } else {
2487                return None;
2488            }
2489        }
2490        exif_writer::ExifFormat::Undefined => {
2491            // UserComment: 8 bytes charset + data
2492            let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; // "ASCII\0\0\0"
2493            data.extend_from_slice(value.as_bytes());
2494            data
2495        }
2496        _ => return None,
2497    };
2498
2499    Some((tag_id, format, encoded))
2500}
2501
2502/// Compute text file tags (from Perl Text.pm).
2503fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
2504    let mut tags = Vec::new();
2505    let mk = |name: &str, val: String| Tag {
2506        id: crate::tag::TagId::Text(name.into()),
2507        name: name.into(),
2508        description: name.into(),
2509        group: crate::tag::TagGroup {
2510            family0: "File".into(),
2511            family1: "File".into(),
2512            family2: "Other".into(),
2513        },
2514        raw_value: Value::String(val.clone()),
2515        print_value: val,
2516        priority: 0,
2517    };
2518
2519    // Detect encoding and BOM
2520    let is_ascii = data.iter().all(|&b| b < 128);
2521    let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2522    let has_utf16le_bom =
2523        data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2524    let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2525    let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2526    let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2527
2528    // Detect if file has weird non-text control characters (like multi-byte unicode without BOM)
2529    let has_weird_ctrl = data.iter().any(|&b| {
2530        (b <= 0x06) || (0x0e..=0x1a).contains(&b) || (0x1c..=0x1f).contains(&b) || b == 0x7f
2531    });
2532
2533    let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2534        ("utf-32le", true, false)
2535    } else if has_utf32be_bom {
2536        ("utf-32be", true, false)
2537    } else if has_utf16le_bom {
2538        ("utf-16le", true, true)
2539    } else if has_utf16be_bom {
2540        ("utf-16be", true, true)
2541    } else if has_weird_ctrl {
2542        // Not a text file (has binary-like control chars but no recognized multi-byte marker)
2543        return tags;
2544    } else if is_ascii {
2545        ("us-ascii", false, false)
2546    } else {
2547        // Check UTF-8
2548        let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2549        if is_valid_utf8 {
2550            if has_utf8_bom {
2551                ("utf-8", true, false)
2552            } else {
2553                // Check if it has high bytes suggesting iso-8859-1 vs utf-8
2554                // Perl's IsUTF8: returns >0 if valid UTF-8 with multi-byte, 0 if ASCII, <0 if invalid
2555                // For simplicity: valid UTF-8 without BOM = utf-8
2556                ("utf-8", false, false)
2557            }
2558        } else if !data.iter().any(|&b| (0x80..=0x9f).contains(&b)) {
2559            ("iso-8859-1", false, false)
2560        } else {
2561            ("unknown-8bit", false, false)
2562        }
2563    };
2564
2565    tags.push(mk("MIMEEncoding", encoding.into()));
2566
2567    if is_bom {
2568        tags.push(mk("ByteOrderMark", "Yes".into()));
2569    }
2570
2571    // Count newlines and detect type
2572    let has_cr = data.contains(&b'\r');
2573    let has_lf = data.contains(&b'\n');
2574    let newline_type = if has_cr && has_lf {
2575        "Windows CRLF"
2576    } else if has_lf {
2577        "Unix LF"
2578    } else if has_cr {
2579        "Macintosh CR"
2580    } else {
2581        "(none)"
2582    };
2583    tags.push(mk("Newlines", newline_type.into()));
2584
2585    if is_csv {
2586        // CSV analysis: detect delimiter, quoting, column count, row count
2587        let text = crate::encoding::decode_utf8_or_latin1(data);
2588        let mut delim = "";
2589        let mut quot = "";
2590        let mut ncols = 1usize;
2591        let mut nrows = 0usize;
2592
2593        for line in text.lines() {
2594            if nrows == 0 {
2595                // Detect delimiter from first line
2596                let comma_count = line.matches(',').count();
2597                let semi_count = line.matches(';').count();
2598                let tab_count = line.matches('\t').count();
2599                if comma_count > semi_count && comma_count > tab_count {
2600                    delim = ",";
2601                    ncols = comma_count + 1;
2602                } else if semi_count > tab_count {
2603                    delim = ";";
2604                    ncols = semi_count + 1;
2605                } else if tab_count > 0 {
2606                    delim = "\t";
2607                    ncols = tab_count + 1;
2608                } else {
2609                    delim = "";
2610                    ncols = 1;
2611                }
2612                // Detect quoting
2613                if line.contains('"') {
2614                    quot = "\"";
2615                } else if line.contains('\'') {
2616                    quot = "'";
2617                }
2618            }
2619            nrows += 1;
2620            if nrows >= 1000 {
2621                break;
2622            }
2623        }
2624
2625        let delim_display = match delim {
2626            "," => "Comma",
2627            ";" => "Semicolon",
2628            "\t" => "Tab",
2629            _ => "(none)",
2630        };
2631        let quot_display = match quot {
2632            "\"" => "Double quotes",
2633            "'" => "Single quotes",
2634            _ => "(none)",
2635        };
2636
2637        tags.push(mk("Delimiter", delim_display.into()));
2638        tags.push(mk("Quoting", quot_display.into()));
2639        tags.push(mk("ColumnCount", ncols.to_string()));
2640        if nrows > 0 {
2641            tags.push(mk("RowCount", nrows.to_string()));
2642        }
2643    } else if !is_utf16 {
2644        // Line count and word count for plain text files (not UTF-16/32)
2645        let line_count = data.iter().filter(|&&b| b == b'\n').count();
2646        let line_count = if line_count == 0 && !data.is_empty() {
2647            1
2648        } else {
2649            line_count
2650        };
2651        tags.push(mk("LineCount", line_count.to_string()));
2652
2653        let text = crate::encoding::decode_utf8_or_latin1(data);
2654        let word_count = text.split_whitespace().count();
2655        tags.push(mk("WordCount", word_count.to_string()));
2656    }
2657
2658    tags
2659}
2660
2661#[cfg(test)]
2662mod tests {
2663    use super::*;
2664
2665    #[test]
2666    fn new_has_default_options() {
2667        let et = ExifTool::new();
2668        assert!(!et.options().duplicates);
2669        assert!(et.options().print_conv);
2670        assert_eq!(et.options().fast_scan, 0);
2671        assert!(et.options().requested_tags.is_empty());
2672        assert_eq!(et.options().extract_embedded, 0);
2673        assert_eq!(et.options().show_unknown, 0);
2674        assert!(!et.options().process_compressed);
2675        assert!(!et.options().use_mwg);
2676    }
2677
2678    #[test]
2679    fn with_options_preserves_custom() {
2680        let opts = Options {
2681            duplicates: true,
2682            print_conv: false,
2683            fast_scan: 2,
2684            requested_tags: vec!["Artist".to_string()],
2685            extract_embedded: 1,
2686            show_unknown: 1,
2687            process_compressed: true,
2688            use_mwg: true,
2689        };
2690        let et = ExifTool::with_options(opts.clone());
2691        assert!(et.options().duplicates);
2692        assert!(!et.options().print_conv);
2693        assert_eq!(et.options().fast_scan, 2);
2694        assert_eq!(et.options().requested_tags, vec!["Artist".to_string()]);
2695        assert_eq!(et.options().extract_embedded, 1);
2696        assert_eq!(et.options().show_unknown, 1);
2697        assert!(et.options().process_compressed);
2698        assert!(et.options().use_mwg);
2699    }
2700
2701    #[test]
2702    fn set_new_value_simple_tag() {
2703        let mut et = ExifTool::new();
2704        et.set_new_value("Artist", Some("John"));
2705        assert_eq!(et.new_values.len(), 1);
2706        assert_eq!(et.new_values[0].tag, "Artist");
2707        assert_eq!(et.new_values[0].group, None);
2708        assert_eq!(et.new_values[0].value, Some("John".to_string()));
2709    }
2710
2711    #[test]
2712    fn set_new_value_with_group_prefix() {
2713        let mut et = ExifTool::new();
2714        et.set_new_value("XMP:Title", Some("Test"));
2715        assert_eq!(et.new_values.len(), 1);
2716        assert_eq!(et.new_values[0].tag, "Title");
2717        assert_eq!(et.new_values[0].group, Some("XMP".to_string()));
2718        assert_eq!(et.new_values[0].value, Some("Test".to_string()));
2719    }
2720
2721    #[test]
2722    fn set_new_value_delete() {
2723        let mut et = ExifTool::new();
2724        et.set_new_value("Comment", None);
2725        assert_eq!(et.new_values.len(), 1);
2726        assert_eq!(et.new_values[0].tag, "Comment");
2727        assert_eq!(et.new_values[0].value, None);
2728    }
2729
2730    #[test]
2731    fn clear_new_values_empties_queue() {
2732        let mut et = ExifTool::new();
2733        et.set_new_value("Artist", Some("A"));
2734        et.set_new_value("Copyright", Some("B"));
2735        assert_eq!(et.new_values.len(), 2);
2736        et.clear_new_values();
2737        assert!(et.new_values.is_empty());
2738    }
2739
2740    #[test]
2741    fn set_new_value_multiple() {
2742        let mut et = ExifTool::new();
2743        et.set_new_value("Artist", Some("John"));
2744        et.set_new_value("IPTC:Keywords", Some("test"));
2745        et.set_new_value("XMP:Subject", None);
2746        assert_eq!(et.new_values.len(), 3);
2747        assert_eq!(et.new_values[1].group, Some("IPTC".to_string()));
2748        assert_eq!(et.new_values[1].tag, "Keywords");
2749        assert_eq!(et.new_values[2].value, None);
2750    }
2751
2752    #[test]
2753    fn options_mut_modifies() {
2754        let mut et = ExifTool::new();
2755        et.options_mut().duplicates = true;
2756        et.options_mut().fast_scan = 3;
2757        assert!(et.options().duplicates);
2758        assert_eq!(et.options().fast_scan, 3);
2759    }
2760
2761    #[test]
2762    fn default_options() {
2763        let opts = Options::default();
2764        assert!(!opts.duplicates);
2765        assert!(opts.print_conv);
2766        assert_eq!(opts.fast_scan, 0);
2767    }
2768}