exiftool_rs/
exiftool.rs

1//! Core ExifTool struct and public API.
2//!
3//! This is the main entry point for reading metadata from files.
4//! Mirrors ExifTool.pm's ImageInfo/ExtractInfo/GetInfo pipeline.
5
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer, psd_writer, tiff_writer, webp_writer, xmp_writer};
17
18/// Processing options for metadata extraction.
19#[derive(Debug, Clone)]
20pub struct Options {
21    /// Include duplicate tags (different groups may have same tag name).
22    pub duplicates: bool,
23    /// Apply print conversions (human-readable values).
24    pub print_conv: bool,
25    /// Fast scan level: 0=normal, 1=skip composite, 2=skip maker notes, 3=skip thumbnails.
26    pub fast_scan: u8,
27    /// Only extract these tag names (empty = all).
28    pub requested_tags: Vec<String>,
29    /// Extract embedded documents/data (video frames, etc.). Level: 0=off, 1=-ee, 2=-ee2, 3=-ee3.
30    pub extract_embedded: u8,
31}
32
33impl Default for Options {
34    fn default() -> Self {
35        Self {
36            duplicates: false,
37            print_conv: true,
38            fast_scan: 0,
39            requested_tags: Vec::new(),
40            extract_embedded: 0,
41        }
42    }
43}
44
45/// The main ExifTool struct. Create one and use it to extract metadata from files.
46///
47/// # Example
48/// ```no_run
49/// use exiftool_rs::ExifTool;
50///
51/// let mut et = ExifTool::new();
52/// let info = et.image_info("photo.jpg").unwrap();
53/// for (name, value) in &info {
54///     println!("{}: {}", name, value);
55/// }
56/// ```
57/// A queued tag change for writing.
58#[derive(Debug, Clone)]
59pub struct NewValue {
60    /// Tag name (e.g., "Artist", "Copyright", "XMP:Title")
61    pub tag: String,
62    /// Group prefix if specified (e.g., "EXIF", "XMP", "IPTC")
63    pub group: Option<String>,
64    /// New value (None = delete tag)
65    pub value: Option<String>,
66}
67
68/// The main ExifTool engine — read, write, and edit metadata.
69///
70/// # Reading metadata
71/// ```no_run
72/// use exiftool_rs::ExifTool;
73///
74/// let et = ExifTool::new();
75///
76/// // Full tag structs
77/// let tags = et.extract_info("photo.jpg").unwrap();
78/// for tag in &tags {
79///     println!("[{}] {}: {}", tag.group.family0, tag.name, tag.print_value);
80/// }
81///
82/// // Simple name→value map
83/// let info = et.image_info("photo.jpg").unwrap();
84/// println!("Camera: {}", info.get("Model").unwrap_or(&String::new()));
85/// ```
86///
87/// # Writing metadata
88/// ```no_run
89/// use exiftool_rs::ExifTool;
90///
91/// let mut et = ExifTool::new();
92/// et.set_new_value("Artist", Some("John Doe"));
93/// et.set_new_value("Copyright", Some("2024"));
94/// et.write_info("input.jpg", "output.jpg").unwrap();
95/// ```
96pub struct ExifTool {
97    options: Options,
98    new_values: Vec<NewValue>,
99}
100
101/// Result of metadata extraction: maps tag names to display values.
102pub type ImageInfo = HashMap<String, String>;
103
104impl ExifTool {
105    /// Create a new ExifTool instance with default options.
106    pub fn new() -> Self {
107        Self {
108            options: Options::default(),
109            new_values: Vec::new(),
110        }
111    }
112
113    /// Create a new ExifTool instance with custom options.
114    pub fn with_options(options: Options) -> Self {
115        Self {
116            options,
117            new_values: Vec::new(),
118        }
119    }
120
121    /// Get a mutable reference to the options.
122    pub fn options_mut(&mut self) -> &mut Options {
123        &mut self.options
124    }
125
126    /// Get a reference to the options.
127    pub fn options(&self) -> &Options {
128        &self.options
129    }
130
131    // ================================================================
132    // Writing API
133    // ================================================================
134
135    /// Queue a new tag value for writing.
136    ///
137    /// Call this one or more times, then call `write_info()` to apply changes.
138    ///
139    /// # Arguments
140    /// * `tag` - Tag name, optionally prefixed with group (e.g., "Artist", "XMP:Title", "EXIF:Copyright")
141    /// * `value` - New value, or None to delete the tag
142    ///
143    /// # Example
144    /// ```no_run
145    /// use exiftool_rs::ExifTool;
146    /// let mut et = ExifTool::new();
147    /// et.set_new_value("Artist", Some("John Doe"));
148    /// et.set_new_value("Copyright", Some("2024 John Doe"));
149    /// et.set_new_value("XMP:Title", Some("My Photo"));
150    /// et.write_info("photo.jpg", "photo_out.jpg").unwrap();
151    /// ```
152    pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
153        let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
154            (Some(tag[..colon_pos].to_string()), tag[colon_pos + 1..].to_string())
155        } else {
156            (None, tag.to_string())
157        };
158
159        self.new_values.push(NewValue {
160            tag: tag_name,
161            group,
162            value: value.map(|v| v.to_string()),
163        });
164    }
165
166    /// Clear all queued new values.
167    pub fn clear_new_values(&mut self) {
168        self.new_values.clear();
169    }
170
171    /// Copy tags from a source file, queuing them as new values.
172    ///
173    /// Reads all tags from `src_path` and queues them for writing.
174    /// Optionally filter by tag names.
175    pub fn set_new_values_from_file<P: AsRef<Path>>(
176        &mut self,
177        src_path: P,
178        tags_to_copy: Option<&[&str]>,
179    ) -> Result<u32> {
180        let src_tags = self.extract_info(src_path)?;
181        let mut count = 0u32;
182
183        for tag in &src_tags {
184            // Skip file-level tags that shouldn't be copied
185            if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
186                continue;
187            }
188            // Skip binary/undefined data and empty values
189            if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
190                continue;
191            }
192            if tag.print_value.is_empty() {
193                continue;
194            }
195
196            // Filter by requested tags
197            if let Some(filter) = tags_to_copy {
198                let name_lower = tag.name.to_lowercase();
199                if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
200                    continue;
201                }
202            }
203
204            let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
205            self.new_values.push(NewValue {
206                tag: tag.name.clone(),
207                group: Some(tag.group.family0.clone()),
208                value: Some(tag.print_value.clone()),
209            });
210            count += 1;
211        }
212
213        Ok(count)
214    }
215
216    /// Set a file's name based on a tag value.
217    pub fn set_file_name_from_tag<P: AsRef<Path>>(
218        &self,
219        path: P,
220        tag_name: &str,
221        template: &str,
222    ) -> Result<String> {
223        let path = path.as_ref();
224        let tags = self.extract_info(path)?;
225
226        let tag_value = tags
227            .iter()
228            .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
229            .map(|t| &t.print_value)
230            .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
231
232        // Build new filename from template
233        // Template: "prefix%value%suffix.ext" or just use the tag value
234        let new_name = if template.contains('%') {
235            template.replace("%v", value_to_filename(tag_value).as_str())
236        } else {
237            // Default: use tag value as filename, keep extension
238            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
239            let clean = value_to_filename(tag_value);
240            if ext.is_empty() {
241                clean
242            } else {
243                format!("{}.{}", clean, ext)
244            }
245        };
246
247        let parent = path.parent().unwrap_or(Path::new(""));
248        let new_path = parent.join(&new_name);
249
250        fs::rename(path, &new_path).map_err(Error::Io)?;
251        Ok(new_path.to_string_lossy().to_string())
252    }
253
254    /// Write queued changes to a file.
255    ///
256    /// If `dst_path` is the same as `src_path`, the file is modified in-place
257    /// (via a temporary file).
258    pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(&self, src_path: P, dst_path: Q) -> Result<u32> {
259        let src_path = src_path.as_ref();
260        let dst_path = dst_path.as_ref();
261        let data = fs::read(src_path).map_err(Error::Io)?;
262
263        let file_type = self.detect_file_type(&data, src_path)?;
264        let output = self.apply_changes(&data, file_type)?;
265
266        // Write to temp file first, then rename (atomic)
267        let temp_path = dst_path.with_extension("exiftool_tmp");
268        fs::write(&temp_path, &output).map_err(Error::Io)?;
269        fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
270
271        Ok(self.new_values.len() as u32)
272    }
273
274    /// Apply queued changes to in-memory data.
275    fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
276        match file_type {
277            FileType::Jpeg => self.write_jpeg(data),
278            FileType::Png => self.write_png(data),
279            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
280            | FileType::Arw | FileType::Orf | FileType::Pef => self.write_tiff(data),
281            FileType::WebP => self.write_webp(data),
282            FileType::Mp4 | FileType::QuickTime | FileType::M4a
283            | FileType::ThreeGP | FileType::F4v => self.write_mp4(data),
284            FileType::Psd => self.write_psd(data),
285            FileType::Pdf => self.write_pdf(data),
286            FileType::Heif | FileType::Avif => self.write_mp4(data),
287            FileType::Mkv | FileType::WebM => self.write_matroska(data),
288            FileType::Gif => {
289                let comment = self.new_values.iter()
290                    .find(|nv| nv.tag.to_lowercase() == "comment")
291                    .and_then(|nv| nv.value.clone());
292                crate::writer::gif_writer::write_gif(data, comment.as_deref())
293            }
294            FileType::Flac => {
295                let changes: Vec<(&str, &str)> = self.new_values.iter()
296                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
297                    .collect();
298                crate::writer::flac_writer::write_flac(data, &changes)
299            }
300            FileType::Mp3 | FileType::Aiff => {
301                let changes: Vec<(&str, &str)> = self.new_values.iter()
302                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
303                    .collect();
304                crate::writer::id3_writer::write_id3(data, &changes)
305            }
306            FileType::Jp2 | FileType::Jxl => {
307                let new_xmp = if self.new_values.iter().any(|nv| nv.group.as_deref() == Some("XMP")) {
308                    let refs: Vec<&NewValue> = self.new_values.iter()
309                        .filter(|nv| nv.group.as_deref() == Some("XMP"))
310                        .collect();
311                    Some(self.build_new_xmp(&refs))
312                } else { None };
313                crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
314            }
315            FileType::PostScript => {
316                let changes: Vec<(&str, &str)> = self.new_values.iter()
317                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
318                    .collect();
319                crate::writer::ps_writer::write_postscript(data, &changes)
320            }
321            FileType::Ogg | FileType::Opus => {
322                let changes: Vec<(&str, &str)> = self.new_values.iter()
323                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
324                    .collect();
325                crate::writer::ogg_writer::write_ogg(data, &changes)
326            }
327            FileType::Xmp => {
328                let props: Vec<xmp_writer::XmpProperty> = self.new_values.iter()
329                    .filter_map(|nv| {
330                        let val = nv.value.as_deref()?;
331                        Some(xmp_writer::XmpProperty {
332                            namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
333                            property: nv.tag.clone(),
334                            values: vec![val.to_string()],
335                            prop_type: xmp_writer::XmpPropertyType::Simple,
336                        })
337                    })
338                    .collect();
339                Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
340            }
341            _ => Err(Error::UnsupportedFileType(format!("writing not yet supported for {}", file_type))),
342        }
343    }
344
345    /// Returns the set of tag names (lowercase) that are writable for a given file type.
346    /// Returns `None` if any tag is writable (open-ended formats like PNG, FLAC, MKV).
347    /// Returns `Some(empty set)` if the format has no writer.
348    pub fn writable_tags(file_type: FileType) -> Option<std::collections::HashSet<&'static str>> {
349        use std::collections::HashSet;
350
351        // EXIF tags supported by exif_writer
352        const EXIF_TAGS: &[&str] = &[
353            "imagedescription", "make", "model", "orientation",
354            "xresolution", "yresolution", "resolutionunit", "software",
355            "modifydate", "datetime", "artist", "copyright",
356            "datetimeoriginal", "createdate", "datetimedigitized",
357            "usercomment", "imageuniqueid", "ownername", "cameraownername",
358            "serialnumber", "bodyserialnumber", "lensmake", "lensmodel", "lensserialnumber",
359        ];
360
361        // IPTC tags supported by iptc_writer
362        const IPTC_TAGS: &[&str] = &[
363            "objectname", "title", "urgency", "category", "supplementalcategories",
364            "keywords", "specialinstructions", "datecreated", "timecreated",
365            "by-line", "author", "byline", "by-linetitle", "authorsposition", "bylinetitle",
366            "city", "sub-location", "sublocation", "province-state", "state", "provincestate",
367            "country-primarylocationcode", "countrycode",
368            "country-primarylocationname", "country",
369            "headline", "credit", "source", "copyrightnotice",
370            "contact", "caption-abstract", "caption", "description",
371            "writer-editor", "captionwriter",
372        ];
373
374        // XMP auto-detected tags (no group prefix needed)
375        const XMP_AUTO_TAGS: &[&str] = &[
376            "title", "description", "subject", "creator", "rights",
377            "keywords", "rating", "label", "hierarchicalsubject",
378        ];
379
380        // ID3 tags
381        const ID3_TAGS: &[&str] = &[
382            "title", "artist", "album", "year", "date", "track",
383            "genre", "comment", "composer", "albumartist",
384            "encoder", "encodedby", "publisher", "copyright", "bpm", "lyrics",
385        ];
386
387        // MP4/MOV ilst tags
388        const MP4_TAGS: &[&str] = &[
389            "title", "artist", "album", "year", "date", "comment",
390            "genre", "composer", "writer", "encoder", "encodedby",
391            "grouping", "lyrics", "description", "albumartist", "copyright",
392        ];
393
394        // PDF Info dict tags
395        const PDF_TAGS: &[&str] = &[
396            "title", "author", "subject", "keywords", "creator", "producer",
397        ];
398
399        // PostScript DSC tags
400        const PS_TAGS: &[&str] = &[
401            "title", "creator", "author", "for", "creationdate", "createdate",
402        ];
403
404        match file_type {
405            // Open-ended: any tag name accepted
406            FileType::Png | FileType::Flac | FileType::Mkv | FileType::WebM
407            | FileType::Ogg | FileType::Opus | FileType::Xmp => None,
408
409            // JPEG: EXIF + IPTC + XMP auto + comment
410            FileType::Jpeg => {
411                let mut set: HashSet<&str> = HashSet::new();
412                set.extend(EXIF_TAGS);
413                set.extend(IPTC_TAGS);
414                set.extend(XMP_AUTO_TAGS);
415                set.insert("comment");
416                Some(set)
417            }
418
419            // TIFF-based: EXIF only
420            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
421            | FileType::Arw | FileType::Orf | FileType::Pef => {
422                let mut set: HashSet<&str> = HashSet::new();
423                set.extend(EXIF_TAGS);
424                Some(set)
425            }
426
427            // WebP: EXIF + XMP auto
428            FileType::WebP => {
429                let mut set: HashSet<&str> = HashSet::new();
430                set.extend(EXIF_TAGS);
431                set.extend(XMP_AUTO_TAGS);
432                Some(set)
433            }
434
435            // MP4/MOV/HEIF: ilst + XMP auto
436            FileType::Mp4 | FileType::QuickTime | FileType::M4a
437            | FileType::ThreeGP | FileType::F4v | FileType::Heif | FileType::Avif => {
438                let mut set: HashSet<&str> = HashSet::new();
439                set.extend(MP4_TAGS);
440                set.extend(XMP_AUTO_TAGS);
441                Some(set)
442            }
443
444            // PSD: IPTC + XMP auto
445            FileType::Psd => {
446                let mut set: HashSet<&str> = HashSet::new();
447                set.extend(IPTC_TAGS);
448                set.extend(XMP_AUTO_TAGS);
449                Some(set)
450            }
451
452            FileType::Pdf => Some(PDF_TAGS.iter().copied().collect()),
453            FileType::PostScript => Some(PS_TAGS.iter().copied().collect()),
454
455            FileType::Mp3 | FileType::Aiff => Some(ID3_TAGS.iter().copied().collect()),
456
457            FileType::Gif => {
458                let mut set: HashSet<&str> = HashSet::new();
459                set.insert("comment");
460                Some(set)
461            }
462
463            // JP2/JXL: XMP only (with group prefix)
464            FileType::Jp2 | FileType::Jxl => Some(XMP_AUTO_TAGS.iter().copied().collect()),
465
466            // No writer
467            _ => Some(HashSet::new()),
468        }
469    }
470
471    /// Write metadata changes to JPEG data.
472    fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
473        // Classify new values by target group
474        let mut exif_values: Vec<&NewValue> = Vec::new();
475        let mut xmp_values: Vec<&NewValue> = Vec::new();
476        let mut iptc_values: Vec<&NewValue> = Vec::new();
477        let mut comment_value: Option<&str> = None;
478        let mut remove_exif = false;
479        let mut remove_xmp = false;
480        let mut remove_iptc = false;
481        let mut remove_comment = false;
482
483        for nv in &self.new_values {
484            let group = nv.group.as_deref().unwrap_or("");
485            let group_upper = group.to_uppercase();
486
487            // Check for group deletion
488            if nv.value.is_none() && nv.tag == "*" {
489                match group_upper.as_str() {
490                    "EXIF" => { remove_exif = true; continue; }
491                    "XMP" => { remove_xmp = true; continue; }
492                    "IPTC" => { remove_iptc = true; continue; }
493                    _ => {}
494                }
495            }
496
497            match group_upper.as_str() {
498                "XMP" => xmp_values.push(nv),
499                "IPTC" => iptc_values.push(nv),
500                "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
501                "" => {
502                    // Auto-detect best group based on tag name
503                    if nv.tag.to_lowercase() == "comment" {
504                        if nv.value.is_none() {
505                            remove_comment = true;
506                        } else {
507                            comment_value = nv.value.as_deref();
508                        }
509                    } else if is_xmp_tag(&nv.tag) {
510                        xmp_values.push(nv);
511                    } else {
512                        exif_values.push(nv);
513                    }
514                }
515                _ => exif_values.push(nv), // default to EXIF
516            }
517        }
518
519        // Build new EXIF data
520        let new_exif = if !exif_values.is_empty() {
521            Some(self.build_new_exif(data, &exif_values)?)
522        } else {
523            None
524        };
525
526        // Build new XMP data
527        let new_xmp = if !xmp_values.is_empty() {
528            Some(self.build_new_xmp(&xmp_values))
529        } else {
530            None
531        };
532
533        // Build new IPTC data
534        let new_iptc_data = if !iptc_values.is_empty() {
535            let records: Vec<iptc_writer::IptcRecord> = iptc_values
536                .iter()
537                .filter_map(|nv| {
538                    let value = nv.value.as_deref()?;
539                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
540                    Some(iptc_writer::IptcRecord {
541                        record,
542                        dataset,
543                        data: value.as_bytes().to_vec(),
544                    })
545                })
546                .collect();
547            if records.is_empty() {
548                None
549            } else {
550                Some(iptc_writer::build_iptc(&records))
551            }
552        } else {
553            None
554        };
555
556        // Rewrite JPEG
557        jpeg_writer::write_jpeg(
558            data,
559            new_exif.as_deref(),
560            new_xmp.as_deref(),
561            new_iptc_data.as_deref(),
562            comment_value,
563            remove_exif,
564            remove_xmp,
565            remove_iptc,
566            remove_comment,
567        )
568    }
569
570    /// Build new EXIF data by merging existing EXIF with queued changes.
571    fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
572        let bo = ByteOrderMark::BigEndian;
573        let mut ifd0_entries = Vec::new();
574        let mut exif_entries = Vec::new();
575        let mut gps_entries = Vec::new();
576
577        // Step 1: Extract existing EXIF entries from the JPEG
578        let existing = extract_existing_exif_entries(jpeg_data, bo);
579        for entry in &existing {
580            match classify_exif_tag(entry.tag) {
581                ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
582                ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
583                ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
584            }
585        }
586
587        // Step 2: Apply queued changes (add/replace/delete)
588        let deleted_tags: Vec<u16> = values
589            .iter()
590            .filter(|nv| nv.value.is_none())
591            .filter_map(|nv| tag_name_to_id(&nv.tag))
592            .collect();
593
594        // Remove deleted tags
595        ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
596        exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
597        gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
598
599        // Add/replace new values
600        for nv in values {
601            if nv.value.is_none() {
602                continue;
603            }
604            let value_str = nv.value.as_deref().unwrap_or("");
605            let group = nv.group.as_deref().unwrap_or("");
606
607            if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo) {
608                let entry = exif_writer::IfdEntry {
609                    tag: tag_id,
610                    format,
611                    data: encoded,
612                };
613
614                let target = match group.to_uppercase().as_str() {
615                    "GPS" => &mut gps_entries,
616                    "EXIFIFD" => &mut exif_entries,
617                    _ => match classify_exif_tag(tag_id) {
618                        ExifIfdGroup::ExifIfd => &mut exif_entries,
619                        ExifIfdGroup::Gps => &mut gps_entries,
620                        ExifIfdGroup::Ifd0 => &mut ifd0_entries,
621                    },
622                };
623
624                // Replace existing or add new
625                if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
626                    *existing = entry;
627                } else {
628                    target.push(entry);
629                }
630            }
631        }
632
633        // Remove sub-IFD pointers from entries (they'll be rebuilt by build_exif)
634        ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
635
636        exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
637    }
638
639    /// Write metadata changes to PNG data.
640    fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
641        let mut new_text: Vec<(&str, &str)> = Vec::new();
642        let mut remove_text: Vec<&str> = Vec::new();
643
644        // Collect text-based changes
645        // We need to hold the strings in vectors that live long enough
646        let owned_pairs: Vec<(String, String)> = self.new_values.iter()
647            .filter(|nv| nv.value.is_some())
648            .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
649            .collect();
650
651        for (tag, value) in &owned_pairs {
652            new_text.push((tag.as_str(), value.as_str()));
653        }
654
655        for nv in &self.new_values {
656            if nv.value.is_none() {
657                remove_text.push(&nv.tag);
658            }
659        }
660
661        png_writer::write_png(data, &new_text, None, &remove_text)
662    }
663
664    /// Write metadata changes to PSD data.
665    fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
666        let mut iptc_values = Vec::new();
667        let mut xmp_values = Vec::new();
668
669        for nv in &self.new_values {
670            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
671            match group.as_str() {
672                "XMP" => xmp_values.push(nv),
673                "IPTC" => iptc_values.push(nv),
674                _ => {
675                    if is_xmp_tag(&nv.tag) { xmp_values.push(nv); }
676                    else { iptc_values.push(nv); }
677                }
678            }
679        }
680
681        let new_iptc = if !iptc_values.is_empty() {
682            let records: Vec<_> = iptc_values.iter().filter_map(|nv| {
683                let value = nv.value.as_deref()?;
684                let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
685                Some(iptc_writer::IptcRecord { record, dataset, data: value.as_bytes().to_vec() })
686            }).collect();
687            if records.is_empty() { None } else { Some(iptc_writer::build_iptc(&records)) }
688        } else { None };
689
690        let new_xmp = if !xmp_values.is_empty() {
691            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
692            Some(self.build_new_xmp(&refs))
693        } else { None };
694
695        psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
696    }
697
698    /// Write metadata changes to Matroska (MKV/WebM) data.
699    fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
700        let changes: Vec<(&str, &str)> = self.new_values.iter()
701            .filter_map(|nv| {
702                let value = nv.value.as_deref()?;
703                Some((nv.tag.as_str(), value))
704            })
705            .collect();
706
707        matroska_writer::write_matroska(data, &changes)
708    }
709
710    /// Write metadata changes to PDF data.
711    fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
712        let changes: Vec<(&str, &str)> = self.new_values.iter()
713            .filter_map(|nv| {
714                let value = nv.value.as_deref()?;
715                Some((nv.tag.as_str(), value))
716            })
717            .collect();
718
719        pdf_writer::write_pdf(data, &changes)
720    }
721
722    /// Write metadata changes to MP4/MOV data.
723    fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
724        let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
725        let mut xmp_values: Vec<&NewValue> = Vec::new();
726
727        for nv in &self.new_values {
728            if nv.value.is_none() { continue; }
729            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
730            if group == "XMP" {
731                xmp_values.push(nv);
732            } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
733                ilst_tags.push((key, nv.value.clone().unwrap()));
734            }
735        }
736
737        let tag_refs: Vec<(&[u8; 4], &str)> = ilst_tags.iter()
738            .map(|(k, v)| (k, v.as_str()))
739            .collect();
740
741        let new_xmp = if !xmp_values.is_empty() {
742            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
743            Some(self.build_new_xmp(&refs))
744        } else {
745            None
746        };
747
748        mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
749    }
750
751    /// Write metadata changes to WebP data.
752    fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
753        let mut exif_values: Vec<&NewValue> = Vec::new();
754        let mut xmp_values: Vec<&NewValue> = Vec::new();
755        let mut remove_exif = false;
756        let mut remove_xmp = false;
757
758        for nv in &self.new_values {
759            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
760            if nv.value.is_none() && nv.tag == "*" {
761                if group == "EXIF" { remove_exif = true; }
762                if group == "XMP" { remove_xmp = true; }
763                continue;
764            }
765            match group.as_str() {
766                "XMP" => xmp_values.push(nv),
767                _ => exif_values.push(nv),
768            }
769        }
770
771        let new_exif = if !exif_values.is_empty() {
772            let bo = ByteOrderMark::BigEndian;
773            let mut entries = Vec::new();
774            for nv in &exif_values {
775                if let Some(ref v) = nv.value {
776                    let group = nv.group.as_deref().unwrap_or("");
777                    if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo) {
778                        entries.push(exif_writer::IfdEntry { tag: tag_id, format, data: encoded });
779                    }
780                }
781            }
782            if !entries.is_empty() {
783                Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
784            } else {
785                None
786            }
787        } else {
788            None
789        };
790
791        let new_xmp = if !xmp_values.is_empty() {
792            Some(self.build_new_xmp(&xmp_values.iter().map(|v| *v).collect::<Vec<_>>()))
793        } else {
794            None
795        };
796
797        webp_writer::write_webp(
798            data,
799            new_exif.as_deref(),
800            new_xmp.as_deref(),
801            remove_exif,
802            remove_xmp,
803        )
804    }
805
806    /// Write metadata changes to TIFF data.
807    fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
808        let bo = if data.starts_with(b"II") {
809            ByteOrderMark::LittleEndian
810        } else {
811            ByteOrderMark::BigEndian
812        };
813
814        let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
815        for nv in &self.new_values {
816            if let Some(ref value) = nv.value {
817                let group = nv.group.as_deref().unwrap_or("");
818                if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo) {
819                    changes.push((tag_id, encoded));
820                }
821            }
822        }
823
824        tiff_writer::write_tiff(data, &changes)
825    }
826
827    /// Build new XMP data from queued values.
828    fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
829        let mut properties = Vec::new();
830
831        for nv in values {
832            let value_str = match &nv.value {
833                Some(v) => v.clone(),
834                None => continue,
835            };
836
837            let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
838            let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
839
840            let prop_type = match nv.tag.to_lowercase().as_str() {
841                "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
842                "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
843                "creator" => xmp_writer::XmpPropertyType::Seq,
844                _ => xmp_writer::XmpPropertyType::Simple,
845            };
846
847            let values = if matches!(prop_type, xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq) {
848                value_str.split(',').map(|s| s.trim().to_string()).collect()
849            } else {
850                vec![value_str]
851            };
852
853            properties.push(xmp_writer::XmpProperty {
854                namespace: ns,
855                property: nv.tag.clone(),
856                values,
857                prop_type,
858            });
859        }
860
861        xmp_writer::build_xmp(&properties).into_bytes()
862    }
863
864    // ================================================================
865    // Reading API
866    // ================================================================
867
868    /// Extract metadata from a file and return a simple name→value map.
869    ///
870    /// This is the high-level one-shot API, equivalent to ExifTool's `ImageInfo()`.
871    pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
872        let tags = self.extract_info(path)?;
873        Ok(self.get_info(&tags))
874    }
875
876    /// Extract all metadata tags from a file.
877    ///
878    /// Returns the full `Tag` structs with groups, raw values, etc.
879    pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
880        let path = path.as_ref();
881        let data = fs::read(path).map_err(Error::Io)?;
882
883        self.extract_info_from_bytes(&data, path)
884    }
885
886    /// Extract metadata from in-memory data.
887    pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
888        let file_type_result = self.detect_file_type(data, path);
889        let (file_type, mut tags) = match file_type_result {
890            Ok(ft) => {
891                let t = self.process_file(data, ft).or_else(|_| {
892                    self.process_by_extension(data, path)
893                })?;
894                (Some(ft), t)
895            }
896            Err(_) => {
897                // File type unknown by magic/extension — try extension-based fallback
898                let t = self.process_by_extension(data, path)?;
899                (None, t)
900            }
901        };
902        let file_type = file_type.unwrap_or(FileType::Zip); // placeholder for file-level tags
903
904        // Add file-level tags
905        tags.push(Tag {
906            id: crate::tag::TagId::Text("FileType".into()),
907            name: "FileType".into(),
908            description: "File Type".into(),
909            group: crate::tag::TagGroup {
910                family0: "File".into(),
911                family1: "File".into(),
912                family2: "Other".into(),
913            },
914            raw_value: Value::String(format!("{:?}", file_type)),
915            print_value: file_type.description().to_string(),
916            priority: 0,
917        });
918
919        tags.push(Tag {
920            id: crate::tag::TagId::Text("MIMEType".into()),
921            name: "MIMEType".into(),
922            description: "MIME Type".into(),
923            group: crate::tag::TagGroup {
924                family0: "File".into(),
925                family1: "File".into(),
926                family2: "Other".into(),
927            },
928            raw_value: Value::String(file_type.mime_type().to_string()),
929            print_value: file_type.mime_type().to_string(),
930            priority: 0,
931        });
932
933        if let Ok(metadata) = fs::metadata(path) {
934            tags.push(Tag {
935                id: crate::tag::TagId::Text("FileSize".into()),
936                name: "FileSize".into(),
937                description: "File Size".into(),
938                group: crate::tag::TagGroup {
939                    family0: "File".into(),
940                    family1: "File".into(),
941                    family2: "Other".into(),
942                },
943                raw_value: Value::U32(metadata.len() as u32),
944                print_value: format_file_size(metadata.len()),
945                priority: 0,
946            });
947        }
948
949        // Add more file-level tags
950        let file_tag = |name: &str, val: Value| -> Tag {
951            Tag {
952                id: crate::tag::TagId::Text(name.to_string()),
953                name: name.to_string(), description: name.to_string(),
954                group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
955                raw_value: val.clone(), print_value: val.to_display_string(), priority: 0,
956            }
957        };
958
959        if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
960            tags.push(file_tag("FileName", Value::String(fname.to_string())));
961        }
962        if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
963            tags.push(file_tag("Directory", Value::String(dir.to_string())));
964        }
965        // Use the canonical (first) extension from the FileType, matching Perl ExifTool behavior.
966        let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
967        if !canonical_ext.is_empty() {
968            tags.push(file_tag("FileTypeExtension", Value::String(canonical_ext.to_string())));
969        }
970
971        #[cfg(unix)]
972        if let Ok(metadata) = fs::metadata(path) {
973            use std::os::unix::fs::MetadataExt;
974            let mode = metadata.mode();
975            tags.push(file_tag("FilePermissions", Value::String(format!("{:o}", mode & 0o7777))));
976
977            // FileModifyDate
978            if let Ok(modified) = metadata.modified() {
979                if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
980                    let secs = dur.as_secs() as i64;
981                    tags.push(file_tag("FileModifyDate", Value::String(unix_to_datetime(secs))));
982                }
983            }
984            // FileAccessDate
985            if let Ok(accessed) = metadata.accessed() {
986                if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
987                    let secs = dur.as_secs() as i64;
988                    tags.push(file_tag("FileAccessDate", Value::String(unix_to_datetime(secs))));
989                }
990            }
991            // FileInodeChangeDate (ctime on Unix)
992            let ctime = metadata.ctime();
993            if ctime > 0 {
994                tags.push(file_tag("FileInodeChangeDate", Value::String(unix_to_datetime(ctime))));
995            }
996        }
997
998        // ExifByteOrder (from TIFF header)
999        {
1000            let bo_str = if data.len() > 8 {
1001                // Check EXIF in JPEG or TIFF header or WebP/RIFF EXIF chunk
1002                let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
1003                    // JPEG: find APP1 EXIF header
1004                    data.windows(6).position(|w| w == b"Exif\0\0")
1005                        .map(|p| &data[p+6..])
1006                } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
1007                    // RAF: look in the embedded JPEG for EXIF byte order
1008                    let jpeg_offset = u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]]) as usize;
1009                    let jpeg_length = u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]]) as usize;
1010                    if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
1011                        let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
1012                        jpeg.windows(6).position(|w| w == b"Exif\0\0")
1013                            .map(|p| &jpeg[p+6..])
1014                    } else {
1015                        None
1016                    }
1017                } else if data.starts_with(b"RIFF") && data.len() >= 12 {
1018                    // RIFF/WebP: find EXIF chunk
1019                    let mut riff_bo: Option<&[u8]> = None;
1020                    let mut pos = 12usize;
1021                    while pos + 8 <= data.len() {
1022                        let cid = &data[pos..pos+4];
1023                        let csz = u32::from_le_bytes([data[pos+4],data[pos+5],data[pos+6],data[pos+7]]) as usize;
1024                        let cstart = pos + 8;
1025                        let cend = (cstart + csz).min(data.len());
1026                        if cid == b"EXIF" && cend > cstart {
1027                            let exif_data = &data[cstart..cend];
1028                            let tiff = if exif_data.starts_with(b"Exif\0\0") { &exif_data[6..] } else { exif_data };
1029                            riff_bo = Some(tiff);
1030                            break;
1031                        }
1032                        // Also check LIST chunks
1033                        if cid == b"LIST" && cend >= cstart + 4 {
1034                            // recurse not needed for this simple scan - just advance
1035                        }
1036                        pos = cend + (csz & 1);
1037                    }
1038                    riff_bo
1039                } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
1040                    // JXL container: scan for brob Exif box and decompress to get byte order
1041                    let mut jxl_bo: Option<String> = None;
1042                    let mut jpos = 12usize; // skip JXL signature box
1043                    while jpos + 8 <= data.len() {
1044                        let bsize = u32::from_be_bytes([data[jpos], data[jpos+1], data[jpos+2], data[jpos+3]]) as usize;
1045                        let btype = &data[jpos+4..jpos+8];
1046                        if bsize < 8 || jpos + bsize > data.len() { break; }
1047                        if btype == b"brob" && jpos + bsize > 12 {
1048                            let inner_type = &data[jpos+8..jpos+12];
1049                            if inner_type == b"Exif" || inner_type == b"exif" {
1050                                let brotli_payload = &data[jpos+12..jpos+bsize];
1051                                use std::io::Cursor;
1052                                let mut inp = Cursor::new(brotli_payload);
1053                                let mut out: Vec<u8> = Vec::new();
1054                                if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
1055                                    let exif_start = if out.len() > 4 { 4 } else { 0 };
1056                                    if exif_start < out.len() {
1057                                        if out[exif_start..].starts_with(b"MM") {
1058                                            jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
1059                                        } else if out[exif_start..].starts_with(b"II") {
1060                                            jxl_bo = Some("Little-endian (Intel, II)".to_string());
1061                                        }
1062                                    }
1063                                }
1064                                break;
1065                            }
1066                        }
1067                        jpos += bsize;
1068                    }
1069                    if let Some(bo) = jxl_bo {
1070                        if !bo.is_empty() && file_type != FileType::Btf {
1071                            tags.push(file_tag("ExifByteOrder", Value::String(bo)));
1072                        }
1073                    }
1074                    // Return None to skip the generic byte order check below
1075                    None
1076                } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
1077                    // MRW: find TTW segment which contains TIFF/EXIF data
1078                    let mrw_data_offset = if data.len() >= 8 {
1079                        u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
1080                    } else { 0 };
1081                    let mut mrw_bo: Option<&[u8]> = None;
1082                    let mut mpos = 8usize;
1083                    while mpos + 8 <= mrw_data_offset.min(data.len()) {
1084                        let seg_tag = &data[mpos..mpos+4];
1085                        let seg_len = u32::from_be_bytes([data[mpos+4], data[mpos+5], data[mpos+6], data[mpos+7]]) as usize;
1086                        if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
1087                            mrw_bo = Some(&data[mpos+8..mpos+8+seg_len]);
1088                            break;
1089                        }
1090                        mpos += 8 + seg_len;
1091                    }
1092                    mrw_bo
1093                } else {
1094                    Some(&data[..])
1095                };
1096                if let Some(tiff) = check {
1097                    if tiff.starts_with(b"II") { "Little-endian (Intel, II)" }
1098                    else if tiff.starts_with(b"MM") { "Big-endian (Motorola, MM)" }
1099                    else { "" }
1100                } else { "" }
1101            } else { "" };
1102            // Suppress ExifByteOrder for BigTIFF, Canon VRD/DR4 (Perl doesn't output it for these)
1103            // Also skip if already emitted by ExifReader (TIFF-based formats)
1104            let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
1105            if !bo_str.is_empty() && !already_has_exifbyteorder
1106                && file_type != FileType::Btf
1107                && file_type != FileType::Dr4 && file_type != FileType::Vrd
1108                && file_type != FileType::Crw {
1109                tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
1110            }
1111        }
1112
1113        tags.push(file_tag("ExifToolVersion", Value::String(crate::VERSION.to_string())));
1114
1115        // Compute composite tags
1116        let composite = crate::composite::compute_composite_tags(&tags);
1117        tags.extend(composite);
1118
1119        // FLIR post-processing: remove LensID composite for FLIR cameras.
1120        // Perl's LensID composite requires LensType EXIF tag (not present in FLIR images),
1121        // and LensID-2 requires LensModel to match /(mm|\d\/F)/ (FLIR names like "FOL7"
1122        // don't match).  Our composite.rs uses a simpler fallback that picks up any non-empty
1123        // LensModel, so we remove LensID when the image is from a FLIR camera with FFF data.
1124        {
1125            let is_flir_fff = tags.iter().any(|t| t.group.family0 == "APP1"
1126                && t.group.family1 == "FLIR");
1127            if is_flir_fff {
1128                tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1129            }
1130        }
1131
1132        // Olympus post-processing: remove the generic "Lens" composite for Olympus cameras.
1133        // In Perl, the "Lens" composite tag requires Canon:MinFocalLength (Canon namespace).
1134        // Our composite.rs generates Lens for any manufacturer that has MinFocalLength +
1135        // MaxFocalLength (e.g., Olympus Equipment sub-IFD).  Remove it for non-Canon cameras.
1136        {
1137            let make = tags.iter().find(|t| t.name == "Make")
1138                .map(|t| t.print_value.clone()).unwrap_or_default();
1139            if !make.to_uppercase().contains("CANON") {
1140                tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1141            }
1142        }
1143
1144        // Priority-based deduplication: when the same tag name appears from both RIFF (priority 0)
1145        // and MakerNotes/EXIF (priority 0 but higher-quality source), remove the RIFF copy.
1146        // Mirrors ExifTool's PRIORITY => 0 behavior for RIFF StreamHeader tags.
1147        {
1148            let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1149            for tag_name in &riff_priority_zero_tags {
1150                let has_makernotes = tags.iter().any(|t| t.name == *tag_name
1151                    && t.group.family0 != "RIFF");
1152                if has_makernotes {
1153                    tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1154                }
1155            }
1156        }
1157
1158        // Filter by requested tags if specified
1159        if !self.options.requested_tags.is_empty() {
1160            let requested: Vec<String> = self
1161                .options
1162                .requested_tags
1163                .iter()
1164                .map(|t| t.to_lowercase())
1165                .collect();
1166            tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1167        }
1168
1169        Ok(tags)
1170    }
1171
1172    /// Format extracted tags into a simple name→value map.
1173    ///
1174    /// Handles duplicate tag names by appending group info.
1175    fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1176        let mut info = ImageInfo::new();
1177        let mut seen: HashMap<String, usize> = HashMap::new();
1178
1179        for tag in tags {
1180            let value = if self.options.print_conv {
1181                &tag.print_value
1182            } else {
1183                &tag.raw_value.to_display_string()
1184            };
1185
1186            let count = seen.entry(tag.name.clone()).or_insert(0);
1187            *count += 1;
1188
1189            if *count == 1 {
1190                info.insert(tag.name.clone(), value.clone());
1191            } else if self.options.duplicates {
1192                let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1193                info.insert(key, value.clone());
1194            }
1195        }
1196
1197        info
1198    }
1199
1200    /// Detect file type from magic bytes and extension.
1201    fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1202        // Try magic bytes first
1203        let header_len = data.len().min(256);
1204        if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1205            // Override ICO to Font if extension is .dfont (Mac resource fork)
1206            if ft == FileType::Ico {
1207                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1208                    if ext.eq_ignore_ascii_case("dfont") {
1209                        return Ok(FileType::Font);
1210                    }
1211                }
1212            }
1213            // Override JPEG to JPS if the file extension is .jps
1214            if ft == FileType::Jpeg {
1215                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1216                    if ext.eq_ignore_ascii_case("jps") {
1217                        return Ok(FileType::Jps);
1218                    }
1219                }
1220            }
1221            // Override PLIST to AAE if extension is .aae
1222            if ft == FileType::Plist {
1223                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1224                    if ext.eq_ignore_ascii_case("aae") {
1225                        return Ok(FileType::Aae);
1226                    }
1227                }
1228            }
1229            // Override XMP to PLIST/AAE if extension is .plist or .aae
1230            if ft == FileType::Xmp {
1231                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1232                    if ext.eq_ignore_ascii_case("plist") {
1233                        return Ok(FileType::Plist);
1234                    }
1235                    if ext.eq_ignore_ascii_case("aae") {
1236                        return Ok(FileType::Aae);
1237                    }
1238                }
1239            }
1240            // Override to PhotoCD if extension is .pcd (file starts with 0xFF padding)
1241            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1242                if ext.eq_ignore_ascii_case("pcd") && data.len() >= 2056
1243                    && &data[2048..2055] == b"PCD_IPI"
1244                {
1245                    return Ok(FileType::PhotoCd);
1246                }
1247            }
1248            // Override MP3 to MPC/APE/WavPack if extension says otherwise
1249            if ft == FileType::Mp3 {
1250                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1251                    if ext.eq_ignore_ascii_case("mpc") {
1252                        return Ok(FileType::Mpc);
1253                    }
1254                    if ext.eq_ignore_ascii_case("ape") {
1255                        return Ok(FileType::Ape);
1256                    }
1257                    if ext.eq_ignore_ascii_case("wv") {
1258                        return Ok(FileType::WavPack);
1259                    }
1260                }
1261            }
1262            // For ZIP files, check if it's an EIP (by extension) or OpenDocument format
1263            if ft == FileType::Zip {
1264                // Check extension first for EIP
1265                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1266                    if ext.eq_ignore_ascii_case("eip") {
1267                        return Ok(FileType::Eip);
1268                    }
1269                }
1270                if let Some(od_type) = detect_opendocument_type(data) {
1271                    return Ok(od_type);
1272                }
1273            }
1274            return Ok(ft);
1275        }
1276
1277        // Fall back to extension
1278        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1279            if let Some(ft) = file_type::detect_from_extension(ext) {
1280                return Ok(ft);
1281            }
1282        }
1283
1284        let ext_str = path
1285            .extension()
1286            .and_then(|e| e.to_str())
1287            .unwrap_or("unknown");
1288        Err(Error::UnsupportedFileType(ext_str.to_string()))
1289    }
1290
1291    /// Dispatch to the appropriate format reader.
1292
1293    fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1294        match file_type {
1295            FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1296            FileType::Png | FileType::Mng => formats::png::read_png(data),
1297            // All TIFF-based formats (TIFF + most RAW formats)
1298            FileType::Tiff
1299            | FileType::Btf
1300            | FileType::Dng
1301            | FileType::Cr2
1302            | FileType::Nef
1303            | FileType::Arw
1304            | FileType::Sr2
1305            | FileType::Orf
1306            | FileType::Pef
1307            | FileType::Erf
1308            | FileType::Fff
1309            | FileType::Rwl
1310            | FileType::Mef
1311            | FileType::Srw
1312            | FileType::Gpr
1313            | FileType::Arq
1314            | FileType::ThreeFR
1315            | FileType::Dcr
1316            | FileType::Rw2
1317            | FileType::Srf => formats::tiff::read_tiff(data),
1318            // Phase One IIQ: TIFF + PhaseOne maker note block
1319            FileType::Iiq => formats::misc::read_iiq(data),
1320            // Image formats
1321            FileType::Gif => formats::gif::read_gif(data),
1322            FileType::Bmp => formats::bmp::read_bmp(data),
1323            FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1324            FileType::Psd => formats::psd::read_psd(data),
1325            // Audio formats
1326            FileType::Mp3 => formats::id3::read_mp3(data),
1327            FileType::Flac => formats::flac::read_flac(data),
1328            FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1329            FileType::Aiff => formats::aiff::read_aiff(data),
1330            // Video formats
1331            FileType::Mp4
1332            | FileType::QuickTime
1333            | FileType::M4a
1334            | FileType::ThreeGP
1335            | FileType::Heif
1336            | FileType::Avif
1337            | FileType::Cr3
1338            | FileType::F4v
1339            | FileType::Mqv
1340            | FileType::Lrv => formats::quicktime::read_quicktime(data),
1341            FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1342            FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1343            FileType::Wtv => formats::wtv::read_wtv(data),
1344            // RAW formats with custom containers
1345            FileType::Crw => formats::canon_raw::read_crw(data),
1346            FileType::Raf => formats::raf::read_raf(data),
1347            FileType::Mrw => formats::mrw::read_mrw(data),
1348            FileType::Mrc => formats::mrc::read_mrc(data),
1349            // Image formats
1350            FileType::Jp2 => formats::jp2::read_jp2(data),
1351            FileType::J2c => formats::jp2::read_j2c(data),
1352            FileType::Jxl => formats::jp2::read_jxl(data),
1353            FileType::Ico => formats::ico::read_ico(data),
1354            FileType::Icc => formats::icc::read_icc(data),
1355            // Documents
1356            FileType::Pdf => formats::pdf::read_pdf(data),
1357            FileType::PostScript => {
1358                // PFA fonts start with %!PS-AdobeFont or %!FontType1
1359                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1360                    formats::font::read_pfa(data).or_else(|_| formats::postscript::read_postscript(data))
1361                } else {
1362                    formats::postscript::read_postscript(data)
1363                }
1364            }
1365            FileType::Eip => formats::capture_one::read_eip(data),
1366            FileType::Zip | FileType::Docx | FileType::Xlsx | FileType::Pptx
1367            | FileType::Doc | FileType::Xls | FileType::Ppt => formats::zip::read_zip(data),
1368            FileType::Rtf => formats::rtf::read_rtf(data),
1369            FileType::InDesign => formats::misc::read_indesign(data),
1370            FileType::Pcap => formats::misc::read_pcap(data),
1371            FileType::Pcapng => formats::misc::read_pcapng(data),
1372            // Canon VRD / DR4
1373            FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1374            FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1375            // Metadata / Other
1376            FileType::Xmp => formats::xmp_file::read_xmp(data),
1377            FileType::Svg => formats::misc::read_svg(data),
1378            FileType::Html => {
1379                // SVG files that weren't detected by magic (e.g., via extension fallback)
1380                let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1381                if is_svg {
1382                    formats::misc::read_svg(data)
1383                } else {
1384                    formats::html::read_html(data)
1385                }
1386            }
1387            FileType::Exe => formats::exe::read_exe(data),
1388            FileType::Font => {
1389                // AFM: Adobe Font Metrics text file
1390                if data.starts_with(b"StartFontMetrics") {
1391                    return formats::font::read_afm(data);
1392                }
1393                // PFA: PostScript Type 1 ASCII font
1394                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1395                    return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1396                }
1397                // PFB: PostScript Type 1 Binary font
1398                if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1399                    return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1400                }
1401                formats::font::read_font(data)
1402            }
1403            // Audio with ID3
1404            FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1405            FileType::Ape => formats::ape::read_ape(data),
1406            FileType::Mpc => formats::ape::read_mpc(data),
1407            FileType::Aac => formats::misc::read_aac(data),
1408            FileType::RealAudio => {
1409                formats::misc::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1410            }
1411            FileType::RealMedia => {
1412                formats::misc::read_real_media(data).or_else(|_| Ok(Vec::new()))
1413            }
1414            // Misc formats
1415            FileType::Czi => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1416            FileType::PhotoCd => formats::misc::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1417            FileType::Dicom => formats::dicom::read_dicom(data),
1418            FileType::Fits => formats::misc::read_fits(data),
1419            FileType::Flv => formats::misc::read_flv(data),
1420            FileType::Mxf => formats::misc::read_mxf(data).or_else(|_| Ok(Vec::new())),
1421            FileType::Swf => formats::misc::read_swf(data),
1422            FileType::Hdr => formats::misc::read_hdr(data),
1423            FileType::DjVu => formats::djvu::read_djvu(data),
1424            FileType::Xcf => formats::gimp::read_xcf(data),
1425            FileType::Mie => formats::mie::read_mie(data),
1426            FileType::Lfp => formats::lytro::read_lfp(data),
1427            // FileType::Miff dispatched via string extension below
1428            FileType::Fpf => formats::flir_fpf::read_fpf(data),
1429            FileType::Flif => formats::misc::read_flif(data),
1430            FileType::Bpg => formats::misc::read_bpg(data),
1431            FileType::Pcx => formats::misc::read_pcx(data),
1432            FileType::Pict => formats::misc::read_pict(data),
1433            FileType::M2ts => formats::misc::read_m2ts(data, self.options.extract_embedded),
1434            FileType::Gzip => formats::misc::read_gzip(data),
1435            FileType::Rar => formats::misc::read_rar(data),
1436            FileType::Dss => formats::misc::read_dss(data),
1437            FileType::Moi => formats::misc::read_moi(data),
1438            FileType::MacOs => formats::misc::read_macos(data),
1439            FileType::Json => formats::misc::read_json(data),
1440            // New formats
1441            FileType::Pgf => formats::pgf::read_pgf(data),
1442            FileType::Xisf => formats::xisf::read_xisf(data),
1443            FileType::Torrent => formats::torrent::read_torrent(data),
1444            FileType::Mobi => formats::palm::read_palm(data),
1445            FileType::Psp => formats::psp::read_psp(data),
1446            FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1447            FileType::Audible => formats::audible::read_audible(data),
1448            FileType::Exr => formats::openexr::read_openexr(data),
1449            // New formats
1450            FileType::Plist => {
1451                if data.starts_with(b"bplist") {
1452                    formats::plist::read_binary_plist_tags(data)
1453                } else {
1454                    formats::plist::read_xml_plist(data)
1455                }
1456            }
1457            FileType::Aae => {
1458                if data.starts_with(b"bplist") {
1459                    formats::plist::read_binary_plist_tags(data)
1460                } else {
1461                    formats::plist::read_aae_plist(data)
1462                }
1463            }
1464            FileType::KyoceraRaw => formats::misc::read_kyocera_raw(data),
1465            FileType::PortableFloatMap => formats::misc::read_pfm(data),
1466            FileType::Ods | FileType::Odt | FileType::Odp | FileType::Odg |
1467            FileType::Odf | FileType::Odb | FileType::Odi | FileType::Odc => formats::zip::read_zip(data),
1468            _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1469        }
1470    }
1471
1472    /// Fallback: try to read file based on extension for formats without magic detection.
1473    fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1474        let ext = path
1475            .extension()
1476            .and_then(|e| e.to_str())
1477            .unwrap_or("")
1478            .to_ascii_lowercase();
1479
1480        match ext.as_str() {
1481            "ppm" | "pgm" | "pbm" => formats::misc::read_ppm(data),
1482            "pfm" => {
1483                // PFM can be Portable Float Map or Printer Font Metrics
1484                if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1485                    formats::misc::read_ppm(data)
1486                } else {
1487                    Ok(Vec::new()) // Printer Font Metrics
1488                }
1489            }
1490            "json" => formats::misc::read_json(data),
1491            "svg" => formats::misc::read_svg(data),
1492            "ram" => formats::misc::read_ram(data).or_else(|_| Ok(Vec::new())),
1493            "txt" | "log" | "igc" => {
1494                Ok(compute_text_tags(data, false))
1495            }
1496            "csv" => {
1497                Ok(compute_text_tags(data, true))
1498            }
1499            "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1500            "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1501            "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1502            "plist" => {
1503                if data.starts_with(b"bplist") {
1504                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1505                } else {
1506                    formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1507                }
1508            }
1509            "aae" => {
1510                if data.starts_with(b"bplist") {
1511                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1512                } else {
1513                    formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1514                }
1515            }
1516            "vcf" | "ics" | "vcard" => {
1517                let s = String::from_utf8_lossy(&data[..data.len().min(100)]);
1518                if s.contains("BEGIN:VCALENDAR") {
1519                    formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1520                } else {
1521                    formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1522                }
1523            }
1524            "xcf" => Ok(Vec::new()),      // GIMP
1525            "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1526            "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1527            "indd" | "indt" => Ok(Vec::new()), // InDesign
1528            "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1529            "mie" => Ok(Vec::new()),       // MIE
1530            "exr" => Ok(Vec::new()),       // OpenEXR
1531            "wpg" => formats::misc::read_wpg(data).or_else(|_| Ok(Vec::new())),
1532            "moi" => formats::misc::read_moi(data).or_else(|_| Ok(Vec::new())),
1533            "macos" => formats::misc::read_macos(data).or_else(|_| Ok(Vec::new())),
1534            "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1535            "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1536            "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1537            "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1538            "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1539            "itc" => formats::misc::read_itc(data).or_else(|_| Ok(Vec::new())),
1540            "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1541            "czi" => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1542            "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1543            "lfp" | "mrc"
1544            | "dss" | "mobi" | "psp" | "pgf" | "raw"
1545            | "pmp" | "torrent"
1546            | "xisf" | "mxf"
1547            | "dfont" => Ok(Vec::new()),
1548            "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1549            "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1550            "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1551            "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1552            _ => Err(Error::UnsupportedFileType(ext)),
1553        }
1554    }
1555}
1556
1557impl Default for ExifTool {
1558    fn default() -> Self {
1559        Self::new()
1560    }
1561}
1562
1563/// Detect OpenDocument file type by reading the `mimetype` entry from a ZIP.
1564/// Returns None if not an OpenDocument file.
1565fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1566    // OpenDocument ZIPs have "mimetype" as the FIRST local file entry (uncompressed)
1567    if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1568        return None;
1569    }
1570    let compression = u16::from_le_bytes([data[8], data[9]]);
1571    let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1572    let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1573    let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1574    let name_start = 30;
1575    if name_start + name_len > data.len() {
1576        return None;
1577    }
1578    let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1579    if filename != "mimetype" || compression != 0 {
1580        return None;
1581    }
1582    let content_start = name_start + name_len + extra_len;
1583    let content_end = (content_start + compressed_size).min(data.len());
1584    if content_start >= content_end {
1585        return None;
1586    }
1587    let mime = std::str::from_utf8(&data[content_start..content_end]).unwrap_or("").trim();
1588    match mime {
1589        "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1590        "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1591        "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1592        "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1593        "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1594        "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1595        "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1596        "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1597        _ => None,
1598    }
1599}
1600
1601/// Detect the file type of a file at the given path.
1602pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1603    let path = path.as_ref();
1604    let mut file = fs::File::open(path).map_err(Error::Io)?;
1605    let mut header = [0u8; 256];
1606    use std::io::Read;
1607    let n = file.read(&mut header).map_err(Error::Io)?;
1608
1609    if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1610        return Ok(ft);
1611    }
1612
1613    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1614        if let Some(ft) = file_type::detect_from_extension(ext) {
1615            return Ok(ft);
1616        }
1617    }
1618
1619    Err(Error::UnsupportedFileType("unknown".into()))
1620}
1621
1622/// Classification of EXIF tags into IFD groups.
1623enum ExifIfdGroup {
1624    Ifd0,
1625    ExifIfd,
1626    Gps,
1627}
1628
1629/// Determine which IFD a tag belongs to based on its ID.
1630fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1631    match tag_id {
1632        // ExifIFD tags
1633        0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292
1634        | 0xA000..=0xA435 => ExifIfdGroup::ExifIfd,
1635        // GPS tags
1636        0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1637        // Everything else → IFD0
1638        _ => ExifIfdGroup::Ifd0,
1639    }
1640}
1641
1642/// Extract existing EXIF entries from a JPEG file's APP1 segment.
1643fn extract_existing_exif_entries(jpeg_data: &[u8], target_bo: ByteOrderMark) -> Vec<exif_writer::IfdEntry> {
1644    let mut entries = Vec::new();
1645
1646    // Find EXIF APP1 segment
1647    let mut pos = 2; // Skip SOI
1648    while pos + 4 <= jpeg_data.len() {
1649        if jpeg_data[pos] != 0xFF {
1650            pos += 1;
1651            continue;
1652        }
1653        let marker = jpeg_data[pos + 1];
1654        pos += 2;
1655
1656        if marker == 0xDA || marker == 0xD9 {
1657            break; // SOS or EOI
1658        }
1659        if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1660            continue;
1661        }
1662
1663        if pos + 2 > jpeg_data.len() {
1664            break;
1665        }
1666        let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1667        if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1668            break;
1669        }
1670
1671        let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1672
1673        // EXIF APP1
1674        if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1675            let tiff_data = &seg_data[6..];
1676            extract_ifd_entries(tiff_data, target_bo, &mut entries);
1677            break;
1678        }
1679
1680        pos += seg_len;
1681    }
1682
1683    entries
1684}
1685
1686/// Extract IFD entries from TIFF data, re-encoding values in the target byte order.
1687fn extract_ifd_entries(
1688    tiff_data: &[u8],
1689    target_bo: ByteOrderMark,
1690    entries: &mut Vec<exif_writer::IfdEntry>,
1691) {
1692    use crate::metadata::exif::parse_tiff_header;
1693
1694    let header = match parse_tiff_header(tiff_data) {
1695        Ok(h) => h,
1696        Err(_) => return,
1697    };
1698
1699    let src_bo = header.byte_order;
1700
1701    // Read IFD0
1702    read_ifd_for_merge(tiff_data, header.ifd0_offset as usize, src_bo, target_bo, entries);
1703
1704    // Find ExifIFD and GPS pointers
1705    let ifd0_offset = header.ifd0_offset as usize;
1706    if ifd0_offset + 2 > tiff_data.len() {
1707        return;
1708    }
1709    let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
1710    for i in 0..count {
1711        let eoff = ifd0_offset + 2 + i * 12;
1712        if eoff + 12 > tiff_data.len() {
1713            break;
1714        }
1715        let tag = read_u16_bo(tiff_data, eoff, src_bo);
1716        let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
1717
1718        match tag {
1719            0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1720            0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1721            _ => {}
1722        }
1723    }
1724}
1725
1726/// Read a single IFD and extract entries for merge.
1727fn read_ifd_for_merge(
1728    data: &[u8],
1729    offset: usize,
1730    src_bo: ByteOrderMark,
1731    target_bo: ByteOrderMark,
1732    entries: &mut Vec<exif_writer::IfdEntry>,
1733) {
1734    if offset + 2 > data.len() {
1735        return;
1736    }
1737    let count = read_u16_bo(data, offset, src_bo) as usize;
1738
1739    for i in 0..count {
1740        let eoff = offset + 2 + i * 12;
1741        if eoff + 12 > data.len() {
1742            break;
1743        }
1744
1745        let tag = read_u16_bo(data, eoff, src_bo);
1746        let dtype = read_u16_bo(data, eoff + 2, src_bo);
1747        let count_val = read_u32_bo(data, eoff + 4, src_bo);
1748
1749        // Skip sub-IFD pointers and MakerNote
1750        if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
1751            continue;
1752        }
1753
1754        let type_size = match dtype {
1755            1 | 2 | 6 | 7 => 1usize,
1756            3 | 8 => 2,
1757            4 | 9 | 11 | 13 => 4,
1758            5 | 10 | 12 => 8,
1759            _ => continue,
1760        };
1761
1762        let total_size = type_size * count_val as usize;
1763        let raw_data = if total_size <= 4 {
1764            data[eoff + 8..eoff + 12].to_vec()
1765        } else {
1766            let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
1767            if voff + total_size > data.len() {
1768                continue;
1769            }
1770            data[voff..voff + total_size].to_vec()
1771        };
1772
1773        // Re-encode multi-byte values if byte orders differ
1774        let final_data = if src_bo != target_bo && type_size > 1 {
1775            reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
1776        } else {
1777            raw_data[..total_size].to_vec()
1778        };
1779
1780        let format = match dtype {
1781            1 => exif_writer::ExifFormat::Byte,
1782            2 => exif_writer::ExifFormat::Ascii,
1783            3 => exif_writer::ExifFormat::Short,
1784            4 => exif_writer::ExifFormat::Long,
1785            5 => exif_writer::ExifFormat::Rational,
1786            6 => exif_writer::ExifFormat::SByte,
1787            7 => exif_writer::ExifFormat::Undefined,
1788            8 => exif_writer::ExifFormat::SShort,
1789            9 => exif_writer::ExifFormat::SLong,
1790            10 => exif_writer::ExifFormat::SRational,
1791            11 => exif_writer::ExifFormat::Float,
1792            12 => exif_writer::ExifFormat::Double,
1793            _ => continue,
1794        };
1795
1796        entries.push(exif_writer::IfdEntry {
1797            tag,
1798            format,
1799            data: final_data,
1800        });
1801    }
1802}
1803
1804/// Re-encode multi-byte values when converting between byte orders.
1805fn reencode_bytes(
1806    data: &[u8],
1807    dtype: u16,
1808    count: usize,
1809    src_bo: ByteOrderMark,
1810    dst_bo: ByteOrderMark,
1811) -> Vec<u8> {
1812    let mut out = Vec::with_capacity(data.len());
1813    match dtype {
1814        3 | 8 => {
1815            // 16-bit
1816            for i in 0..count {
1817                let v = read_u16_bo(data, i * 2, src_bo);
1818                match dst_bo {
1819                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1820                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1821                }
1822            }
1823        }
1824        4 | 9 | 11 | 13 => {
1825            // 32-bit
1826            for i in 0..count {
1827                let v = read_u32_bo(data, i * 4, src_bo);
1828                match dst_bo {
1829                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1830                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1831                }
1832            }
1833        }
1834        5 | 10 => {
1835            // Rational (two 32-bit)
1836            for i in 0..count {
1837                let n = read_u32_bo(data, i * 8, src_bo);
1838                let d = read_u32_bo(data, i * 8 + 4, src_bo);
1839                match dst_bo {
1840                    ByteOrderMark::LittleEndian => {
1841                        out.extend_from_slice(&n.to_le_bytes());
1842                        out.extend_from_slice(&d.to_le_bytes());
1843                    }
1844                    ByteOrderMark::BigEndian => {
1845                        out.extend_from_slice(&n.to_be_bytes());
1846                        out.extend_from_slice(&d.to_be_bytes());
1847                    }
1848                }
1849            }
1850        }
1851        12 => {
1852            // 64-bit double
1853            for i in 0..count {
1854                let mut bytes = [0u8; 8];
1855                bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
1856                if src_bo != dst_bo {
1857                    bytes.reverse();
1858                }
1859                out.extend_from_slice(&bytes);
1860            }
1861        }
1862        _ => out.extend_from_slice(data),
1863    }
1864    out
1865}
1866
1867fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
1868    if offset + 2 > data.len() { return 0; }
1869    match bo {
1870        ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
1871        ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
1872    }
1873}
1874
1875fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
1876    if offset + 4 > data.len() { return 0; }
1877    match bo {
1878        ByteOrderMark::LittleEndian => u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1879        ByteOrderMark::BigEndian => u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1880    }
1881}
1882
1883/// Map tag name to numeric EXIF tag ID.
1884fn tag_name_to_id(name: &str) -> Option<u16> {
1885    encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
1886}
1887
1888/// Convert a tag value to a safe filename.
1889fn value_to_filename(value: &str) -> String {
1890    value
1891        .chars()
1892        .map(|c| match c {
1893            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
1894            c if c.is_control() => '_',
1895            c => c,
1896        })
1897        .collect::<String>()
1898        .trim()
1899        .to_string()
1900}
1901
1902/// Parse a date shift string like "+1:0:0" (add 1 hour) or "-0:30:0" (subtract 30 min).
1903/// Returns (sign, hours, minutes, seconds).
1904pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
1905    let (sign, rest) = if shift.starts_with('-') {
1906        (-1, &shift[1..])
1907    } else if shift.starts_with('+') {
1908        (1, &shift[1..])
1909    } else {
1910        (1, shift)
1911    };
1912
1913    let parts: Vec<&str> = rest.split(':').collect();
1914    match parts.len() {
1915        1 => {
1916            let h: u32 = parts[0].parse().ok()?;
1917            Some((sign, h, 0, 0))
1918        }
1919        2 => {
1920            let h: u32 = parts[0].parse().ok()?;
1921            let m: u32 = parts[1].parse().ok()?;
1922            Some((sign, h, m, 0))
1923        }
1924        3 => {
1925            let h: u32 = parts[0].parse().ok()?;
1926            let m: u32 = parts[1].parse().ok()?;
1927            let s: u32 = parts[2].parse().ok()?;
1928            Some((sign, h, m, s))
1929        }
1930        _ => None,
1931    }
1932}
1933
1934/// Shift a datetime string by the given amount.
1935/// Input format: "YYYY:MM:DD HH:MM:SS"
1936pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
1937    let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
1938
1939    // Parse date/time
1940    if datetime.len() < 19 {
1941        return None;
1942    }
1943    let year: i32 = datetime[0..4].parse().ok()?;
1944    let month: u32 = datetime[5..7].parse().ok()?;
1945    let day: u32 = datetime[8..10].parse().ok()?;
1946    let hour: u32 = datetime[11..13].parse().ok()?;
1947    let min: u32 = datetime[14..16].parse().ok()?;
1948    let sec: u32 = datetime[17..19].parse().ok()?;
1949
1950    // Convert to total seconds, shift, convert back
1951    let total_secs = (hour * 3600 + min * 60 + sec) as i64
1952        + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
1953
1954    let days_shift = if total_secs < 0 {
1955        -1 - (-total_secs - 1) as i64 / 86400
1956    } else {
1957        total_secs / 86400
1958    };
1959
1960    let time_secs = ((total_secs % 86400) + 86400) % 86400;
1961    let new_hour = (time_secs / 3600) as u32;
1962    let new_min = ((time_secs % 3600) / 60) as u32;
1963    let new_sec = (time_secs % 60) as u32;
1964
1965    // Simple day shifting (doesn't handle month/year rollover perfectly for large shifts)
1966    let mut new_day = day as i32 + days_shift as i32;
1967    let mut new_month = month;
1968    let mut new_year = year;
1969
1970    let days_in_month = |m: u32, y: i32| -> i32 {
1971        match m {
1972            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1973            4 | 6 | 9 | 11 => 30,
1974            2 => if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 29 } else { 28 },
1975            _ => 30,
1976        }
1977    };
1978
1979    while new_day > days_in_month(new_month, new_year) {
1980        new_day -= days_in_month(new_month, new_year);
1981        new_month += 1;
1982        if new_month > 12 {
1983            new_month = 1;
1984            new_year += 1;
1985        }
1986    }
1987    while new_day < 1 {
1988        new_month = if new_month == 1 { 12 } else { new_month - 1 };
1989        if new_month == 12 {
1990            new_year -= 1;
1991        }
1992        new_day += days_in_month(new_month, new_year);
1993    }
1994
1995    Some(format!(
1996        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
1997        new_year, new_month, new_day, new_hour, new_min, new_sec
1998    ))
1999}
2000
2001fn unix_to_datetime(secs: i64) -> String {
2002    let days = secs / 86400;
2003    let time = secs % 86400;
2004    let h = time / 3600;
2005    let m = (time % 3600) / 60;
2006    let s = time % 60;
2007    let mut y = 1970i32;
2008    let mut rem = days;
2009    loop {
2010        let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 366 } else { 365 };
2011        if rem < dy { break; }
2012        rem -= dy;
2013        y += 1;
2014    }
2015    let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2016    let months = [31, if leap { 29 } else { 28 }, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
2017    let mut mo = 1;
2018    for &dm in &months {
2019        if rem < dm { break; }
2020        rem -= dm;
2021        mo += 1;
2022    }
2023    format!("{:04}:{:02}:{:02} {:02}:{:02}:{:02}", y, mo, rem + 1, h, m, s)
2024}
2025
2026fn format_file_size(bytes: u64) -> String {
2027    if bytes < 1024 {
2028        format!("{} bytes", bytes)
2029    } else if bytes < 1024 * 1024 {
2030        format!("{:.1} kB", bytes as f64 / 1024.0)
2031    } else if bytes < 1024 * 1024 * 1024 {
2032        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
2033    } else {
2034        format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
2035    }
2036}
2037
2038/// Check if a tag name is typically XMP.
2039fn is_xmp_tag(tag: &str) -> bool {
2040    matches!(
2041        tag.to_lowercase().as_str(),
2042        "title" | "description" | "subject" | "creator" | "rights"
2043        | "keywords" | "rating" | "label" | "hierarchicalsubject"
2044    )
2045}
2046
2047/// Encode an EXIF tag value to binary.
2048/// Returns (tag_id, format, encoded_data) or None if tag is unknown.
2049fn encode_exif_tag(
2050    tag_name: &str,
2051    value: &str,
2052    _group: &str,
2053    bo: ByteOrderMark,
2054) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
2055    let tag_lower = tag_name.to_lowercase();
2056
2057    // Map common tag names to EXIF tag IDs and formats
2058    let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
2059        // IFD0 string tags
2060        "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
2061        "make" => (0x010F, exif_writer::ExifFormat::Ascii),
2062        "model" => (0x0110, exif_writer::ExifFormat::Ascii),
2063        "software" => (0x0131, exif_writer::ExifFormat::Ascii),
2064        "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
2065        "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
2066        "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
2067        // IFD0 numeric tags
2068        "orientation" => (0x0112, exif_writer::ExifFormat::Short),
2069        "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
2070        "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
2071        "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
2072        // ExifIFD tags
2073        "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
2074        "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
2075        "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
2076        "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
2077        "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
2078        "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
2079        "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
2080        "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
2081        "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
2082        _ => return None,
2083    };
2084
2085    let encoded = match format {
2086        exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
2087        exif_writer::ExifFormat::Short => {
2088            let v: u16 = value.parse().ok()?;
2089            exif_writer::encode_u16(v, bo)
2090        }
2091        exif_writer::ExifFormat::Long => {
2092            let v: u32 = value.parse().ok()?;
2093            exif_writer::encode_u32(v, bo)
2094        }
2095        exif_writer::ExifFormat::Rational => {
2096            // Parse "N/D" or just "N"
2097            if let Some(slash) = value.find('/') {
2098                let num: u32 = value[..slash].trim().parse().ok()?;
2099                let den: u32 = value[slash + 1..].trim().parse().ok()?;
2100                exif_writer::encode_urational(num, den, bo)
2101            } else if let Ok(v) = value.parse::<f64>() {
2102                // Convert float to rational
2103                let den = 10000u32;
2104                let num = (v * den as f64).round() as u32;
2105                exif_writer::encode_urational(num, den, bo)
2106            } else {
2107                return None;
2108            }
2109        }
2110        exif_writer::ExifFormat::Undefined => {
2111            // UserComment: 8 bytes charset + data
2112            let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; // "ASCII\0\0\0"
2113            data.extend_from_slice(value.as_bytes());
2114            data
2115        }
2116        _ => return None,
2117    };
2118
2119    Some((tag_id, format, encoded))
2120}
2121
2122/// Compute text file tags (from Perl Text.pm).
2123fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
2124    let mut tags = Vec::new();
2125    let mk = |name: &str, val: String| Tag {
2126        id: crate::tag::TagId::Text(name.into()),
2127        name: name.into(), description: name.into(),
2128        group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
2129        raw_value: Value::String(val.clone()), print_value: val, priority: 0,
2130    };
2131
2132    // Detect encoding and BOM
2133    let is_ascii = data.iter().all(|&b| b < 128);
2134    let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2135    let has_utf16le_bom = data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2136    let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2137    let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2138    let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2139
2140    // Detect if file has weird non-text control characters (like multi-byte unicode without BOM)
2141    let has_weird_ctrl = data.iter().any(|&b| (b <= 0x06) || (b >= 0x0e && b <= 0x1a) || (b >= 0x1c && b <= 0x1f) || b == 0x7f);
2142
2143    let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2144        ("utf-32le", true, false)
2145    } else if has_utf32be_bom {
2146        ("utf-32be", true, false)
2147    } else if has_utf16le_bom {
2148        ("utf-16le", true, true)
2149    } else if has_utf16be_bom {
2150        ("utf-16be", true, true)
2151    } else if has_weird_ctrl {
2152        // Not a text file (has binary-like control chars but no recognized multi-byte marker)
2153        return tags;
2154    } else if is_ascii {
2155        ("us-ascii", false, false)
2156    } else {
2157        // Check UTF-8
2158        let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2159        if is_valid_utf8 {
2160            if has_utf8_bom {
2161                ("utf-8", true, false)
2162            } else {
2163                // Check if it has high bytes suggesting iso-8859-1 vs utf-8
2164                // Perl's IsUTF8: returns >0 if valid UTF-8 with multi-byte, 0 if ASCII, <0 if invalid
2165                // For simplicity: valid UTF-8 without BOM = utf-8
2166                ("utf-8", false, false)
2167            }
2168        } else if !data.iter().any(|&b| b >= 0x80 && b <= 0x9f) {
2169            ("iso-8859-1", false, false)
2170        } else {
2171            ("unknown-8bit", false, false)
2172        }
2173    };
2174
2175    tags.push(mk("MIMEEncoding", encoding.into()));
2176
2177    if is_bom {
2178        tags.push(mk("ByteOrderMark", "Yes".into()));
2179    }
2180
2181    // Count newlines and detect type
2182    let has_cr = data.contains(&b'\r');
2183    let has_lf = data.contains(&b'\n');
2184    let newline_type = if has_cr && has_lf { "Windows CRLF" }
2185        else if has_lf { "Unix LF" }
2186        else if has_cr { "Macintosh CR" }
2187        else { "(none)" };
2188    tags.push(mk("Newlines", newline_type.into()));
2189
2190    if is_csv {
2191        // CSV analysis: detect delimiter, quoting, column count, row count
2192        let text = String::from_utf8_lossy(data);
2193        let mut delim = "";
2194        let mut quot = "";
2195        let mut ncols = 1usize;
2196        let mut nrows = 0usize;
2197
2198        for line in text.lines() {
2199            if nrows == 0 {
2200                // Detect delimiter from first line
2201                let comma_count = line.matches(',').count();
2202                let semi_count = line.matches(';').count();
2203                let tab_count = line.matches('\t').count();
2204                if comma_count > semi_count && comma_count > tab_count {
2205                    delim = ",";
2206                    ncols = comma_count + 1;
2207                } else if semi_count > tab_count {
2208                    delim = ";";
2209                    ncols = semi_count + 1;
2210                } else if tab_count > 0 {
2211                    delim = "\t";
2212                    ncols = tab_count + 1;
2213                } else {
2214                    delim = "";
2215                    ncols = 1;
2216                }
2217                // Detect quoting
2218                if line.contains('"') { quot = "\""; }
2219                else if line.contains('\'') { quot = "'"; }
2220            }
2221            nrows += 1;
2222            if nrows >= 1000 { break; }
2223        }
2224
2225        let delim_display = match delim {
2226            "," => "Comma",
2227            ";" => "Semicolon",
2228            "\t" => "Tab",
2229            _ => "(none)",
2230        };
2231        let quot_display = match quot {
2232            "\"" => "Double quotes",
2233            "'" => "Single quotes",
2234            _ => "(none)",
2235        };
2236
2237        tags.push(mk("Delimiter", delim_display.into()));
2238        tags.push(mk("Quoting", quot_display.into()));
2239        tags.push(mk("ColumnCount", ncols.to_string()));
2240        if nrows > 0 {
2241            tags.push(mk("RowCount", nrows.to_string()));
2242        }
2243    } else if !is_utf16 {
2244        // Line count and word count for plain text files (not UTF-16/32)
2245        let line_count = data.iter().filter(|&&b| b == b'\n').count();
2246        let line_count = if line_count == 0 && !data.is_empty() { 1 } else { line_count };
2247        tags.push(mk("LineCount", line_count.to_string()));
2248
2249        let text = String::from_utf8_lossy(data);
2250        let word_count = text.split_whitespace().count();
2251        tags.push(mk("WordCount", word_count.to_string()));
2252    }
2253
2254    tags
2255}
exiftool_rs/exiftool.rs

exiftool_rs/
exiftool.rs