exiftool_rs/
exiftool.rs

1//! Core ExifTool struct and public API.
2//!
3//! This is the main entry point for reading metadata from files.
4//! Mirrors ExifTool.pm's ImageInfo/ExtractInfo/GetInfo pipeline.
5
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer, psd_writer, tiff_writer, webp_writer, xmp_writer};
17
18/// Processing options for metadata extraction.
19#[derive(Debug, Clone)]
20pub struct Options {
21    /// Include duplicate tags (different groups may have same tag name).
22    pub duplicates: bool,
23    /// Apply print conversions (human-readable values).
24    pub print_conv: bool,
25    /// Fast scan level: 0=normal, 1=skip composite, 2=skip maker notes, 3=skip thumbnails.
26    pub fast_scan: u8,
27    /// Only extract these tag names (empty = all).
28    pub requested_tags: Vec<String>,
29    /// Extract embedded documents/data (video frames, etc.). Level: 0=off, 1=-ee, 2=-ee2, 3=-ee3.
30    pub extract_embedded: u8,
31}
32
33impl Default for Options {
34    fn default() -> Self {
35        Self {
36            duplicates: false,
37            print_conv: true,
38            fast_scan: 0,
39            requested_tags: Vec::new(),
40            extract_embedded: 0,
41        }
42    }
43}
44
45/// The main ExifTool struct. Create one and use it to extract metadata from files.
46///
47/// # Example
48/// ```no_run
49/// use exiftool_rs::ExifTool;
50///
51/// let mut et = ExifTool::new();
52/// let info = et.image_info("photo.jpg").unwrap();
53/// for (name, value) in &info {
54///     println!("{}: {}", name, value);
55/// }
56/// ```
57/// A queued tag change for writing.
58#[derive(Debug, Clone)]
59pub struct NewValue {
60    /// Tag name (e.g., "Artist", "Copyright", "XMP:Title")
61    pub tag: String,
62    /// Group prefix if specified (e.g., "EXIF", "XMP", "IPTC")
63    pub group: Option<String>,
64    /// New value (None = delete tag)
65    pub value: Option<String>,
66}
67
68/// The main ExifTool engine — read, write, and edit metadata.
69///
70/// # Reading metadata
71/// ```no_run
72/// use exiftool_rs::ExifTool;
73///
74/// let et = ExifTool::new();
75///
76/// // Full tag structs
77/// let tags = et.extract_info("photo.jpg").unwrap();
78/// for tag in &tags {
79///     println!("[{}] {}: {}", tag.group.family0, tag.name, tag.print_value);
80/// }
81///
82/// // Simple name→value map
83/// let info = et.image_info("photo.jpg").unwrap();
84/// println!("Camera: {}", info.get("Model").unwrap_or(&String::new()));
85/// ```
86///
87/// # Writing metadata
88/// ```no_run
89/// use exiftool_rs::ExifTool;
90///
91/// let mut et = ExifTool::new();
92/// et.set_new_value("Artist", Some("John Doe"));
93/// et.set_new_value("Copyright", Some("2024"));
94/// et.write_info("input.jpg", "output.jpg").unwrap();
95/// ```
96pub struct ExifTool {
97    options: Options,
98    new_values: Vec<NewValue>,
99}
100
101/// Result of metadata extraction: maps tag names to display values.
102pub type ImageInfo = HashMap<String, String>;
103
104impl ExifTool {
105    /// Create a new ExifTool instance with default options.
106    pub fn new() -> Self {
107        Self {
108            options: Options::default(),
109            new_values: Vec::new(),
110        }
111    }
112
113    /// Create a new ExifTool instance with custom options.
114    pub fn with_options(options: Options) -> Self {
115        Self {
116            options,
117            new_values: Vec::new(),
118        }
119    }
120
121    /// Get a mutable reference to the options.
122    pub fn options_mut(&mut self) -> &mut Options {
123        &mut self.options
124    }
125
126    /// Get a reference to the options.
127    pub fn options(&self) -> &Options {
128        &self.options
129    }
130
131    // ================================================================
132    // Writing API
133    // ================================================================
134
135    /// Queue a new tag value for writing.
136    ///
137    /// Call this one or more times, then call `write_info()` to apply changes.
138    ///
139    /// # Arguments
140    /// * `tag` - Tag name, optionally prefixed with group (e.g., "Artist", "XMP:Title", "EXIF:Copyright")
141    /// * `value` - New value, or None to delete the tag
142    ///
143    /// # Example
144    /// ```no_run
145    /// use exiftool_rs::ExifTool;
146    /// let mut et = ExifTool::new();
147    /// et.set_new_value("Artist", Some("John Doe"));
148    /// et.set_new_value("Copyright", Some("2024 John Doe"));
149    /// et.set_new_value("XMP:Title", Some("My Photo"));
150    /// et.write_info("photo.jpg", "photo_out.jpg").unwrap();
151    /// ```
152    pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
153        let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
154            (Some(tag[..colon_pos].to_string()), tag[colon_pos + 1..].to_string())
155        } else {
156            (None, tag.to_string())
157        };
158
159        self.new_values.push(NewValue {
160            tag: tag_name,
161            group,
162            value: value.map(|v| v.to_string()),
163        });
164    }
165
166    /// Clear all queued new values.
167    pub fn clear_new_values(&mut self) {
168        self.new_values.clear();
169    }
170
171    /// Copy tags from a source file, queuing them as new values.
172    ///
173    /// Reads all tags from `src_path` and queues them for writing.
174    /// Optionally filter by tag names.
175    pub fn set_new_values_from_file<P: AsRef<Path>>(
176        &mut self,
177        src_path: P,
178        tags_to_copy: Option<&[&str]>,
179    ) -> Result<u32> {
180        let src_tags = self.extract_info(src_path)?;
181        let mut count = 0u32;
182
183        for tag in &src_tags {
184            // Skip file-level tags that shouldn't be copied
185            if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
186                continue;
187            }
188            // Skip binary/undefined data and empty values
189            if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
190                continue;
191            }
192            if tag.print_value.is_empty() {
193                continue;
194            }
195
196            // Filter by requested tags
197            if let Some(filter) = tags_to_copy {
198                let name_lower = tag.name.to_lowercase();
199                if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
200                    continue;
201                }
202            }
203
204            let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
205            self.new_values.push(NewValue {
206                tag: tag.name.clone(),
207                group: Some(tag.group.family0.clone()),
208                value: Some(tag.print_value.clone()),
209            });
210            count += 1;
211        }
212
213        Ok(count)
214    }
215
216    /// Set a file's name based on a tag value.
217    pub fn set_file_name_from_tag<P: AsRef<Path>>(
218        &self,
219        path: P,
220        tag_name: &str,
221        template: &str,
222    ) -> Result<String> {
223        let path = path.as_ref();
224        let tags = self.extract_info(path)?;
225
226        let tag_value = tags
227            .iter()
228            .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
229            .map(|t| &t.print_value)
230            .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
231
232        // Build new filename from template
233        // Template: "prefix%value%suffix.ext" or just use the tag value
234        let new_name = if template.contains('%') {
235            template.replace("%v", value_to_filename(tag_value).as_str())
236        } else {
237            // Default: use tag value as filename, keep extension
238            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
239            let clean = value_to_filename(tag_value);
240            if ext.is_empty() {
241                clean
242            } else {
243                format!("{}.{}", clean, ext)
244            }
245        };
246
247        let parent = path.parent().unwrap_or(Path::new(""));
248        let new_path = parent.join(&new_name);
249
250        fs::rename(path, &new_path).map_err(Error::Io)?;
251        Ok(new_path.to_string_lossy().to_string())
252    }
253
254    /// Write queued changes to a file.
255    ///
256    /// If `dst_path` is the same as `src_path`, the file is modified in-place
257    /// (via a temporary file).
258    pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(&self, src_path: P, dst_path: Q) -> Result<u32> {
259        let src_path = src_path.as_ref();
260        let dst_path = dst_path.as_ref();
261        let data = fs::read(src_path).map_err(Error::Io)?;
262
263        let file_type = self.detect_file_type(&data, src_path)?;
264        let output = self.apply_changes(&data, file_type)?;
265
266        // Write to temp file first, then rename (atomic)
267        let temp_path = dst_path.with_extension("exiftool_tmp");
268        fs::write(&temp_path, &output).map_err(Error::Io)?;
269        fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
270
271        Ok(self.new_values.len() as u32)
272    }
273
274    /// Apply queued changes to in-memory data.
275    fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
276        match file_type {
277            FileType::Jpeg => self.write_jpeg(data),
278            FileType::Png => self.write_png(data),
279            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
280            | FileType::Arw | FileType::Orf | FileType::Pef => self.write_tiff(data),
281            FileType::WebP => self.write_webp(data),
282            FileType::Mp4 | FileType::QuickTime | FileType::M4a
283            | FileType::ThreeGP | FileType::F4v => self.write_mp4(data),
284            FileType::Psd => self.write_psd(data),
285            FileType::Pdf => self.write_pdf(data),
286            FileType::Heif | FileType::Avif => self.write_mp4(data),
287            FileType::Mkv | FileType::WebM => self.write_matroska(data),
288            FileType::Gif => {
289                let comment = self.new_values.iter()
290                    .find(|nv| nv.tag.to_lowercase() == "comment")
291                    .and_then(|nv| nv.value.clone());
292                crate::writer::gif_writer::write_gif(data, comment.as_deref())
293            }
294            FileType::Flac => {
295                let changes: Vec<(&str, &str)> = self.new_values.iter()
296                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
297                    .collect();
298                crate::writer::flac_writer::write_flac(data, &changes)
299            }
300            FileType::Mp3 | FileType::Aiff => {
301                let changes: Vec<(&str, &str)> = self.new_values.iter()
302                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
303                    .collect();
304                crate::writer::id3_writer::write_id3(data, &changes)
305            }
306            FileType::Jp2 | FileType::Jxl => {
307                let new_xmp = if self.new_values.iter().any(|nv| nv.group.as_deref() == Some("XMP")) {
308                    let refs: Vec<&NewValue> = self.new_values.iter()
309                        .filter(|nv| nv.group.as_deref() == Some("XMP"))
310                        .collect();
311                    Some(self.build_new_xmp(&refs))
312                } else { None };
313                crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
314            }
315            FileType::PostScript => {
316                let changes: Vec<(&str, &str)> = self.new_values.iter()
317                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
318                    .collect();
319                crate::writer::ps_writer::write_postscript(data, &changes)
320            }
321            FileType::Ogg | FileType::Opus => {
322                let changes: Vec<(&str, &str)> = self.new_values.iter()
323                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
324                    .collect();
325                crate::writer::ogg_writer::write_ogg(data, &changes)
326            }
327            FileType::Xmp => {
328                let props: Vec<xmp_writer::XmpProperty> = self.new_values.iter()
329                    .filter_map(|nv| {
330                        let val = nv.value.as_deref()?;
331                        Some(xmp_writer::XmpProperty {
332                            namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
333                            property: nv.tag.clone(),
334                            values: vec![val.to_string()],
335                            prop_type: xmp_writer::XmpPropertyType::Simple,
336                        })
337                    })
338                    .collect();
339                Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
340            }
341            _ => Err(Error::UnsupportedFileType(format!("writing not yet supported for {}", file_type))),
342        }
343    }
344
345    /// Returns the set of tag names (lowercase) that are writable for a given file type.
346    /// Returns `None` if any tag is writable (open-ended formats like PNG, FLAC, MKV).
347    /// Returns `Some(empty set)` if the format has no writer.
348    pub fn writable_tags(file_type: FileType) -> Option<std::collections::HashSet<&'static str>> {
349        use std::collections::HashSet;
350
351        // EXIF tags supported by exif_writer
352        const EXIF_TAGS: &[&str] = &[
353            "imagedescription", "make", "model", "orientation",
354            "xresolution", "yresolution", "resolutionunit", "software",
355            "modifydate", "datetime", "artist", "copyright",
356            "datetimeoriginal", "createdate", "datetimedigitized",
357            "usercomment", "imageuniqueid", "ownername", "cameraownername",
358            "serialnumber", "bodyserialnumber", "lensmake", "lensmodel", "lensserialnumber",
359        ];
360
361        // IPTC tags supported by iptc_writer
362        const IPTC_TAGS: &[&str] = &[
363            "objectname", "title", "urgency", "category", "supplementalcategories",
364            "keywords", "specialinstructions", "datecreated", "timecreated",
365            "by-line", "author", "byline", "by-linetitle", "authorsposition", "bylinetitle",
366            "city", "sub-location", "sublocation", "province-state", "state", "provincestate",
367            "country-primarylocationcode", "countrycode",
368            "country-primarylocationname", "country",
369            "headline", "credit", "source", "copyrightnotice",
370            "contact", "caption-abstract", "caption", "description",
371            "writer-editor", "captionwriter",
372        ];
373
374        // XMP auto-detected tags (no group prefix needed)
375        const XMP_AUTO_TAGS: &[&str] = &[
376            "title", "description", "subject", "creator", "rights",
377            "keywords", "rating", "label", "hierarchicalsubject",
378        ];
379
380        // ID3 tags
381        const ID3_TAGS: &[&str] = &[
382            "title", "artist", "album", "year", "date", "track",
383            "genre", "comment", "composer", "albumartist",
384            "encoder", "encodedby", "publisher", "copyright", "bpm", "lyrics",
385        ];
386
387        // MP4/MOV ilst tags
388        const MP4_TAGS: &[&str] = &[
389            "title", "artist", "album", "year", "date", "comment",
390            "genre", "composer", "writer", "encoder", "encodedby",
391            "grouping", "lyrics", "description", "albumartist", "copyright",
392        ];
393
394        // PDF Info dict tags
395        const PDF_TAGS: &[&str] = &[
396            "title", "author", "subject", "keywords", "creator", "producer",
397        ];
398
399        // PostScript DSC tags
400        const PS_TAGS: &[&str] = &[
401            "title", "creator", "author", "for", "creationdate", "createdate",
402        ];
403
404        match file_type {
405            // Open-ended: any tag name accepted
406            FileType::Png | FileType::Flac | FileType::Mkv | FileType::WebM
407            | FileType::Ogg | FileType::Opus | FileType::Xmp => None,
408
409            // JPEG: EXIF + IPTC + XMP auto + comment
410            FileType::Jpeg => {
411                let mut set: HashSet<&str> = HashSet::new();
412                set.extend(EXIF_TAGS);
413                set.extend(IPTC_TAGS);
414                set.extend(XMP_AUTO_TAGS);
415                set.insert("comment");
416                Some(set)
417            }
418
419            // TIFF-based: EXIF only
420            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
421            | FileType::Arw | FileType::Orf | FileType::Pef => {
422                let mut set: HashSet<&str> = HashSet::new();
423                set.extend(EXIF_TAGS);
424                Some(set)
425            }
426
427            // WebP: EXIF + XMP auto
428            FileType::WebP => {
429                let mut set: HashSet<&str> = HashSet::new();
430                set.extend(EXIF_TAGS);
431                set.extend(XMP_AUTO_TAGS);
432                Some(set)
433            }
434
435            // MP4/MOV/HEIF: ilst + XMP auto
436            FileType::Mp4 | FileType::QuickTime | FileType::M4a
437            | FileType::ThreeGP | FileType::F4v | FileType::Heif | FileType::Avif => {
438                let mut set: HashSet<&str> = HashSet::new();
439                set.extend(MP4_TAGS);
440                set.extend(XMP_AUTO_TAGS);
441                Some(set)
442            }
443
444            // PSD: IPTC + XMP auto
445            FileType::Psd => {
446                let mut set: HashSet<&str> = HashSet::new();
447                set.extend(IPTC_TAGS);
448                set.extend(XMP_AUTO_TAGS);
449                Some(set)
450            }
451
452            FileType::Pdf => Some(PDF_TAGS.iter().copied().collect()),
453            FileType::PostScript => Some(PS_TAGS.iter().copied().collect()),
454
455            FileType::Mp3 | FileType::Aiff => Some(ID3_TAGS.iter().copied().collect()),
456
457            FileType::Gif => {
458                let mut set: HashSet<&str> = HashSet::new();
459                set.insert("comment");
460                Some(set)
461            }
462
463            // JP2/JXL: XMP only (with group prefix)
464            FileType::Jp2 | FileType::Jxl => Some(XMP_AUTO_TAGS.iter().copied().collect()),
465
466            // No writer
467            _ => Some(HashSet::new()),
468        }
469    }
470
471    /// Write metadata changes to JPEG data.
472    fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
473        // Classify new values by target group
474        let mut exif_values: Vec<&NewValue> = Vec::new();
475        let mut xmp_values: Vec<&NewValue> = Vec::new();
476        let mut iptc_values: Vec<&NewValue> = Vec::new();
477        let mut comment_value: Option<&str> = None;
478        let mut remove_exif = false;
479        let mut remove_xmp = false;
480        let mut remove_iptc = false;
481        let mut remove_comment = false;
482
483        for nv in &self.new_values {
484            let group = nv.group.as_deref().unwrap_or("");
485            let group_upper = group.to_uppercase();
486
487            // Check for group deletion
488            if nv.value.is_none() && nv.tag == "*" {
489                match group_upper.as_str() {
490                    "EXIF" => { remove_exif = true; continue; }
491                    "XMP" => { remove_xmp = true; continue; }
492                    "IPTC" => { remove_iptc = true; continue; }
493                    _ => {}
494                }
495            }
496
497            match group_upper.as_str() {
498                "XMP" => xmp_values.push(nv),
499                "IPTC" => iptc_values.push(nv),
500                "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
501                "" => {
502                    // Auto-detect best group based on tag name
503                    if nv.tag.to_lowercase() == "comment" {
504                        if nv.value.is_none() {
505                            remove_comment = true;
506                        } else {
507                            comment_value = nv.value.as_deref();
508                        }
509                    } else if is_xmp_tag(&nv.tag) {
510                        xmp_values.push(nv);
511                    } else {
512                        exif_values.push(nv);
513                    }
514                }
515                _ => exif_values.push(nv), // default to EXIF
516            }
517        }
518
519        // Build new EXIF data
520        let new_exif = if !exif_values.is_empty() {
521            Some(self.build_new_exif(data, &exif_values)?)
522        } else {
523            None
524        };
525
526        // Build new XMP data
527        let new_xmp = if !xmp_values.is_empty() {
528            Some(self.build_new_xmp(&xmp_values))
529        } else {
530            None
531        };
532
533        // Build new IPTC data
534        let new_iptc_data = if !iptc_values.is_empty() {
535            let records: Vec<iptc_writer::IptcRecord> = iptc_values
536                .iter()
537                .filter_map(|nv| {
538                    let value = nv.value.as_deref()?;
539                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
540                    Some(iptc_writer::IptcRecord {
541                        record,
542                        dataset,
543                        data: value.as_bytes().to_vec(),
544                    })
545                })
546                .collect();
547            if records.is_empty() {
548                None
549            } else {
550                Some(iptc_writer::build_iptc(&records))
551            }
552        } else {
553            None
554        };
555
556        // Rewrite JPEG
557        jpeg_writer::write_jpeg(
558            data,
559            new_exif.as_deref(),
560            new_xmp.as_deref(),
561            new_iptc_data.as_deref(),
562            comment_value,
563            remove_exif,
564            remove_xmp,
565            remove_iptc,
566            remove_comment,
567        )
568    }
569
570    /// Build new EXIF data by merging existing EXIF with queued changes.
571    fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
572        let bo = ByteOrderMark::BigEndian;
573        let mut ifd0_entries = Vec::new();
574        let mut exif_entries = Vec::new();
575        let mut gps_entries = Vec::new();
576
577        // Step 1: Extract existing EXIF entries from the JPEG
578        let existing = extract_existing_exif_entries(jpeg_data, bo);
579        for entry in &existing {
580            match classify_exif_tag(entry.tag) {
581                ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
582                ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
583                ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
584            }
585        }
586
587        // Step 2: Apply queued changes (add/replace/delete)
588        let deleted_tags: Vec<u16> = values
589            .iter()
590            .filter(|nv| nv.value.is_none())
591            .filter_map(|nv| tag_name_to_id(&nv.tag))
592            .collect();
593
594        // Remove deleted tags
595        ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
596        exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
597        gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
598
599        // Add/replace new values
600        for nv in values {
601            if nv.value.is_none() {
602                continue;
603            }
604            let value_str = nv.value.as_deref().unwrap_or("");
605            let group = nv.group.as_deref().unwrap_or("");
606
607            if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo) {
608                let entry = exif_writer::IfdEntry {
609                    tag: tag_id,
610                    format,
611                    data: encoded,
612                };
613
614                let target = match group.to_uppercase().as_str() {
615                    "GPS" => &mut gps_entries,
616                    "EXIFIFD" => &mut exif_entries,
617                    _ => match classify_exif_tag(tag_id) {
618                        ExifIfdGroup::ExifIfd => &mut exif_entries,
619                        ExifIfdGroup::Gps => &mut gps_entries,
620                        ExifIfdGroup::Ifd0 => &mut ifd0_entries,
621                    },
622                };
623
624                // Replace existing or add new
625                if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
626                    *existing = entry;
627                } else {
628                    target.push(entry);
629                }
630            }
631        }
632
633        // Remove sub-IFD pointers from entries (they'll be rebuilt by build_exif)
634        ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
635
636        exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
637    }
638
639    /// Write metadata changes to PNG data.
640    fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
641        let mut new_text: Vec<(&str, &str)> = Vec::new();
642        let mut remove_text: Vec<&str> = Vec::new();
643
644        // Collect text-based changes
645        // We need to hold the strings in vectors that live long enough
646        let owned_pairs: Vec<(String, String)> = self.new_values.iter()
647            .filter(|nv| nv.value.is_some())
648            .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
649            .collect();
650
651        for (tag, value) in &owned_pairs {
652            new_text.push((tag.as_str(), value.as_str()));
653        }
654
655        for nv in &self.new_values {
656            if nv.value.is_none() {
657                remove_text.push(&nv.tag);
658            }
659        }
660
661        png_writer::write_png(data, &new_text, None, &remove_text)
662    }
663
664    /// Write metadata changes to PSD data.
665    fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
666        let mut iptc_values = Vec::new();
667        let mut xmp_values = Vec::new();
668
669        for nv in &self.new_values {
670            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
671            match group.as_str() {
672                "XMP" => xmp_values.push(nv),
673                "IPTC" => iptc_values.push(nv),
674                _ => {
675                    if is_xmp_tag(&nv.tag) { xmp_values.push(nv); }
676                    else { iptc_values.push(nv); }
677                }
678            }
679        }
680
681        let new_iptc = if !iptc_values.is_empty() {
682            let records: Vec<_> = iptc_values.iter().filter_map(|nv| {
683                let value = nv.value.as_deref()?;
684                let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
685                Some(iptc_writer::IptcRecord { record, dataset, data: value.as_bytes().to_vec() })
686            }).collect();
687            if records.is_empty() { None } else { Some(iptc_writer::build_iptc(&records)) }
688        } else { None };
689
690        let new_xmp = if !xmp_values.is_empty() {
691            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
692            Some(self.build_new_xmp(&refs))
693        } else { None };
694
695        psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
696    }
697
698    /// Write metadata changes to Matroska (MKV/WebM) data.
699    fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
700        let changes: Vec<(&str, &str)> = self.new_values.iter()
701            .filter_map(|nv| {
702                let value = nv.value.as_deref()?;
703                Some((nv.tag.as_str(), value))
704            })
705            .collect();
706
707        matroska_writer::write_matroska(data, &changes)
708    }
709
710    /// Write metadata changes to PDF data.
711    fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
712        let changes: Vec<(&str, &str)> = self.new_values.iter()
713            .filter_map(|nv| {
714                let value = nv.value.as_deref()?;
715                Some((nv.tag.as_str(), value))
716            })
717            .collect();
718
719        pdf_writer::write_pdf(data, &changes)
720    }
721
722    /// Write metadata changes to MP4/MOV data.
723    fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
724        let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
725        let mut xmp_values: Vec<&NewValue> = Vec::new();
726
727        for nv in &self.new_values {
728            if nv.value.is_none() { continue; }
729            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
730            if group == "XMP" {
731                xmp_values.push(nv);
732            } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
733                ilst_tags.push((key, nv.value.clone().unwrap()));
734            }
735        }
736
737        let tag_refs: Vec<(&[u8; 4], &str)> = ilst_tags.iter()
738            .map(|(k, v)| (k, v.as_str()))
739            .collect();
740
741        let new_xmp = if !xmp_values.is_empty() {
742            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
743            Some(self.build_new_xmp(&refs))
744        } else {
745            None
746        };
747
748        mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
749    }
750
751    /// Write metadata changes to WebP data.
752    fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
753        let mut exif_values: Vec<&NewValue> = Vec::new();
754        let mut xmp_values: Vec<&NewValue> = Vec::new();
755        let mut remove_exif = false;
756        let mut remove_xmp = false;
757
758        for nv in &self.new_values {
759            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
760            if nv.value.is_none() && nv.tag == "*" {
761                if group == "EXIF" { remove_exif = true; }
762                if group == "XMP" { remove_xmp = true; }
763                continue;
764            }
765            match group.as_str() {
766                "XMP" => xmp_values.push(nv),
767                _ => exif_values.push(nv),
768            }
769        }
770
771        let new_exif = if !exif_values.is_empty() {
772            let bo = ByteOrderMark::BigEndian;
773            let mut entries = Vec::new();
774            for nv in &exif_values {
775                if let Some(ref v) = nv.value {
776                    let group = nv.group.as_deref().unwrap_or("");
777                    if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo) {
778                        entries.push(exif_writer::IfdEntry { tag: tag_id, format, data: encoded });
779                    }
780                }
781            }
782            if !entries.is_empty() {
783                Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
784            } else {
785                None
786            }
787        } else {
788            None
789        };
790
791        let new_xmp = if !xmp_values.is_empty() {
792            Some(self.build_new_xmp(&xmp_values.iter().map(|v| *v).collect::<Vec<_>>()))
793        } else {
794            None
795        };
796
797        webp_writer::write_webp(
798            data,
799            new_exif.as_deref(),
800            new_xmp.as_deref(),
801            remove_exif,
802            remove_xmp,
803        )
804    }
805
806    /// Write metadata changes to TIFF data.
807    fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
808        let bo = if data.starts_with(b"II") {
809            ByteOrderMark::LittleEndian
810        } else {
811            ByteOrderMark::BigEndian
812        };
813
814        let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
815        for nv in &self.new_values {
816            if let Some(ref value) = nv.value {
817                let group = nv.group.as_deref().unwrap_or("");
818                if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo) {
819                    changes.push((tag_id, encoded));
820                }
821            }
822        }
823
824        tiff_writer::write_tiff(data, &changes)
825    }
826
827    /// Build new XMP data from queued values.
828    fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
829        let mut properties = Vec::new();
830
831        for nv in values {
832            let value_str = match &nv.value {
833                Some(v) => v.clone(),
834                None => continue,
835            };
836
837            let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
838            let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
839
840            let prop_type = match nv.tag.to_lowercase().as_str() {
841                "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
842                "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
843                "creator" => xmp_writer::XmpPropertyType::Seq,
844                _ => xmp_writer::XmpPropertyType::Simple,
845            };
846
847            let values = if matches!(prop_type, xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq) {
848                value_str.split(',').map(|s| s.trim().to_string()).collect()
849            } else {
850                vec![value_str]
851            };
852
853            properties.push(xmp_writer::XmpProperty {
854                namespace: ns,
855                property: nv.tag.clone(),
856                values,
857                prop_type,
858            });
859        }
860
861        xmp_writer::build_xmp(&properties).into_bytes()
862    }
863
864    // ================================================================
865    // Reading API
866    // ================================================================
867
868    /// Extract metadata from a file and return a simple name→value map.
869    ///
870    /// This is the high-level one-shot API, equivalent to ExifTool's `ImageInfo()`.
871    pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
872        let tags = self.extract_info(path)?;
873        Ok(self.get_info(&tags))
874    }
875
876    /// Extract all metadata tags from a file.
877    ///
878    /// Returns the full `Tag` structs with groups, raw values, etc.
879    pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
880        let path = path.as_ref();
881        let data = fs::read(path).map_err(Error::Io)?;
882
883        self.extract_info_from_bytes(&data, path)
884    }
885
886    /// Extract metadata from in-memory data.
887    pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
888        let file_type_result = self.detect_file_type(data, path);
889        let (file_type, mut tags) = match file_type_result {
890            Ok(ft) => {
891                let t = self.process_file(data, ft).or_else(|_| {
892                    self.process_by_extension(data, path)
893                })?;
894                (Some(ft), t)
895            }
896            Err(_) => {
897                // File type unknown by magic/extension — try extension-based fallback
898                let t = self.process_by_extension(data, path)?;
899                (None, t)
900            }
901        };
902        let file_type = file_type.unwrap_or(FileType::Zip); // placeholder for file-level tags
903
904        // Add file-level tags
905        tags.push(Tag {
906            id: crate::tag::TagId::Text("FileType".into()),
907            name: "FileType".into(),
908            description: "File Type".into(),
909            group: crate::tag::TagGroup {
910                family0: "File".into(),
911                family1: "File".into(),
912                family2: "Other".into(),
913            },
914            raw_value: Value::String(format!("{:?}", file_type)),
915            print_value: file_type.description().to_string(),
916            priority: 0,
917        });
918
919        tags.push(Tag {
920            id: crate::tag::TagId::Text("MIMEType".into()),
921            name: "MIMEType".into(),
922            description: "MIME Type".into(),
923            group: crate::tag::TagGroup {
924                family0: "File".into(),
925                family1: "File".into(),
926                family2: "Other".into(),
927            },
928            raw_value: Value::String(file_type.mime_type().to_string()),
929            print_value: file_type.mime_type().to_string(),
930            priority: 0,
931        });
932
933        if let Ok(metadata) = fs::metadata(path) {
934            tags.push(Tag {
935                id: crate::tag::TagId::Text("FileSize".into()),
936                name: "FileSize".into(),
937                description: "File Size".into(),
938                group: crate::tag::TagGroup {
939                    family0: "File".into(),
940                    family1: "File".into(),
941                    family2: "Other".into(),
942                },
943                raw_value: Value::U32(metadata.len() as u32),
944                print_value: format_file_size(metadata.len()),
945                priority: 0,
946            });
947        }
948
949        // Add more file-level tags
950        let file_tag = |name: &str, val: Value| -> Tag {
951            Tag {
952                id: crate::tag::TagId::Text(name.to_string()),
953                name: name.to_string(), description: name.to_string(),
954                group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
955                raw_value: val.clone(), print_value: val.to_display_string(), priority: 0,
956            }
957        };
958
959        if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
960            tags.push(file_tag("FileName", Value::String(fname.to_string())));
961        }
962        if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
963            tags.push(file_tag("Directory", Value::String(dir.to_string())));
964        }
965        // Use the canonical (first) extension from the FileType, matching Perl ExifTool behavior.
966        let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
967        if !canonical_ext.is_empty() {
968            tags.push(file_tag("FileTypeExtension", Value::String(canonical_ext.to_string())));
969        }
970
971        #[cfg(unix)]
972        if let Ok(metadata) = fs::metadata(path) {
973            use std::os::unix::fs::MetadataExt;
974            let mode = metadata.mode();
975            tags.push(file_tag("FilePermissions", Value::String(format!("{:o}", mode & 0o7777))));
976
977            // FileModifyDate
978            if let Ok(modified) = metadata.modified() {
979                if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
980                    let secs = dur.as_secs() as i64;
981                    tags.push(file_tag("FileModifyDate", Value::String(unix_to_datetime(secs))));
982                }
983            }
984            // FileAccessDate
985            if let Ok(accessed) = metadata.accessed() {
986                if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
987                    let secs = dur.as_secs() as i64;
988                    tags.push(file_tag("FileAccessDate", Value::String(unix_to_datetime(secs))));
989                }
990            }
991            // FileInodeChangeDate (ctime on Unix)
992            let ctime = metadata.ctime();
993            if ctime > 0 {
994                tags.push(file_tag("FileInodeChangeDate", Value::String(unix_to_datetime(ctime))));
995            }
996        }
997
998        // ExifByteOrder (from TIFF header)
999        {
1000            let bo_str = if data.len() > 8 {
1001                // Check EXIF in JPEG or TIFF header or WebP/RIFF EXIF chunk
1002                let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
1003                    // JPEG: find APP1 EXIF header
1004                    data.windows(6).position(|w| w == b"Exif\0\0")
1005                        .map(|p| &data[p+6..])
1006                } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
1007                    // RAF: look in the embedded JPEG for EXIF byte order
1008                    let jpeg_offset = u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]]) as usize;
1009                    let jpeg_length = u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]]) as usize;
1010                    if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
1011                        let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
1012                        jpeg.windows(6).position(|w| w == b"Exif\0\0")
1013                            .map(|p| &jpeg[p+6..])
1014                    } else {
1015                        None
1016                    }
1017                } else if data.starts_with(b"RIFF") && data.len() >= 12 {
1018                    // RIFF/WebP: find EXIF chunk
1019                    let mut riff_bo: Option<&[u8]> = None;
1020                    let mut pos = 12usize;
1021                    while pos + 8 <= data.len() {
1022                        let cid = &data[pos..pos+4];
1023                        let csz = u32::from_le_bytes([data[pos+4],data[pos+5],data[pos+6],data[pos+7]]) as usize;
1024                        let cstart = pos + 8;
1025                        let cend = (cstart + csz).min(data.len());
1026                        if cid == b"EXIF" && cend > cstart {
1027                            let exif_data = &data[cstart..cend];
1028                            let tiff = if exif_data.starts_with(b"Exif\0\0") { &exif_data[6..] } else { exif_data };
1029                            riff_bo = Some(tiff);
1030                            break;
1031                        }
1032                        // Also check LIST chunks
1033                        if cid == b"LIST" && cend >= cstart + 4 {
1034                            // recurse not needed for this simple scan - just advance
1035                        }
1036                        pos = cend + (csz & 1);
1037                    }
1038                    riff_bo
1039                } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
1040                    // JXL container: scan for brob Exif box and decompress to get byte order
1041                    let mut jxl_bo: Option<String> = None;
1042                    let mut jpos = 12usize; // skip JXL signature box
1043                    while jpos + 8 <= data.len() {
1044                        let bsize = u32::from_be_bytes([data[jpos], data[jpos+1], data[jpos+2], data[jpos+3]]) as usize;
1045                        let btype = &data[jpos+4..jpos+8];
1046                        if bsize < 8 || jpos + bsize > data.len() { break; }
1047                        if btype == b"brob" && jpos + bsize > 12 {
1048                            let inner_type = &data[jpos+8..jpos+12];
1049                            if inner_type == b"Exif" || inner_type == b"exif" {
1050                                let brotli_payload = &data[jpos+12..jpos+bsize];
1051                                use std::io::Cursor;
1052                                let mut inp = Cursor::new(brotli_payload);
1053                                let mut out: Vec<u8> = Vec::new();
1054                                if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
1055                                    let exif_start = if out.len() > 4 { 4 } else { 0 };
1056                                    if exif_start < out.len() {
1057                                        if out[exif_start..].starts_with(b"MM") {
1058                                            jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
1059                                        } else if out[exif_start..].starts_with(b"II") {
1060                                            jxl_bo = Some("Little-endian (Intel, II)".to_string());
1061                                        }
1062                                    }
1063                                }
1064                                break;
1065                            }
1066                        }
1067                        jpos += bsize;
1068                    }
1069                    if let Some(bo) = jxl_bo {
1070                        if !bo.is_empty() && file_type != FileType::Btf {
1071                            tags.push(file_tag("ExifByteOrder", Value::String(bo)));
1072                        }
1073                    }
1074                    // Return None to skip the generic byte order check below
1075                    None
1076                } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
1077                    // MRW: find TTW segment which contains TIFF/EXIF data
1078                    let mrw_data_offset = if data.len() >= 8 {
1079                        u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
1080                    } else { 0 };
1081                    let mut mrw_bo: Option<&[u8]> = None;
1082                    let mut mpos = 8usize;
1083                    while mpos + 8 <= mrw_data_offset.min(data.len()) {
1084                        let seg_tag = &data[mpos..mpos+4];
1085                        let seg_len = u32::from_be_bytes([data[mpos+4], data[mpos+5], data[mpos+6], data[mpos+7]]) as usize;
1086                        if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
1087                            mrw_bo = Some(&data[mpos+8..mpos+8+seg_len]);
1088                            break;
1089                        }
1090                        mpos += 8 + seg_len;
1091                    }
1092                    mrw_bo
1093                } else {
1094                    Some(&data[..])
1095                };
1096                if let Some(tiff) = check {
1097                    if tiff.starts_with(b"II") { "Little-endian (Intel, II)" }
1098                    else if tiff.starts_with(b"MM") { "Big-endian (Motorola, MM)" }
1099                    else { "" }
1100                } else { "" }
1101            } else { "" };
1102            // Suppress ExifByteOrder for BigTIFF, Canon VRD/DR4 (Perl doesn't output it for these)
1103            // Also skip if already emitted by ExifReader (TIFF-based formats)
1104            let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
1105            if !bo_str.is_empty() && !already_has_exifbyteorder
1106                && file_type != FileType::Btf
1107                && file_type != FileType::Dr4 && file_type != FileType::Vrd
1108                && file_type != FileType::Crw {
1109                tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
1110            }
1111        }
1112
1113        tags.push(file_tag("ExifToolVersion", Value::String(crate::VERSION.to_string())));
1114
1115        // Compute composite tags
1116        let composite = crate::composite::compute_composite_tags(&tags);
1117        tags.extend(composite);
1118
1119        // FLIR post-processing: remove LensID composite for FLIR cameras.
1120        // Perl's LensID composite requires LensType EXIF tag (not present in FLIR images),
1121        // and LensID-2 requires LensModel to match /(mm|\d\/F)/ (FLIR names like "FOL7"
1122        // don't match).  Our composite.rs uses a simpler fallback that picks up any non-empty
1123        // LensModel, so we remove LensID when the image is from a FLIR camera with FFF data.
1124        {
1125            let is_flir_fff = tags.iter().any(|t| t.group.family0 == "APP1"
1126                && t.group.family1 == "FLIR");
1127            if is_flir_fff {
1128                tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1129            }
1130        }
1131
1132        // Olympus post-processing: remove the generic "Lens" composite for Olympus cameras.
1133        // In Perl, the "Lens" composite tag requires Canon:MinFocalLength (Canon namespace).
1134        // Our composite.rs generates Lens for any manufacturer that has MinFocalLength +
1135        // MaxFocalLength (e.g., Olympus Equipment sub-IFD).  Remove it for non-Canon cameras.
1136        {
1137            let make = tags.iter().find(|t| t.name == "Make")
1138                .map(|t| t.print_value.clone()).unwrap_or_default();
1139            if !make.to_uppercase().contains("CANON") {
1140                tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1141            }
1142        }
1143
1144        // Priority-based deduplication: when the same tag name appears from both RIFF (priority 0)
1145        // and MakerNotes/EXIF (priority 0 but higher-quality source), remove the RIFF copy.
1146        // Mirrors ExifTool's PRIORITY => 0 behavior for RIFF StreamHeader tags.
1147        {
1148            let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1149            for tag_name in &riff_priority_zero_tags {
1150                let has_makernotes = tags.iter().any(|t| t.name == *tag_name
1151                    && t.group.family0 != "RIFF");
1152                if has_makernotes {
1153                    tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1154                }
1155            }
1156        }
1157
1158        // Filter by requested tags if specified
1159        if !self.options.requested_tags.is_empty() {
1160            let requested: Vec<String> = self
1161                .options
1162                .requested_tags
1163                .iter()
1164                .map(|t| t.to_lowercase())
1165                .collect();
1166            tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1167        }
1168
1169        Ok(tags)
1170    }
1171
1172    /// Format extracted tags into a simple name→value map.
1173    ///
1174    /// Handles duplicate tag names by appending group info.
1175    fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1176        let mut info = ImageInfo::new();
1177        let mut seen: HashMap<String, usize> = HashMap::new();
1178
1179        for tag in tags {
1180            let value = if self.options.print_conv {
1181                &tag.print_value
1182            } else {
1183                &tag.raw_value.to_display_string()
1184            };
1185
1186            let count = seen.entry(tag.name.clone()).or_insert(0);
1187            *count += 1;
1188
1189            if *count == 1 {
1190                info.insert(tag.name.clone(), value.clone());
1191            } else if self.options.duplicates {
1192                let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1193                info.insert(key, value.clone());
1194            }
1195        }
1196
1197        info
1198    }
1199
1200    /// Detect file type from magic bytes and extension.
1201    fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1202        // Try magic bytes first
1203        let header_len = data.len().min(256);
1204        if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1205            // Override ICO to Font if extension is .dfont (Mac resource fork)
1206            if ft == FileType::Ico {
1207                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1208                    if ext.eq_ignore_ascii_case("dfont") {
1209                        return Ok(FileType::Font);
1210                    }
1211                }
1212            }
1213            // Override JPEG to JPS if the file extension is .jps
1214            if ft == FileType::Jpeg {
1215                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1216                    if ext.eq_ignore_ascii_case("jps") {
1217                        return Ok(FileType::Jps);
1218                    }
1219                }
1220            }
1221            // Override PLIST to AAE if extension is .aae
1222            if ft == FileType::Plist {
1223                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1224                    if ext.eq_ignore_ascii_case("aae") {
1225                        return Ok(FileType::Aae);
1226                    }
1227                }
1228            }
1229            // Override XMP to PLIST/AAE if extension is .plist or .aae
1230            if ft == FileType::Xmp {
1231                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1232                    if ext.eq_ignore_ascii_case("plist") {
1233                        return Ok(FileType::Plist);
1234                    }
1235                    if ext.eq_ignore_ascii_case("aae") {
1236                        return Ok(FileType::Aae);
1237                    }
1238                }
1239            }
1240            // Override to PhotoCD if extension is .pcd (file starts with 0xFF padding)
1241            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1242                if ext.eq_ignore_ascii_case("pcd") && data.len() >= 2056
1243                    && &data[2048..2055] == b"PCD_IPI"
1244                {
1245                    return Ok(FileType::PhotoCd);
1246                }
1247            }
1248            // Override MP3 to MPC/APE/WavPack if extension says otherwise
1249            if ft == FileType::Mp3 {
1250                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1251                    if ext.eq_ignore_ascii_case("mpc") {
1252                        return Ok(FileType::Mpc);
1253                    }
1254                    if ext.eq_ignore_ascii_case("ape") {
1255                        return Ok(FileType::Ape);
1256                    }
1257                    if ext.eq_ignore_ascii_case("wv") {
1258                        return Ok(FileType::WavPack);
1259                    }
1260                }
1261            }
1262            // For ZIP files, check if it's an EIP (by extension) or OpenDocument format
1263            if ft == FileType::Zip {
1264                // Check extension first for EIP
1265                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1266                    if ext.eq_ignore_ascii_case("eip") {
1267                        return Ok(FileType::Eip);
1268                    }
1269                }
1270                if let Some(od_type) = detect_opendocument_type(data) {
1271                    return Ok(od_type);
1272                }
1273            }
1274            return Ok(ft);
1275        }
1276
1277        // Fall back to extension
1278        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1279            if let Some(ft) = file_type::detect_from_extension(ext) {
1280                return Ok(ft);
1281            }
1282        }
1283
1284        let ext_str = path
1285            .extension()
1286            .and_then(|e| e.to_str())
1287            .unwrap_or("unknown");
1288        Err(Error::UnsupportedFileType(ext_str.to_string()))
1289    }
1290
1291    /// Dispatch to the appropriate format reader.
1292
1293    fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1294        match file_type {
1295            FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1296            FileType::Png | FileType::Mng => formats::png::read_png(data),
1297            // All TIFF-based formats (TIFF + most RAW formats)
1298            FileType::Tiff
1299            | FileType::Btf
1300            | FileType::Dng
1301            | FileType::Cr2
1302            | FileType::Nef
1303            | FileType::Arw
1304            | FileType::Sr2
1305            | FileType::Orf
1306            | FileType::Pef
1307            | FileType::Erf
1308            | FileType::Fff
1309            | FileType::Rwl
1310            | FileType::Mef
1311            | FileType::Srw
1312            | FileType::Gpr
1313            | FileType::Arq
1314            | FileType::ThreeFR
1315            | FileType::Dcr
1316            | FileType::Rw2
1317            | FileType::Srf => formats::tiff::read_tiff(data),
1318            // Phase One IIQ: TIFF + PhaseOne maker note block
1319            FileType::Iiq => formats::misc::read_iiq(data),
1320            // Image formats
1321            FileType::Gif => formats::gif::read_gif(data),
1322            FileType::Bmp => formats::bmp::read_bmp(data),
1323            FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1324            FileType::Psd => formats::psd::read_psd(data),
1325            // Audio formats
1326            FileType::Mp3 => formats::id3::read_mp3(data),
1327            FileType::Flac => formats::flac::read_flac(data),
1328            FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1329            FileType::Aiff => formats::aiff::read_aiff(data),
1330            // Video formats
1331            FileType::Mp4
1332            | FileType::QuickTime
1333            | FileType::M4a
1334            | FileType::ThreeGP
1335            | FileType::Heif
1336            | FileType::Avif
1337            | FileType::Cr3
1338            | FileType::Crm
1339            | FileType::F4v
1340            | FileType::Mqv
1341            | FileType::Lrv => formats::quicktime::read_quicktime_with_ee(data, self.options.extract_embedded),
1342            FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1343            FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1344            FileType::Wtv => formats::wtv::read_wtv(data),
1345            // RAW formats with custom containers
1346            FileType::Crw => formats::canon_raw::read_crw(data),
1347            FileType::Raf => formats::raf::read_raf(data),
1348            FileType::Mrw => formats::mrw::read_mrw(data),
1349            FileType::Mrc => formats::mrc::read_mrc(data),
1350            // Image formats
1351            FileType::Jp2 => formats::jp2::read_jp2(data),
1352            FileType::J2c => formats::jp2::read_j2c(data),
1353            FileType::Jxl => formats::jp2::read_jxl(data),
1354            FileType::Ico => formats::ico::read_ico(data),
1355            FileType::Icc => formats::icc::read_icc(data),
1356            // Documents
1357            FileType::Pdf => formats::pdf::read_pdf(data),
1358            FileType::PostScript => {
1359                // PFA fonts start with %!PS-AdobeFont or %!FontType1
1360                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1361                    formats::font::read_pfa(data).or_else(|_| formats::postscript::read_postscript(data))
1362                } else {
1363                    formats::postscript::read_postscript(data)
1364                }
1365            }
1366            FileType::Eip => formats::capture_one::read_eip(data),
1367            FileType::Zip | FileType::Docx | FileType::Xlsx | FileType::Pptx
1368            | FileType::Doc | FileType::Xls | FileType::Ppt => formats::zip::read_zip(data),
1369            FileType::Rtf => formats::rtf::read_rtf(data),
1370            FileType::InDesign => formats::misc::read_indesign(data),
1371            FileType::Pcap => formats::misc::read_pcap(data),
1372            FileType::Pcapng => formats::misc::read_pcapng(data),
1373            // Canon VRD / DR4
1374            FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1375            FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1376            // Metadata / Other
1377            FileType::Xmp => formats::xmp_file::read_xmp(data),
1378            FileType::Svg => formats::misc::read_svg(data),
1379            FileType::Html => {
1380                // SVG files that weren't detected by magic (e.g., via extension fallback)
1381                let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1382                if is_svg {
1383                    formats::misc::read_svg(data)
1384                } else {
1385                    formats::html::read_html(data)
1386                }
1387            }
1388            FileType::Exe => formats::exe::read_exe(data),
1389            FileType::Font => {
1390                // AFM: Adobe Font Metrics text file
1391                if data.starts_with(b"StartFontMetrics") {
1392                    return formats::font::read_afm(data);
1393                }
1394                // PFA: PostScript Type 1 ASCII font
1395                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1396                    return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1397                }
1398                // PFB: PostScript Type 1 Binary font
1399                if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1400                    return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1401                }
1402                formats::font::read_font(data)
1403            }
1404            // Audio with ID3
1405            FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1406            FileType::Ape => formats::ape::read_ape(data),
1407            FileType::Mpc => formats::ape::read_mpc(data),
1408            FileType::Aac => formats::misc::read_aac(data),
1409            FileType::RealAudio => {
1410                formats::misc::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1411            }
1412            FileType::RealMedia => {
1413                formats::misc::read_real_media(data).or_else(|_| Ok(Vec::new()))
1414            }
1415            // Misc formats
1416            FileType::Czi => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1417            FileType::PhotoCd => formats::misc::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1418            FileType::Dicom => formats::dicom::read_dicom(data),
1419            FileType::Fits => formats::misc::read_fits(data),
1420            FileType::Flv => formats::misc::read_flv(data),
1421            FileType::Mxf => formats::misc::read_mxf(data).or_else(|_| Ok(Vec::new())),
1422            FileType::Swf => formats::misc::read_swf(data),
1423            FileType::Hdr => formats::misc::read_hdr(data),
1424            FileType::DjVu => formats::djvu::read_djvu(data),
1425            FileType::Xcf => formats::gimp::read_xcf(data),
1426            FileType::Mie => formats::mie::read_mie(data),
1427            FileType::Lfp => formats::lytro::read_lfp(data),
1428            // FileType::Miff dispatched via string extension below
1429            FileType::Fpf => formats::flir_fpf::read_fpf(data),
1430            FileType::Flif => formats::misc::read_flif(data),
1431            FileType::Bpg => formats::misc::read_bpg(data),
1432            FileType::Pcx => formats::misc::read_pcx(data),
1433            FileType::Pict => formats::misc::read_pict(data),
1434            FileType::Mpeg => formats::mpeg::read_mpeg(data),
1435            FileType::M2ts => formats::misc::read_m2ts(data, self.options.extract_embedded),
1436            FileType::Gzip => formats::misc::read_gzip(data),
1437            FileType::Rar => formats::misc::read_rar(data),
1438            FileType::SevenZ => formats::misc::read_7z(data),
1439            FileType::Dss => formats::misc::read_dss(data),
1440            FileType::Moi => formats::misc::read_moi(data),
1441            FileType::MacOs => formats::misc::read_macos(data),
1442            FileType::Json => formats::misc::read_json(data),
1443            // New formats
1444            FileType::Pgf => formats::pgf::read_pgf(data),
1445            FileType::Xisf => formats::xisf::read_xisf(data),
1446            FileType::Torrent => formats::torrent::read_torrent(data),
1447            FileType::Mobi => formats::palm::read_palm(data),
1448            FileType::Psp => formats::psp::read_psp(data),
1449            FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1450            FileType::Audible => formats::audible::read_audible(data),
1451            FileType::Exr => formats::openexr::read_openexr(data),
1452            // New formats
1453            FileType::Plist => {
1454                if data.starts_with(b"bplist") {
1455                    formats::plist::read_binary_plist_tags(data)
1456                } else {
1457                    formats::plist::read_xml_plist(data)
1458                }
1459            }
1460            FileType::Aae => {
1461                if data.starts_with(b"bplist") {
1462                    formats::plist::read_binary_plist_tags(data)
1463                } else {
1464                    formats::plist::read_aae_plist(data)
1465                }
1466            }
1467            FileType::KyoceraRaw => formats::misc::read_kyocera_raw(data),
1468            FileType::PortableFloatMap => formats::misc::read_pfm(data),
1469            FileType::Ods | FileType::Odt | FileType::Odp | FileType::Odg |
1470            FileType::Odf | FileType::Odb | FileType::Odi | FileType::Odc => formats::zip::read_zip(data),
1471            FileType::Lif => formats::misc::read_lif(data),
1472            FileType::Rwz => formats::misc::read_rawzor(data),
1473            FileType::Jxr => formats::misc::read_jxr(data),
1474            _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1475        }
1476    }
1477
1478    /// Fallback: try to read file based on extension for formats without magic detection.
1479    fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1480        let ext = path
1481            .extension()
1482            .and_then(|e| e.to_str())
1483            .unwrap_or("")
1484            .to_ascii_lowercase();
1485
1486        match ext.as_str() {
1487            "ppm" | "pgm" | "pbm" => formats::misc::read_ppm(data),
1488            "pfm" => {
1489                // PFM can be Portable Float Map or Printer Font Metrics
1490                if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1491                    formats::misc::read_ppm(data)
1492                } else {
1493                    Ok(Vec::new()) // Printer Font Metrics
1494                }
1495            }
1496            "json" => formats::misc::read_json(data),
1497            "svg" => formats::misc::read_svg(data),
1498            "ram" => formats::misc::read_ram(data).or_else(|_| Ok(Vec::new())),
1499            "txt" | "log" | "igc" => {
1500                Ok(compute_text_tags(data, false))
1501            }
1502            "csv" => {
1503                Ok(compute_text_tags(data, true))
1504            }
1505            "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1506            "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1507            "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1508            "plist" => {
1509                if data.starts_with(b"bplist") {
1510                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1511                } else {
1512                    formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1513                }
1514            }
1515            "aae" => {
1516                if data.starts_with(b"bplist") {
1517                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1518                } else {
1519                    formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1520                }
1521            }
1522            "vcf" | "ics" | "vcard" => {
1523                let s = String::from_utf8_lossy(&data[..data.len().min(100)]);
1524                if s.contains("BEGIN:VCALENDAR") {
1525                    formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1526                } else {
1527                    formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1528                }
1529            }
1530            "xcf" => Ok(Vec::new()),      // GIMP
1531            "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1532            "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1533            "indd" | "indt" => Ok(Vec::new()), // InDesign
1534            "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1535            "mie" => Ok(Vec::new()),       // MIE
1536            "exr" => Ok(Vec::new()),       // OpenEXR
1537            "wpg" => formats::misc::read_wpg(data).or_else(|_| Ok(Vec::new())),
1538            "moi" => formats::misc::read_moi(data).or_else(|_| Ok(Vec::new())),
1539            "macos" => formats::misc::read_macos(data).or_else(|_| Ok(Vec::new())),
1540            "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1541            "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1542            "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1543            "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1544            "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1545            "itc" => formats::misc::read_itc(data).or_else(|_| Ok(Vec::new())),
1546            "mpg" | "mpeg" | "m1v" | "m2v" | "mpv" => formats::mpeg::read_mpeg(data).or_else(|_| Ok(Vec::new())),
1547            "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1548            "czi" => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1549            "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1550            "lfp" | "mrc"
1551            | "dss" | "mobi" | "psp" | "pgf" | "raw"
1552            | "pmp" | "torrent"
1553            | "xisf" | "mxf"
1554            | "dfont" => Ok(Vec::new()),
1555            "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1556            "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1557            "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1558            "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1559            _ => Err(Error::UnsupportedFileType(ext)),
1560        }
1561    }
1562}
1563
1564impl Default for ExifTool {
1565    fn default() -> Self {
1566        Self::new()
1567    }
1568}
1569
1570/// Detect OpenDocument file type by reading the `mimetype` entry from a ZIP.
1571/// Returns None if not an OpenDocument file.
1572fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1573    // OpenDocument ZIPs have "mimetype" as the FIRST local file entry (uncompressed)
1574    if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1575        return None;
1576    }
1577    let compression = u16::from_le_bytes([data[8], data[9]]);
1578    let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1579    let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1580    let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1581    let name_start = 30;
1582    if name_start + name_len > data.len() {
1583        return None;
1584    }
1585    let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1586    if filename != "mimetype" || compression != 0 {
1587        return None;
1588    }
1589    let content_start = name_start + name_len + extra_len;
1590    let content_end = (content_start + compressed_size).min(data.len());
1591    if content_start >= content_end {
1592        return None;
1593    }
1594    let mime = std::str::from_utf8(&data[content_start..content_end]).unwrap_or("").trim();
1595    match mime {
1596        "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1597        "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1598        "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1599        "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1600        "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1601        "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1602        "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1603        "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1604        _ => None,
1605    }
1606}
1607
1608/// Detect the file type of a file at the given path.
1609pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1610    let path = path.as_ref();
1611    let mut file = fs::File::open(path).map_err(Error::Io)?;
1612    let mut header = [0u8; 256];
1613    use std::io::Read;
1614    let n = file.read(&mut header).map_err(Error::Io)?;
1615
1616    if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1617        return Ok(ft);
1618    }
1619
1620    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1621        if let Some(ft) = file_type::detect_from_extension(ext) {
1622            return Ok(ft);
1623        }
1624    }
1625
1626    Err(Error::UnsupportedFileType("unknown".into()))
1627}
1628
1629/// Classification of EXIF tags into IFD groups.
1630enum ExifIfdGroup {
1631    Ifd0,
1632    ExifIfd,
1633    Gps,
1634}
1635
1636/// Determine which IFD a tag belongs to based on its ID.
1637fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1638    match tag_id {
1639        // ExifIFD tags
1640        0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292
1641        | 0xA000..=0xA435 => ExifIfdGroup::ExifIfd,
1642        // GPS tags
1643        0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1644        // Everything else → IFD0
1645        _ => ExifIfdGroup::Ifd0,
1646    }
1647}
1648
1649/// Extract existing EXIF entries from a JPEG file's APP1 segment.
1650fn extract_existing_exif_entries(jpeg_data: &[u8], target_bo: ByteOrderMark) -> Vec<exif_writer::IfdEntry> {
1651    let mut entries = Vec::new();
1652
1653    // Find EXIF APP1 segment
1654    let mut pos = 2; // Skip SOI
1655    while pos + 4 <= jpeg_data.len() {
1656        if jpeg_data[pos] != 0xFF {
1657            pos += 1;
1658            continue;
1659        }
1660        let marker = jpeg_data[pos + 1];
1661        pos += 2;
1662
1663        if marker == 0xDA || marker == 0xD9 {
1664            break; // SOS or EOI
1665        }
1666        if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1667            continue;
1668        }
1669
1670        if pos + 2 > jpeg_data.len() {
1671            break;
1672        }
1673        let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1674        if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1675            break;
1676        }
1677
1678        let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1679
1680        // EXIF APP1
1681        if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1682            let tiff_data = &seg_data[6..];
1683            extract_ifd_entries(tiff_data, target_bo, &mut entries);
1684            break;
1685        }
1686
1687        pos += seg_len;
1688    }
1689
1690    entries
1691}
1692
1693/// Extract IFD entries from TIFF data, re-encoding values in the target byte order.
1694fn extract_ifd_entries(
1695    tiff_data: &[u8],
1696    target_bo: ByteOrderMark,
1697    entries: &mut Vec<exif_writer::IfdEntry>,
1698) {
1699    use crate::metadata::exif::parse_tiff_header;
1700
1701    let header = match parse_tiff_header(tiff_data) {
1702        Ok(h) => h,
1703        Err(_) => return,
1704    };
1705
1706    let src_bo = header.byte_order;
1707
1708    // Read IFD0
1709    read_ifd_for_merge(tiff_data, header.ifd0_offset as usize, src_bo, target_bo, entries);
1710
1711    // Find ExifIFD and GPS pointers
1712    let ifd0_offset = header.ifd0_offset as usize;
1713    if ifd0_offset + 2 > tiff_data.len() {
1714        return;
1715    }
1716    let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
1717    for i in 0..count {
1718        let eoff = ifd0_offset + 2 + i * 12;
1719        if eoff + 12 > tiff_data.len() {
1720            break;
1721        }
1722        let tag = read_u16_bo(tiff_data, eoff, src_bo);
1723        let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
1724
1725        match tag {
1726            0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1727            0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1728            _ => {}
1729        }
1730    }
1731}
1732
1733/// Read a single IFD and extract entries for merge.
1734fn read_ifd_for_merge(
1735    data: &[u8],
1736    offset: usize,
1737    src_bo: ByteOrderMark,
1738    target_bo: ByteOrderMark,
1739    entries: &mut Vec<exif_writer::IfdEntry>,
1740) {
1741    if offset + 2 > data.len() {
1742        return;
1743    }
1744    let count = read_u16_bo(data, offset, src_bo) as usize;
1745
1746    for i in 0..count {
1747        let eoff = offset + 2 + i * 12;
1748        if eoff + 12 > data.len() {
1749            break;
1750        }
1751
1752        let tag = read_u16_bo(data, eoff, src_bo);
1753        let dtype = read_u16_bo(data, eoff + 2, src_bo);
1754        let count_val = read_u32_bo(data, eoff + 4, src_bo);
1755
1756        // Skip sub-IFD pointers and MakerNote
1757        if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
1758            continue;
1759        }
1760
1761        let type_size = match dtype {
1762            1 | 2 | 6 | 7 => 1usize,
1763            3 | 8 => 2,
1764            4 | 9 | 11 | 13 => 4,
1765            5 | 10 | 12 => 8,
1766            _ => continue,
1767        };
1768
1769        let total_size = type_size * count_val as usize;
1770        let raw_data = if total_size <= 4 {
1771            data[eoff + 8..eoff + 12].to_vec()
1772        } else {
1773            let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
1774            if voff + total_size > data.len() {
1775                continue;
1776            }
1777            data[voff..voff + total_size].to_vec()
1778        };
1779
1780        // Re-encode multi-byte values if byte orders differ
1781        let final_data = if src_bo != target_bo && type_size > 1 {
1782            reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
1783        } else {
1784            raw_data[..total_size].to_vec()
1785        };
1786
1787        let format = match dtype {
1788            1 => exif_writer::ExifFormat::Byte,
1789            2 => exif_writer::ExifFormat::Ascii,
1790            3 => exif_writer::ExifFormat::Short,
1791            4 => exif_writer::ExifFormat::Long,
1792            5 => exif_writer::ExifFormat::Rational,
1793            6 => exif_writer::ExifFormat::SByte,
1794            7 => exif_writer::ExifFormat::Undefined,
1795            8 => exif_writer::ExifFormat::SShort,
1796            9 => exif_writer::ExifFormat::SLong,
1797            10 => exif_writer::ExifFormat::SRational,
1798            11 => exif_writer::ExifFormat::Float,
1799            12 => exif_writer::ExifFormat::Double,
1800            _ => continue,
1801        };
1802
1803        entries.push(exif_writer::IfdEntry {
1804            tag,
1805            format,
1806            data: final_data,
1807        });
1808    }
1809}
1810
1811/// Re-encode multi-byte values when converting between byte orders.
1812fn reencode_bytes(
1813    data: &[u8],
1814    dtype: u16,
1815    count: usize,
1816    src_bo: ByteOrderMark,
1817    dst_bo: ByteOrderMark,
1818) -> Vec<u8> {
1819    let mut out = Vec::with_capacity(data.len());
1820    match dtype {
1821        3 | 8 => {
1822            // 16-bit
1823            for i in 0..count {
1824                let v = read_u16_bo(data, i * 2, src_bo);
1825                match dst_bo {
1826                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1827                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1828                }
1829            }
1830        }
1831        4 | 9 | 11 | 13 => {
1832            // 32-bit
1833            for i in 0..count {
1834                let v = read_u32_bo(data, i * 4, src_bo);
1835                match dst_bo {
1836                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1837                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1838                }
1839            }
1840        }
1841        5 | 10 => {
1842            // Rational (two 32-bit)
1843            for i in 0..count {
1844                let n = read_u32_bo(data, i * 8, src_bo);
1845                let d = read_u32_bo(data, i * 8 + 4, src_bo);
1846                match dst_bo {
1847                    ByteOrderMark::LittleEndian => {
1848                        out.extend_from_slice(&n.to_le_bytes());
1849                        out.extend_from_slice(&d.to_le_bytes());
1850                    }
1851                    ByteOrderMark::BigEndian => {
1852                        out.extend_from_slice(&n.to_be_bytes());
1853                        out.extend_from_slice(&d.to_be_bytes());
1854                    }
1855                }
1856            }
1857        }
1858        12 => {
1859            // 64-bit double
1860            for i in 0..count {
1861                let mut bytes = [0u8; 8];
1862                bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
1863                if src_bo != dst_bo {
1864                    bytes.reverse();
1865                }
1866                out.extend_from_slice(&bytes);
1867            }
1868        }
1869        _ => out.extend_from_slice(data),
1870    }
1871    out
1872}
1873
1874fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
1875    if offset + 2 > data.len() { return 0; }
1876    match bo {
1877        ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
1878        ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
1879    }
1880}
1881
1882fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
1883    if offset + 4 > data.len() { return 0; }
1884    match bo {
1885        ByteOrderMark::LittleEndian => u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1886        ByteOrderMark::BigEndian => u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1887    }
1888}
1889
1890/// Map tag name to numeric EXIF tag ID.
1891fn tag_name_to_id(name: &str) -> Option<u16> {
1892    encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
1893}
1894
1895/// Convert a tag value to a safe filename.
1896fn value_to_filename(value: &str) -> String {
1897    value
1898        .chars()
1899        .map(|c| match c {
1900            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
1901            c if c.is_control() => '_',
1902            c => c,
1903        })
1904        .collect::<String>()
1905        .trim()
1906        .to_string()
1907}
1908
1909/// Parse a date shift string like "+1:0:0" (add 1 hour) or "-0:30:0" (subtract 30 min).
1910/// Returns (sign, hours, minutes, seconds).
1911pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
1912    let (sign, rest) = if shift.starts_with('-') {
1913        (-1, &shift[1..])
1914    } else if shift.starts_with('+') {
1915        (1, &shift[1..])
1916    } else {
1917        (1, shift)
1918    };
1919
1920    let parts: Vec<&str> = rest.split(':').collect();
1921    match parts.len() {
1922        1 => {
1923            let h: u32 = parts[0].parse().ok()?;
1924            Some((sign, h, 0, 0))
1925        }
1926        2 => {
1927            let h: u32 = parts[0].parse().ok()?;
1928            let m: u32 = parts[1].parse().ok()?;
1929            Some((sign, h, m, 0))
1930        }
1931        3 => {
1932            let h: u32 = parts[0].parse().ok()?;
1933            let m: u32 = parts[1].parse().ok()?;
1934            let s: u32 = parts[2].parse().ok()?;
1935            Some((sign, h, m, s))
1936        }
1937        _ => None,
1938    }
1939}
1940
1941/// Shift a datetime string by the given amount.
1942/// Input format: "YYYY:MM:DD HH:MM:SS"
1943pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
1944    let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
1945
1946    // Parse date/time
1947    if datetime.len() < 19 {
1948        return None;
1949    }
1950    let year: i32 = datetime[0..4].parse().ok()?;
1951    let month: u32 = datetime[5..7].parse().ok()?;
1952    let day: u32 = datetime[8..10].parse().ok()?;
1953    let hour: u32 = datetime[11..13].parse().ok()?;
1954    let min: u32 = datetime[14..16].parse().ok()?;
1955    let sec: u32 = datetime[17..19].parse().ok()?;
1956
1957    // Convert to total seconds, shift, convert back
1958    let total_secs = (hour * 3600 + min * 60 + sec) as i64
1959        + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
1960
1961    let days_shift = if total_secs < 0 {
1962        -1 - (-total_secs - 1) as i64 / 86400
1963    } else {
1964        total_secs / 86400
1965    };
1966
1967    let time_secs = ((total_secs % 86400) + 86400) % 86400;
1968    let new_hour = (time_secs / 3600) as u32;
1969    let new_min = ((time_secs % 3600) / 60) as u32;
1970    let new_sec = (time_secs % 60) as u32;
1971
1972    // Simple day shifting (doesn't handle month/year rollover perfectly for large shifts)
1973    let mut new_day = day as i32 + days_shift as i32;
1974    let mut new_month = month;
1975    let mut new_year = year;
1976
1977    let days_in_month = |m: u32, y: i32| -> i32 {
1978        match m {
1979            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1980            4 | 6 | 9 | 11 => 30,
1981            2 => if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 29 } else { 28 },
1982            _ => 30,
1983        }
1984    };
1985
1986    while new_day > days_in_month(new_month, new_year) {
1987        new_day -= days_in_month(new_month, new_year);
1988        new_month += 1;
1989        if new_month > 12 {
1990            new_month = 1;
1991            new_year += 1;
1992        }
1993    }
1994    while new_day < 1 {
1995        new_month = if new_month == 1 { 12 } else { new_month - 1 };
1996        if new_month == 12 {
1997            new_year -= 1;
1998        }
1999        new_day += days_in_month(new_month, new_year);
2000    }
2001
2002    Some(format!(
2003        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
2004        new_year, new_month, new_day, new_hour, new_min, new_sec
2005    ))
2006}
2007
2008fn unix_to_datetime(secs: i64) -> String {
2009    let days = secs / 86400;
2010    let time = secs % 86400;
2011    let h = time / 3600;
2012    let m = (time % 3600) / 60;
2013    let s = time % 60;
2014    let mut y = 1970i32;
2015    let mut rem = days;
2016    loop {
2017        let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 366 } else { 365 };
2018        if rem < dy { break; }
2019        rem -= dy;
2020        y += 1;
2021    }
2022    let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2023    let months = [31, if leap { 29 } else { 28 }, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
2024    let mut mo = 1;
2025    for &dm in &months {
2026        if rem < dm { break; }
2027        rem -= dm;
2028        mo += 1;
2029    }
2030    format!("{:04}:{:02}:{:02} {:02}:{:02}:{:02}", y, mo, rem + 1, h, m, s)
2031}
2032
2033fn format_file_size(bytes: u64) -> String {
2034    if bytes < 1024 {
2035        format!("{} bytes", bytes)
2036    } else if bytes < 1024 * 1024 {
2037        format!("{:.1} kB", bytes as f64 / 1024.0)
2038    } else if bytes < 1024 * 1024 * 1024 {
2039        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
2040    } else {
2041        format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
2042    }
2043}
2044
2045/// Check if a tag name is typically XMP.
2046fn is_xmp_tag(tag: &str) -> bool {
2047    matches!(
2048        tag.to_lowercase().as_str(),
2049        "title" | "description" | "subject" | "creator" | "rights"
2050        | "keywords" | "rating" | "label" | "hierarchicalsubject"
2051    )
2052}
2053
2054/// Encode an EXIF tag value to binary.
2055/// Returns (tag_id, format, encoded_data) or None if tag is unknown.
2056fn encode_exif_tag(
2057    tag_name: &str,
2058    value: &str,
2059    _group: &str,
2060    bo: ByteOrderMark,
2061) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
2062    let tag_lower = tag_name.to_lowercase();
2063
2064    // Map common tag names to EXIF tag IDs and formats
2065    let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
2066        // IFD0 string tags
2067        "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
2068        "make" => (0x010F, exif_writer::ExifFormat::Ascii),
2069        "model" => (0x0110, exif_writer::ExifFormat::Ascii),
2070        "software" => (0x0131, exif_writer::ExifFormat::Ascii),
2071        "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
2072        "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
2073        "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
2074        // IFD0 numeric tags
2075        "orientation" => (0x0112, exif_writer::ExifFormat::Short),
2076        "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
2077        "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
2078        "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
2079        // ExifIFD tags
2080        "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
2081        "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
2082        "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
2083        "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
2084        "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
2085        "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
2086        "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
2087        "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
2088        "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
2089        _ => return None,
2090    };
2091
2092    let encoded = match format {
2093        exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
2094        exif_writer::ExifFormat::Short => {
2095            let v: u16 = value.parse().ok()?;
2096            exif_writer::encode_u16(v, bo)
2097        }
2098        exif_writer::ExifFormat::Long => {
2099            let v: u32 = value.parse().ok()?;
2100            exif_writer::encode_u32(v, bo)
2101        }
2102        exif_writer::ExifFormat::Rational => {
2103            // Parse "N/D" or just "N"
2104            if let Some(slash) = value.find('/') {
2105                let num: u32 = value[..slash].trim().parse().ok()?;
2106                let den: u32 = value[slash + 1..].trim().parse().ok()?;
2107                exif_writer::encode_urational(num, den, bo)
2108            } else if let Ok(v) = value.parse::<f64>() {
2109                // Convert float to rational
2110                let den = 10000u32;
2111                let num = (v * den as f64).round() as u32;
2112                exif_writer::encode_urational(num, den, bo)
2113            } else {
2114                return None;
2115            }
2116        }
2117        exif_writer::ExifFormat::Undefined => {
2118            // UserComment: 8 bytes charset + data
2119            let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; // "ASCII\0\0\0"
2120            data.extend_from_slice(value.as_bytes());
2121            data
2122        }
2123        _ => return None,
2124    };
2125
2126    Some((tag_id, format, encoded))
2127}
2128
2129/// Compute text file tags (from Perl Text.pm).
2130fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
2131    let mut tags = Vec::new();
2132    let mk = |name: &str, val: String| Tag {
2133        id: crate::tag::TagId::Text(name.into()),
2134        name: name.into(), description: name.into(),
2135        group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
2136        raw_value: Value::String(val.clone()), print_value: val, priority: 0,
2137    };
2138
2139    // Detect encoding and BOM
2140    let is_ascii = data.iter().all(|&b| b < 128);
2141    let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2142    let has_utf16le_bom = data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2143    let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2144    let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2145    let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2146
2147    // Detect if file has weird non-text control characters (like multi-byte unicode without BOM)
2148    let has_weird_ctrl = data.iter().any(|&b| (b <= 0x06) || (b >= 0x0e && b <= 0x1a) || (b >= 0x1c && b <= 0x1f) || b == 0x7f);
2149
2150    let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2151        ("utf-32le", true, false)
2152    } else if has_utf32be_bom {
2153        ("utf-32be", true, false)
2154    } else if has_utf16le_bom {
2155        ("utf-16le", true, true)
2156    } else if has_utf16be_bom {
2157        ("utf-16be", true, true)
2158    } else if has_weird_ctrl {
2159        // Not a text file (has binary-like control chars but no recognized multi-byte marker)
2160        return tags;
2161    } else if is_ascii {
2162        ("us-ascii", false, false)
2163    } else {
2164        // Check UTF-8
2165        let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2166        if is_valid_utf8 {
2167            if has_utf8_bom {
2168                ("utf-8", true, false)
2169            } else {
2170                // Check if it has high bytes suggesting iso-8859-1 vs utf-8
2171                // Perl's IsUTF8: returns >0 if valid UTF-8 with multi-byte, 0 if ASCII, <0 if invalid
2172                // For simplicity: valid UTF-8 without BOM = utf-8
2173                ("utf-8", false, false)
2174            }
2175        } else if !data.iter().any(|&b| b >= 0x80 && b <= 0x9f) {
2176            ("iso-8859-1", false, false)
2177        } else {
2178            ("unknown-8bit", false, false)
2179        }
2180    };
2181
2182    tags.push(mk("MIMEEncoding", encoding.into()));
2183
2184    if is_bom {
2185        tags.push(mk("ByteOrderMark", "Yes".into()));
2186    }
2187
2188    // Count newlines and detect type
2189    let has_cr = data.contains(&b'\r');
2190    let has_lf = data.contains(&b'\n');
2191    let newline_type = if has_cr && has_lf { "Windows CRLF" }
2192        else if has_lf { "Unix LF" }
2193        else if has_cr { "Macintosh CR" }
2194        else { "(none)" };
2195    tags.push(mk("Newlines", newline_type.into()));
2196
2197    if is_csv {
2198        // CSV analysis: detect delimiter, quoting, column count, row count
2199        let text = String::from_utf8_lossy(data);
2200        let mut delim = "";
2201        let mut quot = "";
2202        let mut ncols = 1usize;
2203        let mut nrows = 0usize;
2204
2205        for line in text.lines() {
2206            if nrows == 0 {
2207                // Detect delimiter from first line
2208                let comma_count = line.matches(',').count();
2209                let semi_count = line.matches(';').count();
2210                let tab_count = line.matches('\t').count();
2211                if comma_count > semi_count && comma_count > tab_count {
2212                    delim = ",";
2213                    ncols = comma_count + 1;
2214                } else if semi_count > tab_count {
2215                    delim = ";";
2216                    ncols = semi_count + 1;
2217                } else if tab_count > 0 {
2218                    delim = "\t";
2219                    ncols = tab_count + 1;
2220                } else {
2221                    delim = "";
2222                    ncols = 1;
2223                }
2224                // Detect quoting
2225                if line.contains('"') { quot = "\""; }
2226                else if line.contains('\'') { quot = "'"; }
2227            }
2228            nrows += 1;
2229            if nrows >= 1000 { break; }
2230        }
2231
2232        let delim_display = match delim {
2233            "," => "Comma",
2234            ";" => "Semicolon",
2235            "\t" => "Tab",
2236            _ => "(none)",
2237        };
2238        let quot_display = match quot {
2239            "\"" => "Double quotes",
2240            "'" => "Single quotes",
2241            _ => "(none)",
2242        };
2243
2244        tags.push(mk("Delimiter", delim_display.into()));
2245        tags.push(mk("Quoting", quot_display.into()));
2246        tags.push(mk("ColumnCount", ncols.to_string()));
2247        if nrows > 0 {
2248            tags.push(mk("RowCount", nrows.to_string()));
2249        }
2250    } else if !is_utf16 {
2251        // Line count and word count for plain text files (not UTF-16/32)
2252        let line_count = data.iter().filter(|&&b| b == b'\n').count();
2253        let line_count = if line_count == 0 && !data.is_empty() { 1 } else { line_count };
2254        tags.push(mk("LineCount", line_count.to_string()));
2255
2256        let text = String::from_utf8_lossy(data);
2257        let word_count = text.split_whitespace().count();
2258        tags.push(mk("WordCount", word_count.to_string()));
2259    }
2260
2261    tags
2262}
exiftool_rs/exiftool.rs

exiftool_rs/
exiftool.rs