exiftool_rs/
exiftool.rs

1//! Core ExifTool struct and public API.
2//!
3//! This is the main entry point for reading metadata from files.
4//! Mirrors ExifTool.pm's ImageInfo/ExtractInfo/GetInfo pipeline.
5
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer, psd_writer, tiff_writer, webp_writer, xmp_writer};
17
18/// Processing options for metadata extraction.
19#[derive(Debug, Clone)]
20pub struct Options {
21    /// Include duplicate tags (different groups may have same tag name).
22    pub duplicates: bool,
23    /// Apply print conversions (human-readable values).
24    pub print_conv: bool,
25    /// Fast scan level: 0=normal, 1=skip composite, 2=skip maker notes, 3=skip thumbnails.
26    pub fast_scan: u8,
27    /// Only extract these tag names (empty = all).
28    pub requested_tags: Vec<String>,
29    /// Extract embedded documents/data (video frames, etc.). Level: 0=off, 1=-ee, 2=-ee2, 3=-ee3.
30    pub extract_embedded: u8,
31}
32
33impl Default for Options {
34    fn default() -> Self {
35        Self {
36            duplicates: false,
37            print_conv: true,
38            fast_scan: 0,
39            requested_tags: Vec::new(),
40            extract_embedded: 0,
41        }
42    }
43}
44
45/// The main ExifTool struct. Create one and use it to extract metadata from files.
46///
47/// # Example
48/// ```no_run
49/// use exiftool_rs::ExifTool;
50///
51/// let mut et = ExifTool::new();
52/// let info = et.image_info("photo.jpg").unwrap();
53/// for (name, value) in &info {
54///     println!("{}: {}", name, value);
55/// }
56/// ```
57/// A queued tag change for writing.
58#[derive(Debug, Clone)]
59pub struct NewValue {
60    /// Tag name (e.g., "Artist", "Copyright", "XMP:Title")
61    pub tag: String,
62    /// Group prefix if specified (e.g., "EXIF", "XMP", "IPTC")
63    pub group: Option<String>,
64    /// New value (None = delete tag)
65    pub value: Option<String>,
66}
67
68/// The main ExifTool engine — read, write, and edit metadata.
69///
70/// # Reading metadata
71/// ```no_run
72/// use exiftool_rs::ExifTool;
73///
74/// let et = ExifTool::new();
75///
76/// // Full tag structs
77/// let tags = et.extract_info("photo.jpg").unwrap();
78/// for tag in &tags {
79///     println!("[{}] {}: {}", tag.group.family0, tag.name, tag.print_value);
80/// }
81///
82/// // Simple name→value map
83/// let info = et.image_info("photo.jpg").unwrap();
84/// println!("Camera: {}", info.get("Model").unwrap_or(&String::new()));
85/// ```
86///
87/// # Writing metadata
88/// ```no_run
89/// use exiftool_rs::ExifTool;
90///
91/// let mut et = ExifTool::new();
92/// et.set_new_value("Artist", Some("John Doe"));
93/// et.set_new_value("Copyright", Some("2024"));
94/// et.write_info("input.jpg", "output.jpg").unwrap();
95/// ```
96pub struct ExifTool {
97    options: Options,
98    new_values: Vec<NewValue>,
99}
100
101/// Result of metadata extraction: maps tag names to display values.
102pub type ImageInfo = HashMap<String, String>;
103
104impl ExifTool {
105    /// Create a new ExifTool instance with default options.
106    pub fn new() -> Self {
107        Self {
108            options: Options::default(),
109            new_values: Vec::new(),
110        }
111    }
112
113    /// Create a new ExifTool instance with custom options.
114    pub fn with_options(options: Options) -> Self {
115        Self {
116            options,
117            new_values: Vec::new(),
118        }
119    }
120
121    /// Get a mutable reference to the options.
122    pub fn options_mut(&mut self) -> &mut Options {
123        &mut self.options
124    }
125
126    /// Get a reference to the options.
127    pub fn options(&self) -> &Options {
128        &self.options
129    }
130
131    // ================================================================
132    // Writing API
133    // ================================================================
134
135    /// Queue a new tag value for writing.
136    ///
137    /// Call this one or more times, then call `write_info()` to apply changes.
138    ///
139    /// # Arguments
140    /// * `tag` - Tag name, optionally prefixed with group (e.g., "Artist", "XMP:Title", "EXIF:Copyright")
141    /// * `value` - New value, or None to delete the tag
142    ///
143    /// # Example
144    /// ```no_run
145    /// use exiftool_rs::ExifTool;
146    /// let mut et = ExifTool::new();
147    /// et.set_new_value("Artist", Some("John Doe"));
148    /// et.set_new_value("Copyright", Some("2024 John Doe"));
149    /// et.set_new_value("XMP:Title", Some("My Photo"));
150    /// et.write_info("photo.jpg", "photo_out.jpg").unwrap();
151    /// ```
152    pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
153        let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
154            (Some(tag[..colon_pos].to_string()), tag[colon_pos + 1..].to_string())
155        } else {
156            (None, tag.to_string())
157        };
158
159        self.new_values.push(NewValue {
160            tag: tag_name,
161            group,
162            value: value.map(|v| v.to_string()),
163        });
164    }
165
166    /// Clear all queued new values.
167    pub fn clear_new_values(&mut self) {
168        self.new_values.clear();
169    }
170
171    /// Copy tags from a source file, queuing them as new values.
172    ///
173    /// Reads all tags from `src_path` and queues them for writing.
174    /// Optionally filter by tag names.
175    pub fn set_new_values_from_file<P: AsRef<Path>>(
176        &mut self,
177        src_path: P,
178        tags_to_copy: Option<&[&str]>,
179    ) -> Result<u32> {
180        let src_tags = self.extract_info(src_path)?;
181        let mut count = 0u32;
182
183        for tag in &src_tags {
184            // Skip file-level tags that shouldn't be copied
185            if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
186                continue;
187            }
188            // Skip binary/undefined data and empty values
189            if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
190                continue;
191            }
192            if tag.print_value.is_empty() {
193                continue;
194            }
195
196            // Filter by requested tags
197            if let Some(filter) = tags_to_copy {
198                let name_lower = tag.name.to_lowercase();
199                if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
200                    continue;
201                }
202            }
203
204            let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
205            self.new_values.push(NewValue {
206                tag: tag.name.clone(),
207                group: Some(tag.group.family0.clone()),
208                value: Some(tag.print_value.clone()),
209            });
210            count += 1;
211        }
212
213        Ok(count)
214    }
215
216    /// Set a file's name based on a tag value.
217    pub fn set_file_name_from_tag<P: AsRef<Path>>(
218        &self,
219        path: P,
220        tag_name: &str,
221        template: &str,
222    ) -> Result<String> {
223        let path = path.as_ref();
224        let tags = self.extract_info(path)?;
225
226        let tag_value = tags
227            .iter()
228            .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
229            .map(|t| &t.print_value)
230            .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
231
232        // Build new filename from template
233        // Template: "prefix%value%suffix.ext" or just use the tag value
234        let new_name = if template.contains('%') {
235            template.replace("%v", value_to_filename(tag_value).as_str())
236        } else {
237            // Default: use tag value as filename, keep extension
238            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
239            let clean = value_to_filename(tag_value);
240            if ext.is_empty() {
241                clean
242            } else {
243                format!("{}.{}", clean, ext)
244            }
245        };
246
247        let parent = path.parent().unwrap_or(Path::new(""));
248        let new_path = parent.join(&new_name);
249
250        fs::rename(path, &new_path).map_err(Error::Io)?;
251        Ok(new_path.to_string_lossy().to_string())
252    }
253
254    /// Write queued changes to a file.
255    ///
256    /// If `dst_path` is the same as `src_path`, the file is modified in-place
257    /// (via a temporary file).
258    pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(&self, src_path: P, dst_path: Q) -> Result<u32> {
259        let src_path = src_path.as_ref();
260        let dst_path = dst_path.as_ref();
261        let data = fs::read(src_path).map_err(Error::Io)?;
262
263        let file_type = self.detect_file_type(&data, src_path)?;
264        let output = self.apply_changes(&data, file_type)?;
265
266        // Write to temp file first, then rename (atomic)
267        let temp_path = dst_path.with_extension("exiftool_tmp");
268        fs::write(&temp_path, &output).map_err(Error::Io)?;
269        fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
270
271        Ok(self.new_values.len() as u32)
272    }
273
274    /// Apply queued changes to in-memory data.
275    fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
276        match file_type {
277            FileType::Jpeg => self.write_jpeg(data),
278            FileType::Png => self.write_png(data),
279            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
280            | FileType::Arw | FileType::Orf | FileType::Pef => self.write_tiff(data),
281            FileType::WebP => self.write_webp(data),
282            FileType::Mp4 | FileType::QuickTime | FileType::M4a
283            | FileType::ThreeGP | FileType::F4v => self.write_mp4(data),
284            FileType::Psd => self.write_psd(data),
285            FileType::Pdf => self.write_pdf(data),
286            FileType::Heif | FileType::Avif => self.write_mp4(data),
287            FileType::Mkv | FileType::WebM => self.write_matroska(data),
288            FileType::Gif => {
289                let comment = self.new_values.iter()
290                    .find(|nv| nv.tag.to_lowercase() == "comment")
291                    .and_then(|nv| nv.value.clone());
292                crate::writer::gif_writer::write_gif(data, comment.as_deref())
293            }
294            FileType::Flac => {
295                let changes: Vec<(&str, &str)> = self.new_values.iter()
296                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
297                    .collect();
298                crate::writer::flac_writer::write_flac(data, &changes)
299            }
300            FileType::Mp3 | FileType::Aiff => {
301                let changes: Vec<(&str, &str)> = self.new_values.iter()
302                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
303                    .collect();
304                crate::writer::id3_writer::write_id3(data, &changes)
305            }
306            FileType::Jp2 | FileType::Jxl => {
307                let new_xmp = if self.new_values.iter().any(|nv| nv.group.as_deref() == Some("XMP")) {
308                    let refs: Vec<&NewValue> = self.new_values.iter()
309                        .filter(|nv| nv.group.as_deref() == Some("XMP"))
310                        .collect();
311                    Some(self.build_new_xmp(&refs))
312                } else { None };
313                crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
314            }
315            FileType::PostScript => {
316                let changes: Vec<(&str, &str)> = self.new_values.iter()
317                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
318                    .collect();
319                crate::writer::ps_writer::write_postscript(data, &changes)
320            }
321            FileType::Ogg | FileType::Opus => {
322                let changes: Vec<(&str, &str)> = self.new_values.iter()
323                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
324                    .collect();
325                crate::writer::ogg_writer::write_ogg(data, &changes)
326            }
327            FileType::Xmp => {
328                let props: Vec<xmp_writer::XmpProperty> = self.new_values.iter()
329                    .filter_map(|nv| {
330                        let val = nv.value.as_deref()?;
331                        Some(xmp_writer::XmpProperty {
332                            namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
333                            property: nv.tag.clone(),
334                            values: vec![val.to_string()],
335                            prop_type: xmp_writer::XmpPropertyType::Simple,
336                        })
337                    })
338                    .collect();
339                Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
340            }
341            _ => Err(Error::UnsupportedFileType(format!("writing not yet supported for {}", file_type))),
342        }
343    }
344
345    /// Returns the set of tag names (lowercase) that are writable for a given file type.
346    /// Returns `None` if any tag is writable (open-ended formats like PNG, FLAC, MKV).
347    /// Returns `Some(empty set)` if the format has no writer.
348    pub fn writable_tags(file_type: FileType) -> Option<std::collections::HashSet<&'static str>> {
349        use std::collections::HashSet;
350
351        // EXIF tags supported by exif_writer
352        const EXIF_TAGS: &[&str] = &[
353            "imagedescription", "make", "model", "orientation",
354            "xresolution", "yresolution", "resolutionunit", "software",
355            "modifydate", "datetime", "artist", "copyright",
356            "datetimeoriginal", "createdate", "datetimedigitized",
357            "usercomment", "imageuniqueid", "ownername", "cameraownername",
358            "serialnumber", "bodyserialnumber", "lensmake", "lensmodel", "lensserialnumber",
359        ];
360
361        // IPTC tags supported by iptc_writer
362        const IPTC_TAGS: &[&str] = &[
363            "objectname", "title", "urgency", "category", "supplementalcategories",
364            "keywords", "specialinstructions", "datecreated", "timecreated",
365            "by-line", "author", "byline", "by-linetitle", "authorsposition", "bylinetitle",
366            "city", "sub-location", "sublocation", "province-state", "state", "provincestate",
367            "country-primarylocationcode", "countrycode",
368            "country-primarylocationname", "country",
369            "headline", "credit", "source", "copyrightnotice",
370            "contact", "caption-abstract", "caption", "description",
371            "writer-editor", "captionwriter",
372        ];
373
374        // XMP auto-detected tags (no group prefix needed)
375        const XMP_AUTO_TAGS: &[&str] = &[
376            "title", "description", "subject", "creator", "rights",
377            "keywords", "rating", "label", "hierarchicalsubject",
378        ];
379
380        // ID3 tags
381        const ID3_TAGS: &[&str] = &[
382            "title", "artist", "album", "year", "date", "track",
383            "genre", "comment", "composer", "albumartist",
384            "encoder", "encodedby", "publisher", "copyright", "bpm", "lyrics",
385        ];
386
387        // MP4/MOV ilst tags
388        const MP4_TAGS: &[&str] = &[
389            "title", "artist", "album", "year", "date", "comment",
390            "genre", "composer", "writer", "encoder", "encodedby",
391            "grouping", "lyrics", "description", "albumartist", "copyright",
392        ];
393
394        // PDF Info dict tags
395        const PDF_TAGS: &[&str] = &[
396            "title", "author", "subject", "keywords", "creator", "producer",
397        ];
398
399        // PostScript DSC tags
400        const PS_TAGS: &[&str] = &[
401            "title", "creator", "author", "for", "creationdate", "createdate",
402        ];
403
404        match file_type {
405            // Open-ended: any tag name accepted
406            FileType::Png | FileType::Flac | FileType::Mkv | FileType::WebM
407            | FileType::Ogg | FileType::Opus | FileType::Xmp => None,
408
409            // JPEG: EXIF + IPTC + XMP auto + comment
410            FileType::Jpeg => {
411                let mut set: HashSet<&str> = HashSet::new();
412                set.extend(EXIF_TAGS);
413                set.extend(IPTC_TAGS);
414                set.extend(XMP_AUTO_TAGS);
415                set.insert("comment");
416                Some(set)
417            }
418
419            // TIFF-based: EXIF only
420            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
421            | FileType::Arw | FileType::Orf | FileType::Pef => {
422                let mut set: HashSet<&str> = HashSet::new();
423                set.extend(EXIF_TAGS);
424                Some(set)
425            }
426
427            // WebP: EXIF + XMP auto
428            FileType::WebP => {
429                let mut set: HashSet<&str> = HashSet::new();
430                set.extend(EXIF_TAGS);
431                set.extend(XMP_AUTO_TAGS);
432                Some(set)
433            }
434
435            // MP4/MOV/HEIF: ilst + XMP auto
436            FileType::Mp4 | FileType::QuickTime | FileType::M4a
437            | FileType::ThreeGP | FileType::F4v | FileType::Heif | FileType::Avif => {
438                let mut set: HashSet<&str> = HashSet::new();
439                set.extend(MP4_TAGS);
440                set.extend(XMP_AUTO_TAGS);
441                Some(set)
442            }
443
444            // PSD: IPTC + XMP auto
445            FileType::Psd => {
446                let mut set: HashSet<&str> = HashSet::new();
447                set.extend(IPTC_TAGS);
448                set.extend(XMP_AUTO_TAGS);
449                Some(set)
450            }
451
452            FileType::Pdf => Some(PDF_TAGS.iter().copied().collect()),
453            FileType::PostScript => Some(PS_TAGS.iter().copied().collect()),
454
455            FileType::Mp3 | FileType::Aiff => Some(ID3_TAGS.iter().copied().collect()),
456
457            FileType::Gif => {
458                let mut set: HashSet<&str> = HashSet::new();
459                set.insert("comment");
460                Some(set)
461            }
462
463            // JP2/JXL: XMP only (with group prefix)
464            FileType::Jp2 | FileType::Jxl => Some(XMP_AUTO_TAGS.iter().copied().collect()),
465
466            // No writer
467            _ => Some(HashSet::new()),
468        }
469    }
470
471    /// Write metadata changes to JPEG data.
472    fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
473        // Classify new values by target group
474        let mut exif_values: Vec<&NewValue> = Vec::new();
475        let mut xmp_values: Vec<&NewValue> = Vec::new();
476        let mut iptc_values: Vec<&NewValue> = Vec::new();
477        let mut comment_value: Option<&str> = None;
478        let mut remove_exif = false;
479        let mut remove_xmp = false;
480        let mut remove_iptc = false;
481        let mut remove_comment = false;
482
483        for nv in &self.new_values {
484            let group = nv.group.as_deref().unwrap_or("");
485            let group_upper = group.to_uppercase();
486
487            // Check for group deletion
488            if nv.value.is_none() && nv.tag == "*" {
489                match group_upper.as_str() {
490                    "EXIF" => { remove_exif = true; continue; }
491                    "XMP" => { remove_xmp = true; continue; }
492                    "IPTC" => { remove_iptc = true; continue; }
493                    _ => {}
494                }
495            }
496
497            match group_upper.as_str() {
498                "XMP" => xmp_values.push(nv),
499                "IPTC" => iptc_values.push(nv),
500                "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
501                "" => {
502                    // Auto-detect best group based on tag name
503                    if nv.tag.to_lowercase() == "comment" {
504                        if nv.value.is_none() {
505                            remove_comment = true;
506                        } else {
507                            comment_value = nv.value.as_deref();
508                        }
509                    } else if is_xmp_tag(&nv.tag) {
510                        xmp_values.push(nv);
511                    } else {
512                        exif_values.push(nv);
513                    }
514                }
515                _ => exif_values.push(nv), // default to EXIF
516            }
517        }
518
519        // Build new EXIF data
520        let new_exif = if !exif_values.is_empty() {
521            Some(self.build_new_exif(data, &exif_values)?)
522        } else {
523            None
524        };
525
526        // Build new XMP data
527        let new_xmp = if !xmp_values.is_empty() {
528            Some(self.build_new_xmp(&xmp_values))
529        } else {
530            None
531        };
532
533        // Build new IPTC data
534        let new_iptc_data = if !iptc_values.is_empty() {
535            let records: Vec<iptc_writer::IptcRecord> = iptc_values
536                .iter()
537                .filter_map(|nv| {
538                    let value = nv.value.as_deref()?;
539                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
540                    Some(iptc_writer::IptcRecord {
541                        record,
542                        dataset,
543                        data: value.as_bytes().to_vec(),
544                    })
545                })
546                .collect();
547            if records.is_empty() {
548                None
549            } else {
550                Some(iptc_writer::build_iptc(&records))
551            }
552        } else {
553            None
554        };
555
556        // Rewrite JPEG
557        jpeg_writer::write_jpeg(
558            data,
559            new_exif.as_deref(),
560            new_xmp.as_deref(),
561            new_iptc_data.as_deref(),
562            comment_value,
563            remove_exif,
564            remove_xmp,
565            remove_iptc,
566            remove_comment,
567        )
568    }
569
570    /// Build new EXIF data by merging existing EXIF with queued changes.
571    fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
572        let bo = ByteOrderMark::BigEndian;
573        let mut ifd0_entries = Vec::new();
574        let mut exif_entries = Vec::new();
575        let mut gps_entries = Vec::new();
576
577        // Step 1: Extract existing EXIF entries from the JPEG
578        let existing = extract_existing_exif_entries(jpeg_data, bo);
579        for entry in &existing {
580            match classify_exif_tag(entry.tag) {
581                ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
582                ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
583                ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
584            }
585        }
586
587        // Step 2: Apply queued changes (add/replace/delete)
588        let deleted_tags: Vec<u16> = values
589            .iter()
590            .filter(|nv| nv.value.is_none())
591            .filter_map(|nv| tag_name_to_id(&nv.tag))
592            .collect();
593
594        // Remove deleted tags
595        ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
596        exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
597        gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
598
599        // Add/replace new values
600        for nv in values {
601            if nv.value.is_none() {
602                continue;
603            }
604            let value_str = nv.value.as_deref().unwrap_or("");
605            let group = nv.group.as_deref().unwrap_or("");
606
607            if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo) {
608                let entry = exif_writer::IfdEntry {
609                    tag: tag_id,
610                    format,
611                    data: encoded,
612                };
613
614                let target = match group.to_uppercase().as_str() {
615                    "GPS" => &mut gps_entries,
616                    "EXIFIFD" => &mut exif_entries,
617                    _ => match classify_exif_tag(tag_id) {
618                        ExifIfdGroup::ExifIfd => &mut exif_entries,
619                        ExifIfdGroup::Gps => &mut gps_entries,
620                        ExifIfdGroup::Ifd0 => &mut ifd0_entries,
621                    },
622                };
623
624                // Replace existing or add new
625                if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
626                    *existing = entry;
627                } else {
628                    target.push(entry);
629                }
630            }
631        }
632
633        // Remove sub-IFD pointers from entries (they'll be rebuilt by build_exif)
634        ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
635
636        exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
637    }
638
639    /// Write metadata changes to PNG data.
640    fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
641        let mut new_text: Vec<(&str, &str)> = Vec::new();
642        let mut remove_text: Vec<&str> = Vec::new();
643
644        // Collect text-based changes
645        // We need to hold the strings in vectors that live long enough
646        let owned_pairs: Vec<(String, String)> = self.new_values.iter()
647            .filter(|nv| nv.value.is_some())
648            .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
649            .collect();
650
651        for (tag, value) in &owned_pairs {
652            new_text.push((tag.as_str(), value.as_str()));
653        }
654
655        for nv in &self.new_values {
656            if nv.value.is_none() {
657                remove_text.push(&nv.tag);
658            }
659        }
660
661        png_writer::write_png(data, &new_text, None, &remove_text)
662    }
663
664    /// Write metadata changes to PSD data.
665    fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
666        let mut iptc_values = Vec::new();
667        let mut xmp_values = Vec::new();
668
669        for nv in &self.new_values {
670            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
671            match group.as_str() {
672                "XMP" => xmp_values.push(nv),
673                "IPTC" => iptc_values.push(nv),
674                _ => {
675                    if is_xmp_tag(&nv.tag) { xmp_values.push(nv); }
676                    else { iptc_values.push(nv); }
677                }
678            }
679        }
680
681        let new_iptc = if !iptc_values.is_empty() {
682            let records: Vec<_> = iptc_values.iter().filter_map(|nv| {
683                let value = nv.value.as_deref()?;
684                let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
685                Some(iptc_writer::IptcRecord { record, dataset, data: value.as_bytes().to_vec() })
686            }).collect();
687            if records.is_empty() { None } else { Some(iptc_writer::build_iptc(&records)) }
688        } else { None };
689
690        let new_xmp = if !xmp_values.is_empty() {
691            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
692            Some(self.build_new_xmp(&refs))
693        } else { None };
694
695        psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
696    }
697
698    /// Write metadata changes to Matroska (MKV/WebM) data.
699    fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
700        let changes: Vec<(&str, &str)> = self.new_values.iter()
701            .filter_map(|nv| {
702                let value = nv.value.as_deref()?;
703                Some((nv.tag.as_str(), value))
704            })
705            .collect();
706
707        matroska_writer::write_matroska(data, &changes)
708    }
709
710    /// Write metadata changes to PDF data.
711    fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
712        let changes: Vec<(&str, &str)> = self.new_values.iter()
713            .filter_map(|nv| {
714                let value = nv.value.as_deref()?;
715                Some((nv.tag.as_str(), value))
716            })
717            .collect();
718
719        pdf_writer::write_pdf(data, &changes)
720    }
721
722    /// Write metadata changes to MP4/MOV data.
723    fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
724        let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
725        let mut xmp_values: Vec<&NewValue> = Vec::new();
726
727        for nv in &self.new_values {
728            if nv.value.is_none() { continue; }
729            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
730            if group == "XMP" {
731                xmp_values.push(nv);
732            } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
733                ilst_tags.push((key, nv.value.clone().unwrap()));
734            }
735        }
736
737        let tag_refs: Vec<(&[u8; 4], &str)> = ilst_tags.iter()
738            .map(|(k, v)| (k, v.as_str()))
739            .collect();
740
741        let new_xmp = if !xmp_values.is_empty() {
742            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
743            Some(self.build_new_xmp(&refs))
744        } else {
745            None
746        };
747
748        mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
749    }
750
751    /// Write metadata changes to WebP data.
752    fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
753        let mut exif_values: Vec<&NewValue> = Vec::new();
754        let mut xmp_values: Vec<&NewValue> = Vec::new();
755        let mut remove_exif = false;
756        let mut remove_xmp = false;
757
758        for nv in &self.new_values {
759            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
760            if nv.value.is_none() && nv.tag == "*" {
761                if group == "EXIF" { remove_exif = true; }
762                if group == "XMP" { remove_xmp = true; }
763                continue;
764            }
765            match group.as_str() {
766                "XMP" => xmp_values.push(nv),
767                _ => exif_values.push(nv),
768            }
769        }
770
771        let new_exif = if !exif_values.is_empty() {
772            let bo = ByteOrderMark::BigEndian;
773            let mut entries = Vec::new();
774            for nv in &exif_values {
775                if let Some(ref v) = nv.value {
776                    let group = nv.group.as_deref().unwrap_or("");
777                    if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo) {
778                        entries.push(exif_writer::IfdEntry { tag: tag_id, format, data: encoded });
779                    }
780                }
781            }
782            if !entries.is_empty() {
783                Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
784            } else {
785                None
786            }
787        } else {
788            None
789        };
790
791        let new_xmp = if !xmp_values.is_empty() {
792            Some(self.build_new_xmp(&xmp_values.iter().map(|v| *v).collect::<Vec<_>>()))
793        } else {
794            None
795        };
796
797        webp_writer::write_webp(
798            data,
799            new_exif.as_deref(),
800            new_xmp.as_deref(),
801            remove_exif,
802            remove_xmp,
803        )
804    }
805
806    /// Write metadata changes to TIFF data.
807    fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
808        let bo = if data.starts_with(b"II") {
809            ByteOrderMark::LittleEndian
810        } else {
811            ByteOrderMark::BigEndian
812        };
813
814        let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
815        for nv in &self.new_values {
816            if let Some(ref value) = nv.value {
817                let group = nv.group.as_deref().unwrap_or("");
818                if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo) {
819                    changes.push((tag_id, encoded));
820                }
821            }
822        }
823
824        tiff_writer::write_tiff(data, &changes)
825    }
826
827    /// Build new XMP data from queued values.
828    fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
829        let mut properties = Vec::new();
830
831        for nv in values {
832            let value_str = match &nv.value {
833                Some(v) => v.clone(),
834                None => continue,
835            };
836
837            let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
838            let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
839
840            let prop_type = match nv.tag.to_lowercase().as_str() {
841                "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
842                "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
843                "creator" => xmp_writer::XmpPropertyType::Seq,
844                _ => xmp_writer::XmpPropertyType::Simple,
845            };
846
847            let values = if matches!(prop_type, xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq) {
848                value_str.split(',').map(|s| s.trim().to_string()).collect()
849            } else {
850                vec![value_str]
851            };
852
853            properties.push(xmp_writer::XmpProperty {
854                namespace: ns,
855                property: nv.tag.clone(),
856                values,
857                prop_type,
858            });
859        }
860
861        xmp_writer::build_xmp(&properties).into_bytes()
862    }
863
864    // ================================================================
865    // Reading API
866    // ================================================================
867
868    /// Extract metadata from a file and return a simple name→value map.
869    ///
870    /// This is the high-level one-shot API, equivalent to ExifTool's `ImageInfo()`.
871    pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
872        let tags = self.extract_info(path)?;
873        Ok(self.get_info(&tags))
874    }
875
876    /// Extract all metadata tags from a file.
877    ///
878    /// Returns the full `Tag` structs with groups, raw values, etc.
879    pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
880        let path = path.as_ref();
881        let data = fs::read(path).map_err(Error::Io)?;
882
883        self.extract_info_from_bytes(&data, path)
884    }
885
886    /// Extract metadata from in-memory data.
887    pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
888        let file_type_result = self.detect_file_type(data, path);
889        let (file_type, mut tags) = match file_type_result {
890            Ok(ft) => {
891                let t = self.process_file(data, ft).or_else(|_| {
892                    self.process_by_extension(data, path)
893                })?;
894                (Some(ft), t)
895            }
896            Err(_) => {
897                // File type unknown by magic/extension — try extension-based fallback
898                let t = self.process_by_extension(data, path)?;
899                (None, t)
900            }
901        };
902        let file_type = file_type.unwrap_or(FileType::Zip); // placeholder for file-level tags
903
904        // Add file-level tags
905        tags.push(Tag {
906            id: crate::tag::TagId::Text("FileType".into()),
907            name: "FileType".into(),
908            description: "File Type".into(),
909            group: crate::tag::TagGroup {
910                family0: "File".into(),
911                family1: "File".into(),
912                family2: "Other".into(),
913            },
914            raw_value: Value::String(format!("{:?}", file_type)),
915            print_value: file_type.description().to_string(),
916            priority: 0,
917        });
918
919        tags.push(Tag {
920            id: crate::tag::TagId::Text("MIMEType".into()),
921            name: "MIMEType".into(),
922            description: "MIME Type".into(),
923            group: crate::tag::TagGroup {
924                family0: "File".into(),
925                family1: "File".into(),
926                family2: "Other".into(),
927            },
928            raw_value: Value::String(file_type.mime_type().to_string()),
929            print_value: file_type.mime_type().to_string(),
930            priority: 0,
931        });
932
933        if let Ok(metadata) = fs::metadata(path) {
934            tags.push(Tag {
935                id: crate::tag::TagId::Text("FileSize".into()),
936                name: "FileSize".into(),
937                description: "File Size".into(),
938                group: crate::tag::TagGroup {
939                    family0: "File".into(),
940                    family1: "File".into(),
941                    family2: "Other".into(),
942                },
943                raw_value: Value::U32(metadata.len() as u32),
944                print_value: format_file_size(metadata.len()),
945                priority: 0,
946            });
947        }
948
949        // Add more file-level tags
950        let file_tag = |name: &str, val: Value| -> Tag {
951            Tag {
952                id: crate::tag::TagId::Text(name.to_string()),
953                name: name.to_string(), description: name.to_string(),
954                group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
955                raw_value: val.clone(), print_value: val.to_display_string(), priority: 0,
956            }
957        };
958
959        if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
960            tags.push(file_tag("FileName", Value::String(fname.to_string())));
961        }
962        if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
963            tags.push(file_tag("Directory", Value::String(dir.to_string())));
964        }
965        // Use the canonical (first) extension from the FileType, matching Perl ExifTool behavior.
966        let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
967        if !canonical_ext.is_empty() {
968            tags.push(file_tag("FileTypeExtension", Value::String(canonical_ext.to_string())));
969        }
970
971        #[cfg(unix)]
972        if let Ok(metadata) = fs::metadata(path) {
973            use std::os::unix::fs::MetadataExt;
974            let mode = metadata.mode();
975            tags.push(file_tag("FilePermissions", Value::String(format!("{:o}", mode & 0o7777))));
976
977            // FileModifyDate
978            if let Ok(modified) = metadata.modified() {
979                if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
980                    let secs = dur.as_secs() as i64;
981                    tags.push(file_tag("FileModifyDate", Value::String(unix_to_datetime(secs))));
982                }
983            }
984            // FileAccessDate
985            if let Ok(accessed) = metadata.accessed() {
986                if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
987                    let secs = dur.as_secs() as i64;
988                    tags.push(file_tag("FileAccessDate", Value::String(unix_to_datetime(secs))));
989                }
990            }
991            // FileInodeChangeDate (ctime on Unix)
992            let ctime = metadata.ctime();
993            if ctime > 0 {
994                tags.push(file_tag("FileInodeChangeDate", Value::String(unix_to_datetime(ctime))));
995            }
996        }
997
998        // ExifByteOrder (from TIFF header)
999        {
1000            let bo_str = if data.len() > 8 {
1001                // Check EXIF in JPEG or TIFF header or WebP/RIFF EXIF chunk
1002                let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
1003                    // JPEG: find APP1 EXIF header
1004                    data.windows(6).position(|w| w == b"Exif\0\0")
1005                        .map(|p| &data[p+6..])
1006                } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
1007                    // RAF: look in the embedded JPEG for EXIF byte order
1008                    let jpeg_offset = u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]]) as usize;
1009                    let jpeg_length = u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]]) as usize;
1010                    if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
1011                        let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
1012                        jpeg.windows(6).position(|w| w == b"Exif\0\0")
1013                            .map(|p| &jpeg[p+6..])
1014                    } else {
1015                        None
1016                    }
1017                } else if data.starts_with(b"RIFF") && data.len() >= 12 {
1018                    // RIFF/WebP: find EXIF chunk
1019                    let mut riff_bo: Option<&[u8]> = None;
1020                    let mut pos = 12usize;
1021                    while pos + 8 <= data.len() {
1022                        let cid = &data[pos..pos+4];
1023                        let csz = u32::from_le_bytes([data[pos+4],data[pos+5],data[pos+6],data[pos+7]]) as usize;
1024                        let cstart = pos + 8;
1025                        let cend = (cstart + csz).min(data.len());
1026                        if cid == b"EXIF" && cend > cstart {
1027                            let exif_data = &data[cstart..cend];
1028                            let tiff = if exif_data.starts_with(b"Exif\0\0") { &exif_data[6..] } else { exif_data };
1029                            riff_bo = Some(tiff);
1030                            break;
1031                        }
1032                        // Also check LIST chunks
1033                        if cid == b"LIST" && cend >= cstart + 4 {
1034                            // recurse not needed for this simple scan - just advance
1035                        }
1036                        pos = cend + (csz & 1);
1037                    }
1038                    riff_bo
1039                } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
1040                    // JXL container: scan for brob Exif box and decompress to get byte order
1041                    let mut jxl_bo: Option<String> = None;
1042                    let mut jpos = 12usize; // skip JXL signature box
1043                    while jpos + 8 <= data.len() {
1044                        let bsize = u32::from_be_bytes([data[jpos], data[jpos+1], data[jpos+2], data[jpos+3]]) as usize;
1045                        let btype = &data[jpos+4..jpos+8];
1046                        if bsize < 8 || jpos + bsize > data.len() { break; }
1047                        if btype == b"brob" && jpos + bsize > 12 {
1048                            let inner_type = &data[jpos+8..jpos+12];
1049                            if inner_type == b"Exif" || inner_type == b"exif" {
1050                                let brotli_payload = &data[jpos+12..jpos+bsize];
1051                                use std::io::Cursor;
1052                                let mut inp = Cursor::new(brotli_payload);
1053                                let mut out: Vec<u8> = Vec::new();
1054                                if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
1055                                    let exif_start = if out.len() > 4 { 4 } else { 0 };
1056                                    if exif_start < out.len() {
1057                                        if out[exif_start..].starts_with(b"MM") {
1058                                            jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
1059                                        } else if out[exif_start..].starts_with(b"II") {
1060                                            jxl_bo = Some("Little-endian (Intel, II)".to_string());
1061                                        }
1062                                    }
1063                                }
1064                                break;
1065                            }
1066                        }
1067                        jpos += bsize;
1068                    }
1069                    if let Some(bo) = jxl_bo {
1070                        if !bo.is_empty() && file_type != FileType::Btf {
1071                            tags.push(file_tag("ExifByteOrder", Value::String(bo)));
1072                        }
1073                    }
1074                    // Return None to skip the generic byte order check below
1075                    None
1076                } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
1077                    // MRW: find TTW segment which contains TIFF/EXIF data
1078                    let mrw_data_offset = if data.len() >= 8 {
1079                        u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
1080                    } else { 0 };
1081                    let mut mrw_bo: Option<&[u8]> = None;
1082                    let mut mpos = 8usize;
1083                    while mpos + 8 <= mrw_data_offset.min(data.len()) {
1084                        let seg_tag = &data[mpos..mpos+4];
1085                        let seg_len = u32::from_be_bytes([data[mpos+4], data[mpos+5], data[mpos+6], data[mpos+7]]) as usize;
1086                        if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
1087                            mrw_bo = Some(&data[mpos+8..mpos+8+seg_len]);
1088                            break;
1089                        }
1090                        mpos += 8 + seg_len;
1091                    }
1092                    mrw_bo
1093                } else {
1094                    Some(&data[..])
1095                };
1096                if let Some(tiff) = check {
1097                    if tiff.starts_with(b"II") { "Little-endian (Intel, II)" }
1098                    else if tiff.starts_with(b"MM") { "Big-endian (Motorola, MM)" }
1099                    else { "" }
1100                } else { "" }
1101            } else { "" };
1102            // Suppress ExifByteOrder for BigTIFF, Canon VRD/DR4 (Perl doesn't output it for these)
1103            // Also skip if already emitted by ExifReader (TIFF-based formats)
1104            let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
1105            if !bo_str.is_empty() && !already_has_exifbyteorder
1106                && file_type != FileType::Btf
1107                && file_type != FileType::Dr4 && file_type != FileType::Vrd
1108                && file_type != FileType::Crw {
1109                tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
1110            }
1111        }
1112
1113        tags.push(file_tag("ExifToolVersion", Value::String(crate::VERSION.to_string())));
1114
1115        // Compute composite tags
1116        let composite = crate::composite::compute_composite_tags(&tags);
1117        tags.extend(composite);
1118
1119        // FLIR post-processing: remove LensID composite for FLIR cameras.
1120        // Perl's LensID composite requires LensType EXIF tag (not present in FLIR images),
1121        // and LensID-2 requires LensModel to match /(mm|\d\/F)/ (FLIR names like "FOL7"
1122        // don't match).  Our composite.rs uses a simpler fallback that picks up any non-empty
1123        // LensModel, so we remove LensID when the image is from a FLIR camera with FFF data.
1124        {
1125            let is_flir_fff = tags.iter().any(|t| t.group.family0 == "APP1"
1126                && t.group.family1 == "FLIR");
1127            if is_flir_fff {
1128                tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1129            }
1130        }
1131
1132        // Olympus post-processing: remove the generic "Lens" composite for Olympus cameras.
1133        // In Perl, the "Lens" composite tag requires Canon:MinFocalLength (Canon namespace).
1134        // Our composite.rs generates Lens for any manufacturer that has MinFocalLength +
1135        // MaxFocalLength (e.g., Olympus Equipment sub-IFD).  Remove it for non-Canon cameras.
1136        {
1137            let make = tags.iter().find(|t| t.name == "Make")
1138                .map(|t| t.print_value.clone()).unwrap_or_default();
1139            if !make.to_uppercase().contains("CANON") {
1140                tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1141            }
1142        }
1143
1144        // Priority-based deduplication: when the same tag name appears from both RIFF (priority 0)
1145        // and MakerNotes/EXIF (priority 0 but higher-quality source), remove the RIFF copy.
1146        // Mirrors ExifTool's PRIORITY => 0 behavior for RIFF StreamHeader tags.
1147        {
1148            let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1149            for tag_name in &riff_priority_zero_tags {
1150                let has_makernotes = tags.iter().any(|t| t.name == *tag_name
1151                    && t.group.family0 != "RIFF");
1152                if has_makernotes {
1153                    tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1154                }
1155            }
1156        }
1157
1158        // Priority-based deduplication: when the same tag name appears multiple times,
1159        // keep only the one with the highest priority (e.g., EXIF over JFIF, FFF over MakerNote).
1160        if !self.options.duplicates {
1161            let mut best_priority: HashMap<String, i32> = HashMap::new();
1162            for tag in &tags {
1163                let entry = best_priority.entry(tag.name.clone()).or_insert(tag.priority);
1164                if tag.priority > *entry {
1165                    *entry = tag.priority;
1166                }
1167            }
1168            tags.retain(|t| t.priority >= *best_priority.get(&t.name).unwrap_or(&0));
1169        }
1170
1171        // Filter by requested tags if specified
1172        if !self.options.requested_tags.is_empty() {
1173            let requested: Vec<String> = self
1174                .options
1175                .requested_tags
1176                .iter()
1177                .map(|t| t.to_lowercase())
1178                .collect();
1179            tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1180        }
1181
1182        Ok(tags)
1183    }
1184
1185    /// Format extracted tags into a simple name→value map.
1186    ///
1187    /// Handles duplicate tag names by appending group info.
1188    fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1189        let mut info = ImageInfo::new();
1190        let mut seen: HashMap<String, (usize, i32)> = HashMap::new(); // (count, best priority)
1191
1192        for tag in tags {
1193            let value = if self.options.print_conv {
1194                &tag.print_value
1195            } else {
1196                &tag.raw_value.to_display_string()
1197            };
1198
1199            let entry = seen.entry(tag.name.clone()).or_insert((0, i32::MIN));
1200            entry.0 += 1;
1201
1202            if entry.0 == 1 {
1203                entry.1 = tag.priority;
1204                info.insert(tag.name.clone(), value.clone());
1205            } else if tag.priority > entry.1 {
1206                // Higher priority tag replaces the previous one
1207                entry.1 = tag.priority;
1208                info.insert(tag.name.clone(), value.clone());
1209            } else if self.options.duplicates {
1210                let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1211                info.insert(key, value.clone());
1212            }
1213        }
1214
1215        info
1216    }
1217
1218    /// Detect file type from magic bytes and extension.
1219    fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1220        // Try magic bytes first
1221        let header_len = data.len().min(256);
1222        if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1223            // Override ICO to Font if extension is .dfont (Mac resource fork)
1224            if ft == FileType::Ico {
1225                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1226                    if ext.eq_ignore_ascii_case("dfont") {
1227                        return Ok(FileType::Font);
1228                    }
1229                }
1230            }
1231            // Override JPEG to JPS if the file extension is .jps
1232            if ft == FileType::Jpeg {
1233                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1234                    if ext.eq_ignore_ascii_case("jps") {
1235                        return Ok(FileType::Jps);
1236                    }
1237                }
1238            }
1239            // Override PLIST to AAE if extension is .aae
1240            if ft == FileType::Plist {
1241                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1242                    if ext.eq_ignore_ascii_case("aae") {
1243                        return Ok(FileType::Aae);
1244                    }
1245                }
1246            }
1247            // Override XMP to PLIST/AAE if extension is .plist or .aae
1248            if ft == FileType::Xmp {
1249                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1250                    if ext.eq_ignore_ascii_case("plist") {
1251                        return Ok(FileType::Plist);
1252                    }
1253                    if ext.eq_ignore_ascii_case("aae") {
1254                        return Ok(FileType::Aae);
1255                    }
1256                }
1257            }
1258            // Override to PhotoCD if extension is .pcd (file starts with 0xFF padding)
1259            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1260                if ext.eq_ignore_ascii_case("pcd") && data.len() >= 2056
1261                    && &data[2048..2055] == b"PCD_IPI"
1262                {
1263                    return Ok(FileType::PhotoCd);
1264                }
1265            }
1266            // Override MP3 to MPC/APE/WavPack if extension says otherwise
1267            if ft == FileType::Mp3 {
1268                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1269                    if ext.eq_ignore_ascii_case("mpc") {
1270                        return Ok(FileType::Mpc);
1271                    }
1272                    if ext.eq_ignore_ascii_case("ape") {
1273                        return Ok(FileType::Ape);
1274                    }
1275                    if ext.eq_ignore_ascii_case("wv") {
1276                        return Ok(FileType::WavPack);
1277                    }
1278                }
1279            }
1280            // For ZIP files, check if it's an EIP (by extension) or OpenDocument format
1281            if ft == FileType::Zip {
1282                // Check extension first for EIP
1283                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1284                    if ext.eq_ignore_ascii_case("eip") {
1285                        return Ok(FileType::Eip);
1286                    }
1287                }
1288                if let Some(od_type) = detect_opendocument_type(data) {
1289                    return Ok(od_type);
1290                }
1291            }
1292            return Ok(ft);
1293        }
1294
1295        // Fall back to extension
1296        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1297            if let Some(ft) = file_type::detect_from_extension(ext) {
1298                return Ok(ft);
1299            }
1300        }
1301
1302        let ext_str = path
1303            .extension()
1304            .and_then(|e| e.to_str())
1305            .unwrap_or("unknown");
1306        Err(Error::UnsupportedFileType(ext_str.to_string()))
1307    }
1308
1309    /// Dispatch to the appropriate format reader.
1310
1311    fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1312        match file_type {
1313            FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1314            FileType::Png | FileType::Mng => formats::png::read_png(data),
1315            // All TIFF-based formats (TIFF + most RAW formats)
1316            FileType::Tiff
1317            | FileType::Btf
1318            | FileType::Dng
1319            | FileType::Cr2
1320            | FileType::Nef
1321            | FileType::Arw
1322            | FileType::Sr2
1323            | FileType::Orf
1324            | FileType::Pef
1325            | FileType::Erf
1326            | FileType::Fff
1327            | FileType::Rwl
1328            | FileType::Mef
1329            | FileType::Srw
1330            | FileType::Gpr
1331            | FileType::Arq
1332            | FileType::ThreeFR
1333            | FileType::Dcr
1334            | FileType::Rw2
1335            | FileType::Srf => formats::tiff::read_tiff(data),
1336            // Phase One IIQ: TIFF + PhaseOne maker note block
1337            FileType::Iiq => formats::misc::read_iiq(data),
1338            // Image formats
1339            FileType::Gif => formats::gif::read_gif(data),
1340            FileType::Bmp => formats::bmp::read_bmp(data),
1341            FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1342            FileType::Psd => formats::psd::read_psd(data),
1343            // Audio formats
1344            FileType::Mp3 => formats::id3::read_mp3(data),
1345            FileType::Flac => formats::flac::read_flac(data),
1346            FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1347            FileType::Aiff => formats::aiff::read_aiff(data),
1348            // Video formats
1349            FileType::Mp4
1350            | FileType::QuickTime
1351            | FileType::M4a
1352            | FileType::ThreeGP
1353            | FileType::Heif
1354            | FileType::Avif
1355            | FileType::Cr3
1356            | FileType::Crm
1357            | FileType::F4v
1358            | FileType::Mqv
1359            | FileType::Lrv => formats::quicktime::read_quicktime_with_ee(data, self.options.extract_embedded),
1360            FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1361            FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1362            FileType::Wtv => formats::wtv::read_wtv(data),
1363            // RAW formats with custom containers
1364            FileType::Crw => formats::canon_raw::read_crw(data),
1365            FileType::Raf => formats::raf::read_raf(data),
1366            FileType::Mrw => formats::mrw::read_mrw(data),
1367            FileType::Mrc => formats::mrc::read_mrc(data),
1368            // Image formats
1369            FileType::Jp2 => formats::jp2::read_jp2(data),
1370            FileType::J2c => formats::jp2::read_j2c(data),
1371            FileType::Jxl => formats::jp2::read_jxl(data),
1372            FileType::Ico => formats::ico::read_ico(data),
1373            FileType::Icc => formats::icc::read_icc(data),
1374            // Documents
1375            FileType::Pdf => formats::pdf::read_pdf(data),
1376            FileType::PostScript => {
1377                // PFA fonts start with %!PS-AdobeFont or %!FontType1
1378                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1379                    formats::font::read_pfa(data).or_else(|_| formats::postscript::read_postscript(data))
1380                } else {
1381                    formats::postscript::read_postscript(data)
1382                }
1383            }
1384            FileType::Eip => formats::capture_one::read_eip(data),
1385            FileType::Zip | FileType::Docx | FileType::Xlsx | FileType::Pptx
1386            | FileType::Doc | FileType::Xls | FileType::Ppt => formats::zip::read_zip(data),
1387            FileType::Rtf => formats::rtf::read_rtf(data),
1388            FileType::InDesign => formats::misc::read_indesign(data),
1389            FileType::Pcap => formats::misc::read_pcap(data),
1390            FileType::Pcapng => formats::misc::read_pcapng(data),
1391            // Canon VRD / DR4
1392            FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1393            FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1394            // Metadata / Other
1395            FileType::Xmp => formats::xmp_file::read_xmp(data),
1396            FileType::Svg => formats::misc::read_svg(data),
1397            FileType::Html => {
1398                // SVG files that weren't detected by magic (e.g., via extension fallback)
1399                let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1400                if is_svg {
1401                    formats::misc::read_svg(data)
1402                } else {
1403                    formats::html::read_html(data)
1404                }
1405            }
1406            FileType::Exe => formats::exe::read_exe(data),
1407            FileType::Font => {
1408                // AFM: Adobe Font Metrics text file
1409                if data.starts_with(b"StartFontMetrics") {
1410                    return formats::font::read_afm(data);
1411                }
1412                // PFA: PostScript Type 1 ASCII font
1413                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1414                    return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1415                }
1416                // PFB: PostScript Type 1 Binary font
1417                if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1418                    return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1419                }
1420                formats::font::read_font(data)
1421            }
1422            // Audio with ID3
1423            FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1424            FileType::Ape => formats::ape::read_ape(data),
1425            FileType::Mpc => formats::ape::read_mpc(data),
1426            FileType::Aac => formats::misc::read_aac(data),
1427            FileType::RealAudio => {
1428                formats::misc::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1429            }
1430            FileType::RealMedia => {
1431                formats::misc::read_real_media(data).or_else(|_| Ok(Vec::new()))
1432            }
1433            // Misc formats
1434            FileType::Czi => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1435            FileType::PhotoCd => formats::misc::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1436            FileType::Dicom => formats::dicom::read_dicom(data),
1437            FileType::Fits => formats::misc::read_fits(data),
1438            FileType::Flv => formats::misc::read_flv(data),
1439            FileType::Mxf => formats::misc::read_mxf(data).or_else(|_| Ok(Vec::new())),
1440            FileType::Swf => formats::misc::read_swf(data),
1441            FileType::Hdr => formats::misc::read_hdr(data),
1442            FileType::DjVu => formats::djvu::read_djvu(data),
1443            FileType::Xcf => formats::gimp::read_xcf(data),
1444            FileType::Mie => formats::mie::read_mie(data),
1445            FileType::Lfp => formats::lytro::read_lfp(data),
1446            // FileType::Miff dispatched via string extension below
1447            FileType::Fpf => formats::flir_fpf::read_fpf(data),
1448            FileType::Flif => formats::misc::read_flif(data),
1449            FileType::Bpg => formats::misc::read_bpg(data),
1450            FileType::Pcx => formats::misc::read_pcx(data),
1451            FileType::Pict => formats::misc::read_pict(data),
1452            FileType::Mpeg => formats::mpeg::read_mpeg(data),
1453            FileType::M2ts => formats::misc::read_m2ts(data, self.options.extract_embedded),
1454            FileType::Gzip => formats::misc::read_gzip(data),
1455            FileType::Rar => formats::misc::read_rar(data),
1456            FileType::SevenZ => formats::misc::read_7z(data),
1457            FileType::Dss => formats::misc::read_dss(data),
1458            FileType::Moi => formats::misc::read_moi(data),
1459            FileType::MacOs => formats::misc::read_macos(data),
1460            FileType::Json => formats::misc::read_json(data),
1461            // New formats
1462            FileType::Pgf => formats::pgf::read_pgf(data),
1463            FileType::Xisf => formats::xisf::read_xisf(data),
1464            FileType::Torrent => formats::torrent::read_torrent(data),
1465            FileType::Mobi => formats::palm::read_palm(data),
1466            FileType::Psp => formats::psp::read_psp(data),
1467            FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1468            FileType::Audible => formats::audible::read_audible(data),
1469            FileType::Exr => formats::openexr::read_openexr(data),
1470            // New formats
1471            FileType::Plist => {
1472                if data.starts_with(b"bplist") {
1473                    formats::plist::read_binary_plist_tags(data)
1474                } else {
1475                    formats::plist::read_xml_plist(data)
1476                }
1477            }
1478            FileType::Aae => {
1479                if data.starts_with(b"bplist") {
1480                    formats::plist::read_binary_plist_tags(data)
1481                } else {
1482                    formats::plist::read_aae_plist(data)
1483                }
1484            }
1485            FileType::KyoceraRaw => formats::misc::read_kyocera_raw(data),
1486            FileType::PortableFloatMap => formats::misc::read_pfm(data),
1487            FileType::Ods | FileType::Odt | FileType::Odp | FileType::Odg |
1488            FileType::Odf | FileType::Odb | FileType::Odi | FileType::Odc => formats::zip::read_zip(data),
1489            FileType::Lif => formats::misc::read_lif(data),
1490            FileType::Rwz => formats::misc::read_rawzor(data),
1491            FileType::Jxr => formats::misc::read_jxr(data),
1492            _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1493        }
1494    }
1495
1496    /// Fallback: try to read file based on extension for formats without magic detection.
1497    fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1498        let ext = path
1499            .extension()
1500            .and_then(|e| e.to_str())
1501            .unwrap_or("")
1502            .to_ascii_lowercase();
1503
1504        match ext.as_str() {
1505            "ppm" | "pgm" | "pbm" => formats::misc::read_ppm(data),
1506            "pfm" => {
1507                // PFM can be Portable Float Map or Printer Font Metrics
1508                if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1509                    formats::misc::read_ppm(data)
1510                } else {
1511                    Ok(Vec::new()) // Printer Font Metrics
1512                }
1513            }
1514            "json" => formats::misc::read_json(data),
1515            "svg" => formats::misc::read_svg(data),
1516            "ram" => formats::misc::read_ram(data).or_else(|_| Ok(Vec::new())),
1517            "txt" | "log" | "igc" => {
1518                Ok(compute_text_tags(data, false))
1519            }
1520            "csv" => {
1521                Ok(compute_text_tags(data, true))
1522            }
1523            "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1524            "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1525            "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1526            "plist" => {
1527                if data.starts_with(b"bplist") {
1528                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1529                } else {
1530                    formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1531                }
1532            }
1533            "aae" => {
1534                if data.starts_with(b"bplist") {
1535                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1536                } else {
1537                    formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1538                }
1539            }
1540            "vcf" | "ics" | "vcard" => {
1541                let s = String::from_utf8_lossy(&data[..data.len().min(100)]);
1542                if s.contains("BEGIN:VCALENDAR") {
1543                    formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1544                } else {
1545                    formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1546                }
1547            }
1548            "xcf" => Ok(Vec::new()),      // GIMP
1549            "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1550            "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1551            "indd" | "indt" => Ok(Vec::new()), // InDesign
1552            "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1553            "mie" => Ok(Vec::new()),       // MIE
1554            "exr" => Ok(Vec::new()),       // OpenEXR
1555            "wpg" => formats::misc::read_wpg(data).or_else(|_| Ok(Vec::new())),
1556            "moi" => formats::misc::read_moi(data).or_else(|_| Ok(Vec::new())),
1557            "macos" => formats::misc::read_macos(data).or_else(|_| Ok(Vec::new())),
1558            "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1559            "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1560            "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1561            "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1562            "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1563            "itc" => formats::misc::read_itc(data).or_else(|_| Ok(Vec::new())),
1564            "mpg" | "mpeg" | "m1v" | "m2v" | "mpv" => formats::mpeg::read_mpeg(data).or_else(|_| Ok(Vec::new())),
1565            "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1566            "czi" => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1567            "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1568            "lfp" | "mrc"
1569            | "dss" | "mobi" | "psp" | "pgf" | "raw"
1570            | "pmp" | "torrent"
1571            | "xisf" | "mxf"
1572            | "dfont" => Ok(Vec::new()),
1573            "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1574            "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1575            "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1576            "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1577            _ => Err(Error::UnsupportedFileType(ext)),
1578        }
1579    }
1580}
1581
1582impl Default for ExifTool {
1583    fn default() -> Self {
1584        Self::new()
1585    }
1586}
1587
1588/// Detect OpenDocument file type by reading the `mimetype` entry from a ZIP.
1589/// Returns None if not an OpenDocument file.
1590fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1591    // OpenDocument ZIPs have "mimetype" as the FIRST local file entry (uncompressed)
1592    if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1593        return None;
1594    }
1595    let compression = u16::from_le_bytes([data[8], data[9]]);
1596    let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1597    let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1598    let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1599    let name_start = 30;
1600    if name_start + name_len > data.len() {
1601        return None;
1602    }
1603    let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1604    if filename != "mimetype" || compression != 0 {
1605        return None;
1606    }
1607    let content_start = name_start + name_len + extra_len;
1608    let content_end = (content_start + compressed_size).min(data.len());
1609    if content_start >= content_end {
1610        return None;
1611    }
1612    let mime = std::str::from_utf8(&data[content_start..content_end]).unwrap_or("").trim();
1613    match mime {
1614        "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1615        "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1616        "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1617        "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1618        "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1619        "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1620        "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1621        "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1622        _ => None,
1623    }
1624}
1625
1626/// Detect the file type of a file at the given path.
1627pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1628    let path = path.as_ref();
1629    let mut file = fs::File::open(path).map_err(Error::Io)?;
1630    let mut header = [0u8; 256];
1631    use std::io::Read;
1632    let n = file.read(&mut header).map_err(Error::Io)?;
1633
1634    if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1635        return Ok(ft);
1636    }
1637
1638    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1639        if let Some(ft) = file_type::detect_from_extension(ext) {
1640            return Ok(ft);
1641        }
1642    }
1643
1644    Err(Error::UnsupportedFileType("unknown".into()))
1645}
1646
1647/// Classification of EXIF tags into IFD groups.
1648enum ExifIfdGroup {
1649    Ifd0,
1650    ExifIfd,
1651    Gps,
1652}
1653
1654/// Determine which IFD a tag belongs to based on its ID.
1655fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1656    match tag_id {
1657        // ExifIFD tags
1658        0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292
1659        | 0xA000..=0xA435 => ExifIfdGroup::ExifIfd,
1660        // GPS tags
1661        0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1662        // Everything else → IFD0
1663        _ => ExifIfdGroup::Ifd0,
1664    }
1665}
1666
1667/// Extract existing EXIF entries from a JPEG file's APP1 segment.
1668fn extract_existing_exif_entries(jpeg_data: &[u8], target_bo: ByteOrderMark) -> Vec<exif_writer::IfdEntry> {
1669    let mut entries = Vec::new();
1670
1671    // Find EXIF APP1 segment
1672    let mut pos = 2; // Skip SOI
1673    while pos + 4 <= jpeg_data.len() {
1674        if jpeg_data[pos] != 0xFF {
1675            pos += 1;
1676            continue;
1677        }
1678        let marker = jpeg_data[pos + 1];
1679        pos += 2;
1680
1681        if marker == 0xDA || marker == 0xD9 {
1682            break; // SOS or EOI
1683        }
1684        if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1685            continue;
1686        }
1687
1688        if pos + 2 > jpeg_data.len() {
1689            break;
1690        }
1691        let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1692        if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1693            break;
1694        }
1695
1696        let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1697
1698        // EXIF APP1
1699        if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1700            let tiff_data = &seg_data[6..];
1701            extract_ifd_entries(tiff_data, target_bo, &mut entries);
1702            break;
1703        }
1704
1705        pos += seg_len;
1706    }
1707
1708    entries
1709}
1710
1711/// Extract IFD entries from TIFF data, re-encoding values in the target byte order.
1712fn extract_ifd_entries(
1713    tiff_data: &[u8],
1714    target_bo: ByteOrderMark,
1715    entries: &mut Vec<exif_writer::IfdEntry>,
1716) {
1717    use crate::metadata::exif::parse_tiff_header;
1718
1719    let header = match parse_tiff_header(tiff_data) {
1720        Ok(h) => h,
1721        Err(_) => return,
1722    };
1723
1724    let src_bo = header.byte_order;
1725
1726    // Read IFD0
1727    read_ifd_for_merge(tiff_data, header.ifd0_offset as usize, src_bo, target_bo, entries);
1728
1729    // Find ExifIFD and GPS pointers
1730    let ifd0_offset = header.ifd0_offset as usize;
1731    if ifd0_offset + 2 > tiff_data.len() {
1732        return;
1733    }
1734    let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
1735    for i in 0..count {
1736        let eoff = ifd0_offset + 2 + i * 12;
1737        if eoff + 12 > tiff_data.len() {
1738            break;
1739        }
1740        let tag = read_u16_bo(tiff_data, eoff, src_bo);
1741        let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
1742
1743        match tag {
1744            0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1745            0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1746            _ => {}
1747        }
1748    }
1749}
1750
1751/// Read a single IFD and extract entries for merge.
1752fn read_ifd_for_merge(
1753    data: &[u8],
1754    offset: usize,
1755    src_bo: ByteOrderMark,
1756    target_bo: ByteOrderMark,
1757    entries: &mut Vec<exif_writer::IfdEntry>,
1758) {
1759    if offset + 2 > data.len() {
1760        return;
1761    }
1762    let count = read_u16_bo(data, offset, src_bo) as usize;
1763
1764    for i in 0..count {
1765        let eoff = offset + 2 + i * 12;
1766        if eoff + 12 > data.len() {
1767            break;
1768        }
1769
1770        let tag = read_u16_bo(data, eoff, src_bo);
1771        let dtype = read_u16_bo(data, eoff + 2, src_bo);
1772        let count_val = read_u32_bo(data, eoff + 4, src_bo);
1773
1774        // Skip sub-IFD pointers and MakerNote
1775        if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
1776            continue;
1777        }
1778
1779        let type_size = match dtype {
1780            1 | 2 | 6 | 7 => 1usize,
1781            3 | 8 => 2,
1782            4 | 9 | 11 | 13 => 4,
1783            5 | 10 | 12 => 8,
1784            _ => continue,
1785        };
1786
1787        let total_size = type_size * count_val as usize;
1788        let raw_data = if total_size <= 4 {
1789            data[eoff + 8..eoff + 12].to_vec()
1790        } else {
1791            let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
1792            if voff + total_size > data.len() {
1793                continue;
1794            }
1795            data[voff..voff + total_size].to_vec()
1796        };
1797
1798        // Re-encode multi-byte values if byte orders differ
1799        let final_data = if src_bo != target_bo && type_size > 1 {
1800            reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
1801        } else {
1802            raw_data[..total_size].to_vec()
1803        };
1804
1805        let format = match dtype {
1806            1 => exif_writer::ExifFormat::Byte,
1807            2 => exif_writer::ExifFormat::Ascii,
1808            3 => exif_writer::ExifFormat::Short,
1809            4 => exif_writer::ExifFormat::Long,
1810            5 => exif_writer::ExifFormat::Rational,
1811            6 => exif_writer::ExifFormat::SByte,
1812            7 => exif_writer::ExifFormat::Undefined,
1813            8 => exif_writer::ExifFormat::SShort,
1814            9 => exif_writer::ExifFormat::SLong,
1815            10 => exif_writer::ExifFormat::SRational,
1816            11 => exif_writer::ExifFormat::Float,
1817            12 => exif_writer::ExifFormat::Double,
1818            _ => continue,
1819        };
1820
1821        entries.push(exif_writer::IfdEntry {
1822            tag,
1823            format,
1824            data: final_data,
1825        });
1826    }
1827}
1828
1829/// Re-encode multi-byte values when converting between byte orders.
1830fn reencode_bytes(
1831    data: &[u8],
1832    dtype: u16,
1833    count: usize,
1834    src_bo: ByteOrderMark,
1835    dst_bo: ByteOrderMark,
1836) -> Vec<u8> {
1837    let mut out = Vec::with_capacity(data.len());
1838    match dtype {
1839        3 | 8 => {
1840            // 16-bit
1841            for i in 0..count {
1842                let v = read_u16_bo(data, i * 2, src_bo);
1843                match dst_bo {
1844                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1845                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1846                }
1847            }
1848        }
1849        4 | 9 | 11 | 13 => {
1850            // 32-bit
1851            for i in 0..count {
1852                let v = read_u32_bo(data, i * 4, src_bo);
1853                match dst_bo {
1854                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1855                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1856                }
1857            }
1858        }
1859        5 | 10 => {
1860            // Rational (two 32-bit)
1861            for i in 0..count {
1862                let n = read_u32_bo(data, i * 8, src_bo);
1863                let d = read_u32_bo(data, i * 8 + 4, src_bo);
1864                match dst_bo {
1865                    ByteOrderMark::LittleEndian => {
1866                        out.extend_from_slice(&n.to_le_bytes());
1867                        out.extend_from_slice(&d.to_le_bytes());
1868                    }
1869                    ByteOrderMark::BigEndian => {
1870                        out.extend_from_slice(&n.to_be_bytes());
1871                        out.extend_from_slice(&d.to_be_bytes());
1872                    }
1873                }
1874            }
1875        }
1876        12 => {
1877            // 64-bit double
1878            for i in 0..count {
1879                let mut bytes = [0u8; 8];
1880                bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
1881                if src_bo != dst_bo {
1882                    bytes.reverse();
1883                }
1884                out.extend_from_slice(&bytes);
1885            }
1886        }
1887        _ => out.extend_from_slice(data),
1888    }
1889    out
1890}
1891
1892fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
1893    if offset + 2 > data.len() { return 0; }
1894    match bo {
1895        ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
1896        ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
1897    }
1898}
1899
1900fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
1901    if offset + 4 > data.len() { return 0; }
1902    match bo {
1903        ByteOrderMark::LittleEndian => u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1904        ByteOrderMark::BigEndian => u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1905    }
1906}
1907
1908/// Map tag name to numeric EXIF tag ID.
1909fn tag_name_to_id(name: &str) -> Option<u16> {
1910    encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
1911}
1912
1913/// Convert a tag value to a safe filename.
1914fn value_to_filename(value: &str) -> String {
1915    value
1916        .chars()
1917        .map(|c| match c {
1918            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
1919            c if c.is_control() => '_',
1920            c => c,
1921        })
1922        .collect::<String>()
1923        .trim()
1924        .to_string()
1925}
1926
1927/// Parse a date shift string like "+1:0:0" (add 1 hour) or "-0:30:0" (subtract 30 min).
1928/// Returns (sign, hours, minutes, seconds).
1929pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
1930    let (sign, rest) = if shift.starts_with('-') {
1931        (-1, &shift[1..])
1932    } else if shift.starts_with('+') {
1933        (1, &shift[1..])
1934    } else {
1935        (1, shift)
1936    };
1937
1938    let parts: Vec<&str> = rest.split(':').collect();
1939    match parts.len() {
1940        1 => {
1941            let h: u32 = parts[0].parse().ok()?;
1942            Some((sign, h, 0, 0))
1943        }
1944        2 => {
1945            let h: u32 = parts[0].parse().ok()?;
1946            let m: u32 = parts[1].parse().ok()?;
1947            Some((sign, h, m, 0))
1948        }
1949        3 => {
1950            let h: u32 = parts[0].parse().ok()?;
1951            let m: u32 = parts[1].parse().ok()?;
1952            let s: u32 = parts[2].parse().ok()?;
1953            Some((sign, h, m, s))
1954        }
1955        _ => None,
1956    }
1957}
1958
1959/// Shift a datetime string by the given amount.
1960/// Input format: "YYYY:MM:DD HH:MM:SS"
1961pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
1962    let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
1963
1964    // Parse date/time
1965    if datetime.len() < 19 {
1966        return None;
1967    }
1968    let year: i32 = datetime[0..4].parse().ok()?;
1969    let month: u32 = datetime[5..7].parse().ok()?;
1970    let day: u32 = datetime[8..10].parse().ok()?;
1971    let hour: u32 = datetime[11..13].parse().ok()?;
1972    let min: u32 = datetime[14..16].parse().ok()?;
1973    let sec: u32 = datetime[17..19].parse().ok()?;
1974
1975    // Convert to total seconds, shift, convert back
1976    let total_secs = (hour * 3600 + min * 60 + sec) as i64
1977        + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
1978
1979    let days_shift = if total_secs < 0 {
1980        -1 - (-total_secs - 1) as i64 / 86400
1981    } else {
1982        total_secs / 86400
1983    };
1984
1985    let time_secs = ((total_secs % 86400) + 86400) % 86400;
1986    let new_hour = (time_secs / 3600) as u32;
1987    let new_min = ((time_secs % 3600) / 60) as u32;
1988    let new_sec = (time_secs % 60) as u32;
1989
1990    // Simple day shifting (doesn't handle month/year rollover perfectly for large shifts)
1991    let mut new_day = day as i32 + days_shift as i32;
1992    let mut new_month = month;
1993    let mut new_year = year;
1994
1995    let days_in_month = |m: u32, y: i32| -> i32 {
1996        match m {
1997            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1998            4 | 6 | 9 | 11 => 30,
1999            2 => if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 29 } else { 28 },
2000            _ => 30,
2001        }
2002    };
2003
2004    while new_day > days_in_month(new_month, new_year) {
2005        new_day -= days_in_month(new_month, new_year);
2006        new_month += 1;
2007        if new_month > 12 {
2008            new_month = 1;
2009            new_year += 1;
2010        }
2011    }
2012    while new_day < 1 {
2013        new_month = if new_month == 1 { 12 } else { new_month - 1 };
2014        if new_month == 12 {
2015            new_year -= 1;
2016        }
2017        new_day += days_in_month(new_month, new_year);
2018    }
2019
2020    Some(format!(
2021        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
2022        new_year, new_month, new_day, new_hour, new_min, new_sec
2023    ))
2024}
2025
2026fn unix_to_datetime(secs: i64) -> String {
2027    let days = secs / 86400;
2028    let time = secs % 86400;
2029    let h = time / 3600;
2030    let m = (time % 3600) / 60;
2031    let s = time % 60;
2032    let mut y = 1970i32;
2033    let mut rem = days;
2034    loop {
2035        let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 366 } else { 365 };
2036        if rem < dy { break; }
2037        rem -= dy;
2038        y += 1;
2039    }
2040    let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2041    let months = [31, if leap { 29 } else { 28 }, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
2042    let mut mo = 1;
2043    for &dm in &months {
2044        if rem < dm { break; }
2045        rem -= dm;
2046        mo += 1;
2047    }
2048    format!("{:04}:{:02}:{:02} {:02}:{:02}:{:02}", y, mo, rem + 1, h, m, s)
2049}
2050
2051fn format_file_size(bytes: u64) -> String {
2052    if bytes < 1024 {
2053        format!("{} bytes", bytes)
2054    } else if bytes < 1024 * 1024 {
2055        format!("{:.1} kB", bytes as f64 / 1024.0)
2056    } else if bytes < 1024 * 1024 * 1024 {
2057        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
2058    } else {
2059        format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
2060    }
2061}
2062
2063/// Check if a tag name is typically XMP.
2064fn is_xmp_tag(tag: &str) -> bool {
2065    matches!(
2066        tag.to_lowercase().as_str(),
2067        "title" | "description" | "subject" | "creator" | "rights"
2068        | "keywords" | "rating" | "label" | "hierarchicalsubject"
2069    )
2070}
2071
2072/// Encode an EXIF tag value to binary.
2073/// Returns (tag_id, format, encoded_data) or None if tag is unknown.
2074fn encode_exif_tag(
2075    tag_name: &str,
2076    value: &str,
2077    _group: &str,
2078    bo: ByteOrderMark,
2079) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
2080    let tag_lower = tag_name.to_lowercase();
2081
2082    // Map common tag names to EXIF tag IDs and formats
2083    let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
2084        // IFD0 string tags
2085        "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
2086        "make" => (0x010F, exif_writer::ExifFormat::Ascii),
2087        "model" => (0x0110, exif_writer::ExifFormat::Ascii),
2088        "software" => (0x0131, exif_writer::ExifFormat::Ascii),
2089        "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
2090        "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
2091        "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
2092        // IFD0 numeric tags
2093        "orientation" => (0x0112, exif_writer::ExifFormat::Short),
2094        "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
2095        "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
2096        "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
2097        // ExifIFD tags
2098        "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
2099        "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
2100        "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
2101        "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
2102        "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
2103        "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
2104        "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
2105        "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
2106        "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
2107        _ => return None,
2108    };
2109
2110    let encoded = match format {
2111        exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
2112        exif_writer::ExifFormat::Short => {
2113            let v: u16 = value.parse().ok()?;
2114            exif_writer::encode_u16(v, bo)
2115        }
2116        exif_writer::ExifFormat::Long => {
2117            let v: u32 = value.parse().ok()?;
2118            exif_writer::encode_u32(v, bo)
2119        }
2120        exif_writer::ExifFormat::Rational => {
2121            // Parse "N/D" or just "N"
2122            if let Some(slash) = value.find('/') {
2123                let num: u32 = value[..slash].trim().parse().ok()?;
2124                let den: u32 = value[slash + 1..].trim().parse().ok()?;
2125                exif_writer::encode_urational(num, den, bo)
2126            } else if let Ok(v) = value.parse::<f64>() {
2127                // Convert float to rational
2128                let den = 10000u32;
2129                let num = (v * den as f64).round() as u32;
2130                exif_writer::encode_urational(num, den, bo)
2131            } else {
2132                return None;
2133            }
2134        }
2135        exif_writer::ExifFormat::Undefined => {
2136            // UserComment: 8 bytes charset + data
2137            let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; // "ASCII\0\0\0"
2138            data.extend_from_slice(value.as_bytes());
2139            data
2140        }
2141        _ => return None,
2142    };
2143
2144    Some((tag_id, format, encoded))
2145}
2146
2147/// Compute text file tags (from Perl Text.pm).
2148fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
2149    let mut tags = Vec::new();
2150    let mk = |name: &str, val: String| Tag {
2151        id: crate::tag::TagId::Text(name.into()),
2152        name: name.into(), description: name.into(),
2153        group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
2154        raw_value: Value::String(val.clone()), print_value: val, priority: 0,
2155    };
2156
2157    // Detect encoding and BOM
2158    let is_ascii = data.iter().all(|&b| b < 128);
2159    let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2160    let has_utf16le_bom = data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2161    let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2162    let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2163    let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2164
2165    // Detect if file has weird non-text control characters (like multi-byte unicode without BOM)
2166    let has_weird_ctrl = data.iter().any(|&b| (b <= 0x06) || (b >= 0x0e && b <= 0x1a) || (b >= 0x1c && b <= 0x1f) || b == 0x7f);
2167
2168    let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2169        ("utf-32le", true, false)
2170    } else if has_utf32be_bom {
2171        ("utf-32be", true, false)
2172    } else if has_utf16le_bom {
2173        ("utf-16le", true, true)
2174    } else if has_utf16be_bom {
2175        ("utf-16be", true, true)
2176    } else if has_weird_ctrl {
2177        // Not a text file (has binary-like control chars but no recognized multi-byte marker)
2178        return tags;
2179    } else if is_ascii {
2180        ("us-ascii", false, false)
2181    } else {
2182        // Check UTF-8
2183        let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2184        if is_valid_utf8 {
2185            if has_utf8_bom {
2186                ("utf-8", true, false)
2187            } else {
2188                // Check if it has high bytes suggesting iso-8859-1 vs utf-8
2189                // Perl's IsUTF8: returns >0 if valid UTF-8 with multi-byte, 0 if ASCII, <0 if invalid
2190                // For simplicity: valid UTF-8 without BOM = utf-8
2191                ("utf-8", false, false)
2192            }
2193        } else if !data.iter().any(|&b| b >= 0x80 && b <= 0x9f) {
2194            ("iso-8859-1", false, false)
2195        } else {
2196            ("unknown-8bit", false, false)
2197        }
2198    };
2199
2200    tags.push(mk("MIMEEncoding", encoding.into()));
2201
2202    if is_bom {
2203        tags.push(mk("ByteOrderMark", "Yes".into()));
2204    }
2205
2206    // Count newlines and detect type
2207    let has_cr = data.contains(&b'\r');
2208    let has_lf = data.contains(&b'\n');
2209    let newline_type = if has_cr && has_lf { "Windows CRLF" }
2210        else if has_lf { "Unix LF" }
2211        else if has_cr { "Macintosh CR" }
2212        else { "(none)" };
2213    tags.push(mk("Newlines", newline_type.into()));
2214
2215    if is_csv {
2216        // CSV analysis: detect delimiter, quoting, column count, row count
2217        let text = String::from_utf8_lossy(data);
2218        let mut delim = "";
2219        let mut quot = "";
2220        let mut ncols = 1usize;
2221        let mut nrows = 0usize;
2222
2223        for line in text.lines() {
2224            if nrows == 0 {
2225                // Detect delimiter from first line
2226                let comma_count = line.matches(',').count();
2227                let semi_count = line.matches(';').count();
2228                let tab_count = line.matches('\t').count();
2229                if comma_count > semi_count && comma_count > tab_count {
2230                    delim = ",";
2231                    ncols = comma_count + 1;
2232                } else if semi_count > tab_count {
2233                    delim = ";";
2234                    ncols = semi_count + 1;
2235                } else if tab_count > 0 {
2236                    delim = "\t";
2237                    ncols = tab_count + 1;
2238                } else {
2239                    delim = "";
2240                    ncols = 1;
2241                }
2242                // Detect quoting
2243                if line.contains('"') { quot = "\""; }
2244                else if line.contains('\'') { quot = "'"; }
2245            }
2246            nrows += 1;
2247            if nrows >= 1000 { break; }
2248        }
2249
2250        let delim_display = match delim {
2251            "," => "Comma",
2252            ";" => "Semicolon",
2253            "\t" => "Tab",
2254            _ => "(none)",
2255        };
2256        let quot_display = match quot {
2257            "\"" => "Double quotes",
2258            "'" => "Single quotes",
2259            _ => "(none)",
2260        };
2261
2262        tags.push(mk("Delimiter", delim_display.into()));
2263        tags.push(mk("Quoting", quot_display.into()));
2264        tags.push(mk("ColumnCount", ncols.to_string()));
2265        if nrows > 0 {
2266            tags.push(mk("RowCount", nrows.to_string()));
2267        }
2268    } else if !is_utf16 {
2269        // Line count and word count for plain text files (not UTF-16/32)
2270        let line_count = data.iter().filter(|&&b| b == b'\n').count();
2271        let line_count = if line_count == 0 && !data.is_empty() { 1 } else { line_count };
2272        tags.push(mk("LineCount", line_count.to_string()));
2273
2274        let text = String::from_utf8_lossy(data);
2275        let word_count = text.split_whitespace().count();
2276        tags.push(mk("WordCount", word_count.to_string()));
2277    }
2278
2279    tags
2280}
exiftool_rs/exiftool.rs

exiftool_rs/
exiftool.rs