exiftool_rs/
exiftool.rs

1//! Core ExifTool struct and public API.
2//!
3//! This is the main entry point for reading metadata from files.
4//! Mirrors ExifTool.pm's ImageInfo/ExtractInfo/GetInfo pipeline.
5
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer, psd_writer, tiff_writer, webp_writer, xmp_writer};
17
18/// Processing options for metadata extraction.
19#[derive(Debug, Clone)]
20pub struct Options {
21    /// Include duplicate tags (different groups may have same tag name).
22    pub duplicates: bool,
23    /// Apply print conversions (human-readable values).
24    pub print_conv: bool,
25    /// Fast scan level: 0=normal, 1=skip composite, 2=skip maker notes, 3=skip thumbnails.
26    pub fast_scan: u8,
27    /// Only extract these tag names (empty = all).
28    pub requested_tags: Vec<String>,
29    /// Extract embedded documents/data (video frames, etc.). Level: 0=off, 1=-ee, 2=-ee2, 3=-ee3.
30    pub extract_embedded: u8,
31}
32
33impl Default for Options {
34    fn default() -> Self {
35        Self {
36            duplicates: false,
37            print_conv: true,
38            fast_scan: 0,
39            requested_tags: Vec::new(),
40            extract_embedded: 0,
41        }
42    }
43}
44
45/// The main ExifTool struct. Create one and use it to extract metadata from files.
46///
47/// # Example
48/// ```no_run
49/// use exiftool_rs::ExifTool;
50///
51/// let mut et = ExifTool::new();
52/// let info = et.image_info("photo.jpg").unwrap();
53/// for (name, value) in &info {
54///     println!("{}: {}", name, value);
55/// }
56/// ```
57/// A queued tag change for writing.
58#[derive(Debug, Clone)]
59pub struct NewValue {
60    /// Tag name (e.g., "Artist", "Copyright", "XMP:Title")
61    pub tag: String,
62    /// Group prefix if specified (e.g., "EXIF", "XMP", "IPTC")
63    pub group: Option<String>,
64    /// New value (None = delete tag)
65    pub value: Option<String>,
66}
67
68/// The main ExifTool engine — read, write, and edit metadata.
69///
70/// # Reading metadata
71/// ```no_run
72/// use exiftool_rs::ExifTool;
73///
74/// let et = ExifTool::new();
75///
76/// // Full tag structs
77/// let tags = et.extract_info("photo.jpg").unwrap();
78/// for tag in &tags {
79///     println!("[{}] {}: {}", tag.group.family0, tag.name, tag.print_value);
80/// }
81///
82/// // Simple name→value map
83/// let info = et.image_info("photo.jpg").unwrap();
84/// println!("Camera: {}", info.get("Model").unwrap_or(&String::new()));
85/// ```
86///
87/// # Writing metadata
88/// ```no_run
89/// use exiftool_rs::ExifTool;
90///
91/// let mut et = ExifTool::new();
92/// et.set_new_value("Artist", Some("John Doe"));
93/// et.set_new_value("Copyright", Some("2024"));
94/// et.write_info("input.jpg", "output.jpg").unwrap();
95/// ```
96pub struct ExifTool {
97    options: Options,
98    new_values: Vec<NewValue>,
99}
100
101/// Result of metadata extraction: maps tag names to display values.
102pub type ImageInfo = HashMap<String, String>;
103
104impl ExifTool {
105    /// Create a new ExifTool instance with default options.
106    pub fn new() -> Self {
107        Self {
108            options: Options::default(),
109            new_values: Vec::new(),
110        }
111    }
112
113    /// Create a new ExifTool instance with custom options.
114    pub fn with_options(options: Options) -> Self {
115        Self {
116            options,
117            new_values: Vec::new(),
118        }
119    }
120
121    /// Get a mutable reference to the options.
122    pub fn options_mut(&mut self) -> &mut Options {
123        &mut self.options
124    }
125
126    /// Get a reference to the options.
127    pub fn options(&self) -> &Options {
128        &self.options
129    }
130
131    // ================================================================
132    // Writing API
133    // ================================================================
134
135    /// Queue a new tag value for writing.
136    ///
137    /// Call this one or more times, then call `write_info()` to apply changes.
138    ///
139    /// # Arguments
140    /// * `tag` - Tag name, optionally prefixed with group (e.g., "Artist", "XMP:Title", "EXIF:Copyright")
141    /// * `value` - New value, or None to delete the tag
142    ///
143    /// # Example
144    /// ```no_run
145    /// use exiftool_rs::ExifTool;
146    /// let mut et = ExifTool::new();
147    /// et.set_new_value("Artist", Some("John Doe"));
148    /// et.set_new_value("Copyright", Some("2024 John Doe"));
149    /// et.set_new_value("XMP:Title", Some("My Photo"));
150    /// et.write_info("photo.jpg", "photo_out.jpg").unwrap();
151    /// ```
152    pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
153        let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
154            (Some(tag[..colon_pos].to_string()), tag[colon_pos + 1..].to_string())
155        } else {
156            (None, tag.to_string())
157        };
158
159        self.new_values.push(NewValue {
160            tag: tag_name,
161            group,
162            value: value.map(|v| v.to_string()),
163        });
164    }
165
166    /// Clear all queued new values.
167    pub fn clear_new_values(&mut self) {
168        self.new_values.clear();
169    }
170
171    /// Copy tags from a source file, queuing them as new values.
172    ///
173    /// Reads all tags from `src_path` and queues them for writing.
174    /// Optionally filter by tag names.
175    pub fn set_new_values_from_file<P: AsRef<Path>>(
176        &mut self,
177        src_path: P,
178        tags_to_copy: Option<&[&str]>,
179    ) -> Result<u32> {
180        let src_tags = self.extract_info(src_path)?;
181        let mut count = 0u32;
182
183        for tag in &src_tags {
184            // Skip file-level tags that shouldn't be copied
185            if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
186                continue;
187            }
188            // Skip binary/undefined data and empty values
189            if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
190                continue;
191            }
192            if tag.print_value.is_empty() {
193                continue;
194            }
195
196            // Filter by requested tags
197            if let Some(filter) = tags_to_copy {
198                let name_lower = tag.name.to_lowercase();
199                if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
200                    continue;
201                }
202            }
203
204            let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
205            self.new_values.push(NewValue {
206                tag: tag.name.clone(),
207                group: Some(tag.group.family0.clone()),
208                value: Some(tag.print_value.clone()),
209            });
210            count += 1;
211        }
212
213        Ok(count)
214    }
215
216    /// Set a file's name based on a tag value.
217    pub fn set_file_name_from_tag<P: AsRef<Path>>(
218        &self,
219        path: P,
220        tag_name: &str,
221        template: &str,
222    ) -> Result<String> {
223        let path = path.as_ref();
224        let tags = self.extract_info(path)?;
225
226        let tag_value = tags
227            .iter()
228            .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
229            .map(|t| &t.print_value)
230            .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
231
232        // Build new filename from template
233        // Template: "prefix%value%suffix.ext" or just use the tag value
234        let new_name = if template.contains('%') {
235            template.replace("%v", value_to_filename(tag_value).as_str())
236        } else {
237            // Default: use tag value as filename, keep extension
238            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
239            let clean = value_to_filename(tag_value);
240            if ext.is_empty() {
241                clean
242            } else {
243                format!("{}.{}", clean, ext)
244            }
245        };
246
247        let parent = path.parent().unwrap_or(Path::new(""));
248        let new_path = parent.join(&new_name);
249
250        fs::rename(path, &new_path).map_err(Error::Io)?;
251        Ok(new_path.to_string_lossy().to_string())
252    }
253
254    /// Write queued changes to a file.
255    ///
256    /// If `dst_path` is the same as `src_path`, the file is modified in-place
257    /// (via a temporary file).
258    pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(&self, src_path: P, dst_path: Q) -> Result<u32> {
259        let src_path = src_path.as_ref();
260        let dst_path = dst_path.as_ref();
261        let data = fs::read(src_path).map_err(Error::Io)?;
262
263        let file_type = self.detect_file_type(&data, src_path)?;
264        let output = self.apply_changes(&data, file_type)?;
265
266        // Write to temp file first, then rename (atomic)
267        let temp_path = dst_path.with_extension("exiftool_tmp");
268        fs::write(&temp_path, &output).map_err(Error::Io)?;
269        fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
270
271        Ok(self.new_values.len() as u32)
272    }
273
274    /// Apply queued changes to in-memory data.
275    fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
276        match file_type {
277            FileType::Jpeg => self.write_jpeg(data),
278            FileType::Png => self.write_png(data),
279            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
280            | FileType::Arw | FileType::Orf | FileType::Pef => self.write_tiff(data),
281            FileType::WebP => self.write_webp(data),
282            FileType::Mp4 | FileType::QuickTime | FileType::M4a
283            | FileType::ThreeGP | FileType::F4v => self.write_mp4(data),
284            FileType::Psd => self.write_psd(data),
285            FileType::Pdf => self.write_pdf(data),
286            FileType::Heif | FileType::Avif => self.write_mp4(data),
287            FileType::Mkv | FileType::WebM => self.write_matroska(data),
288            FileType::Gif => {
289                let comment = self.new_values.iter()
290                    .find(|nv| nv.tag.to_lowercase() == "comment")
291                    .and_then(|nv| nv.value.clone());
292                crate::writer::gif_writer::write_gif(data, comment.as_deref())
293            }
294            FileType::Flac => {
295                let changes: Vec<(&str, &str)> = self.new_values.iter()
296                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
297                    .collect();
298                crate::writer::flac_writer::write_flac(data, &changes)
299            }
300            FileType::Mp3 | FileType::Aiff => {
301                let changes: Vec<(&str, &str)> = self.new_values.iter()
302                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
303                    .collect();
304                crate::writer::id3_writer::write_id3(data, &changes)
305            }
306            FileType::Jp2 | FileType::Jxl => {
307                let new_xmp = if self.new_values.iter().any(|nv| nv.group.as_deref() == Some("XMP")) {
308                    let refs: Vec<&NewValue> = self.new_values.iter()
309                        .filter(|nv| nv.group.as_deref() == Some("XMP"))
310                        .collect();
311                    Some(self.build_new_xmp(&refs))
312                } else { None };
313                crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
314            }
315            FileType::PostScript => {
316                let changes: Vec<(&str, &str)> = self.new_values.iter()
317                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
318                    .collect();
319                crate::writer::ps_writer::write_postscript(data, &changes)
320            }
321            FileType::Ogg | FileType::Opus => {
322                let changes: Vec<(&str, &str)> = self.new_values.iter()
323                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
324                    .collect();
325                crate::writer::ogg_writer::write_ogg(data, &changes)
326            }
327            FileType::Xmp => {
328                let props: Vec<xmp_writer::XmpProperty> = self.new_values.iter()
329                    .filter_map(|nv| {
330                        let val = nv.value.as_deref()?;
331                        Some(xmp_writer::XmpProperty {
332                            namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
333                            property: nv.tag.clone(),
334                            values: vec![val.to_string()],
335                            prop_type: xmp_writer::XmpPropertyType::Simple,
336                        })
337                    })
338                    .collect();
339                Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
340            }
341            _ => Err(Error::UnsupportedFileType(format!("writing not yet supported for {}", file_type))),
342        }
343    }
344
345    /// Write metadata changes to JPEG data.
346    fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
347        // Classify new values by target group
348        let mut exif_values: Vec<&NewValue> = Vec::new();
349        let mut xmp_values: Vec<&NewValue> = Vec::new();
350        let mut iptc_values: Vec<&NewValue> = Vec::new();
351        let mut comment_value: Option<&str> = None;
352        let mut remove_exif = false;
353        let mut remove_xmp = false;
354        let mut remove_iptc = false;
355        let mut remove_comment = false;
356
357        for nv in &self.new_values {
358            let group = nv.group.as_deref().unwrap_or("");
359            let group_upper = group.to_uppercase();
360
361            // Check for group deletion
362            if nv.value.is_none() && nv.tag == "*" {
363                match group_upper.as_str() {
364                    "EXIF" => { remove_exif = true; continue; }
365                    "XMP" => { remove_xmp = true; continue; }
366                    "IPTC" => { remove_iptc = true; continue; }
367                    _ => {}
368                }
369            }
370
371            match group_upper.as_str() {
372                "XMP" => xmp_values.push(nv),
373                "IPTC" => iptc_values.push(nv),
374                "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
375                "" => {
376                    // Auto-detect best group based on tag name
377                    if nv.tag.to_lowercase() == "comment" {
378                        if nv.value.is_none() {
379                            remove_comment = true;
380                        } else {
381                            comment_value = nv.value.as_deref();
382                        }
383                    } else if is_xmp_tag(&nv.tag) {
384                        xmp_values.push(nv);
385                    } else {
386                        exif_values.push(nv);
387                    }
388                }
389                _ => exif_values.push(nv), // default to EXIF
390            }
391        }
392
393        // Build new EXIF data
394        let new_exif = if !exif_values.is_empty() {
395            Some(self.build_new_exif(data, &exif_values)?)
396        } else {
397            None
398        };
399
400        // Build new XMP data
401        let new_xmp = if !xmp_values.is_empty() {
402            Some(self.build_new_xmp(&xmp_values))
403        } else {
404            None
405        };
406
407        // Build new IPTC data
408        let new_iptc_data = if !iptc_values.is_empty() {
409            let records: Vec<iptc_writer::IptcRecord> = iptc_values
410                .iter()
411                .filter_map(|nv| {
412                    let value = nv.value.as_deref()?;
413                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
414                    Some(iptc_writer::IptcRecord {
415                        record,
416                        dataset,
417                        data: value.as_bytes().to_vec(),
418                    })
419                })
420                .collect();
421            if records.is_empty() {
422                None
423            } else {
424                Some(iptc_writer::build_iptc(&records))
425            }
426        } else {
427            None
428        };
429
430        // Rewrite JPEG
431        jpeg_writer::write_jpeg(
432            data,
433            new_exif.as_deref(),
434            new_xmp.as_deref(),
435            new_iptc_data.as_deref(),
436            comment_value,
437            remove_exif,
438            remove_xmp,
439            remove_iptc,
440            remove_comment,
441        )
442    }
443
444    /// Build new EXIF data by merging existing EXIF with queued changes.
445    fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
446        let bo = ByteOrderMark::BigEndian;
447        let mut ifd0_entries = Vec::new();
448        let mut exif_entries = Vec::new();
449        let mut gps_entries = Vec::new();
450
451        // Step 1: Extract existing EXIF entries from the JPEG
452        let existing = extract_existing_exif_entries(jpeg_data, bo);
453        for entry in &existing {
454            match classify_exif_tag(entry.tag) {
455                ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
456                ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
457                ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
458            }
459        }
460
461        // Step 2: Apply queued changes (add/replace/delete)
462        let deleted_tags: Vec<u16> = values
463            .iter()
464            .filter(|nv| nv.value.is_none())
465            .filter_map(|nv| tag_name_to_id(&nv.tag))
466            .collect();
467
468        // Remove deleted tags
469        ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
470        exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
471        gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
472
473        // Add/replace new values
474        for nv in values {
475            if nv.value.is_none() {
476                continue;
477            }
478            let value_str = nv.value.as_deref().unwrap_or("");
479            let group = nv.group.as_deref().unwrap_or("");
480
481            if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo) {
482                let entry = exif_writer::IfdEntry {
483                    tag: tag_id,
484                    format,
485                    data: encoded,
486                };
487
488                let target = match group.to_uppercase().as_str() {
489                    "GPS" => &mut gps_entries,
490                    "EXIFIFD" => &mut exif_entries,
491                    _ => match classify_exif_tag(tag_id) {
492                        ExifIfdGroup::ExifIfd => &mut exif_entries,
493                        ExifIfdGroup::Gps => &mut gps_entries,
494                        ExifIfdGroup::Ifd0 => &mut ifd0_entries,
495                    },
496                };
497
498                // Replace existing or add new
499                if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
500                    *existing = entry;
501                } else {
502                    target.push(entry);
503                }
504            }
505        }
506
507        // Remove sub-IFD pointers from entries (they'll be rebuilt by build_exif)
508        ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
509
510        exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
511    }
512
513    /// Write metadata changes to PNG data.
514    fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
515        let mut new_text: Vec<(&str, &str)> = Vec::new();
516        let mut remove_text: Vec<&str> = Vec::new();
517
518        // Collect text-based changes
519        // We need to hold the strings in vectors that live long enough
520        let owned_pairs: Vec<(String, String)> = self.new_values.iter()
521            .filter(|nv| nv.value.is_some())
522            .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
523            .collect();
524
525        for (tag, value) in &owned_pairs {
526            new_text.push((tag.as_str(), value.as_str()));
527        }
528
529        for nv in &self.new_values {
530            if nv.value.is_none() {
531                remove_text.push(&nv.tag);
532            }
533        }
534
535        png_writer::write_png(data, &new_text, None, &remove_text)
536    }
537
538    /// Write metadata changes to PSD data.
539    fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
540        let mut iptc_values = Vec::new();
541        let mut xmp_values = Vec::new();
542
543        for nv in &self.new_values {
544            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
545            match group.as_str() {
546                "XMP" => xmp_values.push(nv),
547                "IPTC" => iptc_values.push(nv),
548                _ => {
549                    if is_xmp_tag(&nv.tag) { xmp_values.push(nv); }
550                    else { iptc_values.push(nv); }
551                }
552            }
553        }
554
555        let new_iptc = if !iptc_values.is_empty() {
556            let records: Vec<_> = iptc_values.iter().filter_map(|nv| {
557                let value = nv.value.as_deref()?;
558                let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
559                Some(iptc_writer::IptcRecord { record, dataset, data: value.as_bytes().to_vec() })
560            }).collect();
561            if records.is_empty() { None } else { Some(iptc_writer::build_iptc(&records)) }
562        } else { None };
563
564        let new_xmp = if !xmp_values.is_empty() {
565            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
566            Some(self.build_new_xmp(&refs))
567        } else { None };
568
569        psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
570    }
571
572    /// Write metadata changes to Matroska (MKV/WebM) data.
573    fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
574        let changes: Vec<(&str, &str)> = self.new_values.iter()
575            .filter_map(|nv| {
576                let value = nv.value.as_deref()?;
577                Some((nv.tag.as_str(), value))
578            })
579            .collect();
580
581        matroska_writer::write_matroska(data, &changes)
582    }
583
584    /// Write metadata changes to PDF data.
585    fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
586        let changes: Vec<(&str, &str)> = self.new_values.iter()
587            .filter_map(|nv| {
588                let value = nv.value.as_deref()?;
589                Some((nv.tag.as_str(), value))
590            })
591            .collect();
592
593        pdf_writer::write_pdf(data, &changes)
594    }
595
596    /// Write metadata changes to MP4/MOV data.
597    fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
598        let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
599        let mut xmp_values: Vec<&NewValue> = Vec::new();
600
601        for nv in &self.new_values {
602            if nv.value.is_none() { continue; }
603            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
604            if group == "XMP" {
605                xmp_values.push(nv);
606            } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
607                ilst_tags.push((key, nv.value.clone().unwrap()));
608            }
609        }
610
611        let tag_refs: Vec<(&[u8; 4], &str)> = ilst_tags.iter()
612            .map(|(k, v)| (k, v.as_str()))
613            .collect();
614
615        let new_xmp = if !xmp_values.is_empty() {
616            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
617            Some(self.build_new_xmp(&refs))
618        } else {
619            None
620        };
621
622        mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
623    }
624
625    /// Write metadata changes to WebP data.
626    fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
627        let mut exif_values: Vec<&NewValue> = Vec::new();
628        let mut xmp_values: Vec<&NewValue> = Vec::new();
629        let mut remove_exif = false;
630        let mut remove_xmp = false;
631
632        for nv in &self.new_values {
633            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
634            if nv.value.is_none() && nv.tag == "*" {
635                if group == "EXIF" { remove_exif = true; }
636                if group == "XMP" { remove_xmp = true; }
637                continue;
638            }
639            match group.as_str() {
640                "XMP" => xmp_values.push(nv),
641                _ => exif_values.push(nv),
642            }
643        }
644
645        let new_exif = if !exif_values.is_empty() {
646            let bo = ByteOrderMark::BigEndian;
647            let mut entries = Vec::new();
648            for nv in &exif_values {
649                if let Some(ref v) = nv.value {
650                    let group = nv.group.as_deref().unwrap_or("");
651                    if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo) {
652                        entries.push(exif_writer::IfdEntry { tag: tag_id, format, data: encoded });
653                    }
654                }
655            }
656            if !entries.is_empty() {
657                Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
658            } else {
659                None
660            }
661        } else {
662            None
663        };
664
665        let new_xmp = if !xmp_values.is_empty() {
666            Some(self.build_new_xmp(&xmp_values.iter().map(|v| *v).collect::<Vec<_>>()))
667        } else {
668            None
669        };
670
671        webp_writer::write_webp(
672            data,
673            new_exif.as_deref(),
674            new_xmp.as_deref(),
675            remove_exif,
676            remove_xmp,
677        )
678    }
679
680    /// Write metadata changes to TIFF data.
681    fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
682        let bo = if data.starts_with(b"II") {
683            ByteOrderMark::LittleEndian
684        } else {
685            ByteOrderMark::BigEndian
686        };
687
688        let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
689        for nv in &self.new_values {
690            if let Some(ref value) = nv.value {
691                let group = nv.group.as_deref().unwrap_or("");
692                if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo) {
693                    changes.push((tag_id, encoded));
694                }
695            }
696        }
697
698        tiff_writer::write_tiff(data, &changes)
699    }
700
701    /// Build new XMP data from queued values.
702    fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
703        let mut properties = Vec::new();
704
705        for nv in values {
706            let value_str = match &nv.value {
707                Some(v) => v.clone(),
708                None => continue,
709            };
710
711            let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
712            let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
713
714            let prop_type = match nv.tag.to_lowercase().as_str() {
715                "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
716                "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
717                "creator" => xmp_writer::XmpPropertyType::Seq,
718                _ => xmp_writer::XmpPropertyType::Simple,
719            };
720
721            let values = if matches!(prop_type, xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq) {
722                value_str.split(',').map(|s| s.trim().to_string()).collect()
723            } else {
724                vec![value_str]
725            };
726
727            properties.push(xmp_writer::XmpProperty {
728                namespace: ns,
729                property: nv.tag.clone(),
730                values,
731                prop_type,
732            });
733        }
734
735        xmp_writer::build_xmp(&properties).into_bytes()
736    }
737
738    // ================================================================
739    // Reading API
740    // ================================================================
741
742    /// Extract metadata from a file and return a simple name→value map.
743    ///
744    /// This is the high-level one-shot API, equivalent to ExifTool's `ImageInfo()`.
745    pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
746        let tags = self.extract_info(path)?;
747        Ok(self.get_info(&tags))
748    }
749
750    /// Extract all metadata tags from a file.
751    ///
752    /// Returns the full `Tag` structs with groups, raw values, etc.
753    pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
754        let path = path.as_ref();
755        let data = fs::read(path).map_err(Error::Io)?;
756
757        self.extract_info_from_bytes(&data, path)
758    }
759
760    /// Extract metadata from in-memory data.
761    pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
762        let file_type_result = self.detect_file_type(data, path);
763        let (file_type, mut tags) = match file_type_result {
764            Ok(ft) => {
765                let t = self.process_file(data, ft).or_else(|_| {
766                    self.process_by_extension(data, path)
767                })?;
768                (Some(ft), t)
769            }
770            Err(_) => {
771                // File type unknown by magic/extension — try extension-based fallback
772                let t = self.process_by_extension(data, path)?;
773                (None, t)
774            }
775        };
776        let file_type = file_type.unwrap_or(FileType::Zip); // placeholder for file-level tags
777
778        // Add file-level tags
779        tags.push(Tag {
780            id: crate::tag::TagId::Text("FileType".into()),
781            name: "FileType".into(),
782            description: "File Type".into(),
783            group: crate::tag::TagGroup {
784                family0: "File".into(),
785                family1: "File".into(),
786                family2: "Other".into(),
787            },
788            raw_value: Value::String(format!("{:?}", file_type)),
789            print_value: file_type.description().to_string(),
790            priority: 0,
791        });
792
793        tags.push(Tag {
794            id: crate::tag::TagId::Text("MIMEType".into()),
795            name: "MIMEType".into(),
796            description: "MIME Type".into(),
797            group: crate::tag::TagGroup {
798                family0: "File".into(),
799                family1: "File".into(),
800                family2: "Other".into(),
801            },
802            raw_value: Value::String(file_type.mime_type().to_string()),
803            print_value: file_type.mime_type().to_string(),
804            priority: 0,
805        });
806
807        if let Ok(metadata) = fs::metadata(path) {
808            tags.push(Tag {
809                id: crate::tag::TagId::Text("FileSize".into()),
810                name: "FileSize".into(),
811                description: "File Size".into(),
812                group: crate::tag::TagGroup {
813                    family0: "File".into(),
814                    family1: "File".into(),
815                    family2: "Other".into(),
816                },
817                raw_value: Value::U32(metadata.len() as u32),
818                print_value: format_file_size(metadata.len()),
819                priority: 0,
820            });
821        }
822
823        // Add more file-level tags
824        let file_tag = |name: &str, val: Value| -> Tag {
825            Tag {
826                id: crate::tag::TagId::Text(name.to_string()),
827                name: name.to_string(), description: name.to_string(),
828                group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
829                raw_value: val.clone(), print_value: val.to_display_string(), priority: 0,
830            }
831        };
832
833        if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
834            tags.push(file_tag("FileName", Value::String(fname.to_string())));
835        }
836        if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
837            tags.push(file_tag("Directory", Value::String(dir.to_string())));
838        }
839        // Use the canonical (first) extension from the FileType, matching Perl ExifTool behavior.
840        let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
841        if !canonical_ext.is_empty() {
842            tags.push(file_tag("FileTypeExtension", Value::String(canonical_ext.to_string())));
843        }
844
845        #[cfg(unix)]
846        if let Ok(metadata) = fs::metadata(path) {
847            use std::os::unix::fs::MetadataExt;
848            let mode = metadata.mode();
849            tags.push(file_tag("FilePermissions", Value::String(format!("{:o}", mode & 0o7777))));
850
851            // FileModifyDate
852            if let Ok(modified) = metadata.modified() {
853                if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
854                    let secs = dur.as_secs() as i64;
855                    tags.push(file_tag("FileModifyDate", Value::String(unix_to_datetime(secs))));
856                }
857            }
858            // FileAccessDate
859            if let Ok(accessed) = metadata.accessed() {
860                if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
861                    let secs = dur.as_secs() as i64;
862                    tags.push(file_tag("FileAccessDate", Value::String(unix_to_datetime(secs))));
863                }
864            }
865            // FileInodeChangeDate (ctime on Unix)
866            let ctime = metadata.ctime();
867            if ctime > 0 {
868                tags.push(file_tag("FileInodeChangeDate", Value::String(unix_to_datetime(ctime))));
869            }
870        }
871
872        // ExifByteOrder (from TIFF header)
873        {
874            let bo_str = if data.len() > 8 {
875                // Check EXIF in JPEG or TIFF header or WebP/RIFF EXIF chunk
876                let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
877                    // JPEG: find APP1 EXIF header
878                    data.windows(6).position(|w| w == b"Exif\0\0")
879                        .map(|p| &data[p+6..])
880                } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
881                    // RAF: look in the embedded JPEG for EXIF byte order
882                    let jpeg_offset = u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]]) as usize;
883                    let jpeg_length = u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]]) as usize;
884                    if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
885                        let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
886                        jpeg.windows(6).position(|w| w == b"Exif\0\0")
887                            .map(|p| &jpeg[p+6..])
888                    } else {
889                        None
890                    }
891                } else if data.starts_with(b"RIFF") && data.len() >= 12 {
892                    // RIFF/WebP: find EXIF chunk
893                    let mut riff_bo: Option<&[u8]> = None;
894                    let mut pos = 12usize;
895                    while pos + 8 <= data.len() {
896                        let cid = &data[pos..pos+4];
897                        let csz = u32::from_le_bytes([data[pos+4],data[pos+5],data[pos+6],data[pos+7]]) as usize;
898                        let cstart = pos + 8;
899                        let cend = (cstart + csz).min(data.len());
900                        if cid == b"EXIF" && cend > cstart {
901                            let exif_data = &data[cstart..cend];
902                            let tiff = if exif_data.starts_with(b"Exif\0\0") { &exif_data[6..] } else { exif_data };
903                            riff_bo = Some(tiff);
904                            break;
905                        }
906                        // Also check LIST chunks
907                        if cid == b"LIST" && cend >= cstart + 4 {
908                            // recurse not needed for this simple scan - just advance
909                        }
910                        pos = cend + (csz & 1);
911                    }
912                    riff_bo
913                } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
914                    // JXL container: scan for brob Exif box and decompress to get byte order
915                    let mut jxl_bo: Option<String> = None;
916                    let mut jpos = 12usize; // skip JXL signature box
917                    while jpos + 8 <= data.len() {
918                        let bsize = u32::from_be_bytes([data[jpos], data[jpos+1], data[jpos+2], data[jpos+3]]) as usize;
919                        let btype = &data[jpos+4..jpos+8];
920                        if bsize < 8 || jpos + bsize > data.len() { break; }
921                        if btype == b"brob" && jpos + bsize > 12 {
922                            let inner_type = &data[jpos+8..jpos+12];
923                            if inner_type == b"Exif" || inner_type == b"exif" {
924                                let brotli_payload = &data[jpos+12..jpos+bsize];
925                                use std::io::Cursor;
926                                let mut inp = Cursor::new(brotli_payload);
927                                let mut out: Vec<u8> = Vec::new();
928                                if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
929                                    let exif_start = if out.len() > 4 { 4 } else { 0 };
930                                    if exif_start < out.len() {
931                                        if out[exif_start..].starts_with(b"MM") {
932                                            jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
933                                        } else if out[exif_start..].starts_with(b"II") {
934                                            jxl_bo = Some("Little-endian (Intel, II)".to_string());
935                                        }
936                                    }
937                                }
938                                break;
939                            }
940                        }
941                        jpos += bsize;
942                    }
943                    if let Some(bo) = jxl_bo {
944                        if !bo.is_empty() && file_type != FileType::Btf {
945                            tags.push(file_tag("ExifByteOrder", Value::String(bo)));
946                        }
947                    }
948                    // Return None to skip the generic byte order check below
949                    None
950                } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
951                    // MRW: find TTW segment which contains TIFF/EXIF data
952                    let mrw_data_offset = if data.len() >= 8 {
953                        u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
954                    } else { 0 };
955                    let mut mrw_bo: Option<&[u8]> = None;
956                    let mut mpos = 8usize;
957                    while mpos + 8 <= mrw_data_offset.min(data.len()) {
958                        let seg_tag = &data[mpos..mpos+4];
959                        let seg_len = u32::from_be_bytes([data[mpos+4], data[mpos+5], data[mpos+6], data[mpos+7]]) as usize;
960                        if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
961                            mrw_bo = Some(&data[mpos+8..mpos+8+seg_len]);
962                            break;
963                        }
964                        mpos += 8 + seg_len;
965                    }
966                    mrw_bo
967                } else {
968                    Some(&data[..])
969                };
970                if let Some(tiff) = check {
971                    if tiff.starts_with(b"II") { "Little-endian (Intel, II)" }
972                    else if tiff.starts_with(b"MM") { "Big-endian (Motorola, MM)" }
973                    else { "" }
974                } else { "" }
975            } else { "" };
976            // Suppress ExifByteOrder for BigTIFF, Canon VRD/DR4 (Perl doesn't output it for these)
977            // Also skip if already emitted by ExifReader (TIFF-based formats)
978            let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
979            if !bo_str.is_empty() && !already_has_exifbyteorder
980                && file_type != FileType::Btf
981                && file_type != FileType::Dr4 && file_type != FileType::Vrd
982                && file_type != FileType::Crw {
983                tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
984            }
985        }
986
987        tags.push(file_tag("ExifToolVersion", Value::String(crate::VERSION.to_string())));
988
989        // Compute composite tags
990        let composite = crate::composite::compute_composite_tags(&tags);
991        tags.extend(composite);
992
993        // FLIR post-processing: remove LensID composite for FLIR cameras.
994        // Perl's LensID composite requires LensType EXIF tag (not present in FLIR images),
995        // and LensID-2 requires LensModel to match /(mm|\d\/F)/ (FLIR names like "FOL7"
996        // don't match).  Our composite.rs uses a simpler fallback that picks up any non-empty
997        // LensModel, so we remove LensID when the image is from a FLIR camera with FFF data.
998        {
999            let is_flir_fff = tags.iter().any(|t| t.group.family0 == "APP1"
1000                && t.group.family1 == "FLIR");
1001            if is_flir_fff {
1002                tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1003            }
1004        }
1005
1006        // Olympus post-processing: remove the generic "Lens" composite for Olympus cameras.
1007        // In Perl, the "Lens" composite tag requires Canon:MinFocalLength (Canon namespace).
1008        // Our composite.rs generates Lens for any manufacturer that has MinFocalLength +
1009        // MaxFocalLength (e.g., Olympus Equipment sub-IFD).  Remove it for non-Canon cameras.
1010        {
1011            let make = tags.iter().find(|t| t.name == "Make")
1012                .map(|t| t.print_value.clone()).unwrap_or_default();
1013            if !make.to_uppercase().contains("CANON") {
1014                tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1015            }
1016        }
1017
1018        // Priority-based deduplication: when the same tag name appears from both RIFF (priority 0)
1019        // and MakerNotes/EXIF (priority 0 but higher-quality source), remove the RIFF copy.
1020        // Mirrors ExifTool's PRIORITY => 0 behavior for RIFF StreamHeader tags.
1021        {
1022            let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1023            for tag_name in &riff_priority_zero_tags {
1024                let has_makernotes = tags.iter().any(|t| t.name == *tag_name
1025                    && t.group.family0 != "RIFF");
1026                if has_makernotes {
1027                    tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1028                }
1029            }
1030        }
1031
1032        // Filter by requested tags if specified
1033        if !self.options.requested_tags.is_empty() {
1034            let requested: Vec<String> = self
1035                .options
1036                .requested_tags
1037                .iter()
1038                .map(|t| t.to_lowercase())
1039                .collect();
1040            tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1041        }
1042
1043        Ok(tags)
1044    }
1045
1046    /// Format extracted tags into a simple name→value map.
1047    ///
1048    /// Handles duplicate tag names by appending group info.
1049    fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1050        let mut info = ImageInfo::new();
1051        let mut seen: HashMap<String, usize> = HashMap::new();
1052
1053        for tag in tags {
1054            let value = if self.options.print_conv {
1055                &tag.print_value
1056            } else {
1057                &tag.raw_value.to_display_string()
1058            };
1059
1060            let count = seen.entry(tag.name.clone()).or_insert(0);
1061            *count += 1;
1062
1063            if *count == 1 {
1064                info.insert(tag.name.clone(), value.clone());
1065            } else if self.options.duplicates {
1066                let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1067                info.insert(key, value.clone());
1068            }
1069        }
1070
1071        info
1072    }
1073
1074    /// Detect file type from magic bytes and extension.
1075    fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1076        // Try magic bytes first
1077        let header_len = data.len().min(256);
1078        if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1079            // Override ICO to Font if extension is .dfont (Mac resource fork)
1080            if ft == FileType::Ico {
1081                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1082                    if ext.eq_ignore_ascii_case("dfont") {
1083                        return Ok(FileType::Font);
1084                    }
1085                }
1086            }
1087            // Override JPEG to JPS if the file extension is .jps
1088            if ft == FileType::Jpeg {
1089                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1090                    if ext.eq_ignore_ascii_case("jps") {
1091                        return Ok(FileType::Jps);
1092                    }
1093                }
1094            }
1095            // Override PLIST to AAE if extension is .aae
1096            if ft == FileType::Plist {
1097                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1098                    if ext.eq_ignore_ascii_case("aae") {
1099                        return Ok(FileType::Aae);
1100                    }
1101                }
1102            }
1103            // Override XMP to PLIST/AAE if extension is .plist or .aae
1104            if ft == FileType::Xmp {
1105                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1106                    if ext.eq_ignore_ascii_case("plist") {
1107                        return Ok(FileType::Plist);
1108                    }
1109                    if ext.eq_ignore_ascii_case("aae") {
1110                        return Ok(FileType::Aae);
1111                    }
1112                }
1113            }
1114            // Override to PhotoCD if extension is .pcd (file starts with 0xFF padding)
1115            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1116                if ext.eq_ignore_ascii_case("pcd") && data.len() >= 2056
1117                    && &data[2048..2055] == b"PCD_IPI"
1118                {
1119                    return Ok(FileType::PhotoCd);
1120                }
1121            }
1122            // Override MP3 to MPC/APE/WavPack if extension says otherwise
1123            if ft == FileType::Mp3 {
1124                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1125                    if ext.eq_ignore_ascii_case("mpc") {
1126                        return Ok(FileType::Mpc);
1127                    }
1128                    if ext.eq_ignore_ascii_case("ape") {
1129                        return Ok(FileType::Ape);
1130                    }
1131                    if ext.eq_ignore_ascii_case("wv") {
1132                        return Ok(FileType::WavPack);
1133                    }
1134                }
1135            }
1136            // For ZIP files, check if it's an EIP (by extension) or OpenDocument format
1137            if ft == FileType::Zip {
1138                // Check extension first for EIP
1139                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1140                    if ext.eq_ignore_ascii_case("eip") {
1141                        return Ok(FileType::Eip);
1142                    }
1143                }
1144                if let Some(od_type) = detect_opendocument_type(data) {
1145                    return Ok(od_type);
1146                }
1147            }
1148            return Ok(ft);
1149        }
1150
1151        // Fall back to extension
1152        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1153            if let Some(ft) = file_type::detect_from_extension(ext) {
1154                return Ok(ft);
1155            }
1156        }
1157
1158        let ext_str = path
1159            .extension()
1160            .and_then(|e| e.to_str())
1161            .unwrap_or("unknown");
1162        Err(Error::UnsupportedFileType(ext_str.to_string()))
1163    }
1164
1165    /// Dispatch to the appropriate format reader.
1166
1167    fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1168        match file_type {
1169            FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1170            FileType::Png | FileType::Mng => formats::png::read_png(data),
1171            // All TIFF-based formats (TIFF + most RAW formats)
1172            FileType::Tiff
1173            | FileType::Btf
1174            | FileType::Dng
1175            | FileType::Cr2
1176            | FileType::Nef
1177            | FileType::Arw
1178            | FileType::Sr2
1179            | FileType::Orf
1180            | FileType::Pef
1181            | FileType::Erf
1182            | FileType::Fff
1183            | FileType::Rwl
1184            | FileType::Mef
1185            | FileType::Srw
1186            | FileType::Gpr
1187            | FileType::Arq
1188            | FileType::ThreeFR
1189            | FileType::Dcr
1190            | FileType::Rw2
1191            | FileType::Srf => formats::tiff::read_tiff(data),
1192            // Phase One IIQ: TIFF + PhaseOne maker note block
1193            FileType::Iiq => formats::misc::read_iiq(data),
1194            // Image formats
1195            FileType::Gif => formats::gif::read_gif(data),
1196            FileType::Bmp => formats::bmp::read_bmp(data),
1197            FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1198            FileType::Psd => formats::psd::read_psd(data),
1199            // Audio formats
1200            FileType::Mp3 => formats::id3::read_mp3(data),
1201            FileType::Flac => formats::flac::read_flac(data),
1202            FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1203            FileType::Aiff => formats::aiff::read_aiff(data),
1204            // Video formats
1205            FileType::Mp4
1206            | FileType::QuickTime
1207            | FileType::M4a
1208            | FileType::ThreeGP
1209            | FileType::Heif
1210            | FileType::Avif
1211            | FileType::Cr3
1212            | FileType::F4v
1213            | FileType::Mqv
1214            | FileType::Lrv => formats::quicktime::read_quicktime(data),
1215            FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1216            FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1217            FileType::Wtv => formats::wtv::read_wtv(data),
1218            // RAW formats with custom containers
1219            FileType::Crw => formats::canon_raw::read_crw(data),
1220            FileType::Raf => formats::raf::read_raf(data),
1221            FileType::Mrw => formats::mrw::read_mrw(data),
1222            FileType::Mrc => formats::mrc::read_mrc(data),
1223            // Image formats
1224            FileType::Jp2 => formats::jp2::read_jp2(data),
1225            FileType::J2c => formats::jp2::read_j2c(data),
1226            FileType::Jxl => formats::jp2::read_jxl(data),
1227            FileType::Ico => formats::ico::read_ico(data),
1228            FileType::Icc => formats::icc::read_icc(data),
1229            // Documents
1230            FileType::Pdf => formats::pdf::read_pdf(data),
1231            FileType::PostScript => {
1232                // PFA fonts start with %!PS-AdobeFont or %!FontType1
1233                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1234                    formats::font::read_pfa(data).or_else(|_| formats::postscript::read_postscript(data))
1235                } else {
1236                    formats::postscript::read_postscript(data)
1237                }
1238            }
1239            FileType::Eip => formats::capture_one::read_eip(data),
1240            FileType::Zip | FileType::Docx | FileType::Xlsx | FileType::Pptx
1241            | FileType::Doc | FileType::Xls | FileType::Ppt => formats::zip::read_zip(data),
1242            FileType::Rtf => formats::rtf::read_rtf(data),
1243            FileType::InDesign => formats::misc::read_indesign(data),
1244            FileType::Pcap => formats::misc::read_pcap(data),
1245            FileType::Pcapng => formats::misc::read_pcapng(data),
1246            // Canon VRD / DR4
1247            FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1248            FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1249            // Metadata / Other
1250            FileType::Xmp => formats::xmp_file::read_xmp(data),
1251            FileType::Svg => formats::misc::read_svg(data),
1252            FileType::Html => {
1253                // SVG files that weren't detected by magic (e.g., via extension fallback)
1254                let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1255                if is_svg {
1256                    formats::misc::read_svg(data)
1257                } else {
1258                    formats::html::read_html(data)
1259                }
1260            }
1261            FileType::Exe => formats::exe::read_exe(data),
1262            FileType::Font => {
1263                // AFM: Adobe Font Metrics text file
1264                if data.starts_with(b"StartFontMetrics") {
1265                    return formats::font::read_afm(data);
1266                }
1267                // PFA: PostScript Type 1 ASCII font
1268                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1269                    return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1270                }
1271                // PFB: PostScript Type 1 Binary font
1272                if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1273                    return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1274                }
1275                formats::font::read_font(data)
1276            }
1277            // Audio with ID3
1278            FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1279            FileType::Ape => formats::ape::read_ape(data),
1280            FileType::Mpc => formats::ape::read_mpc(data),
1281            FileType::Aac => formats::misc::read_aac(data),
1282            FileType::RealAudio => {
1283                formats::misc::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1284            }
1285            FileType::RealMedia => {
1286                formats::misc::read_real_media(data).or_else(|_| Ok(Vec::new()))
1287            }
1288            // Misc formats
1289            FileType::Czi => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1290            FileType::PhotoCd => formats::misc::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1291            FileType::Dicom => formats::dicom::read_dicom(data),
1292            FileType::Fits => formats::misc::read_fits(data),
1293            FileType::Flv => formats::misc::read_flv(data),
1294            FileType::Mxf => formats::misc::read_mxf(data).or_else(|_| Ok(Vec::new())),
1295            FileType::Swf => formats::misc::read_swf(data),
1296            FileType::Hdr => formats::misc::read_hdr(data),
1297            FileType::DjVu => formats::djvu::read_djvu(data),
1298            FileType::Xcf => formats::gimp::read_xcf(data),
1299            FileType::Mie => formats::mie::read_mie(data),
1300            FileType::Lfp => formats::lytro::read_lfp(data),
1301            // FileType::Miff dispatched via string extension below
1302            FileType::Fpf => formats::flir_fpf::read_fpf(data),
1303            FileType::Flif => formats::misc::read_flif(data),
1304            FileType::Bpg => formats::misc::read_bpg(data),
1305            FileType::Pcx => formats::misc::read_pcx(data),
1306            FileType::Pict => formats::misc::read_pict(data),
1307            FileType::M2ts => formats::misc::read_m2ts(data, self.options.extract_embedded),
1308            FileType::Gzip => formats::misc::read_gzip(data),
1309            FileType::Rar => formats::misc::read_rar(data),
1310            FileType::Dss => formats::misc::read_dss(data),
1311            FileType::Moi => formats::misc::read_moi(data),
1312            FileType::MacOs => formats::misc::read_macos(data),
1313            FileType::Json => formats::misc::read_json(data),
1314            // New formats
1315            FileType::Pgf => formats::pgf::read_pgf(data),
1316            FileType::Xisf => formats::xisf::read_xisf(data),
1317            FileType::Torrent => formats::torrent::read_torrent(data),
1318            FileType::Mobi => formats::palm::read_palm(data),
1319            FileType::Psp => formats::psp::read_psp(data),
1320            FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1321            FileType::Audible => formats::audible::read_audible(data),
1322            FileType::Exr => formats::openexr::read_openexr(data),
1323            // New formats
1324            FileType::Plist => {
1325                if data.starts_with(b"bplist") {
1326                    formats::plist::read_binary_plist_tags(data)
1327                } else {
1328                    formats::plist::read_xml_plist(data)
1329                }
1330            }
1331            FileType::Aae => {
1332                if data.starts_with(b"bplist") {
1333                    formats::plist::read_binary_plist_tags(data)
1334                } else {
1335                    formats::plist::read_aae_plist(data)
1336                }
1337            }
1338            FileType::KyoceraRaw => formats::misc::read_kyocera_raw(data),
1339            FileType::PortableFloatMap => formats::misc::read_pfm(data),
1340            FileType::Ods | FileType::Odt | FileType::Odp | FileType::Odg |
1341            FileType::Odf | FileType::Odb | FileType::Odi | FileType::Odc => formats::zip::read_zip(data),
1342            _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1343        }
1344    }
1345
1346    /// Fallback: try to read file based on extension for formats without magic detection.
1347    fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1348        let ext = path
1349            .extension()
1350            .and_then(|e| e.to_str())
1351            .unwrap_or("")
1352            .to_ascii_lowercase();
1353
1354        match ext.as_str() {
1355            "ppm" | "pgm" | "pbm" => formats::misc::read_ppm(data),
1356            "pfm" => {
1357                // PFM can be Portable Float Map or Printer Font Metrics
1358                if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1359                    formats::misc::read_ppm(data)
1360                } else {
1361                    Ok(Vec::new()) // Printer Font Metrics
1362                }
1363            }
1364            "json" => formats::misc::read_json(data),
1365            "svg" => formats::misc::read_svg(data),
1366            "ram" => formats::misc::read_ram(data).or_else(|_| Ok(Vec::new())),
1367            "txt" | "log" | "igc" => {
1368                Ok(compute_text_tags(data, false))
1369            }
1370            "csv" => {
1371                Ok(compute_text_tags(data, true))
1372            }
1373            "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1374            "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1375            "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1376            "plist" => {
1377                if data.starts_with(b"bplist") {
1378                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1379                } else {
1380                    formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1381                }
1382            }
1383            "aae" => {
1384                if data.starts_with(b"bplist") {
1385                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1386                } else {
1387                    formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1388                }
1389            }
1390            "vcf" | "ics" | "vcard" => {
1391                let s = String::from_utf8_lossy(&data[..data.len().min(100)]);
1392                if s.contains("BEGIN:VCALENDAR") {
1393                    formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1394                } else {
1395                    formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1396                }
1397            }
1398            "xcf" => Ok(Vec::new()),      // GIMP
1399            "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1400            "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1401            "indd" | "indt" => Ok(Vec::new()), // InDesign
1402            "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1403            "mie" => Ok(Vec::new()),       // MIE
1404            "exr" => Ok(Vec::new()),       // OpenEXR
1405            "wpg" => formats::misc::read_wpg(data).or_else(|_| Ok(Vec::new())),
1406            "moi" => formats::misc::read_moi(data).or_else(|_| Ok(Vec::new())),
1407            "macos" => formats::misc::read_macos(data).or_else(|_| Ok(Vec::new())),
1408            "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1409            "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1410            "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1411            "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1412            "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1413            "itc" => formats::misc::read_itc(data).or_else(|_| Ok(Vec::new())),
1414            "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1415            "czi" => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1416            "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1417            "lfp" | "mrc"
1418            | "dss" | "mobi" | "psp" | "pgf" | "raw"
1419            | "pmp" | "torrent"
1420            | "xisf" | "mxf"
1421            | "dfont" => Ok(Vec::new()),
1422            "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1423            "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1424            "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1425            "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1426            _ => Err(Error::UnsupportedFileType(ext)),
1427        }
1428    }
1429}
1430
1431impl Default for ExifTool {
1432    fn default() -> Self {
1433        Self::new()
1434    }
1435}
1436
1437/// Detect OpenDocument file type by reading the `mimetype` entry from a ZIP.
1438/// Returns None if not an OpenDocument file.
1439fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1440    // OpenDocument ZIPs have "mimetype" as the FIRST local file entry (uncompressed)
1441    if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1442        return None;
1443    }
1444    let compression = u16::from_le_bytes([data[8], data[9]]);
1445    let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1446    let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1447    let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1448    let name_start = 30;
1449    if name_start + name_len > data.len() {
1450        return None;
1451    }
1452    let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1453    if filename != "mimetype" || compression != 0 {
1454        return None;
1455    }
1456    let content_start = name_start + name_len + extra_len;
1457    let content_end = (content_start + compressed_size).min(data.len());
1458    if content_start >= content_end {
1459        return None;
1460    }
1461    let mime = std::str::from_utf8(&data[content_start..content_end]).unwrap_or("").trim();
1462    match mime {
1463        "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1464        "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1465        "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1466        "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1467        "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1468        "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1469        "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1470        "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1471        _ => None,
1472    }
1473}
1474
1475/// Detect the file type of a file at the given path.
1476pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1477    let path = path.as_ref();
1478    let mut file = fs::File::open(path).map_err(Error::Io)?;
1479    let mut header = [0u8; 256];
1480    use std::io::Read;
1481    let n = file.read(&mut header).map_err(Error::Io)?;
1482
1483    if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1484        return Ok(ft);
1485    }
1486
1487    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1488        if let Some(ft) = file_type::detect_from_extension(ext) {
1489            return Ok(ft);
1490        }
1491    }
1492
1493    Err(Error::UnsupportedFileType("unknown".into()))
1494}
1495
1496/// Classification of EXIF tags into IFD groups.
1497enum ExifIfdGroup {
1498    Ifd0,
1499    ExifIfd,
1500    Gps,
1501}
1502
1503/// Determine which IFD a tag belongs to based on its ID.
1504fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1505    match tag_id {
1506        // ExifIFD tags
1507        0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292
1508        | 0xA000..=0xA435 => ExifIfdGroup::ExifIfd,
1509        // GPS tags
1510        0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1511        // Everything else → IFD0
1512        _ => ExifIfdGroup::Ifd0,
1513    }
1514}
1515
1516/// Extract existing EXIF entries from a JPEG file's APP1 segment.
1517fn extract_existing_exif_entries(jpeg_data: &[u8], target_bo: ByteOrderMark) -> Vec<exif_writer::IfdEntry> {
1518    let mut entries = Vec::new();
1519
1520    // Find EXIF APP1 segment
1521    let mut pos = 2; // Skip SOI
1522    while pos + 4 <= jpeg_data.len() {
1523        if jpeg_data[pos] != 0xFF {
1524            pos += 1;
1525            continue;
1526        }
1527        let marker = jpeg_data[pos + 1];
1528        pos += 2;
1529
1530        if marker == 0xDA || marker == 0xD9 {
1531            break; // SOS or EOI
1532        }
1533        if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1534            continue;
1535        }
1536
1537        if pos + 2 > jpeg_data.len() {
1538            break;
1539        }
1540        let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1541        if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1542            break;
1543        }
1544
1545        let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1546
1547        // EXIF APP1
1548        if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1549            let tiff_data = &seg_data[6..];
1550            extract_ifd_entries(tiff_data, target_bo, &mut entries);
1551            break;
1552        }
1553
1554        pos += seg_len;
1555    }
1556
1557    entries
1558}
1559
1560/// Extract IFD entries from TIFF data, re-encoding values in the target byte order.
1561fn extract_ifd_entries(
1562    tiff_data: &[u8],
1563    target_bo: ByteOrderMark,
1564    entries: &mut Vec<exif_writer::IfdEntry>,
1565) {
1566    use crate::metadata::exif::parse_tiff_header;
1567
1568    let header = match parse_tiff_header(tiff_data) {
1569        Ok(h) => h,
1570        Err(_) => return,
1571    };
1572
1573    let src_bo = header.byte_order;
1574
1575    // Read IFD0
1576    read_ifd_for_merge(tiff_data, header.ifd0_offset as usize, src_bo, target_bo, entries);
1577
1578    // Find ExifIFD and GPS pointers
1579    let ifd0_offset = header.ifd0_offset as usize;
1580    if ifd0_offset + 2 > tiff_data.len() {
1581        return;
1582    }
1583    let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
1584    for i in 0..count {
1585        let eoff = ifd0_offset + 2 + i * 12;
1586        if eoff + 12 > tiff_data.len() {
1587            break;
1588        }
1589        let tag = read_u16_bo(tiff_data, eoff, src_bo);
1590        let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
1591
1592        match tag {
1593            0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1594            0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1595            _ => {}
1596        }
1597    }
1598}
1599
1600/// Read a single IFD and extract entries for merge.
1601fn read_ifd_for_merge(
1602    data: &[u8],
1603    offset: usize,
1604    src_bo: ByteOrderMark,
1605    target_bo: ByteOrderMark,
1606    entries: &mut Vec<exif_writer::IfdEntry>,
1607) {
1608    if offset + 2 > data.len() {
1609        return;
1610    }
1611    let count = read_u16_bo(data, offset, src_bo) as usize;
1612
1613    for i in 0..count {
1614        let eoff = offset + 2 + i * 12;
1615        if eoff + 12 > data.len() {
1616            break;
1617        }
1618
1619        let tag = read_u16_bo(data, eoff, src_bo);
1620        let dtype = read_u16_bo(data, eoff + 2, src_bo);
1621        let count_val = read_u32_bo(data, eoff + 4, src_bo);
1622
1623        // Skip sub-IFD pointers and MakerNote
1624        if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
1625            continue;
1626        }
1627
1628        let type_size = match dtype {
1629            1 | 2 | 6 | 7 => 1usize,
1630            3 | 8 => 2,
1631            4 | 9 | 11 | 13 => 4,
1632            5 | 10 | 12 => 8,
1633            _ => continue,
1634        };
1635
1636        let total_size = type_size * count_val as usize;
1637        let raw_data = if total_size <= 4 {
1638            data[eoff + 8..eoff + 12].to_vec()
1639        } else {
1640            let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
1641            if voff + total_size > data.len() {
1642                continue;
1643            }
1644            data[voff..voff + total_size].to_vec()
1645        };
1646
1647        // Re-encode multi-byte values if byte orders differ
1648        let final_data = if src_bo != target_bo && type_size > 1 {
1649            reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
1650        } else {
1651            raw_data[..total_size].to_vec()
1652        };
1653
1654        let format = match dtype {
1655            1 => exif_writer::ExifFormat::Byte,
1656            2 => exif_writer::ExifFormat::Ascii,
1657            3 => exif_writer::ExifFormat::Short,
1658            4 => exif_writer::ExifFormat::Long,
1659            5 => exif_writer::ExifFormat::Rational,
1660            6 => exif_writer::ExifFormat::SByte,
1661            7 => exif_writer::ExifFormat::Undefined,
1662            8 => exif_writer::ExifFormat::SShort,
1663            9 => exif_writer::ExifFormat::SLong,
1664            10 => exif_writer::ExifFormat::SRational,
1665            11 => exif_writer::ExifFormat::Float,
1666            12 => exif_writer::ExifFormat::Double,
1667            _ => continue,
1668        };
1669
1670        entries.push(exif_writer::IfdEntry {
1671            tag,
1672            format,
1673            data: final_data,
1674        });
1675    }
1676}
1677
1678/// Re-encode multi-byte values when converting between byte orders.
1679fn reencode_bytes(
1680    data: &[u8],
1681    dtype: u16,
1682    count: usize,
1683    src_bo: ByteOrderMark,
1684    dst_bo: ByteOrderMark,
1685) -> Vec<u8> {
1686    let mut out = Vec::with_capacity(data.len());
1687    match dtype {
1688        3 | 8 => {
1689            // 16-bit
1690            for i in 0..count {
1691                let v = read_u16_bo(data, i * 2, src_bo);
1692                match dst_bo {
1693                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1694                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1695                }
1696            }
1697        }
1698        4 | 9 | 11 | 13 => {
1699            // 32-bit
1700            for i in 0..count {
1701                let v = read_u32_bo(data, i * 4, src_bo);
1702                match dst_bo {
1703                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1704                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1705                }
1706            }
1707        }
1708        5 | 10 => {
1709            // Rational (two 32-bit)
1710            for i in 0..count {
1711                let n = read_u32_bo(data, i * 8, src_bo);
1712                let d = read_u32_bo(data, i * 8 + 4, src_bo);
1713                match dst_bo {
1714                    ByteOrderMark::LittleEndian => {
1715                        out.extend_from_slice(&n.to_le_bytes());
1716                        out.extend_from_slice(&d.to_le_bytes());
1717                    }
1718                    ByteOrderMark::BigEndian => {
1719                        out.extend_from_slice(&n.to_be_bytes());
1720                        out.extend_from_slice(&d.to_be_bytes());
1721                    }
1722                }
1723            }
1724        }
1725        12 => {
1726            // 64-bit double
1727            for i in 0..count {
1728                let mut bytes = [0u8; 8];
1729                bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
1730                if src_bo != dst_bo {
1731                    bytes.reverse();
1732                }
1733                out.extend_from_slice(&bytes);
1734            }
1735        }
1736        _ => out.extend_from_slice(data),
1737    }
1738    out
1739}
1740
1741fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
1742    if offset + 2 > data.len() { return 0; }
1743    match bo {
1744        ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
1745        ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
1746    }
1747}
1748
1749fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
1750    if offset + 4 > data.len() { return 0; }
1751    match bo {
1752        ByteOrderMark::LittleEndian => u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1753        ByteOrderMark::BigEndian => u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1754    }
1755}
1756
1757/// Map tag name to numeric EXIF tag ID.
1758fn tag_name_to_id(name: &str) -> Option<u16> {
1759    encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
1760}
1761
1762/// Convert a tag value to a safe filename.
1763fn value_to_filename(value: &str) -> String {
1764    value
1765        .chars()
1766        .map(|c| match c {
1767            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
1768            c if c.is_control() => '_',
1769            c => c,
1770        })
1771        .collect::<String>()
1772        .trim()
1773        .to_string()
1774}
1775
1776/// Parse a date shift string like "+1:0:0" (add 1 hour) or "-0:30:0" (subtract 30 min).
1777/// Returns (sign, hours, minutes, seconds).
1778pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
1779    let (sign, rest) = if shift.starts_with('-') {
1780        (-1, &shift[1..])
1781    } else if shift.starts_with('+') {
1782        (1, &shift[1..])
1783    } else {
1784        (1, shift)
1785    };
1786
1787    let parts: Vec<&str> = rest.split(':').collect();
1788    match parts.len() {
1789        1 => {
1790            let h: u32 = parts[0].parse().ok()?;
1791            Some((sign, h, 0, 0))
1792        }
1793        2 => {
1794            let h: u32 = parts[0].parse().ok()?;
1795            let m: u32 = parts[1].parse().ok()?;
1796            Some((sign, h, m, 0))
1797        }
1798        3 => {
1799            let h: u32 = parts[0].parse().ok()?;
1800            let m: u32 = parts[1].parse().ok()?;
1801            let s: u32 = parts[2].parse().ok()?;
1802            Some((sign, h, m, s))
1803        }
1804        _ => None,
1805    }
1806}
1807
1808/// Shift a datetime string by the given amount.
1809/// Input format: "YYYY:MM:DD HH:MM:SS"
1810pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
1811    let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
1812
1813    // Parse date/time
1814    if datetime.len() < 19 {
1815        return None;
1816    }
1817    let year: i32 = datetime[0..4].parse().ok()?;
1818    let month: u32 = datetime[5..7].parse().ok()?;
1819    let day: u32 = datetime[8..10].parse().ok()?;
1820    let hour: u32 = datetime[11..13].parse().ok()?;
1821    let min: u32 = datetime[14..16].parse().ok()?;
1822    let sec: u32 = datetime[17..19].parse().ok()?;
1823
1824    // Convert to total seconds, shift, convert back
1825    let total_secs = (hour * 3600 + min * 60 + sec) as i64
1826        + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
1827
1828    let days_shift = if total_secs < 0 {
1829        -1 - (-total_secs - 1) as i64 / 86400
1830    } else {
1831        total_secs / 86400
1832    };
1833
1834    let time_secs = ((total_secs % 86400) + 86400) % 86400;
1835    let new_hour = (time_secs / 3600) as u32;
1836    let new_min = ((time_secs % 3600) / 60) as u32;
1837    let new_sec = (time_secs % 60) as u32;
1838
1839    // Simple day shifting (doesn't handle month/year rollover perfectly for large shifts)
1840    let mut new_day = day as i32 + days_shift as i32;
1841    let mut new_month = month;
1842    let mut new_year = year;
1843
1844    let days_in_month = |m: u32, y: i32| -> i32 {
1845        match m {
1846            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1847            4 | 6 | 9 | 11 => 30,
1848            2 => if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 29 } else { 28 },
1849            _ => 30,
1850        }
1851    };
1852
1853    while new_day > days_in_month(new_month, new_year) {
1854        new_day -= days_in_month(new_month, new_year);
1855        new_month += 1;
1856        if new_month > 12 {
1857            new_month = 1;
1858            new_year += 1;
1859        }
1860    }
1861    while new_day < 1 {
1862        new_month = if new_month == 1 { 12 } else { new_month - 1 };
1863        if new_month == 12 {
1864            new_year -= 1;
1865        }
1866        new_day += days_in_month(new_month, new_year);
1867    }
1868
1869    Some(format!(
1870        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
1871        new_year, new_month, new_day, new_hour, new_min, new_sec
1872    ))
1873}
1874
1875fn unix_to_datetime(secs: i64) -> String {
1876    let days = secs / 86400;
1877    let time = secs % 86400;
1878    let h = time / 3600;
1879    let m = (time % 3600) / 60;
1880    let s = time % 60;
1881    let mut y = 1970i32;
1882    let mut rem = days;
1883    loop {
1884        let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 366 } else { 365 };
1885        if rem < dy { break; }
1886        rem -= dy;
1887        y += 1;
1888    }
1889    let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
1890    let months = [31, if leap { 29 } else { 28 }, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
1891    let mut mo = 1;
1892    for &dm in &months {
1893        if rem < dm { break; }
1894        rem -= dm;
1895        mo += 1;
1896    }
1897    format!("{:04}:{:02}:{:02} {:02}:{:02}:{:02}", y, mo, rem + 1, h, m, s)
1898}
1899
1900fn format_file_size(bytes: u64) -> String {
1901    if bytes < 1024 {
1902        format!("{} bytes", bytes)
1903    } else if bytes < 1024 * 1024 {
1904        format!("{:.1} kB", bytes as f64 / 1024.0)
1905    } else if bytes < 1024 * 1024 * 1024 {
1906        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
1907    } else {
1908        format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
1909    }
1910}
1911
1912/// Check if a tag name is typically XMP.
1913fn is_xmp_tag(tag: &str) -> bool {
1914    matches!(
1915        tag.to_lowercase().as_str(),
1916        "title" | "description" | "subject" | "creator" | "rights"
1917        | "keywords" | "rating" | "label" | "hierarchicalsubject"
1918    )
1919}
1920
1921/// Encode an EXIF tag value to binary.
1922/// Returns (tag_id, format, encoded_data) or None if tag is unknown.
1923fn encode_exif_tag(
1924    tag_name: &str,
1925    value: &str,
1926    _group: &str,
1927    bo: ByteOrderMark,
1928) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
1929    let tag_lower = tag_name.to_lowercase();
1930
1931    // Map common tag names to EXIF tag IDs and formats
1932    let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
1933        // IFD0 string tags
1934        "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
1935        "make" => (0x010F, exif_writer::ExifFormat::Ascii),
1936        "model" => (0x0110, exif_writer::ExifFormat::Ascii),
1937        "software" => (0x0131, exif_writer::ExifFormat::Ascii),
1938        "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
1939        "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
1940        "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
1941        // IFD0 numeric tags
1942        "orientation" => (0x0112, exif_writer::ExifFormat::Short),
1943        "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
1944        "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
1945        "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
1946        // ExifIFD tags
1947        "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
1948        "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
1949        "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
1950        "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
1951        "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
1952        "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
1953        "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
1954        "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
1955        "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
1956        _ => return None,
1957    };
1958
1959    let encoded = match format {
1960        exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
1961        exif_writer::ExifFormat::Short => {
1962            let v: u16 = value.parse().ok()?;
1963            exif_writer::encode_u16(v, bo)
1964        }
1965        exif_writer::ExifFormat::Long => {
1966            let v: u32 = value.parse().ok()?;
1967            exif_writer::encode_u32(v, bo)
1968        }
1969        exif_writer::ExifFormat::Rational => {
1970            // Parse "N/D" or just "N"
1971            if let Some(slash) = value.find('/') {
1972                let num: u32 = value[..slash].trim().parse().ok()?;
1973                let den: u32 = value[slash + 1..].trim().parse().ok()?;
1974                exif_writer::encode_urational(num, den, bo)
1975            } else if let Ok(v) = value.parse::<f64>() {
1976                // Convert float to rational
1977                let den = 10000u32;
1978                let num = (v * den as f64).round() as u32;
1979                exif_writer::encode_urational(num, den, bo)
1980            } else {
1981                return None;
1982            }
1983        }
1984        exif_writer::ExifFormat::Undefined => {
1985            // UserComment: 8 bytes charset + data
1986            let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; // "ASCII\0\0\0"
1987            data.extend_from_slice(value.as_bytes());
1988            data
1989        }
1990        _ => return None,
1991    };
1992
1993    Some((tag_id, format, encoded))
1994}
1995
1996/// Compute text file tags (from Perl Text.pm).
1997fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
1998    let mut tags = Vec::new();
1999    let mk = |name: &str, val: String| Tag {
2000        id: crate::tag::TagId::Text(name.into()),
2001        name: name.into(), description: name.into(),
2002        group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
2003        raw_value: Value::String(val.clone()), print_value: val, priority: 0,
2004    };
2005
2006    // Detect encoding and BOM
2007    let is_ascii = data.iter().all(|&b| b < 128);
2008    let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2009    let has_utf16le_bom = data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2010    let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2011    let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2012    let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2013
2014    // Detect if file has weird non-text control characters (like multi-byte unicode without BOM)
2015    let has_weird_ctrl = data.iter().any(|&b| (b <= 0x06) || (b >= 0x0e && b <= 0x1a) || (b >= 0x1c && b <= 0x1f) || b == 0x7f);
2016
2017    let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2018        ("utf-32le", true, false)
2019    } else if has_utf32be_bom {
2020        ("utf-32be", true, false)
2021    } else if has_utf16le_bom {
2022        ("utf-16le", true, true)
2023    } else if has_utf16be_bom {
2024        ("utf-16be", true, true)
2025    } else if has_weird_ctrl {
2026        // Not a text file (has binary-like control chars but no recognized multi-byte marker)
2027        return tags;
2028    } else if is_ascii {
2029        ("us-ascii", false, false)
2030    } else {
2031        // Check UTF-8
2032        let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2033        if is_valid_utf8 {
2034            if has_utf8_bom {
2035                ("utf-8", true, false)
2036            } else {
2037                // Check if it has high bytes suggesting iso-8859-1 vs utf-8
2038                // Perl's IsUTF8: returns >0 if valid UTF-8 with multi-byte, 0 if ASCII, <0 if invalid
2039                // For simplicity: valid UTF-8 without BOM = utf-8
2040                ("utf-8", false, false)
2041            }
2042        } else if !data.iter().any(|&b| b >= 0x80 && b <= 0x9f) {
2043            ("iso-8859-1", false, false)
2044        } else {
2045            ("unknown-8bit", false, false)
2046        }
2047    };
2048
2049    tags.push(mk("MIMEEncoding", encoding.into()));
2050
2051    if is_bom {
2052        tags.push(mk("ByteOrderMark", "Yes".into()));
2053    }
2054
2055    // Count newlines and detect type
2056    let has_cr = data.contains(&b'\r');
2057    let has_lf = data.contains(&b'\n');
2058    let newline_type = if has_cr && has_lf { "Windows CRLF" }
2059        else if has_lf { "Unix LF" }
2060        else if has_cr { "Macintosh CR" }
2061        else { "(none)" };
2062    tags.push(mk("Newlines", newline_type.into()));
2063
2064    if is_csv {
2065        // CSV analysis: detect delimiter, quoting, column count, row count
2066        let text = String::from_utf8_lossy(data);
2067        let mut delim = "";
2068        let mut quot = "";
2069        let mut ncols = 1usize;
2070        let mut nrows = 0usize;
2071
2072        for line in text.lines() {
2073            if nrows == 0 {
2074                // Detect delimiter from first line
2075                let comma_count = line.matches(',').count();
2076                let semi_count = line.matches(';').count();
2077                let tab_count = line.matches('\t').count();
2078                if comma_count > semi_count && comma_count > tab_count {
2079                    delim = ",";
2080                    ncols = comma_count + 1;
2081                } else if semi_count > tab_count {
2082                    delim = ";";
2083                    ncols = semi_count + 1;
2084                } else if tab_count > 0 {
2085                    delim = "\t";
2086                    ncols = tab_count + 1;
2087                } else {
2088                    delim = "";
2089                    ncols = 1;
2090                }
2091                // Detect quoting
2092                if line.contains('"') { quot = "\""; }
2093                else if line.contains('\'') { quot = "'"; }
2094            }
2095            nrows += 1;
2096            if nrows >= 1000 { break; }
2097        }
2098
2099        let delim_display = match delim {
2100            "," => "Comma",
2101            ";" => "Semicolon",
2102            "\t" => "Tab",
2103            _ => "(none)",
2104        };
2105        let quot_display = match quot {
2106            "\"" => "Double quotes",
2107            "'" => "Single quotes",
2108            _ => "(none)",
2109        };
2110
2111        tags.push(mk("Delimiter", delim_display.into()));
2112        tags.push(mk("Quoting", quot_display.into()));
2113        tags.push(mk("ColumnCount", ncols.to_string()));
2114        if nrows > 0 {
2115            tags.push(mk("RowCount", nrows.to_string()));
2116        }
2117    } else if !is_utf16 {
2118        // Line count and word count for plain text files (not UTF-16/32)
2119        let line_count = data.iter().filter(|&&b| b == b'\n').count();
2120        let line_count = if line_count == 0 && !data.is_empty() { 1 } else { line_count };
2121        tags.push(mk("LineCount", line_count.to_string()));
2122
2123        let text = String::from_utf8_lossy(data);
2124        let word_count = text.split_whitespace().count();
2125        tags.push(mk("WordCount", word_count.to_string()));
2126    }
2127
2128    tags
2129}
exiftool_rs/exiftool.rs

exiftool_rs/
exiftool.rs