exiftool_rs/
exiftool.rs

1//! Core ExifTool struct and public API.
2//!
3//! This is the main entry point for reading metadata from files.
4//! Mirrors ExifTool.pm's ImageInfo/ExtractInfo/GetInfo pipeline.
5
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer, psd_writer, tiff_writer, webp_writer, xmp_writer};
17
18/// Processing options for metadata extraction.
19#[derive(Debug, Clone)]
20pub struct Options {
21    /// Include duplicate tags (different groups may have same tag name).
22    pub duplicates: bool,
23    /// Apply print conversions (human-readable values).
24    pub print_conv: bool,
25    /// Fast scan level: 0=normal, 1=skip composite, 2=skip maker notes, 3=skip thumbnails.
26    pub fast_scan: u8,
27    /// Only extract these tag names (empty = all).
28    pub requested_tags: Vec<String>,
29}
30
31impl Default for Options {
32    fn default() -> Self {
33        Self {
34            duplicates: false,
35            print_conv: true,
36            fast_scan: 0,
37            requested_tags: Vec::new(),
38        }
39    }
40}
41
42/// The main ExifTool struct. Create one and use it to extract metadata from files.
43///
44/// # Example
45/// ```no_run
46/// use exiftool_rs::ExifTool;
47///
48/// let mut et = ExifTool::new();
49/// let info = et.image_info("photo.jpg").unwrap();
50/// for (name, value) in &info {
51///     println!("{}: {}", name, value);
52/// }
53/// ```
54/// A queued tag change for writing.
55#[derive(Debug, Clone)]
56pub struct NewValue {
57    /// Tag name (e.g., "Artist", "Copyright", "XMP:Title")
58    pub tag: String,
59    /// Group prefix if specified (e.g., "EXIF", "XMP", "IPTC")
60    pub group: Option<String>,
61    /// New value (None = delete tag)
62    pub value: Option<String>,
63}
64
65/// The main ExifTool engine — read, write, and edit metadata.
66///
67/// # Reading metadata
68/// ```no_run
69/// use exiftool_rs::ExifTool;
70///
71/// let et = ExifTool::new();
72///
73/// // Full tag structs
74/// let tags = et.extract_info("photo.jpg").unwrap();
75/// for tag in &tags {
76///     println!("[{}] {}: {}", tag.group.family0, tag.name, tag.print_value);
77/// }
78///
79/// // Simple name→value map
80/// let info = et.image_info("photo.jpg").unwrap();
81/// println!("Camera: {}", info.get("Model").unwrap_or(&String::new()));
82/// ```
83///
84/// # Writing metadata
85/// ```no_run
86/// use exiftool_rs::ExifTool;
87///
88/// let mut et = ExifTool::new();
89/// et.set_new_value("Artist", Some("John Doe"));
90/// et.set_new_value("Copyright", Some("2024"));
91/// et.write_info("input.jpg", "output.jpg").unwrap();
92/// ```
93pub struct ExifTool {
94    options: Options,
95    new_values: Vec<NewValue>,
96}
97
98/// Result of metadata extraction: maps tag names to display values.
99pub type ImageInfo = HashMap<String, String>;
100
101impl ExifTool {
102    /// Create a new ExifTool instance with default options.
103    pub fn new() -> Self {
104        Self {
105            options: Options::default(),
106            new_values: Vec::new(),
107        }
108    }
109
110    /// Create a new ExifTool instance with custom options.
111    pub fn with_options(options: Options) -> Self {
112        Self {
113            options,
114            new_values: Vec::new(),
115        }
116    }
117
118    /// Get a mutable reference to the options.
119    pub fn options_mut(&mut self) -> &mut Options {
120        &mut self.options
121    }
122
123    /// Get a reference to the options.
124    pub fn options(&self) -> &Options {
125        &self.options
126    }
127
128    // ================================================================
129    // Writing API
130    // ================================================================
131
132    /// Queue a new tag value for writing.
133    ///
134    /// Call this one or more times, then call `write_info()` to apply changes.
135    ///
136    /// # Arguments
137    /// * `tag` - Tag name, optionally prefixed with group (e.g., "Artist", "XMP:Title", "EXIF:Copyright")
138    /// * `value` - New value, or None to delete the tag
139    ///
140    /// # Example
141    /// ```no_run
142    /// use exiftool_rs::ExifTool;
143    /// let mut et = ExifTool::new();
144    /// et.set_new_value("Artist", Some("John Doe"));
145    /// et.set_new_value("Copyright", Some("2024 John Doe"));
146    /// et.set_new_value("XMP:Title", Some("My Photo"));
147    /// et.write_info("photo.jpg", "photo_out.jpg").unwrap();
148    /// ```
149    pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
150        let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
151            (Some(tag[..colon_pos].to_string()), tag[colon_pos + 1..].to_string())
152        } else {
153            (None, tag.to_string())
154        };
155
156        self.new_values.push(NewValue {
157            tag: tag_name,
158            group,
159            value: value.map(|v| v.to_string()),
160        });
161    }
162
163    /// Clear all queued new values.
164    pub fn clear_new_values(&mut self) {
165        self.new_values.clear();
166    }
167
168    /// Copy tags from a source file, queuing them as new values.
169    ///
170    /// Reads all tags from `src_path` and queues them for writing.
171    /// Optionally filter by tag names.
172    pub fn set_new_values_from_file<P: AsRef<Path>>(
173        &mut self,
174        src_path: P,
175        tags_to_copy: Option<&[&str]>,
176    ) -> Result<u32> {
177        let src_tags = self.extract_info(src_path)?;
178        let mut count = 0u32;
179
180        for tag in &src_tags {
181            // Skip file-level tags that shouldn't be copied
182            if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
183                continue;
184            }
185            // Skip binary/undefined data and empty values
186            if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
187                continue;
188            }
189            if tag.print_value.is_empty() {
190                continue;
191            }
192
193            // Filter by requested tags
194            if let Some(filter) = tags_to_copy {
195                let name_lower = tag.name.to_lowercase();
196                if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
197                    continue;
198                }
199            }
200
201            let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
202            self.new_values.push(NewValue {
203                tag: tag.name.clone(),
204                group: Some(tag.group.family0.clone()),
205                value: Some(tag.print_value.clone()),
206            });
207            count += 1;
208        }
209
210        Ok(count)
211    }
212
213    /// Set a file's name based on a tag value.
214    pub fn set_file_name_from_tag<P: AsRef<Path>>(
215        &self,
216        path: P,
217        tag_name: &str,
218        template: &str,
219    ) -> Result<String> {
220        let path = path.as_ref();
221        let tags = self.extract_info(path)?;
222
223        let tag_value = tags
224            .iter()
225            .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
226            .map(|t| &t.print_value)
227            .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
228
229        // Build new filename from template
230        // Template: "prefix%value%suffix.ext" or just use the tag value
231        let new_name = if template.contains('%') {
232            template.replace("%v", value_to_filename(tag_value).as_str())
233        } else {
234            // Default: use tag value as filename, keep extension
235            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
236            let clean = value_to_filename(tag_value);
237            if ext.is_empty() {
238                clean
239            } else {
240                format!("{}.{}", clean, ext)
241            }
242        };
243
244        let parent = path.parent().unwrap_or(Path::new(""));
245        let new_path = parent.join(&new_name);
246
247        fs::rename(path, &new_path).map_err(Error::Io)?;
248        Ok(new_path.to_string_lossy().to_string())
249    }
250
251    /// Write queued changes to a file.
252    ///
253    /// If `dst_path` is the same as `src_path`, the file is modified in-place
254    /// (via a temporary file).
255    pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(&self, src_path: P, dst_path: Q) -> Result<u32> {
256        let src_path = src_path.as_ref();
257        let dst_path = dst_path.as_ref();
258        let data = fs::read(src_path).map_err(Error::Io)?;
259
260        let file_type = self.detect_file_type(&data, src_path)?;
261        let output = self.apply_changes(&data, file_type)?;
262
263        // Write to temp file first, then rename (atomic)
264        let temp_path = dst_path.with_extension("exiftool_tmp");
265        fs::write(&temp_path, &output).map_err(Error::Io)?;
266        fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
267
268        Ok(self.new_values.len() as u32)
269    }
270
271    /// Apply queued changes to in-memory data.
272    fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
273        match file_type {
274            FileType::Jpeg => self.write_jpeg(data),
275            FileType::Png => self.write_png(data),
276            FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
277            | FileType::Arw | FileType::Orf | FileType::Pef => self.write_tiff(data),
278            FileType::WebP => self.write_webp(data),
279            FileType::Mp4 | FileType::QuickTime | FileType::M4a
280            | FileType::ThreeGP | FileType::F4v => self.write_mp4(data),
281            FileType::Psd => self.write_psd(data),
282            FileType::Pdf => self.write_pdf(data),
283            FileType::Heif | FileType::Avif => self.write_mp4(data),
284            FileType::Mkv | FileType::WebM => self.write_matroska(data),
285            FileType::Gif => {
286                let comment = self.new_values.iter()
287                    .find(|nv| nv.tag.to_lowercase() == "comment")
288                    .and_then(|nv| nv.value.clone());
289                crate::writer::gif_writer::write_gif(data, comment.as_deref())
290            }
291            FileType::Flac => {
292                let changes: Vec<(&str, &str)> = self.new_values.iter()
293                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
294                    .collect();
295                crate::writer::flac_writer::write_flac(data, &changes)
296            }
297            FileType::Mp3 | FileType::Aiff => {
298                let changes: Vec<(&str, &str)> = self.new_values.iter()
299                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
300                    .collect();
301                crate::writer::id3_writer::write_id3(data, &changes)
302            }
303            FileType::Jp2 | FileType::Jxl => {
304                let new_xmp = if self.new_values.iter().any(|nv| nv.group.as_deref() == Some("XMP")) {
305                    let refs: Vec<&NewValue> = self.new_values.iter()
306                        .filter(|nv| nv.group.as_deref() == Some("XMP"))
307                        .collect();
308                    Some(self.build_new_xmp(&refs))
309                } else { None };
310                crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
311            }
312            FileType::PostScript => {
313                let changes: Vec<(&str, &str)> = self.new_values.iter()
314                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
315                    .collect();
316                crate::writer::ps_writer::write_postscript(data, &changes)
317            }
318            FileType::Ogg | FileType::Opus => {
319                let changes: Vec<(&str, &str)> = self.new_values.iter()
320                    .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
321                    .collect();
322                crate::writer::ogg_writer::write_ogg(data, &changes)
323            }
324            FileType::Xmp => {
325                let props: Vec<xmp_writer::XmpProperty> = self.new_values.iter()
326                    .filter_map(|nv| {
327                        let val = nv.value.as_deref()?;
328                        Some(xmp_writer::XmpProperty {
329                            namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
330                            property: nv.tag.clone(),
331                            values: vec![val.to_string()],
332                            prop_type: xmp_writer::XmpPropertyType::Simple,
333                        })
334                    })
335                    .collect();
336                Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
337            }
338            _ => Err(Error::UnsupportedFileType(format!("writing not yet supported for {}", file_type))),
339        }
340    }
341
342    /// Write metadata changes to JPEG data.
343    fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
344        // Classify new values by target group
345        let mut exif_values: Vec<&NewValue> = Vec::new();
346        let mut xmp_values: Vec<&NewValue> = Vec::new();
347        let mut iptc_values: Vec<&NewValue> = Vec::new();
348        let mut comment_value: Option<&str> = None;
349        let mut remove_exif = false;
350        let mut remove_xmp = false;
351        let mut remove_iptc = false;
352        let mut remove_comment = false;
353
354        for nv in &self.new_values {
355            let group = nv.group.as_deref().unwrap_or("");
356            let group_upper = group.to_uppercase();
357
358            // Check for group deletion
359            if nv.value.is_none() && nv.tag == "*" {
360                match group_upper.as_str() {
361                    "EXIF" => { remove_exif = true; continue; }
362                    "XMP" => { remove_xmp = true; continue; }
363                    "IPTC" => { remove_iptc = true; continue; }
364                    _ => {}
365                }
366            }
367
368            match group_upper.as_str() {
369                "XMP" => xmp_values.push(nv),
370                "IPTC" => iptc_values.push(nv),
371                "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
372                "" => {
373                    // Auto-detect best group based on tag name
374                    if nv.tag.to_lowercase() == "comment" {
375                        if nv.value.is_none() {
376                            remove_comment = true;
377                        } else {
378                            comment_value = nv.value.as_deref();
379                        }
380                    } else if is_xmp_tag(&nv.tag) {
381                        xmp_values.push(nv);
382                    } else {
383                        exif_values.push(nv);
384                    }
385                }
386                _ => exif_values.push(nv), // default to EXIF
387            }
388        }
389
390        // Build new EXIF data
391        let new_exif = if !exif_values.is_empty() {
392            Some(self.build_new_exif(data, &exif_values)?)
393        } else {
394            None
395        };
396
397        // Build new XMP data
398        let new_xmp = if !xmp_values.is_empty() {
399            Some(self.build_new_xmp(&xmp_values))
400        } else {
401            None
402        };
403
404        // Build new IPTC data
405        let new_iptc_data = if !iptc_values.is_empty() {
406            let records: Vec<iptc_writer::IptcRecord> = iptc_values
407                .iter()
408                .filter_map(|nv| {
409                    let value = nv.value.as_deref()?;
410                    let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
411                    Some(iptc_writer::IptcRecord {
412                        record,
413                        dataset,
414                        data: value.as_bytes().to_vec(),
415                    })
416                })
417                .collect();
418            if records.is_empty() {
419                None
420            } else {
421                Some(iptc_writer::build_iptc(&records))
422            }
423        } else {
424            None
425        };
426
427        // Rewrite JPEG
428        jpeg_writer::write_jpeg(
429            data,
430            new_exif.as_deref(),
431            new_xmp.as_deref(),
432            new_iptc_data.as_deref(),
433            comment_value,
434            remove_exif,
435            remove_xmp,
436            remove_iptc,
437            remove_comment,
438        )
439    }
440
441    /// Build new EXIF data by merging existing EXIF with queued changes.
442    fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
443        let bo = ByteOrderMark::BigEndian;
444        let mut ifd0_entries = Vec::new();
445        let mut exif_entries = Vec::new();
446        let mut gps_entries = Vec::new();
447
448        // Step 1: Extract existing EXIF entries from the JPEG
449        let existing = extract_existing_exif_entries(jpeg_data, bo);
450        for entry in &existing {
451            match classify_exif_tag(entry.tag) {
452                ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
453                ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
454                ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
455            }
456        }
457
458        // Step 2: Apply queued changes (add/replace/delete)
459        let deleted_tags: Vec<u16> = values
460            .iter()
461            .filter(|nv| nv.value.is_none())
462            .filter_map(|nv| tag_name_to_id(&nv.tag))
463            .collect();
464
465        // Remove deleted tags
466        ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
467        exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
468        gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
469
470        // Add/replace new values
471        for nv in values {
472            if nv.value.is_none() {
473                continue;
474            }
475            let value_str = nv.value.as_deref().unwrap_or("");
476            let group = nv.group.as_deref().unwrap_or("");
477
478            if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo) {
479                let entry = exif_writer::IfdEntry {
480                    tag: tag_id,
481                    format,
482                    data: encoded,
483                };
484
485                let target = match group.to_uppercase().as_str() {
486                    "GPS" => &mut gps_entries,
487                    "EXIFIFD" => &mut exif_entries,
488                    _ => match classify_exif_tag(tag_id) {
489                        ExifIfdGroup::ExifIfd => &mut exif_entries,
490                        ExifIfdGroup::Gps => &mut gps_entries,
491                        ExifIfdGroup::Ifd0 => &mut ifd0_entries,
492                    },
493                };
494
495                // Replace existing or add new
496                if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
497                    *existing = entry;
498                } else {
499                    target.push(entry);
500                }
501            }
502        }
503
504        // Remove sub-IFD pointers from entries (they'll be rebuilt by build_exif)
505        ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
506
507        exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
508    }
509
510    /// Write metadata changes to PNG data.
511    fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
512        let mut new_text: Vec<(&str, &str)> = Vec::new();
513        let mut remove_text: Vec<&str> = Vec::new();
514
515        // Collect text-based changes
516        // We need to hold the strings in vectors that live long enough
517        let owned_pairs: Vec<(String, String)> = self.new_values.iter()
518            .filter(|nv| nv.value.is_some())
519            .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
520            .collect();
521
522        for (tag, value) in &owned_pairs {
523            new_text.push((tag.as_str(), value.as_str()));
524        }
525
526        for nv in &self.new_values {
527            if nv.value.is_none() {
528                remove_text.push(&nv.tag);
529            }
530        }
531
532        png_writer::write_png(data, &new_text, None, &remove_text)
533    }
534
535    /// Write metadata changes to PSD data.
536    fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
537        let mut iptc_values = Vec::new();
538        let mut xmp_values = Vec::new();
539
540        for nv in &self.new_values {
541            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
542            match group.as_str() {
543                "XMP" => xmp_values.push(nv),
544                "IPTC" => iptc_values.push(nv),
545                _ => {
546                    if is_xmp_tag(&nv.tag) { xmp_values.push(nv); }
547                    else { iptc_values.push(nv); }
548                }
549            }
550        }
551
552        let new_iptc = if !iptc_values.is_empty() {
553            let records: Vec<_> = iptc_values.iter().filter_map(|nv| {
554                let value = nv.value.as_deref()?;
555                let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
556                Some(iptc_writer::IptcRecord { record, dataset, data: value.as_bytes().to_vec() })
557            }).collect();
558            if records.is_empty() { None } else { Some(iptc_writer::build_iptc(&records)) }
559        } else { None };
560
561        let new_xmp = if !xmp_values.is_empty() {
562            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
563            Some(self.build_new_xmp(&refs))
564        } else { None };
565
566        psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
567    }
568
569    /// Write metadata changes to Matroska (MKV/WebM) data.
570    fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
571        let changes: Vec<(&str, &str)> = self.new_values.iter()
572            .filter_map(|nv| {
573                let value = nv.value.as_deref()?;
574                Some((nv.tag.as_str(), value))
575            })
576            .collect();
577
578        matroska_writer::write_matroska(data, &changes)
579    }
580
581    /// Write metadata changes to PDF data.
582    fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
583        let changes: Vec<(&str, &str)> = self.new_values.iter()
584            .filter_map(|nv| {
585                let value = nv.value.as_deref()?;
586                Some((nv.tag.as_str(), value))
587            })
588            .collect();
589
590        pdf_writer::write_pdf(data, &changes)
591    }
592
593    /// Write metadata changes to MP4/MOV data.
594    fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
595        let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
596        let mut xmp_values: Vec<&NewValue> = Vec::new();
597
598        for nv in &self.new_values {
599            if nv.value.is_none() { continue; }
600            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
601            if group == "XMP" {
602                xmp_values.push(nv);
603            } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
604                ilst_tags.push((key, nv.value.clone().unwrap()));
605            }
606        }
607
608        let tag_refs: Vec<(&[u8; 4], &str)> = ilst_tags.iter()
609            .map(|(k, v)| (k, v.as_str()))
610            .collect();
611
612        let new_xmp = if !xmp_values.is_empty() {
613            let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
614            Some(self.build_new_xmp(&refs))
615        } else {
616            None
617        };
618
619        mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
620    }
621
622    /// Write metadata changes to WebP data.
623    fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
624        let mut exif_values: Vec<&NewValue> = Vec::new();
625        let mut xmp_values: Vec<&NewValue> = Vec::new();
626        let mut remove_exif = false;
627        let mut remove_xmp = false;
628
629        for nv in &self.new_values {
630            let group = nv.group.as_deref().unwrap_or("").to_uppercase();
631            if nv.value.is_none() && nv.tag == "*" {
632                if group == "EXIF" { remove_exif = true; }
633                if group == "XMP" { remove_xmp = true; }
634                continue;
635            }
636            match group.as_str() {
637                "XMP" => xmp_values.push(nv),
638                _ => exif_values.push(nv),
639            }
640        }
641
642        let new_exif = if !exif_values.is_empty() {
643            let bo = ByteOrderMark::BigEndian;
644            let mut entries = Vec::new();
645            for nv in &exif_values {
646                if let Some(ref v) = nv.value {
647                    let group = nv.group.as_deref().unwrap_or("");
648                    if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo) {
649                        entries.push(exif_writer::IfdEntry { tag: tag_id, format, data: encoded });
650                    }
651                }
652            }
653            if !entries.is_empty() {
654                Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
655            } else {
656                None
657            }
658        } else {
659            None
660        };
661
662        let new_xmp = if !xmp_values.is_empty() {
663            Some(self.build_new_xmp(&xmp_values.iter().map(|v| *v).collect::<Vec<_>>()))
664        } else {
665            None
666        };
667
668        webp_writer::write_webp(
669            data,
670            new_exif.as_deref(),
671            new_xmp.as_deref(),
672            remove_exif,
673            remove_xmp,
674        )
675    }
676
677    /// Write metadata changes to TIFF data.
678    fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
679        let bo = if data.starts_with(b"II") {
680            ByteOrderMark::LittleEndian
681        } else {
682            ByteOrderMark::BigEndian
683        };
684
685        let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
686        for nv in &self.new_values {
687            if let Some(ref value) = nv.value {
688                let group = nv.group.as_deref().unwrap_or("");
689                if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo) {
690                    changes.push((tag_id, encoded));
691                }
692            }
693        }
694
695        tiff_writer::write_tiff(data, &changes)
696    }
697
698    /// Build new XMP data from queued values.
699    fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
700        let mut properties = Vec::new();
701
702        for nv in values {
703            let value_str = match &nv.value {
704                Some(v) => v.clone(),
705                None => continue,
706            };
707
708            let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
709            let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
710
711            let prop_type = match nv.tag.to_lowercase().as_str() {
712                "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
713                "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
714                "creator" => xmp_writer::XmpPropertyType::Seq,
715                _ => xmp_writer::XmpPropertyType::Simple,
716            };
717
718            let values = if matches!(prop_type, xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq) {
719                value_str.split(',').map(|s| s.trim().to_string()).collect()
720            } else {
721                vec![value_str]
722            };
723
724            properties.push(xmp_writer::XmpProperty {
725                namespace: ns,
726                property: nv.tag.clone(),
727                values,
728                prop_type,
729            });
730        }
731
732        xmp_writer::build_xmp(&properties).into_bytes()
733    }
734
735    // ================================================================
736    // Reading API
737    // ================================================================
738
739    /// Extract metadata from a file and return a simple name→value map.
740    ///
741    /// This is the high-level one-shot API, equivalent to ExifTool's `ImageInfo()`.
742    pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
743        let tags = self.extract_info(path)?;
744        Ok(self.get_info(&tags))
745    }
746
747    /// Extract all metadata tags from a file.
748    ///
749    /// Returns the full `Tag` structs with groups, raw values, etc.
750    pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
751        let path = path.as_ref();
752        let data = fs::read(path).map_err(Error::Io)?;
753
754        self.extract_info_from_bytes(&data, path)
755    }
756
757    /// Extract metadata from in-memory data.
758    pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
759        let file_type_result = self.detect_file_type(data, path);
760        let (file_type, mut tags) = match file_type_result {
761            Ok(ft) => {
762                let t = self.process_file(data, ft).or_else(|_| {
763                    self.process_by_extension(data, path)
764                })?;
765                (Some(ft), t)
766            }
767            Err(_) => {
768                // File type unknown by magic/extension — try extension-based fallback
769                let t = self.process_by_extension(data, path)?;
770                (None, t)
771            }
772        };
773        let file_type = file_type.unwrap_or(FileType::Zip); // placeholder for file-level tags
774
775        // Add file-level tags
776        tags.push(Tag {
777            id: crate::tag::TagId::Text("FileType".into()),
778            name: "FileType".into(),
779            description: "File Type".into(),
780            group: crate::tag::TagGroup {
781                family0: "File".into(),
782                family1: "File".into(),
783                family2: "Other".into(),
784            },
785            raw_value: Value::String(format!("{:?}", file_type)),
786            print_value: file_type.description().to_string(),
787            priority: 0,
788        });
789
790        tags.push(Tag {
791            id: crate::tag::TagId::Text("MIMEType".into()),
792            name: "MIMEType".into(),
793            description: "MIME Type".into(),
794            group: crate::tag::TagGroup {
795                family0: "File".into(),
796                family1: "File".into(),
797                family2: "Other".into(),
798            },
799            raw_value: Value::String(file_type.mime_type().to_string()),
800            print_value: file_type.mime_type().to_string(),
801            priority: 0,
802        });
803
804        if let Ok(metadata) = fs::metadata(path) {
805            tags.push(Tag {
806                id: crate::tag::TagId::Text("FileSize".into()),
807                name: "FileSize".into(),
808                description: "File Size".into(),
809                group: crate::tag::TagGroup {
810                    family0: "File".into(),
811                    family1: "File".into(),
812                    family2: "Other".into(),
813                },
814                raw_value: Value::U32(metadata.len() as u32),
815                print_value: format_file_size(metadata.len()),
816                priority: 0,
817            });
818        }
819
820        // Add more file-level tags
821        let file_tag = |name: &str, val: Value| -> Tag {
822            Tag {
823                id: crate::tag::TagId::Text(name.to_string()),
824                name: name.to_string(), description: name.to_string(),
825                group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
826                raw_value: val.clone(), print_value: val.to_display_string(), priority: 0,
827            }
828        };
829
830        if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
831            tags.push(file_tag("FileName", Value::String(fname.to_string())));
832        }
833        if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
834            tags.push(file_tag("Directory", Value::String(dir.to_string())));
835        }
836        // Use the canonical (first) extension from the FileType, matching Perl ExifTool behavior.
837        let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
838        if !canonical_ext.is_empty() {
839            tags.push(file_tag("FileTypeExtension", Value::String(canonical_ext.to_string())));
840        }
841
842        #[cfg(unix)]
843        if let Ok(metadata) = fs::metadata(path) {
844            use std::os::unix::fs::MetadataExt;
845            let mode = metadata.mode();
846            tags.push(file_tag("FilePermissions", Value::String(format!("{:o}", mode & 0o7777))));
847
848            // FileModifyDate
849            if let Ok(modified) = metadata.modified() {
850                if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
851                    let secs = dur.as_secs() as i64;
852                    tags.push(file_tag("FileModifyDate", Value::String(unix_to_datetime(secs))));
853                }
854            }
855            // FileAccessDate
856            if let Ok(accessed) = metadata.accessed() {
857                if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
858                    let secs = dur.as_secs() as i64;
859                    tags.push(file_tag("FileAccessDate", Value::String(unix_to_datetime(secs))));
860                }
861            }
862            // FileInodeChangeDate (ctime on Unix)
863            let ctime = metadata.ctime();
864            if ctime > 0 {
865                tags.push(file_tag("FileInodeChangeDate", Value::String(unix_to_datetime(ctime))));
866            }
867        }
868
869        // ExifByteOrder (from TIFF header)
870        {
871            let bo_str = if data.len() > 8 {
872                // Check EXIF in JPEG or TIFF header or WebP/RIFF EXIF chunk
873                let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
874                    // JPEG: find APP1 EXIF header
875                    data.windows(6).position(|w| w == b"Exif\0\0")
876                        .map(|p| &data[p+6..])
877                } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
878                    // RAF: look in the embedded JPEG for EXIF byte order
879                    let jpeg_offset = u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]]) as usize;
880                    let jpeg_length = u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]]) as usize;
881                    if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
882                        let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
883                        jpeg.windows(6).position(|w| w == b"Exif\0\0")
884                            .map(|p| &jpeg[p+6..])
885                    } else {
886                        None
887                    }
888                } else if data.starts_with(b"RIFF") && data.len() >= 12 {
889                    // RIFF/WebP: find EXIF chunk
890                    let mut riff_bo: Option<&[u8]> = None;
891                    let mut pos = 12usize;
892                    while pos + 8 <= data.len() {
893                        let cid = &data[pos..pos+4];
894                        let csz = u32::from_le_bytes([data[pos+4],data[pos+5],data[pos+6],data[pos+7]]) as usize;
895                        let cstart = pos + 8;
896                        let cend = (cstart + csz).min(data.len());
897                        if cid == b"EXIF" && cend > cstart {
898                            let exif_data = &data[cstart..cend];
899                            let tiff = if exif_data.starts_with(b"Exif\0\0") { &exif_data[6..] } else { exif_data };
900                            riff_bo = Some(tiff);
901                            break;
902                        }
903                        // Also check LIST chunks
904                        if cid == b"LIST" && cend >= cstart + 4 {
905                            // recurse not needed for this simple scan - just advance
906                        }
907                        pos = cend + (csz & 1);
908                    }
909                    riff_bo
910                } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
911                    // JXL container: scan for brob Exif box and decompress to get byte order
912                    let mut jxl_bo: Option<String> = None;
913                    let mut jpos = 12usize; // skip JXL signature box
914                    while jpos + 8 <= data.len() {
915                        let bsize = u32::from_be_bytes([data[jpos], data[jpos+1], data[jpos+2], data[jpos+3]]) as usize;
916                        let btype = &data[jpos+4..jpos+8];
917                        if bsize < 8 || jpos + bsize > data.len() { break; }
918                        if btype == b"brob" && jpos + bsize > 12 {
919                            let inner_type = &data[jpos+8..jpos+12];
920                            if inner_type == b"Exif" || inner_type == b"exif" {
921                                let brotli_payload = &data[jpos+12..jpos+bsize];
922                                use std::io::Cursor;
923                                let mut inp = Cursor::new(brotli_payload);
924                                let mut out: Vec<u8> = Vec::new();
925                                if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
926                                    let exif_start = if out.len() > 4 { 4 } else { 0 };
927                                    if exif_start < out.len() {
928                                        if out[exif_start..].starts_with(b"MM") {
929                                            jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
930                                        } else if out[exif_start..].starts_with(b"II") {
931                                            jxl_bo = Some("Little-endian (Intel, II)".to_string());
932                                        }
933                                    }
934                                }
935                                break;
936                            }
937                        }
938                        jpos += bsize;
939                    }
940                    if let Some(bo) = jxl_bo {
941                        if !bo.is_empty() && file_type != FileType::Btf {
942                            tags.push(file_tag("ExifByteOrder", Value::String(bo)));
943                        }
944                    }
945                    // Return None to skip the generic byte order check below
946                    None
947                } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
948                    // MRW: find TTW segment which contains TIFF/EXIF data
949                    let mrw_data_offset = if data.len() >= 8 {
950                        u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
951                    } else { 0 };
952                    let mut mrw_bo: Option<&[u8]> = None;
953                    let mut mpos = 8usize;
954                    while mpos + 8 <= mrw_data_offset.min(data.len()) {
955                        let seg_tag = &data[mpos..mpos+4];
956                        let seg_len = u32::from_be_bytes([data[mpos+4], data[mpos+5], data[mpos+6], data[mpos+7]]) as usize;
957                        if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
958                            mrw_bo = Some(&data[mpos+8..mpos+8+seg_len]);
959                            break;
960                        }
961                        mpos += 8 + seg_len;
962                    }
963                    mrw_bo
964                } else {
965                    Some(&data[..])
966                };
967                if let Some(tiff) = check {
968                    if tiff.starts_with(b"II") { "Little-endian (Intel, II)" }
969                    else if tiff.starts_with(b"MM") { "Big-endian (Motorola, MM)" }
970                    else { "" }
971                } else { "" }
972            } else { "" };
973            // Suppress ExifByteOrder for BigTIFF, Canon VRD/DR4 (Perl doesn't output it for these)
974            // Also skip if already emitted by ExifReader (TIFF-based formats)
975            let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
976            if !bo_str.is_empty() && !already_has_exifbyteorder
977                && file_type != FileType::Btf
978                && file_type != FileType::Dr4 && file_type != FileType::Vrd
979                && file_type != FileType::Crw {
980                tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
981            }
982        }
983
984        tags.push(file_tag("ExifToolVersion", Value::String(crate::VERSION.to_string())));
985
986        // Compute composite tags
987        let composite = crate::composite::compute_composite_tags(&tags);
988        tags.extend(composite);
989
990        // FLIR post-processing: remove LensID composite for FLIR cameras.
991        // Perl's LensID composite requires LensType EXIF tag (not present in FLIR images),
992        // and LensID-2 requires LensModel to match /(mm|\d\/F)/ (FLIR names like "FOL7"
993        // don't match).  Our composite.rs uses a simpler fallback that picks up any non-empty
994        // LensModel, so we remove LensID when the image is from a FLIR camera with FFF data.
995        {
996            let is_flir_fff = tags.iter().any(|t| t.group.family0 == "APP1"
997                && t.group.family1 == "FLIR");
998            if is_flir_fff {
999                tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1000            }
1001        }
1002
1003        // Olympus post-processing: remove the generic "Lens" composite for Olympus cameras.
1004        // In Perl, the "Lens" composite tag requires Canon:MinFocalLength (Canon namespace).
1005        // Our composite.rs generates Lens for any manufacturer that has MinFocalLength +
1006        // MaxFocalLength (e.g., Olympus Equipment sub-IFD).  Remove it for non-Canon cameras.
1007        {
1008            let make = tags.iter().find(|t| t.name == "Make")
1009                .map(|t| t.print_value.clone()).unwrap_or_default();
1010            if !make.to_uppercase().contains("CANON") {
1011                tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1012            }
1013        }
1014
1015        // Priority-based deduplication: when the same tag name appears from both RIFF (priority 0)
1016        // and MakerNotes/EXIF (priority 0 but higher-quality source), remove the RIFF copy.
1017        // Mirrors ExifTool's PRIORITY => 0 behavior for RIFF StreamHeader tags.
1018        {
1019            let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1020            for tag_name in &riff_priority_zero_tags {
1021                let has_makernotes = tags.iter().any(|t| t.name == *tag_name
1022                    && t.group.family0 != "RIFF");
1023                if has_makernotes {
1024                    tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1025                }
1026            }
1027        }
1028
1029        // Filter by requested tags if specified
1030        if !self.options.requested_tags.is_empty() {
1031            let requested: Vec<String> = self
1032                .options
1033                .requested_tags
1034                .iter()
1035                .map(|t| t.to_lowercase())
1036                .collect();
1037            tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1038        }
1039
1040        Ok(tags)
1041    }
1042
1043    /// Format extracted tags into a simple name→value map.
1044    ///
1045    /// Handles duplicate tag names by appending group info.
1046    fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1047        let mut info = ImageInfo::new();
1048        let mut seen: HashMap<String, usize> = HashMap::new();
1049
1050        for tag in tags {
1051            let value = if self.options.print_conv {
1052                &tag.print_value
1053            } else {
1054                &tag.raw_value.to_display_string()
1055            };
1056
1057            let count = seen.entry(tag.name.clone()).or_insert(0);
1058            *count += 1;
1059
1060            if *count == 1 {
1061                info.insert(tag.name.clone(), value.clone());
1062            } else if self.options.duplicates {
1063                let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1064                info.insert(key, value.clone());
1065            }
1066        }
1067
1068        info
1069    }
1070
1071    /// Detect file type from magic bytes and extension.
1072    fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1073        // Try magic bytes first
1074        let header_len = data.len().min(256);
1075        if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1076            // Override ICO to Font if extension is .dfont (Mac resource fork)
1077            if ft == FileType::Ico {
1078                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1079                    if ext.eq_ignore_ascii_case("dfont") {
1080                        return Ok(FileType::Font);
1081                    }
1082                }
1083            }
1084            // Override JPEG to JPS if the file extension is .jps
1085            if ft == FileType::Jpeg {
1086                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1087                    if ext.eq_ignore_ascii_case("jps") {
1088                        return Ok(FileType::Jps);
1089                    }
1090                }
1091            }
1092            // Override PLIST to AAE if extension is .aae
1093            if ft == FileType::Plist {
1094                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1095                    if ext.eq_ignore_ascii_case("aae") {
1096                        return Ok(FileType::Aae);
1097                    }
1098                }
1099            }
1100            // Override XMP to PLIST/AAE if extension is .plist or .aae
1101            if ft == FileType::Xmp {
1102                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1103                    if ext.eq_ignore_ascii_case("plist") {
1104                        return Ok(FileType::Plist);
1105                    }
1106                    if ext.eq_ignore_ascii_case("aae") {
1107                        return Ok(FileType::Aae);
1108                    }
1109                }
1110            }
1111            // Override to PhotoCD if extension is .pcd (file starts with 0xFF padding)
1112            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1113                if ext.eq_ignore_ascii_case("pcd") && data.len() >= 2056
1114                    && &data[2048..2055] == b"PCD_IPI"
1115                {
1116                    return Ok(FileType::PhotoCd);
1117                }
1118            }
1119            // Override MP3 to MPC/APE/WavPack if extension says otherwise
1120            if ft == FileType::Mp3 {
1121                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1122                    if ext.eq_ignore_ascii_case("mpc") {
1123                        return Ok(FileType::Mpc);
1124                    }
1125                    if ext.eq_ignore_ascii_case("ape") {
1126                        return Ok(FileType::Ape);
1127                    }
1128                    if ext.eq_ignore_ascii_case("wv") {
1129                        return Ok(FileType::WavPack);
1130                    }
1131                }
1132            }
1133            // For ZIP files, check if it's an EIP (by extension) or OpenDocument format
1134            if ft == FileType::Zip {
1135                // Check extension first for EIP
1136                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1137                    if ext.eq_ignore_ascii_case("eip") {
1138                        return Ok(FileType::Eip);
1139                    }
1140                }
1141                if let Some(od_type) = detect_opendocument_type(data) {
1142                    return Ok(od_type);
1143                }
1144            }
1145            return Ok(ft);
1146        }
1147
1148        // Fall back to extension
1149        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1150            if let Some(ft) = file_type::detect_from_extension(ext) {
1151                return Ok(ft);
1152            }
1153        }
1154
1155        let ext_str = path
1156            .extension()
1157            .and_then(|e| e.to_str())
1158            .unwrap_or("unknown");
1159        Err(Error::UnsupportedFileType(ext_str.to_string()))
1160    }
1161
1162    /// Dispatch to the appropriate format reader.
1163
1164    fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1165        match file_type {
1166            FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1167            FileType::Png | FileType::Mng => formats::png::read_png(data),
1168            // All TIFF-based formats (TIFF + most RAW formats)
1169            FileType::Tiff
1170            | FileType::Btf
1171            | FileType::Dng
1172            | FileType::Cr2
1173            | FileType::Nef
1174            | FileType::Arw
1175            | FileType::Sr2
1176            | FileType::Orf
1177            | FileType::Pef
1178            | FileType::Erf
1179            | FileType::Fff
1180            | FileType::Rwl
1181            | FileType::Mef
1182            | FileType::Srw
1183            | FileType::Gpr
1184            | FileType::Arq
1185            | FileType::ThreeFR
1186            | FileType::Dcr
1187            | FileType::Rw2
1188            | FileType::Srf => formats::tiff::read_tiff(data),
1189            // Phase One IIQ: TIFF + PhaseOne maker note block
1190            FileType::Iiq => formats::misc::read_iiq(data),
1191            // Image formats
1192            FileType::Gif => formats::gif::read_gif(data),
1193            FileType::Bmp => formats::bmp::read_bmp(data),
1194            FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1195            FileType::Psd => formats::psd::read_psd(data),
1196            // Audio formats
1197            FileType::Mp3 => formats::id3::read_mp3(data),
1198            FileType::Flac => formats::flac::read_flac(data),
1199            FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1200            FileType::Aiff => formats::aiff::read_aiff(data),
1201            // Video formats
1202            FileType::Mp4
1203            | FileType::QuickTime
1204            | FileType::M4a
1205            | FileType::ThreeGP
1206            | FileType::Heif
1207            | FileType::Avif
1208            | FileType::Cr3
1209            | FileType::F4v
1210            | FileType::Mqv
1211            | FileType::Lrv => formats::quicktime::read_quicktime(data),
1212            FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1213            FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1214            FileType::Wtv => formats::wtv::read_wtv(data),
1215            // RAW formats with custom containers
1216            FileType::Crw => formats::canon_raw::read_crw(data),
1217            FileType::Raf => formats::raf::read_raf(data),
1218            FileType::Mrw => formats::mrw::read_mrw(data),
1219            FileType::Mrc => formats::mrc::read_mrc(data),
1220            // Image formats
1221            FileType::Jp2 => formats::jp2::read_jp2(data),
1222            FileType::J2c => formats::jp2::read_j2c(data),
1223            FileType::Jxl => formats::jp2::read_jxl(data),
1224            FileType::Ico => formats::ico::read_ico(data),
1225            FileType::Icc => formats::icc::read_icc(data),
1226            // Documents
1227            FileType::Pdf => formats::pdf::read_pdf(data),
1228            FileType::PostScript => {
1229                // PFA fonts start with %!PS-AdobeFont or %!FontType1
1230                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1231                    formats::font::read_pfa(data).or_else(|_| formats::postscript::read_postscript(data))
1232                } else {
1233                    formats::postscript::read_postscript(data)
1234                }
1235            }
1236            FileType::Eip => formats::capture_one::read_eip(data),
1237            FileType::Zip | FileType::Docx | FileType::Xlsx | FileType::Pptx
1238            | FileType::Doc | FileType::Xls | FileType::Ppt => formats::zip::read_zip(data),
1239            FileType::Rtf => formats::rtf::read_rtf(data),
1240            FileType::InDesign => formats::misc::read_indesign(data),
1241            FileType::Pcap => formats::misc::read_pcap(data),
1242            FileType::Pcapng => formats::misc::read_pcapng(data),
1243            // Canon VRD / DR4
1244            FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1245            FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1246            // Metadata / Other
1247            FileType::Xmp => formats::xmp_file::read_xmp(data),
1248            FileType::Svg => formats::misc::read_svg(data),
1249            FileType::Html => {
1250                // SVG files that weren't detected by magic (e.g., via extension fallback)
1251                let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1252                if is_svg {
1253                    formats::misc::read_svg(data)
1254                } else {
1255                    formats::html::read_html(data)
1256                }
1257            }
1258            FileType::Exe => formats::exe::read_exe(data),
1259            FileType::Font => {
1260                // AFM: Adobe Font Metrics text file
1261                if data.starts_with(b"StartFontMetrics") {
1262                    return formats::font::read_afm(data);
1263                }
1264                // PFA: PostScript Type 1 ASCII font
1265                if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1266                    return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1267                }
1268                // PFB: PostScript Type 1 Binary font
1269                if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1270                    return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1271                }
1272                formats::font::read_font(data)
1273            }
1274            // Audio with ID3
1275            FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1276            FileType::Ape => formats::ape::read_ape(data),
1277            FileType::Mpc => formats::ape::read_mpc(data),
1278            FileType::Aac => formats::misc::read_aac(data),
1279            FileType::RealAudio => {
1280                formats::misc::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1281            }
1282            FileType::RealMedia => {
1283                formats::misc::read_real_media(data).or_else(|_| Ok(Vec::new()))
1284            }
1285            // Misc formats
1286            FileType::Czi => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1287            FileType::PhotoCd => formats::misc::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1288            FileType::Dicom => formats::dicom::read_dicom(data),
1289            FileType::Fits => formats::misc::read_fits(data),
1290            FileType::Flv => formats::misc::read_flv(data),
1291            FileType::Mxf => formats::misc::read_mxf(data).or_else(|_| Ok(Vec::new())),
1292            FileType::Swf => formats::misc::read_swf(data),
1293            FileType::Hdr => formats::misc::read_hdr(data),
1294            FileType::DjVu => formats::djvu::read_djvu(data),
1295            FileType::Xcf => formats::gimp::read_xcf(data),
1296            FileType::Mie => formats::mie::read_mie(data),
1297            FileType::Lfp => formats::lytro::read_lfp(data),
1298            // FileType::Miff dispatched via string extension below
1299            FileType::Fpf => formats::flir_fpf::read_fpf(data),
1300            FileType::Flif => formats::misc::read_flif(data),
1301            FileType::Bpg => formats::misc::read_bpg(data),
1302            FileType::Pcx => formats::misc::read_pcx(data),
1303            FileType::Pict => formats::misc::read_pict(data),
1304            FileType::M2ts => formats::misc::read_m2ts(data),
1305            FileType::Gzip => formats::misc::read_gzip(data),
1306            FileType::Rar => formats::misc::read_rar(data),
1307            FileType::Dss => formats::misc::read_dss(data),
1308            FileType::Moi => formats::misc::read_moi(data),
1309            FileType::MacOs => formats::misc::read_macos(data),
1310            FileType::Json => formats::misc::read_json(data),
1311            // New formats
1312            FileType::Pgf => formats::pgf::read_pgf(data),
1313            FileType::Xisf => formats::xisf::read_xisf(data),
1314            FileType::Torrent => formats::torrent::read_torrent(data),
1315            FileType::Mobi => formats::palm::read_palm(data),
1316            FileType::Psp => formats::psp::read_psp(data),
1317            FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1318            FileType::Audible => formats::audible::read_audible(data),
1319            FileType::Exr => formats::openexr::read_openexr(data),
1320            // New formats
1321            FileType::Plist => {
1322                if data.starts_with(b"bplist") {
1323                    formats::plist::read_binary_plist_tags(data)
1324                } else {
1325                    formats::plist::read_xml_plist(data)
1326                }
1327            }
1328            FileType::Aae => {
1329                if data.starts_with(b"bplist") {
1330                    formats::plist::read_binary_plist_tags(data)
1331                } else {
1332                    formats::plist::read_aae_plist(data)
1333                }
1334            }
1335            FileType::KyoceraRaw => formats::misc::read_kyocera_raw(data),
1336            FileType::PortableFloatMap => formats::misc::read_pfm(data),
1337            FileType::Ods | FileType::Odt | FileType::Odp | FileType::Odg |
1338            FileType::Odf | FileType::Odb | FileType::Odi | FileType::Odc => formats::zip::read_zip(data),
1339            _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1340        }
1341    }
1342
1343    /// Fallback: try to read file based on extension for formats without magic detection.
1344    fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1345        let ext = path
1346            .extension()
1347            .and_then(|e| e.to_str())
1348            .unwrap_or("")
1349            .to_ascii_lowercase();
1350
1351        match ext.as_str() {
1352            "ppm" | "pgm" | "pbm" => formats::misc::read_ppm(data),
1353            "pfm" => {
1354                // PFM can be Portable Float Map or Printer Font Metrics
1355                if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1356                    formats::misc::read_ppm(data)
1357                } else {
1358                    Ok(Vec::new()) // Printer Font Metrics
1359                }
1360            }
1361            "json" => formats::misc::read_json(data),
1362            "svg" => formats::misc::read_svg(data),
1363            "ram" => formats::misc::read_ram(data).or_else(|_| Ok(Vec::new())),
1364            "txt" | "log" | "igc" => {
1365                Ok(compute_text_tags(data, false))
1366            }
1367            "csv" => {
1368                Ok(compute_text_tags(data, true))
1369            }
1370            "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1371            "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1372            "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1373            "plist" => {
1374                if data.starts_with(b"bplist") {
1375                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1376                } else {
1377                    formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1378                }
1379            }
1380            "aae" => {
1381                if data.starts_with(b"bplist") {
1382                    formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1383                } else {
1384                    formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1385                }
1386            }
1387            "vcf" | "ics" | "vcard" => {
1388                let s = String::from_utf8_lossy(&data[..data.len().min(100)]);
1389                if s.contains("BEGIN:VCALENDAR") {
1390                    formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1391                } else {
1392                    formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1393                }
1394            }
1395            "xcf" => Ok(Vec::new()),      // GIMP
1396            "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1397            "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1398            "indd" | "indt" => Ok(Vec::new()), // InDesign
1399            "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1400            "mie" => Ok(Vec::new()),       // MIE
1401            "exr" => Ok(Vec::new()),       // OpenEXR
1402            "wpg" => formats::misc::read_wpg(data).or_else(|_| Ok(Vec::new())),
1403            "moi" => formats::misc::read_moi(data).or_else(|_| Ok(Vec::new())),
1404            "macos" => formats::misc::read_macos(data).or_else(|_| Ok(Vec::new())),
1405            "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1406            "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1407            "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1408            "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1409            "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1410            "itc" => formats::misc::read_itc(data).or_else(|_| Ok(Vec::new())),
1411            "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1412            "czi" => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1413            "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1414            "lfp" | "mrc"
1415            | "dss" | "mobi" | "psp" | "pgf" | "raw"
1416            | "pmp" | "torrent"
1417            | "xisf" | "mxf"
1418            | "dfont" => Ok(Vec::new()),
1419            "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1420            "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1421            "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1422            "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1423            _ => Err(Error::UnsupportedFileType(ext)),
1424        }
1425    }
1426}
1427
1428impl Default for ExifTool {
1429    fn default() -> Self {
1430        Self::new()
1431    }
1432}
1433
1434/// Detect OpenDocument file type by reading the `mimetype` entry from a ZIP.
1435/// Returns None if not an OpenDocument file.
1436fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1437    // OpenDocument ZIPs have "mimetype" as the FIRST local file entry (uncompressed)
1438    if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1439        return None;
1440    }
1441    let compression = u16::from_le_bytes([data[8], data[9]]);
1442    let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1443    let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1444    let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1445    let name_start = 30;
1446    if name_start + name_len > data.len() {
1447        return None;
1448    }
1449    let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1450    if filename != "mimetype" || compression != 0 {
1451        return None;
1452    }
1453    let content_start = name_start + name_len + extra_len;
1454    let content_end = (content_start + compressed_size).min(data.len());
1455    if content_start >= content_end {
1456        return None;
1457    }
1458    let mime = std::str::from_utf8(&data[content_start..content_end]).unwrap_or("").trim();
1459    match mime {
1460        "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1461        "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1462        "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1463        "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1464        "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1465        "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1466        "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1467        "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1468        _ => None,
1469    }
1470}
1471
1472/// Detect the file type of a file at the given path.
1473pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1474    let path = path.as_ref();
1475    let mut file = fs::File::open(path).map_err(Error::Io)?;
1476    let mut header = [0u8; 256];
1477    use std::io::Read;
1478    let n = file.read(&mut header).map_err(Error::Io)?;
1479
1480    if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1481        return Ok(ft);
1482    }
1483
1484    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1485        if let Some(ft) = file_type::detect_from_extension(ext) {
1486            return Ok(ft);
1487        }
1488    }
1489
1490    Err(Error::UnsupportedFileType("unknown".into()))
1491}
1492
1493/// Classification of EXIF tags into IFD groups.
1494enum ExifIfdGroup {
1495    Ifd0,
1496    ExifIfd,
1497    Gps,
1498}
1499
1500/// Determine which IFD a tag belongs to based on its ID.
1501fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1502    match tag_id {
1503        // ExifIFD tags
1504        0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292
1505        | 0xA000..=0xA435 => ExifIfdGroup::ExifIfd,
1506        // GPS tags
1507        0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1508        // Everything else → IFD0
1509        _ => ExifIfdGroup::Ifd0,
1510    }
1511}
1512
1513/// Extract existing EXIF entries from a JPEG file's APP1 segment.
1514fn extract_existing_exif_entries(jpeg_data: &[u8], target_bo: ByteOrderMark) -> Vec<exif_writer::IfdEntry> {
1515    let mut entries = Vec::new();
1516
1517    // Find EXIF APP1 segment
1518    let mut pos = 2; // Skip SOI
1519    while pos + 4 <= jpeg_data.len() {
1520        if jpeg_data[pos] != 0xFF {
1521            pos += 1;
1522            continue;
1523        }
1524        let marker = jpeg_data[pos + 1];
1525        pos += 2;
1526
1527        if marker == 0xDA || marker == 0xD9 {
1528            break; // SOS or EOI
1529        }
1530        if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1531            continue;
1532        }
1533
1534        if pos + 2 > jpeg_data.len() {
1535            break;
1536        }
1537        let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1538        if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1539            break;
1540        }
1541
1542        let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1543
1544        // EXIF APP1
1545        if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1546            let tiff_data = &seg_data[6..];
1547            extract_ifd_entries(tiff_data, target_bo, &mut entries);
1548            break;
1549        }
1550
1551        pos += seg_len;
1552    }
1553
1554    entries
1555}
1556
1557/// Extract IFD entries from TIFF data, re-encoding values in the target byte order.
1558fn extract_ifd_entries(
1559    tiff_data: &[u8],
1560    target_bo: ByteOrderMark,
1561    entries: &mut Vec<exif_writer::IfdEntry>,
1562) {
1563    use crate::metadata::exif::parse_tiff_header;
1564
1565    let header = match parse_tiff_header(tiff_data) {
1566        Ok(h) => h,
1567        Err(_) => return,
1568    };
1569
1570    let src_bo = header.byte_order;
1571
1572    // Read IFD0
1573    read_ifd_for_merge(tiff_data, header.ifd0_offset as usize, src_bo, target_bo, entries);
1574
1575    // Find ExifIFD and GPS pointers
1576    let ifd0_offset = header.ifd0_offset as usize;
1577    if ifd0_offset + 2 > tiff_data.len() {
1578        return;
1579    }
1580    let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
1581    for i in 0..count {
1582        let eoff = ifd0_offset + 2 + i * 12;
1583        if eoff + 12 > tiff_data.len() {
1584            break;
1585        }
1586        let tag = read_u16_bo(tiff_data, eoff, src_bo);
1587        let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
1588
1589        match tag {
1590            0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1591            0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1592            _ => {}
1593        }
1594    }
1595}
1596
1597/// Read a single IFD and extract entries for merge.
1598fn read_ifd_for_merge(
1599    data: &[u8],
1600    offset: usize,
1601    src_bo: ByteOrderMark,
1602    target_bo: ByteOrderMark,
1603    entries: &mut Vec<exif_writer::IfdEntry>,
1604) {
1605    if offset + 2 > data.len() {
1606        return;
1607    }
1608    let count = read_u16_bo(data, offset, src_bo) as usize;
1609
1610    for i in 0..count {
1611        let eoff = offset + 2 + i * 12;
1612        if eoff + 12 > data.len() {
1613            break;
1614        }
1615
1616        let tag = read_u16_bo(data, eoff, src_bo);
1617        let dtype = read_u16_bo(data, eoff + 2, src_bo);
1618        let count_val = read_u32_bo(data, eoff + 4, src_bo);
1619
1620        // Skip sub-IFD pointers and MakerNote
1621        if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
1622            continue;
1623        }
1624
1625        let type_size = match dtype {
1626            1 | 2 | 6 | 7 => 1usize,
1627            3 | 8 => 2,
1628            4 | 9 | 11 | 13 => 4,
1629            5 | 10 | 12 => 8,
1630            _ => continue,
1631        };
1632
1633        let total_size = type_size * count_val as usize;
1634        let raw_data = if total_size <= 4 {
1635            data[eoff + 8..eoff + 12].to_vec()
1636        } else {
1637            let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
1638            if voff + total_size > data.len() {
1639                continue;
1640            }
1641            data[voff..voff + total_size].to_vec()
1642        };
1643
1644        // Re-encode multi-byte values if byte orders differ
1645        let final_data = if src_bo != target_bo && type_size > 1 {
1646            reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
1647        } else {
1648            raw_data[..total_size].to_vec()
1649        };
1650
1651        let format = match dtype {
1652            1 => exif_writer::ExifFormat::Byte,
1653            2 => exif_writer::ExifFormat::Ascii,
1654            3 => exif_writer::ExifFormat::Short,
1655            4 => exif_writer::ExifFormat::Long,
1656            5 => exif_writer::ExifFormat::Rational,
1657            6 => exif_writer::ExifFormat::SByte,
1658            7 => exif_writer::ExifFormat::Undefined,
1659            8 => exif_writer::ExifFormat::SShort,
1660            9 => exif_writer::ExifFormat::SLong,
1661            10 => exif_writer::ExifFormat::SRational,
1662            11 => exif_writer::ExifFormat::Float,
1663            12 => exif_writer::ExifFormat::Double,
1664            _ => continue,
1665        };
1666
1667        entries.push(exif_writer::IfdEntry {
1668            tag,
1669            format,
1670            data: final_data,
1671        });
1672    }
1673}
1674
1675/// Re-encode multi-byte values when converting between byte orders.
1676fn reencode_bytes(
1677    data: &[u8],
1678    dtype: u16,
1679    count: usize,
1680    src_bo: ByteOrderMark,
1681    dst_bo: ByteOrderMark,
1682) -> Vec<u8> {
1683    let mut out = Vec::with_capacity(data.len());
1684    match dtype {
1685        3 | 8 => {
1686            // 16-bit
1687            for i in 0..count {
1688                let v = read_u16_bo(data, i * 2, src_bo);
1689                match dst_bo {
1690                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1691                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1692                }
1693            }
1694        }
1695        4 | 9 | 11 | 13 => {
1696            // 32-bit
1697            for i in 0..count {
1698                let v = read_u32_bo(data, i * 4, src_bo);
1699                match dst_bo {
1700                    ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1701                    ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1702                }
1703            }
1704        }
1705        5 | 10 => {
1706            // Rational (two 32-bit)
1707            for i in 0..count {
1708                let n = read_u32_bo(data, i * 8, src_bo);
1709                let d = read_u32_bo(data, i * 8 + 4, src_bo);
1710                match dst_bo {
1711                    ByteOrderMark::LittleEndian => {
1712                        out.extend_from_slice(&n.to_le_bytes());
1713                        out.extend_from_slice(&d.to_le_bytes());
1714                    }
1715                    ByteOrderMark::BigEndian => {
1716                        out.extend_from_slice(&n.to_be_bytes());
1717                        out.extend_from_slice(&d.to_be_bytes());
1718                    }
1719                }
1720            }
1721        }
1722        12 => {
1723            // 64-bit double
1724            for i in 0..count {
1725                let mut bytes = [0u8; 8];
1726                bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
1727                if src_bo != dst_bo {
1728                    bytes.reverse();
1729                }
1730                out.extend_from_slice(&bytes);
1731            }
1732        }
1733        _ => out.extend_from_slice(data),
1734    }
1735    out
1736}
1737
1738fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
1739    if offset + 2 > data.len() { return 0; }
1740    match bo {
1741        ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
1742        ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
1743    }
1744}
1745
1746fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
1747    if offset + 4 > data.len() { return 0; }
1748    match bo {
1749        ByteOrderMark::LittleEndian => u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1750        ByteOrderMark::BigEndian => u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1751    }
1752}
1753
1754/// Map tag name to numeric EXIF tag ID.
1755fn tag_name_to_id(name: &str) -> Option<u16> {
1756    encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
1757}
1758
1759/// Convert a tag value to a safe filename.
1760fn value_to_filename(value: &str) -> String {
1761    value
1762        .chars()
1763        .map(|c| match c {
1764            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
1765            c if c.is_control() => '_',
1766            c => c,
1767        })
1768        .collect::<String>()
1769        .trim()
1770        .to_string()
1771}
1772
1773/// Parse a date shift string like "+1:0:0" (add 1 hour) or "-0:30:0" (subtract 30 min).
1774/// Returns (sign, hours, minutes, seconds).
1775pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
1776    let (sign, rest) = if shift.starts_with('-') {
1777        (-1, &shift[1..])
1778    } else if shift.starts_with('+') {
1779        (1, &shift[1..])
1780    } else {
1781        (1, shift)
1782    };
1783
1784    let parts: Vec<&str> = rest.split(':').collect();
1785    match parts.len() {
1786        1 => {
1787            let h: u32 = parts[0].parse().ok()?;
1788            Some((sign, h, 0, 0))
1789        }
1790        2 => {
1791            let h: u32 = parts[0].parse().ok()?;
1792            let m: u32 = parts[1].parse().ok()?;
1793            Some((sign, h, m, 0))
1794        }
1795        3 => {
1796            let h: u32 = parts[0].parse().ok()?;
1797            let m: u32 = parts[1].parse().ok()?;
1798            let s: u32 = parts[2].parse().ok()?;
1799            Some((sign, h, m, s))
1800        }
1801        _ => None,
1802    }
1803}
1804
1805/// Shift a datetime string by the given amount.
1806/// Input format: "YYYY:MM:DD HH:MM:SS"
1807pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
1808    let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
1809
1810    // Parse date/time
1811    if datetime.len() < 19 {
1812        return None;
1813    }
1814    let year: i32 = datetime[0..4].parse().ok()?;
1815    let month: u32 = datetime[5..7].parse().ok()?;
1816    let day: u32 = datetime[8..10].parse().ok()?;
1817    let hour: u32 = datetime[11..13].parse().ok()?;
1818    let min: u32 = datetime[14..16].parse().ok()?;
1819    let sec: u32 = datetime[17..19].parse().ok()?;
1820
1821    // Convert to total seconds, shift, convert back
1822    let total_secs = (hour * 3600 + min * 60 + sec) as i64
1823        + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
1824
1825    let days_shift = if total_secs < 0 {
1826        -1 - (-total_secs - 1) as i64 / 86400
1827    } else {
1828        total_secs / 86400
1829    };
1830
1831    let time_secs = ((total_secs % 86400) + 86400) % 86400;
1832    let new_hour = (time_secs / 3600) as u32;
1833    let new_min = ((time_secs % 3600) / 60) as u32;
1834    let new_sec = (time_secs % 60) as u32;
1835
1836    // Simple day shifting (doesn't handle month/year rollover perfectly for large shifts)
1837    let mut new_day = day as i32 + days_shift as i32;
1838    let mut new_month = month;
1839    let mut new_year = year;
1840
1841    let days_in_month = |m: u32, y: i32| -> i32 {
1842        match m {
1843            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1844            4 | 6 | 9 | 11 => 30,
1845            2 => if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 29 } else { 28 },
1846            _ => 30,
1847        }
1848    };
1849
1850    while new_day > days_in_month(new_month, new_year) {
1851        new_day -= days_in_month(new_month, new_year);
1852        new_month += 1;
1853        if new_month > 12 {
1854            new_month = 1;
1855            new_year += 1;
1856        }
1857    }
1858    while new_day < 1 {
1859        new_month = if new_month == 1 { 12 } else { new_month - 1 };
1860        if new_month == 12 {
1861            new_year -= 1;
1862        }
1863        new_day += days_in_month(new_month, new_year);
1864    }
1865
1866    Some(format!(
1867        "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
1868        new_year, new_month, new_day, new_hour, new_min, new_sec
1869    ))
1870}
1871
1872fn unix_to_datetime(secs: i64) -> String {
1873    let days = secs / 86400;
1874    let time = secs % 86400;
1875    let h = time / 3600;
1876    let m = (time % 3600) / 60;
1877    let s = time % 60;
1878    let mut y = 1970i32;
1879    let mut rem = days;
1880    loop {
1881        let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 366 } else { 365 };
1882        if rem < dy { break; }
1883        rem -= dy;
1884        y += 1;
1885    }
1886    let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
1887    let months = [31, if leap { 29 } else { 28 }, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
1888    let mut mo = 1;
1889    for &dm in &months {
1890        if rem < dm { break; }
1891        rem -= dm;
1892        mo += 1;
1893    }
1894    format!("{:04}:{:02}:{:02} {:02}:{:02}:{:02}", y, mo, rem + 1, h, m, s)
1895}
1896
1897fn format_file_size(bytes: u64) -> String {
1898    if bytes < 1024 {
1899        format!("{} bytes", bytes)
1900    } else if bytes < 1024 * 1024 {
1901        format!("{:.1} kB", bytes as f64 / 1024.0)
1902    } else if bytes < 1024 * 1024 * 1024 {
1903        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
1904    } else {
1905        format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
1906    }
1907}
1908
1909/// Check if a tag name is typically XMP.
1910fn is_xmp_tag(tag: &str) -> bool {
1911    matches!(
1912        tag.to_lowercase().as_str(),
1913        "title" | "description" | "subject" | "creator" | "rights"
1914        | "keywords" | "rating" | "label" | "hierarchicalsubject"
1915    )
1916}
1917
1918/// Encode an EXIF tag value to binary.
1919/// Returns (tag_id, format, encoded_data) or None if tag is unknown.
1920fn encode_exif_tag(
1921    tag_name: &str,
1922    value: &str,
1923    _group: &str,
1924    bo: ByteOrderMark,
1925) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
1926    let tag_lower = tag_name.to_lowercase();
1927
1928    // Map common tag names to EXIF tag IDs and formats
1929    let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
1930        // IFD0 string tags
1931        "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
1932        "make" => (0x010F, exif_writer::ExifFormat::Ascii),
1933        "model" => (0x0110, exif_writer::ExifFormat::Ascii),
1934        "software" => (0x0131, exif_writer::ExifFormat::Ascii),
1935        "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
1936        "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
1937        "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
1938        // IFD0 numeric tags
1939        "orientation" => (0x0112, exif_writer::ExifFormat::Short),
1940        "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
1941        "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
1942        "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
1943        // ExifIFD tags
1944        "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
1945        "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
1946        "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
1947        "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
1948        "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
1949        "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
1950        "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
1951        "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
1952        "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
1953        _ => return None,
1954    };
1955
1956    let encoded = match format {
1957        exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
1958        exif_writer::ExifFormat::Short => {
1959            let v: u16 = value.parse().ok()?;
1960            exif_writer::encode_u16(v, bo)
1961        }
1962        exif_writer::ExifFormat::Long => {
1963            let v: u32 = value.parse().ok()?;
1964            exif_writer::encode_u32(v, bo)
1965        }
1966        exif_writer::ExifFormat::Rational => {
1967            // Parse "N/D" or just "N"
1968            if let Some(slash) = value.find('/') {
1969                let num: u32 = value[..slash].trim().parse().ok()?;
1970                let den: u32 = value[slash + 1..].trim().parse().ok()?;
1971                exif_writer::encode_urational(num, den, bo)
1972            } else if let Ok(v) = value.parse::<f64>() {
1973                // Convert float to rational
1974                let den = 10000u32;
1975                let num = (v * den as f64).round() as u32;
1976                exif_writer::encode_urational(num, den, bo)
1977            } else {
1978                return None;
1979            }
1980        }
1981        exif_writer::ExifFormat::Undefined => {
1982            // UserComment: 8 bytes charset + data
1983            let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; // "ASCII\0\0\0"
1984            data.extend_from_slice(value.as_bytes());
1985            data
1986        }
1987        _ => return None,
1988    };
1989
1990    Some((tag_id, format, encoded))
1991}
1992
1993/// Compute text file tags (from Perl Text.pm).
1994fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
1995    let mut tags = Vec::new();
1996    let mk = |name: &str, val: String| Tag {
1997        id: crate::tag::TagId::Text(name.into()),
1998        name: name.into(), description: name.into(),
1999        group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
2000        raw_value: Value::String(val.clone()), print_value: val, priority: 0,
2001    };
2002
2003    // Detect encoding and BOM
2004    let is_ascii = data.iter().all(|&b| b < 128);
2005    let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2006    let has_utf16le_bom = data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2007    let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2008    let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2009    let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2010
2011    // Detect if file has weird non-text control characters (like multi-byte unicode without BOM)
2012    let has_weird_ctrl = data.iter().any(|&b| (b <= 0x06) || (b >= 0x0e && b <= 0x1a) || (b >= 0x1c && b <= 0x1f) || b == 0x7f);
2013
2014    let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2015        ("utf-32le", true, false)
2016    } else if has_utf32be_bom {
2017        ("utf-32be", true, false)
2018    } else if has_utf16le_bom {
2019        ("utf-16le", true, true)
2020    } else if has_utf16be_bom {
2021        ("utf-16be", true, true)
2022    } else if has_weird_ctrl {
2023        // Not a text file (has binary-like control chars but no recognized multi-byte marker)
2024        return tags;
2025    } else if is_ascii {
2026        ("us-ascii", false, false)
2027    } else {
2028        // Check UTF-8
2029        let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2030        if is_valid_utf8 {
2031            if has_utf8_bom {
2032                ("utf-8", true, false)
2033            } else {
2034                // Check if it has high bytes suggesting iso-8859-1 vs utf-8
2035                // Perl's IsUTF8: returns >0 if valid UTF-8 with multi-byte, 0 if ASCII, <0 if invalid
2036                // For simplicity: valid UTF-8 without BOM = utf-8
2037                ("utf-8", false, false)
2038            }
2039        } else if !data.iter().any(|&b| b >= 0x80 && b <= 0x9f) {
2040            ("iso-8859-1", false, false)
2041        } else {
2042            ("unknown-8bit", false, false)
2043        }
2044    };
2045
2046    tags.push(mk("MIMEEncoding", encoding.into()));
2047
2048    if is_bom {
2049        tags.push(mk("ByteOrderMark", "Yes".into()));
2050    }
2051
2052    // Count newlines and detect type
2053    let has_cr = data.contains(&b'\r');
2054    let has_lf = data.contains(&b'\n');
2055    let newline_type = if has_cr && has_lf { "Windows CRLF" }
2056        else if has_lf { "Unix LF" }
2057        else if has_cr { "Macintosh CR" }
2058        else { "(none)" };
2059    tags.push(mk("Newlines", newline_type.into()));
2060
2061    if is_csv {
2062        // CSV analysis: detect delimiter, quoting, column count, row count
2063        let text = String::from_utf8_lossy(data);
2064        let mut delim = "";
2065        let mut quot = "";
2066        let mut ncols = 1usize;
2067        let mut nrows = 0usize;
2068
2069        for line in text.lines() {
2070            if nrows == 0 {
2071                // Detect delimiter from first line
2072                let comma_count = line.matches(',').count();
2073                let semi_count = line.matches(';').count();
2074                let tab_count = line.matches('\t').count();
2075                if comma_count > semi_count && comma_count > tab_count {
2076                    delim = ",";
2077                    ncols = comma_count + 1;
2078                } else if semi_count > tab_count {
2079                    delim = ";";
2080                    ncols = semi_count + 1;
2081                } else if tab_count > 0 {
2082                    delim = "\t";
2083                    ncols = tab_count + 1;
2084                } else {
2085                    delim = "";
2086                    ncols = 1;
2087                }
2088                // Detect quoting
2089                if line.contains('"') { quot = "\""; }
2090                else if line.contains('\'') { quot = "'"; }
2091            }
2092            nrows += 1;
2093            if nrows >= 1000 { break; }
2094        }
2095
2096        let delim_display = match delim {
2097            "," => "Comma",
2098            ";" => "Semicolon",
2099            "\t" => "Tab",
2100            _ => "(none)",
2101        };
2102        let quot_display = match quot {
2103            "\"" => "Double quotes",
2104            "'" => "Single quotes",
2105            _ => "(none)",
2106        };
2107
2108        tags.push(mk("Delimiter", delim_display.into()));
2109        tags.push(mk("Quoting", quot_display.into()));
2110        tags.push(mk("ColumnCount", ncols.to_string()));
2111        if nrows > 0 {
2112            tags.push(mk("RowCount", nrows.to_string()));
2113        }
2114    } else if !is_utf16 {
2115        // Line count and word count for plain text files (not UTF-16/32)
2116        let line_count = data.iter().filter(|&&b| b == b'\n').count();
2117        let line_count = if line_count == 0 && !data.is_empty() { 1 } else { line_count };
2118        tags.push(mk("LineCount", line_count.to_string()));
2119
2120        let text = String::from_utf8_lossy(data);
2121        let word_count = text.split_whitespace().count();
2122        tags.push(mk("WordCount", word_count.to_string()));
2123    }
2124
2125    tags
2126}
exiftool_rs/exiftool.rs

exiftool_rs/
exiftool.rs