1use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer, psd_writer, tiff_writer, webp_writer, xmp_writer};
17
18#[derive(Debug, Clone)]
20pub struct Options {
21 pub duplicates: bool,
23 pub print_conv: bool,
25 pub fast_scan: u8,
27 pub requested_tags: Vec<String>,
29 pub extract_embedded: u8,
31}
32
33impl Default for Options {
34 fn default() -> Self {
35 Self {
36 duplicates: false,
37 print_conv: true,
38 fast_scan: 0,
39 requested_tags: Vec::new(),
40 extract_embedded: 0,
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
59pub struct NewValue {
60 pub tag: String,
62 pub group: Option<String>,
64 pub value: Option<String>,
66}
67
68pub struct ExifTool {
97 options: Options,
98 new_values: Vec<NewValue>,
99}
100
101pub type ImageInfo = HashMap<String, String>;
103
104impl ExifTool {
105 pub fn new() -> Self {
107 Self {
108 options: Options::default(),
109 new_values: Vec::new(),
110 }
111 }
112
113 pub fn with_options(options: Options) -> Self {
115 Self {
116 options,
117 new_values: Vec::new(),
118 }
119 }
120
121 pub fn options_mut(&mut self) -> &mut Options {
123 &mut self.options
124 }
125
126 pub fn options(&self) -> &Options {
128 &self.options
129 }
130
131 pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
153 let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
154 (Some(tag[..colon_pos].to_string()), tag[colon_pos + 1..].to_string())
155 } else {
156 (None, tag.to_string())
157 };
158
159 self.new_values.push(NewValue {
160 tag: tag_name,
161 group,
162 value: value.map(|v| v.to_string()),
163 });
164 }
165
166 pub fn clear_new_values(&mut self) {
168 self.new_values.clear();
169 }
170
171 pub fn set_new_values_from_file<P: AsRef<Path>>(
176 &mut self,
177 src_path: P,
178 tags_to_copy: Option<&[&str]>,
179 ) -> Result<u32> {
180 let src_tags = self.extract_info(src_path)?;
181 let mut count = 0u32;
182
183 for tag in &src_tags {
184 if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
186 continue;
187 }
188 if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
190 continue;
191 }
192 if tag.print_value.is_empty() {
193 continue;
194 }
195
196 if let Some(filter) = tags_to_copy {
198 let name_lower = tag.name.to_lowercase();
199 if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
200 continue;
201 }
202 }
203
204 let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
205 self.new_values.push(NewValue {
206 tag: tag.name.clone(),
207 group: Some(tag.group.family0.clone()),
208 value: Some(tag.print_value.clone()),
209 });
210 count += 1;
211 }
212
213 Ok(count)
214 }
215
216 pub fn set_file_name_from_tag<P: AsRef<Path>>(
218 &self,
219 path: P,
220 tag_name: &str,
221 template: &str,
222 ) -> Result<String> {
223 let path = path.as_ref();
224 let tags = self.extract_info(path)?;
225
226 let tag_value = tags
227 .iter()
228 .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
229 .map(|t| &t.print_value)
230 .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
231
232 let new_name = if template.contains('%') {
235 template.replace("%v", value_to_filename(tag_value).as_str())
236 } else {
237 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
239 let clean = value_to_filename(tag_value);
240 if ext.is_empty() {
241 clean
242 } else {
243 format!("{}.{}", clean, ext)
244 }
245 };
246
247 let parent = path.parent().unwrap_or(Path::new(""));
248 let new_path = parent.join(&new_name);
249
250 fs::rename(path, &new_path).map_err(Error::Io)?;
251 Ok(new_path.to_string_lossy().to_string())
252 }
253
254 pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(&self, src_path: P, dst_path: Q) -> Result<u32> {
259 let src_path = src_path.as_ref();
260 let dst_path = dst_path.as_ref();
261 let data = fs::read(src_path).map_err(Error::Io)?;
262
263 let file_type = self.detect_file_type(&data, src_path)?;
264 let output = self.apply_changes(&data, file_type)?;
265
266 let temp_path = dst_path.with_extension("exiftool_tmp");
268 fs::write(&temp_path, &output).map_err(Error::Io)?;
269 fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
270
271 Ok(self.new_values.len() as u32)
272 }
273
274 fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
276 match file_type {
277 FileType::Jpeg => self.write_jpeg(data),
278 FileType::Png => self.write_png(data),
279 FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
280 | FileType::Arw | FileType::Orf | FileType::Pef => self.write_tiff(data),
281 FileType::WebP => self.write_webp(data),
282 FileType::Mp4 | FileType::QuickTime | FileType::M4a
283 | FileType::ThreeGP | FileType::F4v => self.write_mp4(data),
284 FileType::Psd => self.write_psd(data),
285 FileType::Pdf => self.write_pdf(data),
286 FileType::Heif | FileType::Avif => self.write_mp4(data),
287 FileType::Mkv | FileType::WebM => self.write_matroska(data),
288 FileType::Gif => {
289 let comment = self.new_values.iter()
290 .find(|nv| nv.tag.to_lowercase() == "comment")
291 .and_then(|nv| nv.value.clone());
292 crate::writer::gif_writer::write_gif(data, comment.as_deref())
293 }
294 FileType::Flac => {
295 let changes: Vec<(&str, &str)> = self.new_values.iter()
296 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
297 .collect();
298 crate::writer::flac_writer::write_flac(data, &changes)
299 }
300 FileType::Mp3 | FileType::Aiff => {
301 let changes: Vec<(&str, &str)> = self.new_values.iter()
302 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
303 .collect();
304 crate::writer::id3_writer::write_id3(data, &changes)
305 }
306 FileType::Jp2 | FileType::Jxl => {
307 let new_xmp = if self.new_values.iter().any(|nv| nv.group.as_deref() == Some("XMP")) {
308 let refs: Vec<&NewValue> = self.new_values.iter()
309 .filter(|nv| nv.group.as_deref() == Some("XMP"))
310 .collect();
311 Some(self.build_new_xmp(&refs))
312 } else { None };
313 crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
314 }
315 FileType::PostScript => {
316 let changes: Vec<(&str, &str)> = self.new_values.iter()
317 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
318 .collect();
319 crate::writer::ps_writer::write_postscript(data, &changes)
320 }
321 FileType::Ogg | FileType::Opus => {
322 let changes: Vec<(&str, &str)> = self.new_values.iter()
323 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
324 .collect();
325 crate::writer::ogg_writer::write_ogg(data, &changes)
326 }
327 FileType::Xmp => {
328 let props: Vec<xmp_writer::XmpProperty> = self.new_values.iter()
329 .filter_map(|nv| {
330 let val = nv.value.as_deref()?;
331 Some(xmp_writer::XmpProperty {
332 namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
333 property: nv.tag.clone(),
334 values: vec![val.to_string()],
335 prop_type: xmp_writer::XmpPropertyType::Simple,
336 })
337 })
338 .collect();
339 Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
340 }
341 _ => Err(Error::UnsupportedFileType(format!("writing not yet supported for {}", file_type))),
342 }
343 }
344
345 fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
347 let mut exif_values: Vec<&NewValue> = Vec::new();
349 let mut xmp_values: Vec<&NewValue> = Vec::new();
350 let mut iptc_values: Vec<&NewValue> = Vec::new();
351 let mut comment_value: Option<&str> = None;
352 let mut remove_exif = false;
353 let mut remove_xmp = false;
354 let mut remove_iptc = false;
355 let mut remove_comment = false;
356
357 for nv in &self.new_values {
358 let group = nv.group.as_deref().unwrap_or("");
359 let group_upper = group.to_uppercase();
360
361 if nv.value.is_none() && nv.tag == "*" {
363 match group_upper.as_str() {
364 "EXIF" => { remove_exif = true; continue; }
365 "XMP" => { remove_xmp = true; continue; }
366 "IPTC" => { remove_iptc = true; continue; }
367 _ => {}
368 }
369 }
370
371 match group_upper.as_str() {
372 "XMP" => xmp_values.push(nv),
373 "IPTC" => iptc_values.push(nv),
374 "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
375 "" => {
376 if nv.tag.to_lowercase() == "comment" {
378 if nv.value.is_none() {
379 remove_comment = true;
380 } else {
381 comment_value = nv.value.as_deref();
382 }
383 } else if is_xmp_tag(&nv.tag) {
384 xmp_values.push(nv);
385 } else {
386 exif_values.push(nv);
387 }
388 }
389 _ => exif_values.push(nv), }
391 }
392
393 let new_exif = if !exif_values.is_empty() {
395 Some(self.build_new_exif(data, &exif_values)?)
396 } else {
397 None
398 };
399
400 let new_xmp = if !xmp_values.is_empty() {
402 Some(self.build_new_xmp(&xmp_values))
403 } else {
404 None
405 };
406
407 let new_iptc_data = if !iptc_values.is_empty() {
409 let records: Vec<iptc_writer::IptcRecord> = iptc_values
410 .iter()
411 .filter_map(|nv| {
412 let value = nv.value.as_deref()?;
413 let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
414 Some(iptc_writer::IptcRecord {
415 record,
416 dataset,
417 data: value.as_bytes().to_vec(),
418 })
419 })
420 .collect();
421 if records.is_empty() {
422 None
423 } else {
424 Some(iptc_writer::build_iptc(&records))
425 }
426 } else {
427 None
428 };
429
430 jpeg_writer::write_jpeg(
432 data,
433 new_exif.as_deref(),
434 new_xmp.as_deref(),
435 new_iptc_data.as_deref(),
436 comment_value,
437 remove_exif,
438 remove_xmp,
439 remove_iptc,
440 remove_comment,
441 )
442 }
443
444 fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
446 let bo = ByteOrderMark::BigEndian;
447 let mut ifd0_entries = Vec::new();
448 let mut exif_entries = Vec::new();
449 let mut gps_entries = Vec::new();
450
451 let existing = extract_existing_exif_entries(jpeg_data, bo);
453 for entry in &existing {
454 match classify_exif_tag(entry.tag) {
455 ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
456 ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
457 ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
458 }
459 }
460
461 let deleted_tags: Vec<u16> = values
463 .iter()
464 .filter(|nv| nv.value.is_none())
465 .filter_map(|nv| tag_name_to_id(&nv.tag))
466 .collect();
467
468 ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
470 exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
471 gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
472
473 for nv in values {
475 if nv.value.is_none() {
476 continue;
477 }
478 let value_str = nv.value.as_deref().unwrap_or("");
479 let group = nv.group.as_deref().unwrap_or("");
480
481 if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo) {
482 let entry = exif_writer::IfdEntry {
483 tag: tag_id,
484 format,
485 data: encoded,
486 };
487
488 let target = match group.to_uppercase().as_str() {
489 "GPS" => &mut gps_entries,
490 "EXIFIFD" => &mut exif_entries,
491 _ => match classify_exif_tag(tag_id) {
492 ExifIfdGroup::ExifIfd => &mut exif_entries,
493 ExifIfdGroup::Gps => &mut gps_entries,
494 ExifIfdGroup::Ifd0 => &mut ifd0_entries,
495 },
496 };
497
498 if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
500 *existing = entry;
501 } else {
502 target.push(entry);
503 }
504 }
505 }
506
507 ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
509
510 exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
511 }
512
513 fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
515 let mut new_text: Vec<(&str, &str)> = Vec::new();
516 let mut remove_text: Vec<&str> = Vec::new();
517
518 let owned_pairs: Vec<(String, String)> = self.new_values.iter()
521 .filter(|nv| nv.value.is_some())
522 .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
523 .collect();
524
525 for (tag, value) in &owned_pairs {
526 new_text.push((tag.as_str(), value.as_str()));
527 }
528
529 for nv in &self.new_values {
530 if nv.value.is_none() {
531 remove_text.push(&nv.tag);
532 }
533 }
534
535 png_writer::write_png(data, &new_text, None, &remove_text)
536 }
537
538 fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
540 let mut iptc_values = Vec::new();
541 let mut xmp_values = Vec::new();
542
543 for nv in &self.new_values {
544 let group = nv.group.as_deref().unwrap_or("").to_uppercase();
545 match group.as_str() {
546 "XMP" => xmp_values.push(nv),
547 "IPTC" => iptc_values.push(nv),
548 _ => {
549 if is_xmp_tag(&nv.tag) { xmp_values.push(nv); }
550 else { iptc_values.push(nv); }
551 }
552 }
553 }
554
555 let new_iptc = if !iptc_values.is_empty() {
556 let records: Vec<_> = iptc_values.iter().filter_map(|nv| {
557 let value = nv.value.as_deref()?;
558 let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
559 Some(iptc_writer::IptcRecord { record, dataset, data: value.as_bytes().to_vec() })
560 }).collect();
561 if records.is_empty() { None } else { Some(iptc_writer::build_iptc(&records)) }
562 } else { None };
563
564 let new_xmp = if !xmp_values.is_empty() {
565 let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
566 Some(self.build_new_xmp(&refs))
567 } else { None };
568
569 psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
570 }
571
572 fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
574 let changes: Vec<(&str, &str)> = self.new_values.iter()
575 .filter_map(|nv| {
576 let value = nv.value.as_deref()?;
577 Some((nv.tag.as_str(), value))
578 })
579 .collect();
580
581 matroska_writer::write_matroska(data, &changes)
582 }
583
584 fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
586 let changes: Vec<(&str, &str)> = self.new_values.iter()
587 .filter_map(|nv| {
588 let value = nv.value.as_deref()?;
589 Some((nv.tag.as_str(), value))
590 })
591 .collect();
592
593 pdf_writer::write_pdf(data, &changes)
594 }
595
596 fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
598 let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
599 let mut xmp_values: Vec<&NewValue> = Vec::new();
600
601 for nv in &self.new_values {
602 if nv.value.is_none() { continue; }
603 let group = nv.group.as_deref().unwrap_or("").to_uppercase();
604 if group == "XMP" {
605 xmp_values.push(nv);
606 } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
607 ilst_tags.push((key, nv.value.clone().unwrap()));
608 }
609 }
610
611 let tag_refs: Vec<(&[u8; 4], &str)> = ilst_tags.iter()
612 .map(|(k, v)| (k, v.as_str()))
613 .collect();
614
615 let new_xmp = if !xmp_values.is_empty() {
616 let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
617 Some(self.build_new_xmp(&refs))
618 } else {
619 None
620 };
621
622 mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
623 }
624
625 fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
627 let mut exif_values: Vec<&NewValue> = Vec::new();
628 let mut xmp_values: Vec<&NewValue> = Vec::new();
629 let mut remove_exif = false;
630 let mut remove_xmp = false;
631
632 for nv in &self.new_values {
633 let group = nv.group.as_deref().unwrap_or("").to_uppercase();
634 if nv.value.is_none() && nv.tag == "*" {
635 if group == "EXIF" { remove_exif = true; }
636 if group == "XMP" { remove_xmp = true; }
637 continue;
638 }
639 match group.as_str() {
640 "XMP" => xmp_values.push(nv),
641 _ => exif_values.push(nv),
642 }
643 }
644
645 let new_exif = if !exif_values.is_empty() {
646 let bo = ByteOrderMark::BigEndian;
647 let mut entries = Vec::new();
648 for nv in &exif_values {
649 if let Some(ref v) = nv.value {
650 let group = nv.group.as_deref().unwrap_or("");
651 if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo) {
652 entries.push(exif_writer::IfdEntry { tag: tag_id, format, data: encoded });
653 }
654 }
655 }
656 if !entries.is_empty() {
657 Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
658 } else {
659 None
660 }
661 } else {
662 None
663 };
664
665 let new_xmp = if !xmp_values.is_empty() {
666 Some(self.build_new_xmp(&xmp_values.iter().map(|v| *v).collect::<Vec<_>>()))
667 } else {
668 None
669 };
670
671 webp_writer::write_webp(
672 data,
673 new_exif.as_deref(),
674 new_xmp.as_deref(),
675 remove_exif,
676 remove_xmp,
677 )
678 }
679
680 fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
682 let bo = if data.starts_with(b"II") {
683 ByteOrderMark::LittleEndian
684 } else {
685 ByteOrderMark::BigEndian
686 };
687
688 let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
689 for nv in &self.new_values {
690 if let Some(ref value) = nv.value {
691 let group = nv.group.as_deref().unwrap_or("");
692 if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo) {
693 changes.push((tag_id, encoded));
694 }
695 }
696 }
697
698 tiff_writer::write_tiff(data, &changes)
699 }
700
701 fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
703 let mut properties = Vec::new();
704
705 for nv in values {
706 let value_str = match &nv.value {
707 Some(v) => v.clone(),
708 None => continue,
709 };
710
711 let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
712 let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
713
714 let prop_type = match nv.tag.to_lowercase().as_str() {
715 "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
716 "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
717 "creator" => xmp_writer::XmpPropertyType::Seq,
718 _ => xmp_writer::XmpPropertyType::Simple,
719 };
720
721 let values = if matches!(prop_type, xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq) {
722 value_str.split(',').map(|s| s.trim().to_string()).collect()
723 } else {
724 vec![value_str]
725 };
726
727 properties.push(xmp_writer::XmpProperty {
728 namespace: ns,
729 property: nv.tag.clone(),
730 values,
731 prop_type,
732 });
733 }
734
735 xmp_writer::build_xmp(&properties).into_bytes()
736 }
737
738 pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
746 let tags = self.extract_info(path)?;
747 Ok(self.get_info(&tags))
748 }
749
750 pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
754 let path = path.as_ref();
755 let data = fs::read(path).map_err(Error::Io)?;
756
757 self.extract_info_from_bytes(&data, path)
758 }
759
760 pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
762 let file_type_result = self.detect_file_type(data, path);
763 let (file_type, mut tags) = match file_type_result {
764 Ok(ft) => {
765 let t = self.process_file(data, ft).or_else(|_| {
766 self.process_by_extension(data, path)
767 })?;
768 (Some(ft), t)
769 }
770 Err(_) => {
771 let t = self.process_by_extension(data, path)?;
773 (None, t)
774 }
775 };
776 let file_type = file_type.unwrap_or(FileType::Zip); tags.push(Tag {
780 id: crate::tag::TagId::Text("FileType".into()),
781 name: "FileType".into(),
782 description: "File Type".into(),
783 group: crate::tag::TagGroup {
784 family0: "File".into(),
785 family1: "File".into(),
786 family2: "Other".into(),
787 },
788 raw_value: Value::String(format!("{:?}", file_type)),
789 print_value: file_type.description().to_string(),
790 priority: 0,
791 });
792
793 tags.push(Tag {
794 id: crate::tag::TagId::Text("MIMEType".into()),
795 name: "MIMEType".into(),
796 description: "MIME Type".into(),
797 group: crate::tag::TagGroup {
798 family0: "File".into(),
799 family1: "File".into(),
800 family2: "Other".into(),
801 },
802 raw_value: Value::String(file_type.mime_type().to_string()),
803 print_value: file_type.mime_type().to_string(),
804 priority: 0,
805 });
806
807 if let Ok(metadata) = fs::metadata(path) {
808 tags.push(Tag {
809 id: crate::tag::TagId::Text("FileSize".into()),
810 name: "FileSize".into(),
811 description: "File Size".into(),
812 group: crate::tag::TagGroup {
813 family0: "File".into(),
814 family1: "File".into(),
815 family2: "Other".into(),
816 },
817 raw_value: Value::U32(metadata.len() as u32),
818 print_value: format_file_size(metadata.len()),
819 priority: 0,
820 });
821 }
822
823 let file_tag = |name: &str, val: Value| -> Tag {
825 Tag {
826 id: crate::tag::TagId::Text(name.to_string()),
827 name: name.to_string(), description: name.to_string(),
828 group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
829 raw_value: val.clone(), print_value: val.to_display_string(), priority: 0,
830 }
831 };
832
833 if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
834 tags.push(file_tag("FileName", Value::String(fname.to_string())));
835 }
836 if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
837 tags.push(file_tag("Directory", Value::String(dir.to_string())));
838 }
839 let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
841 if !canonical_ext.is_empty() {
842 tags.push(file_tag("FileTypeExtension", Value::String(canonical_ext.to_string())));
843 }
844
845 #[cfg(unix)]
846 if let Ok(metadata) = fs::metadata(path) {
847 use std::os::unix::fs::MetadataExt;
848 let mode = metadata.mode();
849 tags.push(file_tag("FilePermissions", Value::String(format!("{:o}", mode & 0o7777))));
850
851 if let Ok(modified) = metadata.modified() {
853 if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
854 let secs = dur.as_secs() as i64;
855 tags.push(file_tag("FileModifyDate", Value::String(unix_to_datetime(secs))));
856 }
857 }
858 if let Ok(accessed) = metadata.accessed() {
860 if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
861 let secs = dur.as_secs() as i64;
862 tags.push(file_tag("FileAccessDate", Value::String(unix_to_datetime(secs))));
863 }
864 }
865 let ctime = metadata.ctime();
867 if ctime > 0 {
868 tags.push(file_tag("FileInodeChangeDate", Value::String(unix_to_datetime(ctime))));
869 }
870 }
871
872 {
874 let bo_str = if data.len() > 8 {
875 let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
877 data.windows(6).position(|w| w == b"Exif\0\0")
879 .map(|p| &data[p+6..])
880 } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
881 let jpeg_offset = u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]]) as usize;
883 let jpeg_length = u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]]) as usize;
884 if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
885 let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
886 jpeg.windows(6).position(|w| w == b"Exif\0\0")
887 .map(|p| &jpeg[p+6..])
888 } else {
889 None
890 }
891 } else if data.starts_with(b"RIFF") && data.len() >= 12 {
892 let mut riff_bo: Option<&[u8]> = None;
894 let mut pos = 12usize;
895 while pos + 8 <= data.len() {
896 let cid = &data[pos..pos+4];
897 let csz = u32::from_le_bytes([data[pos+4],data[pos+5],data[pos+6],data[pos+7]]) as usize;
898 let cstart = pos + 8;
899 let cend = (cstart + csz).min(data.len());
900 if cid == b"EXIF" && cend > cstart {
901 let exif_data = &data[cstart..cend];
902 let tiff = if exif_data.starts_with(b"Exif\0\0") { &exif_data[6..] } else { exif_data };
903 riff_bo = Some(tiff);
904 break;
905 }
906 if cid == b"LIST" && cend >= cstart + 4 {
908 }
910 pos = cend + (csz & 1);
911 }
912 riff_bo
913 } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
914 let mut jxl_bo: Option<String> = None;
916 let mut jpos = 12usize; while jpos + 8 <= data.len() {
918 let bsize = u32::from_be_bytes([data[jpos], data[jpos+1], data[jpos+2], data[jpos+3]]) as usize;
919 let btype = &data[jpos+4..jpos+8];
920 if bsize < 8 || jpos + bsize > data.len() { break; }
921 if btype == b"brob" && jpos + bsize > 12 {
922 let inner_type = &data[jpos+8..jpos+12];
923 if inner_type == b"Exif" || inner_type == b"exif" {
924 let brotli_payload = &data[jpos+12..jpos+bsize];
925 use std::io::Cursor;
926 let mut inp = Cursor::new(brotli_payload);
927 let mut out: Vec<u8> = Vec::new();
928 if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
929 let exif_start = if out.len() > 4 { 4 } else { 0 };
930 if exif_start < out.len() {
931 if out[exif_start..].starts_with(b"MM") {
932 jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
933 } else if out[exif_start..].starts_with(b"II") {
934 jxl_bo = Some("Little-endian (Intel, II)".to_string());
935 }
936 }
937 }
938 break;
939 }
940 }
941 jpos += bsize;
942 }
943 if let Some(bo) = jxl_bo {
944 if !bo.is_empty() && file_type != FileType::Btf {
945 tags.push(file_tag("ExifByteOrder", Value::String(bo)));
946 }
947 }
948 None
950 } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
951 let mrw_data_offset = if data.len() >= 8 {
953 u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
954 } else { 0 };
955 let mut mrw_bo: Option<&[u8]> = None;
956 let mut mpos = 8usize;
957 while mpos + 8 <= mrw_data_offset.min(data.len()) {
958 let seg_tag = &data[mpos..mpos+4];
959 let seg_len = u32::from_be_bytes([data[mpos+4], data[mpos+5], data[mpos+6], data[mpos+7]]) as usize;
960 if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
961 mrw_bo = Some(&data[mpos+8..mpos+8+seg_len]);
962 break;
963 }
964 mpos += 8 + seg_len;
965 }
966 mrw_bo
967 } else {
968 Some(&data[..])
969 };
970 if let Some(tiff) = check {
971 if tiff.starts_with(b"II") { "Little-endian (Intel, II)" }
972 else if tiff.starts_with(b"MM") { "Big-endian (Motorola, MM)" }
973 else { "" }
974 } else { "" }
975 } else { "" };
976 let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
979 if !bo_str.is_empty() && !already_has_exifbyteorder
980 && file_type != FileType::Btf
981 && file_type != FileType::Dr4 && file_type != FileType::Vrd
982 && file_type != FileType::Crw {
983 tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
984 }
985 }
986
987 tags.push(file_tag("ExifToolVersion", Value::String(crate::VERSION.to_string())));
988
989 let composite = crate::composite::compute_composite_tags(&tags);
991 tags.extend(composite);
992
993 {
999 let is_flir_fff = tags.iter().any(|t| t.group.family0 == "APP1"
1000 && t.group.family1 == "FLIR");
1001 if is_flir_fff {
1002 tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1003 }
1004 }
1005
1006 {
1011 let make = tags.iter().find(|t| t.name == "Make")
1012 .map(|t| t.print_value.clone()).unwrap_or_default();
1013 if !make.to_uppercase().contains("CANON") {
1014 tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1015 }
1016 }
1017
1018 {
1022 let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1023 for tag_name in &riff_priority_zero_tags {
1024 let has_makernotes = tags.iter().any(|t| t.name == *tag_name
1025 && t.group.family0 != "RIFF");
1026 if has_makernotes {
1027 tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1028 }
1029 }
1030 }
1031
1032 if !self.options.requested_tags.is_empty() {
1034 let requested: Vec<String> = self
1035 .options
1036 .requested_tags
1037 .iter()
1038 .map(|t| t.to_lowercase())
1039 .collect();
1040 tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1041 }
1042
1043 Ok(tags)
1044 }
1045
1046 fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1050 let mut info = ImageInfo::new();
1051 let mut seen: HashMap<String, usize> = HashMap::new();
1052
1053 for tag in tags {
1054 let value = if self.options.print_conv {
1055 &tag.print_value
1056 } else {
1057 &tag.raw_value.to_display_string()
1058 };
1059
1060 let count = seen.entry(tag.name.clone()).or_insert(0);
1061 *count += 1;
1062
1063 if *count == 1 {
1064 info.insert(tag.name.clone(), value.clone());
1065 } else if self.options.duplicates {
1066 let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1067 info.insert(key, value.clone());
1068 }
1069 }
1070
1071 info
1072 }
1073
1074 fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1076 let header_len = data.len().min(256);
1078 if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1079 if ft == FileType::Ico {
1081 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1082 if ext.eq_ignore_ascii_case("dfont") {
1083 return Ok(FileType::Font);
1084 }
1085 }
1086 }
1087 if ft == FileType::Jpeg {
1089 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1090 if ext.eq_ignore_ascii_case("jps") {
1091 return Ok(FileType::Jps);
1092 }
1093 }
1094 }
1095 if ft == FileType::Plist {
1097 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1098 if ext.eq_ignore_ascii_case("aae") {
1099 return Ok(FileType::Aae);
1100 }
1101 }
1102 }
1103 if ft == FileType::Xmp {
1105 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1106 if ext.eq_ignore_ascii_case("plist") {
1107 return Ok(FileType::Plist);
1108 }
1109 if ext.eq_ignore_ascii_case("aae") {
1110 return Ok(FileType::Aae);
1111 }
1112 }
1113 }
1114 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1116 if ext.eq_ignore_ascii_case("pcd") && data.len() >= 2056
1117 && &data[2048..2055] == b"PCD_IPI"
1118 {
1119 return Ok(FileType::PhotoCd);
1120 }
1121 }
1122 if ft == FileType::Mp3 {
1124 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1125 if ext.eq_ignore_ascii_case("mpc") {
1126 return Ok(FileType::Mpc);
1127 }
1128 if ext.eq_ignore_ascii_case("ape") {
1129 return Ok(FileType::Ape);
1130 }
1131 if ext.eq_ignore_ascii_case("wv") {
1132 return Ok(FileType::WavPack);
1133 }
1134 }
1135 }
1136 if ft == FileType::Zip {
1138 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1140 if ext.eq_ignore_ascii_case("eip") {
1141 return Ok(FileType::Eip);
1142 }
1143 }
1144 if let Some(od_type) = detect_opendocument_type(data) {
1145 return Ok(od_type);
1146 }
1147 }
1148 return Ok(ft);
1149 }
1150
1151 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1153 if let Some(ft) = file_type::detect_from_extension(ext) {
1154 return Ok(ft);
1155 }
1156 }
1157
1158 let ext_str = path
1159 .extension()
1160 .and_then(|e| e.to_str())
1161 .unwrap_or("unknown");
1162 Err(Error::UnsupportedFileType(ext_str.to_string()))
1163 }
1164
1165 fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1168 match file_type {
1169 FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1170 FileType::Png | FileType::Mng => formats::png::read_png(data),
1171 FileType::Tiff
1173 | FileType::Btf
1174 | FileType::Dng
1175 | FileType::Cr2
1176 | FileType::Nef
1177 | FileType::Arw
1178 | FileType::Sr2
1179 | FileType::Orf
1180 | FileType::Pef
1181 | FileType::Erf
1182 | FileType::Fff
1183 | FileType::Rwl
1184 | FileType::Mef
1185 | FileType::Srw
1186 | FileType::Gpr
1187 | FileType::Arq
1188 | FileType::ThreeFR
1189 | FileType::Dcr
1190 | FileType::Rw2
1191 | FileType::Srf => formats::tiff::read_tiff(data),
1192 FileType::Iiq => formats::misc::read_iiq(data),
1194 FileType::Gif => formats::gif::read_gif(data),
1196 FileType::Bmp => formats::bmp::read_bmp(data),
1197 FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1198 FileType::Psd => formats::psd::read_psd(data),
1199 FileType::Mp3 => formats::id3::read_mp3(data),
1201 FileType::Flac => formats::flac::read_flac(data),
1202 FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1203 FileType::Aiff => formats::aiff::read_aiff(data),
1204 FileType::Mp4
1206 | FileType::QuickTime
1207 | FileType::M4a
1208 | FileType::ThreeGP
1209 | FileType::Heif
1210 | FileType::Avif
1211 | FileType::Cr3
1212 | FileType::F4v
1213 | FileType::Mqv
1214 | FileType::Lrv => formats::quicktime::read_quicktime(data),
1215 FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1216 FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1217 FileType::Wtv => formats::wtv::read_wtv(data),
1218 FileType::Crw => formats::canon_raw::read_crw(data),
1220 FileType::Raf => formats::raf::read_raf(data),
1221 FileType::Mrw => formats::mrw::read_mrw(data),
1222 FileType::Mrc => formats::mrc::read_mrc(data),
1223 FileType::Jp2 => formats::jp2::read_jp2(data),
1225 FileType::J2c => formats::jp2::read_j2c(data),
1226 FileType::Jxl => formats::jp2::read_jxl(data),
1227 FileType::Ico => formats::ico::read_ico(data),
1228 FileType::Icc => formats::icc::read_icc(data),
1229 FileType::Pdf => formats::pdf::read_pdf(data),
1231 FileType::PostScript => {
1232 if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1234 formats::font::read_pfa(data).or_else(|_| formats::postscript::read_postscript(data))
1235 } else {
1236 formats::postscript::read_postscript(data)
1237 }
1238 }
1239 FileType::Eip => formats::capture_one::read_eip(data),
1240 FileType::Zip | FileType::Docx | FileType::Xlsx | FileType::Pptx
1241 | FileType::Doc | FileType::Xls | FileType::Ppt => formats::zip::read_zip(data),
1242 FileType::Rtf => formats::rtf::read_rtf(data),
1243 FileType::InDesign => formats::misc::read_indesign(data),
1244 FileType::Pcap => formats::misc::read_pcap(data),
1245 FileType::Pcapng => formats::misc::read_pcapng(data),
1246 FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1248 FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1249 FileType::Xmp => formats::xmp_file::read_xmp(data),
1251 FileType::Svg => formats::misc::read_svg(data),
1252 FileType::Html => {
1253 let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1255 if is_svg {
1256 formats::misc::read_svg(data)
1257 } else {
1258 formats::html::read_html(data)
1259 }
1260 }
1261 FileType::Exe => formats::exe::read_exe(data),
1262 FileType::Font => {
1263 if data.starts_with(b"StartFontMetrics") {
1265 return formats::font::read_afm(data);
1266 }
1267 if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1269 return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1270 }
1271 if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1273 return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1274 }
1275 formats::font::read_font(data)
1276 }
1277 FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1279 FileType::Ape => formats::ape::read_ape(data),
1280 FileType::Mpc => formats::ape::read_mpc(data),
1281 FileType::Aac => formats::misc::read_aac(data),
1282 FileType::RealAudio => {
1283 formats::misc::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1284 }
1285 FileType::RealMedia => {
1286 formats::misc::read_real_media(data).or_else(|_| Ok(Vec::new()))
1287 }
1288 FileType::Czi => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1290 FileType::PhotoCd => formats::misc::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1291 FileType::Dicom => formats::dicom::read_dicom(data),
1292 FileType::Fits => formats::misc::read_fits(data),
1293 FileType::Flv => formats::misc::read_flv(data),
1294 FileType::Mxf => formats::misc::read_mxf(data).or_else(|_| Ok(Vec::new())),
1295 FileType::Swf => formats::misc::read_swf(data),
1296 FileType::Hdr => formats::misc::read_hdr(data),
1297 FileType::DjVu => formats::djvu::read_djvu(data),
1298 FileType::Xcf => formats::gimp::read_xcf(data),
1299 FileType::Mie => formats::mie::read_mie(data),
1300 FileType::Lfp => formats::lytro::read_lfp(data),
1301 FileType::Fpf => formats::flir_fpf::read_fpf(data),
1303 FileType::Flif => formats::misc::read_flif(data),
1304 FileType::Bpg => formats::misc::read_bpg(data),
1305 FileType::Pcx => formats::misc::read_pcx(data),
1306 FileType::Pict => formats::misc::read_pict(data),
1307 FileType::M2ts => formats::misc::read_m2ts(data, self.options.extract_embedded),
1308 FileType::Gzip => formats::misc::read_gzip(data),
1309 FileType::Rar => formats::misc::read_rar(data),
1310 FileType::Dss => formats::misc::read_dss(data),
1311 FileType::Moi => formats::misc::read_moi(data),
1312 FileType::MacOs => formats::misc::read_macos(data),
1313 FileType::Json => formats::misc::read_json(data),
1314 FileType::Pgf => formats::pgf::read_pgf(data),
1316 FileType::Xisf => formats::xisf::read_xisf(data),
1317 FileType::Torrent => formats::torrent::read_torrent(data),
1318 FileType::Mobi => formats::palm::read_palm(data),
1319 FileType::Psp => formats::psp::read_psp(data),
1320 FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1321 FileType::Audible => formats::audible::read_audible(data),
1322 FileType::Exr => formats::openexr::read_openexr(data),
1323 FileType::Plist => {
1325 if data.starts_with(b"bplist") {
1326 formats::plist::read_binary_plist_tags(data)
1327 } else {
1328 formats::plist::read_xml_plist(data)
1329 }
1330 }
1331 FileType::Aae => {
1332 if data.starts_with(b"bplist") {
1333 formats::plist::read_binary_plist_tags(data)
1334 } else {
1335 formats::plist::read_aae_plist(data)
1336 }
1337 }
1338 FileType::KyoceraRaw => formats::misc::read_kyocera_raw(data),
1339 FileType::PortableFloatMap => formats::misc::read_pfm(data),
1340 FileType::Ods | FileType::Odt | FileType::Odp | FileType::Odg |
1341 FileType::Odf | FileType::Odb | FileType::Odi | FileType::Odc => formats::zip::read_zip(data),
1342 _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1343 }
1344 }
1345
1346 fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1348 let ext = path
1349 .extension()
1350 .and_then(|e| e.to_str())
1351 .unwrap_or("")
1352 .to_ascii_lowercase();
1353
1354 match ext.as_str() {
1355 "ppm" | "pgm" | "pbm" => formats::misc::read_ppm(data),
1356 "pfm" => {
1357 if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1359 formats::misc::read_ppm(data)
1360 } else {
1361 Ok(Vec::new()) }
1363 }
1364 "json" => formats::misc::read_json(data),
1365 "svg" => formats::misc::read_svg(data),
1366 "ram" => formats::misc::read_ram(data).or_else(|_| Ok(Vec::new())),
1367 "txt" | "log" | "igc" => {
1368 Ok(compute_text_tags(data, false))
1369 }
1370 "csv" => {
1371 Ok(compute_text_tags(data, true))
1372 }
1373 "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1374 "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1375 "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1376 "plist" => {
1377 if data.starts_with(b"bplist") {
1378 formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1379 } else {
1380 formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1381 }
1382 }
1383 "aae" => {
1384 if data.starts_with(b"bplist") {
1385 formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1386 } else {
1387 formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1388 }
1389 }
1390 "vcf" | "ics" | "vcard" => {
1391 let s = String::from_utf8_lossy(&data[..data.len().min(100)]);
1392 if s.contains("BEGIN:VCALENDAR") {
1393 formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1394 } else {
1395 formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1396 }
1397 }
1398 "xcf" => Ok(Vec::new()), "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1400 "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1401 "indd" | "indt" => Ok(Vec::new()), "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1403 "mie" => Ok(Vec::new()), "exr" => Ok(Vec::new()), "wpg" => formats::misc::read_wpg(data).or_else(|_| Ok(Vec::new())),
1406 "moi" => formats::misc::read_moi(data).or_else(|_| Ok(Vec::new())),
1407 "macos" => formats::misc::read_macos(data).or_else(|_| Ok(Vec::new())),
1408 "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1409 "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1410 "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1411 "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1412 "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1413 "itc" => formats::misc::read_itc(data).or_else(|_| Ok(Vec::new())),
1414 "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1415 "czi" => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1416 "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1417 "lfp" | "mrc"
1418 | "dss" | "mobi" | "psp" | "pgf" | "raw"
1419 | "pmp" | "torrent"
1420 | "xisf" | "mxf"
1421 | "dfont" => Ok(Vec::new()),
1422 "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1423 "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1424 "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1425 "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1426 _ => Err(Error::UnsupportedFileType(ext)),
1427 }
1428 }
1429}
1430
1431impl Default for ExifTool {
1432 fn default() -> Self {
1433 Self::new()
1434 }
1435}
1436
1437fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1440 if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1442 return None;
1443 }
1444 let compression = u16::from_le_bytes([data[8], data[9]]);
1445 let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1446 let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1447 let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1448 let name_start = 30;
1449 if name_start + name_len > data.len() {
1450 return None;
1451 }
1452 let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1453 if filename != "mimetype" || compression != 0 {
1454 return None;
1455 }
1456 let content_start = name_start + name_len + extra_len;
1457 let content_end = (content_start + compressed_size).min(data.len());
1458 if content_start >= content_end {
1459 return None;
1460 }
1461 let mime = std::str::from_utf8(&data[content_start..content_end]).unwrap_or("").trim();
1462 match mime {
1463 "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1464 "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1465 "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1466 "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1467 "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1468 "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1469 "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1470 "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1471 _ => None,
1472 }
1473}
1474
1475pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1477 let path = path.as_ref();
1478 let mut file = fs::File::open(path).map_err(Error::Io)?;
1479 let mut header = [0u8; 256];
1480 use std::io::Read;
1481 let n = file.read(&mut header).map_err(Error::Io)?;
1482
1483 if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1484 return Ok(ft);
1485 }
1486
1487 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1488 if let Some(ft) = file_type::detect_from_extension(ext) {
1489 return Ok(ft);
1490 }
1491 }
1492
1493 Err(Error::UnsupportedFileType("unknown".into()))
1494}
1495
1496enum ExifIfdGroup {
1498 Ifd0,
1499 ExifIfd,
1500 Gps,
1501}
1502
1503fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1505 match tag_id {
1506 0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292
1508 | 0xA000..=0xA435 => ExifIfdGroup::ExifIfd,
1509 0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1511 _ => ExifIfdGroup::Ifd0,
1513 }
1514}
1515
1516fn extract_existing_exif_entries(jpeg_data: &[u8], target_bo: ByteOrderMark) -> Vec<exif_writer::IfdEntry> {
1518 let mut entries = Vec::new();
1519
1520 let mut pos = 2; while pos + 4 <= jpeg_data.len() {
1523 if jpeg_data[pos] != 0xFF {
1524 pos += 1;
1525 continue;
1526 }
1527 let marker = jpeg_data[pos + 1];
1528 pos += 2;
1529
1530 if marker == 0xDA || marker == 0xD9 {
1531 break; }
1533 if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1534 continue;
1535 }
1536
1537 if pos + 2 > jpeg_data.len() {
1538 break;
1539 }
1540 let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1541 if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1542 break;
1543 }
1544
1545 let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1546
1547 if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1549 let tiff_data = &seg_data[6..];
1550 extract_ifd_entries(tiff_data, target_bo, &mut entries);
1551 break;
1552 }
1553
1554 pos += seg_len;
1555 }
1556
1557 entries
1558}
1559
1560fn extract_ifd_entries(
1562 tiff_data: &[u8],
1563 target_bo: ByteOrderMark,
1564 entries: &mut Vec<exif_writer::IfdEntry>,
1565) {
1566 use crate::metadata::exif::parse_tiff_header;
1567
1568 let header = match parse_tiff_header(tiff_data) {
1569 Ok(h) => h,
1570 Err(_) => return,
1571 };
1572
1573 let src_bo = header.byte_order;
1574
1575 read_ifd_for_merge(tiff_data, header.ifd0_offset as usize, src_bo, target_bo, entries);
1577
1578 let ifd0_offset = header.ifd0_offset as usize;
1580 if ifd0_offset + 2 > tiff_data.len() {
1581 return;
1582 }
1583 let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
1584 for i in 0..count {
1585 let eoff = ifd0_offset + 2 + i * 12;
1586 if eoff + 12 > tiff_data.len() {
1587 break;
1588 }
1589 let tag = read_u16_bo(tiff_data, eoff, src_bo);
1590 let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
1591
1592 match tag {
1593 0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1594 0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1595 _ => {}
1596 }
1597 }
1598}
1599
1600fn read_ifd_for_merge(
1602 data: &[u8],
1603 offset: usize,
1604 src_bo: ByteOrderMark,
1605 target_bo: ByteOrderMark,
1606 entries: &mut Vec<exif_writer::IfdEntry>,
1607) {
1608 if offset + 2 > data.len() {
1609 return;
1610 }
1611 let count = read_u16_bo(data, offset, src_bo) as usize;
1612
1613 for i in 0..count {
1614 let eoff = offset + 2 + i * 12;
1615 if eoff + 12 > data.len() {
1616 break;
1617 }
1618
1619 let tag = read_u16_bo(data, eoff, src_bo);
1620 let dtype = read_u16_bo(data, eoff + 2, src_bo);
1621 let count_val = read_u32_bo(data, eoff + 4, src_bo);
1622
1623 if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
1625 continue;
1626 }
1627
1628 let type_size = match dtype {
1629 1 | 2 | 6 | 7 => 1usize,
1630 3 | 8 => 2,
1631 4 | 9 | 11 | 13 => 4,
1632 5 | 10 | 12 => 8,
1633 _ => continue,
1634 };
1635
1636 let total_size = type_size * count_val as usize;
1637 let raw_data = if total_size <= 4 {
1638 data[eoff + 8..eoff + 12].to_vec()
1639 } else {
1640 let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
1641 if voff + total_size > data.len() {
1642 continue;
1643 }
1644 data[voff..voff + total_size].to_vec()
1645 };
1646
1647 let final_data = if src_bo != target_bo && type_size > 1 {
1649 reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
1650 } else {
1651 raw_data[..total_size].to_vec()
1652 };
1653
1654 let format = match dtype {
1655 1 => exif_writer::ExifFormat::Byte,
1656 2 => exif_writer::ExifFormat::Ascii,
1657 3 => exif_writer::ExifFormat::Short,
1658 4 => exif_writer::ExifFormat::Long,
1659 5 => exif_writer::ExifFormat::Rational,
1660 6 => exif_writer::ExifFormat::SByte,
1661 7 => exif_writer::ExifFormat::Undefined,
1662 8 => exif_writer::ExifFormat::SShort,
1663 9 => exif_writer::ExifFormat::SLong,
1664 10 => exif_writer::ExifFormat::SRational,
1665 11 => exif_writer::ExifFormat::Float,
1666 12 => exif_writer::ExifFormat::Double,
1667 _ => continue,
1668 };
1669
1670 entries.push(exif_writer::IfdEntry {
1671 tag,
1672 format,
1673 data: final_data,
1674 });
1675 }
1676}
1677
1678fn reencode_bytes(
1680 data: &[u8],
1681 dtype: u16,
1682 count: usize,
1683 src_bo: ByteOrderMark,
1684 dst_bo: ByteOrderMark,
1685) -> Vec<u8> {
1686 let mut out = Vec::with_capacity(data.len());
1687 match dtype {
1688 3 | 8 => {
1689 for i in 0..count {
1691 let v = read_u16_bo(data, i * 2, src_bo);
1692 match dst_bo {
1693 ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1694 ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1695 }
1696 }
1697 }
1698 4 | 9 | 11 | 13 => {
1699 for i in 0..count {
1701 let v = read_u32_bo(data, i * 4, src_bo);
1702 match dst_bo {
1703 ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1704 ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1705 }
1706 }
1707 }
1708 5 | 10 => {
1709 for i in 0..count {
1711 let n = read_u32_bo(data, i * 8, src_bo);
1712 let d = read_u32_bo(data, i * 8 + 4, src_bo);
1713 match dst_bo {
1714 ByteOrderMark::LittleEndian => {
1715 out.extend_from_slice(&n.to_le_bytes());
1716 out.extend_from_slice(&d.to_le_bytes());
1717 }
1718 ByteOrderMark::BigEndian => {
1719 out.extend_from_slice(&n.to_be_bytes());
1720 out.extend_from_slice(&d.to_be_bytes());
1721 }
1722 }
1723 }
1724 }
1725 12 => {
1726 for i in 0..count {
1728 let mut bytes = [0u8; 8];
1729 bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
1730 if src_bo != dst_bo {
1731 bytes.reverse();
1732 }
1733 out.extend_from_slice(&bytes);
1734 }
1735 }
1736 _ => out.extend_from_slice(data),
1737 }
1738 out
1739}
1740
1741fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
1742 if offset + 2 > data.len() { return 0; }
1743 match bo {
1744 ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
1745 ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
1746 }
1747}
1748
1749fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
1750 if offset + 4 > data.len() { return 0; }
1751 match bo {
1752 ByteOrderMark::LittleEndian => u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1753 ByteOrderMark::BigEndian => u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1754 }
1755}
1756
1757fn tag_name_to_id(name: &str) -> Option<u16> {
1759 encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
1760}
1761
1762fn value_to_filename(value: &str) -> String {
1764 value
1765 .chars()
1766 .map(|c| match c {
1767 '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
1768 c if c.is_control() => '_',
1769 c => c,
1770 })
1771 .collect::<String>()
1772 .trim()
1773 .to_string()
1774}
1775
1776pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
1779 let (sign, rest) = if shift.starts_with('-') {
1780 (-1, &shift[1..])
1781 } else if shift.starts_with('+') {
1782 (1, &shift[1..])
1783 } else {
1784 (1, shift)
1785 };
1786
1787 let parts: Vec<&str> = rest.split(':').collect();
1788 match parts.len() {
1789 1 => {
1790 let h: u32 = parts[0].parse().ok()?;
1791 Some((sign, h, 0, 0))
1792 }
1793 2 => {
1794 let h: u32 = parts[0].parse().ok()?;
1795 let m: u32 = parts[1].parse().ok()?;
1796 Some((sign, h, m, 0))
1797 }
1798 3 => {
1799 let h: u32 = parts[0].parse().ok()?;
1800 let m: u32 = parts[1].parse().ok()?;
1801 let s: u32 = parts[2].parse().ok()?;
1802 Some((sign, h, m, s))
1803 }
1804 _ => None,
1805 }
1806}
1807
1808pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
1811 let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
1812
1813 if datetime.len() < 19 {
1815 return None;
1816 }
1817 let year: i32 = datetime[0..4].parse().ok()?;
1818 let month: u32 = datetime[5..7].parse().ok()?;
1819 let day: u32 = datetime[8..10].parse().ok()?;
1820 let hour: u32 = datetime[11..13].parse().ok()?;
1821 let min: u32 = datetime[14..16].parse().ok()?;
1822 let sec: u32 = datetime[17..19].parse().ok()?;
1823
1824 let total_secs = (hour * 3600 + min * 60 + sec) as i64
1826 + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
1827
1828 let days_shift = if total_secs < 0 {
1829 -1 - (-total_secs - 1) as i64 / 86400
1830 } else {
1831 total_secs / 86400
1832 };
1833
1834 let time_secs = ((total_secs % 86400) + 86400) % 86400;
1835 let new_hour = (time_secs / 3600) as u32;
1836 let new_min = ((time_secs % 3600) / 60) as u32;
1837 let new_sec = (time_secs % 60) as u32;
1838
1839 let mut new_day = day as i32 + days_shift as i32;
1841 let mut new_month = month;
1842 let mut new_year = year;
1843
1844 let days_in_month = |m: u32, y: i32| -> i32 {
1845 match m {
1846 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1847 4 | 6 | 9 | 11 => 30,
1848 2 => if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 29 } else { 28 },
1849 _ => 30,
1850 }
1851 };
1852
1853 while new_day > days_in_month(new_month, new_year) {
1854 new_day -= days_in_month(new_month, new_year);
1855 new_month += 1;
1856 if new_month > 12 {
1857 new_month = 1;
1858 new_year += 1;
1859 }
1860 }
1861 while new_day < 1 {
1862 new_month = if new_month == 1 { 12 } else { new_month - 1 };
1863 if new_month == 12 {
1864 new_year -= 1;
1865 }
1866 new_day += days_in_month(new_month, new_year);
1867 }
1868
1869 Some(format!(
1870 "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
1871 new_year, new_month, new_day, new_hour, new_min, new_sec
1872 ))
1873}
1874
1875fn unix_to_datetime(secs: i64) -> String {
1876 let days = secs / 86400;
1877 let time = secs % 86400;
1878 let h = time / 3600;
1879 let m = (time % 3600) / 60;
1880 let s = time % 60;
1881 let mut y = 1970i32;
1882 let mut rem = days;
1883 loop {
1884 let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 366 } else { 365 };
1885 if rem < dy { break; }
1886 rem -= dy;
1887 y += 1;
1888 }
1889 let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
1890 let months = [31, if leap { 29 } else { 28 }, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
1891 let mut mo = 1;
1892 for &dm in &months {
1893 if rem < dm { break; }
1894 rem -= dm;
1895 mo += 1;
1896 }
1897 format!("{:04}:{:02}:{:02} {:02}:{:02}:{:02}", y, mo, rem + 1, h, m, s)
1898}
1899
1900fn format_file_size(bytes: u64) -> String {
1901 if bytes < 1024 {
1902 format!("{} bytes", bytes)
1903 } else if bytes < 1024 * 1024 {
1904 format!("{:.1} kB", bytes as f64 / 1024.0)
1905 } else if bytes < 1024 * 1024 * 1024 {
1906 format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
1907 } else {
1908 format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
1909 }
1910}
1911
1912fn is_xmp_tag(tag: &str) -> bool {
1914 matches!(
1915 tag.to_lowercase().as_str(),
1916 "title" | "description" | "subject" | "creator" | "rights"
1917 | "keywords" | "rating" | "label" | "hierarchicalsubject"
1918 )
1919}
1920
1921fn encode_exif_tag(
1924 tag_name: &str,
1925 value: &str,
1926 _group: &str,
1927 bo: ByteOrderMark,
1928) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
1929 let tag_lower = tag_name.to_lowercase();
1930
1931 let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
1933 "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
1935 "make" => (0x010F, exif_writer::ExifFormat::Ascii),
1936 "model" => (0x0110, exif_writer::ExifFormat::Ascii),
1937 "software" => (0x0131, exif_writer::ExifFormat::Ascii),
1938 "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
1939 "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
1940 "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
1941 "orientation" => (0x0112, exif_writer::ExifFormat::Short),
1943 "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
1944 "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
1945 "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
1946 "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
1948 "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
1949 "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
1950 "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
1951 "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
1952 "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
1953 "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
1954 "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
1955 "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
1956 _ => return None,
1957 };
1958
1959 let encoded = match format {
1960 exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
1961 exif_writer::ExifFormat::Short => {
1962 let v: u16 = value.parse().ok()?;
1963 exif_writer::encode_u16(v, bo)
1964 }
1965 exif_writer::ExifFormat::Long => {
1966 let v: u32 = value.parse().ok()?;
1967 exif_writer::encode_u32(v, bo)
1968 }
1969 exif_writer::ExifFormat::Rational => {
1970 if let Some(slash) = value.find('/') {
1972 let num: u32 = value[..slash].trim().parse().ok()?;
1973 let den: u32 = value[slash + 1..].trim().parse().ok()?;
1974 exif_writer::encode_urational(num, den, bo)
1975 } else if let Ok(v) = value.parse::<f64>() {
1976 let den = 10000u32;
1978 let num = (v * den as f64).round() as u32;
1979 exif_writer::encode_urational(num, den, bo)
1980 } else {
1981 return None;
1982 }
1983 }
1984 exif_writer::ExifFormat::Undefined => {
1985 let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; data.extend_from_slice(value.as_bytes());
1988 data
1989 }
1990 _ => return None,
1991 };
1992
1993 Some((tag_id, format, encoded))
1994}
1995
1996fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
1998 let mut tags = Vec::new();
1999 let mk = |name: &str, val: String| Tag {
2000 id: crate::tag::TagId::Text(name.into()),
2001 name: name.into(), description: name.into(),
2002 group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
2003 raw_value: Value::String(val.clone()), print_value: val, priority: 0,
2004 };
2005
2006 let is_ascii = data.iter().all(|&b| b < 128);
2008 let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2009 let has_utf16le_bom = data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2010 let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2011 let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2012 let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2013
2014 let has_weird_ctrl = data.iter().any(|&b| (b <= 0x06) || (b >= 0x0e && b <= 0x1a) || (b >= 0x1c && b <= 0x1f) || b == 0x7f);
2016
2017 let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2018 ("utf-32le", true, false)
2019 } else if has_utf32be_bom {
2020 ("utf-32be", true, false)
2021 } else if has_utf16le_bom {
2022 ("utf-16le", true, true)
2023 } else if has_utf16be_bom {
2024 ("utf-16be", true, true)
2025 } else if has_weird_ctrl {
2026 return tags;
2028 } else if is_ascii {
2029 ("us-ascii", false, false)
2030 } else {
2031 let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2033 if is_valid_utf8 {
2034 if has_utf8_bom {
2035 ("utf-8", true, false)
2036 } else {
2037 ("utf-8", false, false)
2041 }
2042 } else if !data.iter().any(|&b| b >= 0x80 && b <= 0x9f) {
2043 ("iso-8859-1", false, false)
2044 } else {
2045 ("unknown-8bit", false, false)
2046 }
2047 };
2048
2049 tags.push(mk("MIMEEncoding", encoding.into()));
2050
2051 if is_bom {
2052 tags.push(mk("ByteOrderMark", "Yes".into()));
2053 }
2054
2055 let has_cr = data.contains(&b'\r');
2057 let has_lf = data.contains(&b'\n');
2058 let newline_type = if has_cr && has_lf { "Windows CRLF" }
2059 else if has_lf { "Unix LF" }
2060 else if has_cr { "Macintosh CR" }
2061 else { "(none)" };
2062 tags.push(mk("Newlines", newline_type.into()));
2063
2064 if is_csv {
2065 let text = String::from_utf8_lossy(data);
2067 let mut delim = "";
2068 let mut quot = "";
2069 let mut ncols = 1usize;
2070 let mut nrows = 0usize;
2071
2072 for line in text.lines() {
2073 if nrows == 0 {
2074 let comma_count = line.matches(',').count();
2076 let semi_count = line.matches(';').count();
2077 let tab_count = line.matches('\t').count();
2078 if comma_count > semi_count && comma_count > tab_count {
2079 delim = ",";
2080 ncols = comma_count + 1;
2081 } else if semi_count > tab_count {
2082 delim = ";";
2083 ncols = semi_count + 1;
2084 } else if tab_count > 0 {
2085 delim = "\t";
2086 ncols = tab_count + 1;
2087 } else {
2088 delim = "";
2089 ncols = 1;
2090 }
2091 if line.contains('"') { quot = "\""; }
2093 else if line.contains('\'') { quot = "'"; }
2094 }
2095 nrows += 1;
2096 if nrows >= 1000 { break; }
2097 }
2098
2099 let delim_display = match delim {
2100 "," => "Comma",
2101 ";" => "Semicolon",
2102 "\t" => "Tab",
2103 _ => "(none)",
2104 };
2105 let quot_display = match quot {
2106 "\"" => "Double quotes",
2107 "'" => "Single quotes",
2108 _ => "(none)",
2109 };
2110
2111 tags.push(mk("Delimiter", delim_display.into()));
2112 tags.push(mk("Quoting", quot_display.into()));
2113 tags.push(mk("ColumnCount", ncols.to_string()));
2114 if nrows > 0 {
2115 tags.push(mk("RowCount", nrows.to_string()));
2116 }
2117 } else if !is_utf16 {
2118 let line_count = data.iter().filter(|&&b| b == b'\n').count();
2120 let line_count = if line_count == 0 && !data.is_empty() { 1 } else { line_count };
2121 tags.push(mk("LineCount", line_count.to_string()));
2122
2123 let text = String::from_utf8_lossy(data);
2124 let word_count = text.split_whitespace().count();
2125 tags.push(mk("WordCount", word_count.to_string()));
2126 }
2127
2128 tags
2129}