1use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::file_type::{self, FileType};
12use crate::formats;
13use crate::metadata::exif::ByteOrderMark;
14use crate::tag::Tag;
15use crate::value::Value;
16use crate::writer::{exif_writer, iptc_writer, jpeg_writer, matroska_writer, mp4_writer, pdf_writer, png_writer, psd_writer, tiff_writer, webp_writer, xmp_writer};
17
18#[derive(Debug, Clone)]
20pub struct Options {
21 pub duplicates: bool,
23 pub print_conv: bool,
25 pub fast_scan: u8,
27 pub requested_tags: Vec<String>,
29}
30
31impl Default for Options {
32 fn default() -> Self {
33 Self {
34 duplicates: false,
35 print_conv: true,
36 fast_scan: 0,
37 requested_tags: Vec::new(),
38 }
39 }
40}
41
42#[derive(Debug, Clone)]
56pub struct NewValue {
57 pub tag: String,
59 pub group: Option<String>,
61 pub value: Option<String>,
63}
64
65pub struct ExifTool {
94 options: Options,
95 new_values: Vec<NewValue>,
96}
97
98pub type ImageInfo = HashMap<String, String>;
100
101impl ExifTool {
102 pub fn new() -> Self {
104 Self {
105 options: Options::default(),
106 new_values: Vec::new(),
107 }
108 }
109
110 pub fn with_options(options: Options) -> Self {
112 Self {
113 options,
114 new_values: Vec::new(),
115 }
116 }
117
118 pub fn options_mut(&mut self) -> &mut Options {
120 &mut self.options
121 }
122
123 pub fn options(&self) -> &Options {
125 &self.options
126 }
127
128 pub fn set_new_value(&mut self, tag: &str, value: Option<&str>) {
150 let (group, tag_name) = if let Some(colon_pos) = tag.find(':') {
151 (Some(tag[..colon_pos].to_string()), tag[colon_pos + 1..].to_string())
152 } else {
153 (None, tag.to_string())
154 };
155
156 self.new_values.push(NewValue {
157 tag: tag_name,
158 group,
159 value: value.map(|v| v.to_string()),
160 });
161 }
162
163 pub fn clear_new_values(&mut self) {
165 self.new_values.clear();
166 }
167
168 pub fn set_new_values_from_file<P: AsRef<Path>>(
173 &mut self,
174 src_path: P,
175 tags_to_copy: Option<&[&str]>,
176 ) -> Result<u32> {
177 let src_tags = self.extract_info(src_path)?;
178 let mut count = 0u32;
179
180 for tag in &src_tags {
181 if tag.group.family0 == "File" || tag.group.family0 == "Composite" {
183 continue;
184 }
185 if tag.print_value.starts_with("(Binary") || tag.print_value.starts_with("(Undefined") {
187 continue;
188 }
189 if tag.print_value.is_empty() {
190 continue;
191 }
192
193 if let Some(filter) = tags_to_copy {
195 let name_lower = tag.name.to_lowercase();
196 if !filter.iter().any(|f| f.to_lowercase() == name_lower) {
197 continue;
198 }
199 }
200
201 let _full_tag = format!("{}:{}", tag.group.family0, tag.name);
202 self.new_values.push(NewValue {
203 tag: tag.name.clone(),
204 group: Some(tag.group.family0.clone()),
205 value: Some(tag.print_value.clone()),
206 });
207 count += 1;
208 }
209
210 Ok(count)
211 }
212
213 pub fn set_file_name_from_tag<P: AsRef<Path>>(
215 &self,
216 path: P,
217 tag_name: &str,
218 template: &str,
219 ) -> Result<String> {
220 let path = path.as_ref();
221 let tags = self.extract_info(path)?;
222
223 let tag_value = tags
224 .iter()
225 .find(|t| t.name.to_lowercase() == tag_name.to_lowercase())
226 .map(|t| &t.print_value)
227 .ok_or_else(|| Error::TagNotFound(tag_name.to_string()))?;
228
229 let new_name = if template.contains('%') {
232 template.replace("%v", value_to_filename(tag_value).as_str())
233 } else {
234 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
236 let clean = value_to_filename(tag_value);
237 if ext.is_empty() {
238 clean
239 } else {
240 format!("{}.{}", clean, ext)
241 }
242 };
243
244 let parent = path.parent().unwrap_or(Path::new(""));
245 let new_path = parent.join(&new_name);
246
247 fs::rename(path, &new_path).map_err(Error::Io)?;
248 Ok(new_path.to_string_lossy().to_string())
249 }
250
251 pub fn write_info<P: AsRef<Path>, Q: AsRef<Path>>(&self, src_path: P, dst_path: Q) -> Result<u32> {
256 let src_path = src_path.as_ref();
257 let dst_path = dst_path.as_ref();
258 let data = fs::read(src_path).map_err(Error::Io)?;
259
260 let file_type = self.detect_file_type(&data, src_path)?;
261 let output = self.apply_changes(&data, file_type)?;
262
263 let temp_path = dst_path.with_extension("exiftool_tmp");
265 fs::write(&temp_path, &output).map_err(Error::Io)?;
266 fs::rename(&temp_path, dst_path).map_err(Error::Io)?;
267
268 Ok(self.new_values.len() as u32)
269 }
270
271 fn apply_changes(&self, data: &[u8], file_type: FileType) -> Result<Vec<u8>> {
273 match file_type {
274 FileType::Jpeg => self.write_jpeg(data),
275 FileType::Png => self.write_png(data),
276 FileType::Tiff | FileType::Dng | FileType::Cr2 | FileType::Nef
277 | FileType::Arw | FileType::Orf | FileType::Pef => self.write_tiff(data),
278 FileType::WebP => self.write_webp(data),
279 FileType::Mp4 | FileType::QuickTime | FileType::M4a
280 | FileType::ThreeGP | FileType::F4v => self.write_mp4(data),
281 FileType::Psd => self.write_psd(data),
282 FileType::Pdf => self.write_pdf(data),
283 FileType::Heif | FileType::Avif => self.write_mp4(data),
284 FileType::Mkv | FileType::WebM => self.write_matroska(data),
285 FileType::Gif => {
286 let comment = self.new_values.iter()
287 .find(|nv| nv.tag.to_lowercase() == "comment")
288 .and_then(|nv| nv.value.clone());
289 crate::writer::gif_writer::write_gif(data, comment.as_deref())
290 }
291 FileType::Flac => {
292 let changes: Vec<(&str, &str)> = self.new_values.iter()
293 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
294 .collect();
295 crate::writer::flac_writer::write_flac(data, &changes)
296 }
297 FileType::Mp3 | FileType::Aiff => {
298 let changes: Vec<(&str, &str)> = self.new_values.iter()
299 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
300 .collect();
301 crate::writer::id3_writer::write_id3(data, &changes)
302 }
303 FileType::Jp2 | FileType::Jxl => {
304 let new_xmp = if self.new_values.iter().any(|nv| nv.group.as_deref() == Some("XMP")) {
305 let refs: Vec<&NewValue> = self.new_values.iter()
306 .filter(|nv| nv.group.as_deref() == Some("XMP"))
307 .collect();
308 Some(self.build_new_xmp(&refs))
309 } else { None };
310 crate::writer::jp2_writer::write_jp2(data, new_xmp.as_deref(), None)
311 }
312 FileType::PostScript => {
313 let changes: Vec<(&str, &str)> = self.new_values.iter()
314 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
315 .collect();
316 crate::writer::ps_writer::write_postscript(data, &changes)
317 }
318 FileType::Ogg | FileType::Opus => {
319 let changes: Vec<(&str, &str)> = self.new_values.iter()
320 .filter_map(|nv| Some((nv.tag.as_str(), nv.value.as_deref()?)))
321 .collect();
322 crate::writer::ogg_writer::write_ogg(data, &changes)
323 }
324 FileType::Xmp => {
325 let props: Vec<xmp_writer::XmpProperty> = self.new_values.iter()
326 .filter_map(|nv| {
327 let val = nv.value.as_deref()?;
328 Some(xmp_writer::XmpProperty {
329 namespace: nv.group.clone().unwrap_or_else(|| "dc".into()),
330 property: nv.tag.clone(),
331 values: vec![val.to_string()],
332 prop_type: xmp_writer::XmpPropertyType::Simple,
333 })
334 })
335 .collect();
336 Ok(crate::writer::xmp_sidecar_writer::write_xmp_sidecar(&props))
337 }
338 _ => Err(Error::UnsupportedFileType(format!("writing not yet supported for {}", file_type))),
339 }
340 }
341
342 fn write_jpeg(&self, data: &[u8]) -> Result<Vec<u8>> {
344 let mut exif_values: Vec<&NewValue> = Vec::new();
346 let mut xmp_values: Vec<&NewValue> = Vec::new();
347 let mut iptc_values: Vec<&NewValue> = Vec::new();
348 let mut comment_value: Option<&str> = None;
349 let mut remove_exif = false;
350 let mut remove_xmp = false;
351 let mut remove_iptc = false;
352 let mut remove_comment = false;
353
354 for nv in &self.new_values {
355 let group = nv.group.as_deref().unwrap_or("");
356 let group_upper = group.to_uppercase();
357
358 if nv.value.is_none() && nv.tag == "*" {
360 match group_upper.as_str() {
361 "EXIF" => { remove_exif = true; continue; }
362 "XMP" => { remove_xmp = true; continue; }
363 "IPTC" => { remove_iptc = true; continue; }
364 _ => {}
365 }
366 }
367
368 match group_upper.as_str() {
369 "XMP" => xmp_values.push(nv),
370 "IPTC" => iptc_values.push(nv),
371 "EXIF" | "IFD0" | "EXIFIFD" | "GPS" => exif_values.push(nv),
372 "" => {
373 if nv.tag.to_lowercase() == "comment" {
375 if nv.value.is_none() {
376 remove_comment = true;
377 } else {
378 comment_value = nv.value.as_deref();
379 }
380 } else if is_xmp_tag(&nv.tag) {
381 xmp_values.push(nv);
382 } else {
383 exif_values.push(nv);
384 }
385 }
386 _ => exif_values.push(nv), }
388 }
389
390 let new_exif = if !exif_values.is_empty() {
392 Some(self.build_new_exif(data, &exif_values)?)
393 } else {
394 None
395 };
396
397 let new_xmp = if !xmp_values.is_empty() {
399 Some(self.build_new_xmp(&xmp_values))
400 } else {
401 None
402 };
403
404 let new_iptc_data = if !iptc_values.is_empty() {
406 let records: Vec<iptc_writer::IptcRecord> = iptc_values
407 .iter()
408 .filter_map(|nv| {
409 let value = nv.value.as_deref()?;
410 let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
411 Some(iptc_writer::IptcRecord {
412 record,
413 dataset,
414 data: value.as_bytes().to_vec(),
415 })
416 })
417 .collect();
418 if records.is_empty() {
419 None
420 } else {
421 Some(iptc_writer::build_iptc(&records))
422 }
423 } else {
424 None
425 };
426
427 jpeg_writer::write_jpeg(
429 data,
430 new_exif.as_deref(),
431 new_xmp.as_deref(),
432 new_iptc_data.as_deref(),
433 comment_value,
434 remove_exif,
435 remove_xmp,
436 remove_iptc,
437 remove_comment,
438 )
439 }
440
441 fn build_new_exif(&self, jpeg_data: &[u8], values: &[&NewValue]) -> Result<Vec<u8>> {
443 let bo = ByteOrderMark::BigEndian;
444 let mut ifd0_entries = Vec::new();
445 let mut exif_entries = Vec::new();
446 let mut gps_entries = Vec::new();
447
448 let existing = extract_existing_exif_entries(jpeg_data, bo);
450 for entry in &existing {
451 match classify_exif_tag(entry.tag) {
452 ExifIfdGroup::Ifd0 => ifd0_entries.push(entry.clone()),
453 ExifIfdGroup::ExifIfd => exif_entries.push(entry.clone()),
454 ExifIfdGroup::Gps => gps_entries.push(entry.clone()),
455 }
456 }
457
458 let deleted_tags: Vec<u16> = values
460 .iter()
461 .filter(|nv| nv.value.is_none())
462 .filter_map(|nv| tag_name_to_id(&nv.tag))
463 .collect();
464
465 ifd0_entries.retain(|e| !deleted_tags.contains(&e.tag));
467 exif_entries.retain(|e| !deleted_tags.contains(&e.tag));
468 gps_entries.retain(|e| !deleted_tags.contains(&e.tag));
469
470 for nv in values {
472 if nv.value.is_none() {
473 continue;
474 }
475 let value_str = nv.value.as_deref().unwrap_or("");
476 let group = nv.group.as_deref().unwrap_or("");
477
478 if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, value_str, group, bo) {
479 let entry = exif_writer::IfdEntry {
480 tag: tag_id,
481 format,
482 data: encoded,
483 };
484
485 let target = match group.to_uppercase().as_str() {
486 "GPS" => &mut gps_entries,
487 "EXIFIFD" => &mut exif_entries,
488 _ => match classify_exif_tag(tag_id) {
489 ExifIfdGroup::ExifIfd => &mut exif_entries,
490 ExifIfdGroup::Gps => &mut gps_entries,
491 ExifIfdGroup::Ifd0 => &mut ifd0_entries,
492 },
493 };
494
495 if let Some(existing) = target.iter_mut().find(|e| e.tag == tag_id) {
497 *existing = entry;
498 } else {
499 target.push(entry);
500 }
501 }
502 }
503
504 ifd0_entries.retain(|e| e.tag != 0x8769 && e.tag != 0x8825 && e.tag != 0xA005);
506
507 exif_writer::build_exif(&ifd0_entries, &exif_entries, &gps_entries, bo)
508 }
509
510 fn write_png(&self, data: &[u8]) -> Result<Vec<u8>> {
512 let mut new_text: Vec<(&str, &str)> = Vec::new();
513 let mut remove_text: Vec<&str> = Vec::new();
514
515 let owned_pairs: Vec<(String, String)> = self.new_values.iter()
518 .filter(|nv| nv.value.is_some())
519 .map(|nv| (nv.tag.clone(), nv.value.clone().unwrap()))
520 .collect();
521
522 for (tag, value) in &owned_pairs {
523 new_text.push((tag.as_str(), value.as_str()));
524 }
525
526 for nv in &self.new_values {
527 if nv.value.is_none() {
528 remove_text.push(&nv.tag);
529 }
530 }
531
532 png_writer::write_png(data, &new_text, None, &remove_text)
533 }
534
535 fn write_psd(&self, data: &[u8]) -> Result<Vec<u8>> {
537 let mut iptc_values = Vec::new();
538 let mut xmp_values = Vec::new();
539
540 for nv in &self.new_values {
541 let group = nv.group.as_deref().unwrap_or("").to_uppercase();
542 match group.as_str() {
543 "XMP" => xmp_values.push(nv),
544 "IPTC" => iptc_values.push(nv),
545 _ => {
546 if is_xmp_tag(&nv.tag) { xmp_values.push(nv); }
547 else { iptc_values.push(nv); }
548 }
549 }
550 }
551
552 let new_iptc = if !iptc_values.is_empty() {
553 let records: Vec<_> = iptc_values.iter().filter_map(|nv| {
554 let value = nv.value.as_deref()?;
555 let (record, dataset) = iptc_writer::tag_name_to_iptc(&nv.tag)?;
556 Some(iptc_writer::IptcRecord { record, dataset, data: value.as_bytes().to_vec() })
557 }).collect();
558 if records.is_empty() { None } else { Some(iptc_writer::build_iptc(&records)) }
559 } else { None };
560
561 let new_xmp = if !xmp_values.is_empty() {
562 let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
563 Some(self.build_new_xmp(&refs))
564 } else { None };
565
566 psd_writer::write_psd(data, new_iptc.as_deref(), new_xmp.as_deref())
567 }
568
569 fn write_matroska(&self, data: &[u8]) -> Result<Vec<u8>> {
571 let changes: Vec<(&str, &str)> = self.new_values.iter()
572 .filter_map(|nv| {
573 let value = nv.value.as_deref()?;
574 Some((nv.tag.as_str(), value))
575 })
576 .collect();
577
578 matroska_writer::write_matroska(data, &changes)
579 }
580
581 fn write_pdf(&self, data: &[u8]) -> Result<Vec<u8>> {
583 let changes: Vec<(&str, &str)> = self.new_values.iter()
584 .filter_map(|nv| {
585 let value = nv.value.as_deref()?;
586 Some((nv.tag.as_str(), value))
587 })
588 .collect();
589
590 pdf_writer::write_pdf(data, &changes)
591 }
592
593 fn write_mp4(&self, data: &[u8]) -> Result<Vec<u8>> {
595 let mut ilst_tags: Vec<([u8; 4], String)> = Vec::new();
596 let mut xmp_values: Vec<&NewValue> = Vec::new();
597
598 for nv in &self.new_values {
599 if nv.value.is_none() { continue; }
600 let group = nv.group.as_deref().unwrap_or("").to_uppercase();
601 if group == "XMP" {
602 xmp_values.push(nv);
603 } else if let Some(key) = mp4_writer::tag_to_ilst_key(&nv.tag) {
604 ilst_tags.push((key, nv.value.clone().unwrap()));
605 }
606 }
607
608 let tag_refs: Vec<(&[u8; 4], &str)> = ilst_tags.iter()
609 .map(|(k, v)| (k, v.as_str()))
610 .collect();
611
612 let new_xmp = if !xmp_values.is_empty() {
613 let refs: Vec<&NewValue> = xmp_values.iter().copied().collect();
614 Some(self.build_new_xmp(&refs))
615 } else {
616 None
617 };
618
619 mp4_writer::write_mp4(data, &tag_refs, new_xmp.as_deref())
620 }
621
622 fn write_webp(&self, data: &[u8]) -> Result<Vec<u8>> {
624 let mut exif_values: Vec<&NewValue> = Vec::new();
625 let mut xmp_values: Vec<&NewValue> = Vec::new();
626 let mut remove_exif = false;
627 let mut remove_xmp = false;
628
629 for nv in &self.new_values {
630 let group = nv.group.as_deref().unwrap_or("").to_uppercase();
631 if nv.value.is_none() && nv.tag == "*" {
632 if group == "EXIF" { remove_exif = true; }
633 if group == "XMP" { remove_xmp = true; }
634 continue;
635 }
636 match group.as_str() {
637 "XMP" => xmp_values.push(nv),
638 _ => exif_values.push(nv),
639 }
640 }
641
642 let new_exif = if !exif_values.is_empty() {
643 let bo = ByteOrderMark::BigEndian;
644 let mut entries = Vec::new();
645 for nv in &exif_values {
646 if let Some(ref v) = nv.value {
647 let group = nv.group.as_deref().unwrap_or("");
648 if let Some((tag_id, format, encoded)) = encode_exif_tag(&nv.tag, v, group, bo) {
649 entries.push(exif_writer::IfdEntry { tag: tag_id, format, data: encoded });
650 }
651 }
652 }
653 if !entries.is_empty() {
654 Some(exif_writer::build_exif(&entries, &[], &[], bo)?)
655 } else {
656 None
657 }
658 } else {
659 None
660 };
661
662 let new_xmp = if !xmp_values.is_empty() {
663 Some(self.build_new_xmp(&xmp_values.iter().map(|v| *v).collect::<Vec<_>>()))
664 } else {
665 None
666 };
667
668 webp_writer::write_webp(
669 data,
670 new_exif.as_deref(),
671 new_xmp.as_deref(),
672 remove_exif,
673 remove_xmp,
674 )
675 }
676
677 fn write_tiff(&self, data: &[u8]) -> Result<Vec<u8>> {
679 let bo = if data.starts_with(b"II") {
680 ByteOrderMark::LittleEndian
681 } else {
682 ByteOrderMark::BigEndian
683 };
684
685 let mut changes: Vec<(u16, Vec<u8>)> = Vec::new();
686 for nv in &self.new_values {
687 if let Some(ref value) = nv.value {
688 let group = nv.group.as_deref().unwrap_or("");
689 if let Some((tag_id, _format, encoded)) = encode_exif_tag(&nv.tag, value, group, bo) {
690 changes.push((tag_id, encoded));
691 }
692 }
693 }
694
695 tiff_writer::write_tiff(data, &changes)
696 }
697
698 fn build_new_xmp(&self, values: &[&NewValue]) -> Vec<u8> {
700 let mut properties = Vec::new();
701
702 for nv in values {
703 let value_str = match &nv.value {
704 Some(v) => v.clone(),
705 None => continue,
706 };
707
708 let ns = nv.group.as_deref().unwrap_or("dc").to_lowercase();
709 let ns = if ns == "xmp" { "xmp".to_string() } else { ns };
710
711 let prop_type = match nv.tag.to_lowercase().as_str() {
712 "title" | "description" | "rights" => xmp_writer::XmpPropertyType::LangAlt,
713 "subject" | "keywords" => xmp_writer::XmpPropertyType::Bag,
714 "creator" => xmp_writer::XmpPropertyType::Seq,
715 _ => xmp_writer::XmpPropertyType::Simple,
716 };
717
718 let values = if matches!(prop_type, xmp_writer::XmpPropertyType::Bag | xmp_writer::XmpPropertyType::Seq) {
719 value_str.split(',').map(|s| s.trim().to_string()).collect()
720 } else {
721 vec![value_str]
722 };
723
724 properties.push(xmp_writer::XmpProperty {
725 namespace: ns,
726 property: nv.tag.clone(),
727 values,
728 prop_type,
729 });
730 }
731
732 xmp_writer::build_xmp(&properties).into_bytes()
733 }
734
735 pub fn image_info<P: AsRef<Path>>(&self, path: P) -> Result<ImageInfo> {
743 let tags = self.extract_info(path)?;
744 Ok(self.get_info(&tags))
745 }
746
747 pub fn extract_info<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Tag>> {
751 let path = path.as_ref();
752 let data = fs::read(path).map_err(Error::Io)?;
753
754 self.extract_info_from_bytes(&data, path)
755 }
756
757 pub fn extract_info_from_bytes(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
759 let file_type_result = self.detect_file_type(data, path);
760 let (file_type, mut tags) = match file_type_result {
761 Ok(ft) => {
762 let t = self.process_file(data, ft).or_else(|_| {
763 self.process_by_extension(data, path)
764 })?;
765 (Some(ft), t)
766 }
767 Err(_) => {
768 let t = self.process_by_extension(data, path)?;
770 (None, t)
771 }
772 };
773 let file_type = file_type.unwrap_or(FileType::Zip); tags.push(Tag {
777 id: crate::tag::TagId::Text("FileType".into()),
778 name: "FileType".into(),
779 description: "File Type".into(),
780 group: crate::tag::TagGroup {
781 family0: "File".into(),
782 family1: "File".into(),
783 family2: "Other".into(),
784 },
785 raw_value: Value::String(format!("{:?}", file_type)),
786 print_value: file_type.description().to_string(),
787 priority: 0,
788 });
789
790 tags.push(Tag {
791 id: crate::tag::TagId::Text("MIMEType".into()),
792 name: "MIMEType".into(),
793 description: "MIME Type".into(),
794 group: crate::tag::TagGroup {
795 family0: "File".into(),
796 family1: "File".into(),
797 family2: "Other".into(),
798 },
799 raw_value: Value::String(file_type.mime_type().to_string()),
800 print_value: file_type.mime_type().to_string(),
801 priority: 0,
802 });
803
804 if let Ok(metadata) = fs::metadata(path) {
805 tags.push(Tag {
806 id: crate::tag::TagId::Text("FileSize".into()),
807 name: "FileSize".into(),
808 description: "File Size".into(),
809 group: crate::tag::TagGroup {
810 family0: "File".into(),
811 family1: "File".into(),
812 family2: "Other".into(),
813 },
814 raw_value: Value::U32(metadata.len() as u32),
815 print_value: format_file_size(metadata.len()),
816 priority: 0,
817 });
818 }
819
820 let file_tag = |name: &str, val: Value| -> Tag {
822 Tag {
823 id: crate::tag::TagId::Text(name.to_string()),
824 name: name.to_string(), description: name.to_string(),
825 group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
826 raw_value: val.clone(), print_value: val.to_display_string(), priority: 0,
827 }
828 };
829
830 if let Some(fname) = path.file_name().and_then(|n| n.to_str()) {
831 tags.push(file_tag("FileName", Value::String(fname.to_string())));
832 }
833 if let Some(dir) = path.parent().and_then(|p| p.to_str()) {
834 tags.push(file_tag("Directory", Value::String(dir.to_string())));
835 }
836 let canonical_ext = file_type.extensions().first().copied().unwrap_or("");
838 if !canonical_ext.is_empty() {
839 tags.push(file_tag("FileTypeExtension", Value::String(canonical_ext.to_string())));
840 }
841
842 #[cfg(unix)]
843 if let Ok(metadata) = fs::metadata(path) {
844 use std::os::unix::fs::MetadataExt;
845 let mode = metadata.mode();
846 tags.push(file_tag("FilePermissions", Value::String(format!("{:o}", mode & 0o7777))));
847
848 if let Ok(modified) = metadata.modified() {
850 if let Ok(dur) = modified.duration_since(std::time::UNIX_EPOCH) {
851 let secs = dur.as_secs() as i64;
852 tags.push(file_tag("FileModifyDate", Value::String(unix_to_datetime(secs))));
853 }
854 }
855 if let Ok(accessed) = metadata.accessed() {
857 if let Ok(dur) = accessed.duration_since(std::time::UNIX_EPOCH) {
858 let secs = dur.as_secs() as i64;
859 tags.push(file_tag("FileAccessDate", Value::String(unix_to_datetime(secs))));
860 }
861 }
862 let ctime = metadata.ctime();
864 if ctime > 0 {
865 tags.push(file_tag("FileInodeChangeDate", Value::String(unix_to_datetime(ctime))));
866 }
867 }
868
869 {
871 let bo_str = if data.len() > 8 {
872 let check: Option<&[u8]> = if data.starts_with(&[0xFF, 0xD8]) {
874 data.windows(6).position(|w| w == b"Exif\0\0")
876 .map(|p| &data[p+6..])
877 } else if data.starts_with(b"FUJIFILMCCD-RAW") && data.len() >= 0x60 {
878 let jpeg_offset = u32::from_be_bytes([data[0x54], data[0x55], data[0x56], data[0x57]]) as usize;
880 let jpeg_length = u32::from_be_bytes([data[0x58], data[0x59], data[0x5A], data[0x5B]]) as usize;
881 if jpeg_offset > 0 && jpeg_offset + jpeg_length <= data.len() {
882 let jpeg = &data[jpeg_offset..jpeg_offset + jpeg_length];
883 jpeg.windows(6).position(|w| w == b"Exif\0\0")
884 .map(|p| &jpeg[p+6..])
885 } else {
886 None
887 }
888 } else if data.starts_with(b"RIFF") && data.len() >= 12 {
889 let mut riff_bo: Option<&[u8]> = None;
891 let mut pos = 12usize;
892 while pos + 8 <= data.len() {
893 let cid = &data[pos..pos+4];
894 let csz = u32::from_le_bytes([data[pos+4],data[pos+5],data[pos+6],data[pos+7]]) as usize;
895 let cstart = pos + 8;
896 let cend = (cstart + csz).min(data.len());
897 if cid == b"EXIF" && cend > cstart {
898 let exif_data = &data[cstart..cend];
899 let tiff = if exif_data.starts_with(b"Exif\0\0") { &exif_data[6..] } else { exif_data };
900 riff_bo = Some(tiff);
901 break;
902 }
903 if cid == b"LIST" && cend >= cstart + 4 {
905 }
907 pos = cend + (csz & 1);
908 }
909 riff_bo
910 } else if data.starts_with(&[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' ']) {
911 let mut jxl_bo: Option<String> = None;
913 let mut jpos = 12usize; while jpos + 8 <= data.len() {
915 let bsize = u32::from_be_bytes([data[jpos], data[jpos+1], data[jpos+2], data[jpos+3]]) as usize;
916 let btype = &data[jpos+4..jpos+8];
917 if bsize < 8 || jpos + bsize > data.len() { break; }
918 if btype == b"brob" && jpos + bsize > 12 {
919 let inner_type = &data[jpos+8..jpos+12];
920 if inner_type == b"Exif" || inner_type == b"exif" {
921 let brotli_payload = &data[jpos+12..jpos+bsize];
922 use std::io::Cursor;
923 let mut inp = Cursor::new(brotli_payload);
924 let mut out: Vec<u8> = Vec::new();
925 if brotli::BrotliDecompress(&mut inp, &mut out).is_ok() {
926 let exif_start = if out.len() > 4 { 4 } else { 0 };
927 if exif_start < out.len() {
928 if out[exif_start..].starts_with(b"MM") {
929 jxl_bo = Some("Big-endian (Motorola, MM)".to_string());
930 } else if out[exif_start..].starts_with(b"II") {
931 jxl_bo = Some("Little-endian (Intel, II)".to_string());
932 }
933 }
934 }
935 break;
936 }
937 }
938 jpos += bsize;
939 }
940 if let Some(bo) = jxl_bo {
941 if !bo.is_empty() && file_type != FileType::Btf {
942 tags.push(file_tag("ExifByteOrder", Value::String(bo)));
943 }
944 }
945 None
947 } else if data.starts_with(&[0x00, b'M', b'R', b'M']) {
948 let mrw_data_offset = if data.len() >= 8 {
950 u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize + 8
951 } else { 0 };
952 let mut mrw_bo: Option<&[u8]> = None;
953 let mut mpos = 8usize;
954 while mpos + 8 <= mrw_data_offset.min(data.len()) {
955 let seg_tag = &data[mpos..mpos+4];
956 let seg_len = u32::from_be_bytes([data[mpos+4], data[mpos+5], data[mpos+6], data[mpos+7]]) as usize;
957 if seg_tag == b"\x00TTW" && mpos + 8 + seg_len <= data.len() {
958 mrw_bo = Some(&data[mpos+8..mpos+8+seg_len]);
959 break;
960 }
961 mpos += 8 + seg_len;
962 }
963 mrw_bo
964 } else {
965 Some(&data[..])
966 };
967 if let Some(tiff) = check {
968 if tiff.starts_with(b"II") { "Little-endian (Intel, II)" }
969 else if tiff.starts_with(b"MM") { "Big-endian (Motorola, MM)" }
970 else { "" }
971 } else { "" }
972 } else { "" };
973 let already_has_exifbyteorder = tags.iter().any(|t| t.name == "ExifByteOrder");
976 if !bo_str.is_empty() && !already_has_exifbyteorder
977 && file_type != FileType::Btf
978 && file_type != FileType::Dr4 && file_type != FileType::Vrd
979 && file_type != FileType::Crw {
980 tags.push(file_tag("ExifByteOrder", Value::String(bo_str.to_string())));
981 }
982 }
983
984 tags.push(file_tag("ExifToolVersion", Value::String(crate::VERSION.to_string())));
985
986 let composite = crate::composite::compute_composite_tags(&tags);
988 tags.extend(composite);
989
990 {
996 let is_flir_fff = tags.iter().any(|t| t.group.family0 == "APP1"
997 && t.group.family1 == "FLIR");
998 if is_flir_fff {
999 tags.retain(|t| !(t.name == "LensID" && t.group.family0 == "Composite"));
1000 }
1001 }
1002
1003 {
1008 let make = tags.iter().find(|t| t.name == "Make")
1009 .map(|t| t.print_value.clone()).unwrap_or_default();
1010 if !make.to_uppercase().contains("CANON") {
1011 tags.retain(|t| t.name != "Lens" || t.group.family0 != "Composite");
1012 }
1013 }
1014
1015 {
1019 let riff_priority_zero_tags = ["Quality", "SampleSize", "StreamType"];
1020 for tag_name in &riff_priority_zero_tags {
1021 let has_makernotes = tags.iter().any(|t| t.name == *tag_name
1022 && t.group.family0 != "RIFF");
1023 if has_makernotes {
1024 tags.retain(|t| !(t.name == *tag_name && t.group.family0 == "RIFF"));
1025 }
1026 }
1027 }
1028
1029 if !self.options.requested_tags.is_empty() {
1031 let requested: Vec<String> = self
1032 .options
1033 .requested_tags
1034 .iter()
1035 .map(|t| t.to_lowercase())
1036 .collect();
1037 tags.retain(|t| requested.contains(&t.name.to_lowercase()));
1038 }
1039
1040 Ok(tags)
1041 }
1042
1043 fn get_info(&self, tags: &[Tag]) -> ImageInfo {
1047 let mut info = ImageInfo::new();
1048 let mut seen: HashMap<String, usize> = HashMap::new();
1049
1050 for tag in tags {
1051 let value = if self.options.print_conv {
1052 &tag.print_value
1053 } else {
1054 &tag.raw_value.to_display_string()
1055 };
1056
1057 let count = seen.entry(tag.name.clone()).or_insert(0);
1058 *count += 1;
1059
1060 if *count == 1 {
1061 info.insert(tag.name.clone(), value.clone());
1062 } else if self.options.duplicates {
1063 let key = format!("{} [{}:{}]", tag.name, tag.group.family0, tag.group.family1);
1064 info.insert(key, value.clone());
1065 }
1066 }
1067
1068 info
1069 }
1070
1071 fn detect_file_type(&self, data: &[u8], path: &Path) -> Result<FileType> {
1073 let header_len = data.len().min(256);
1075 if let Some(ft) = file_type::detect_from_magic(&data[..header_len]) {
1076 if ft == FileType::Ico {
1078 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1079 if ext.eq_ignore_ascii_case("dfont") {
1080 return Ok(FileType::Font);
1081 }
1082 }
1083 }
1084 if ft == FileType::Jpeg {
1086 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1087 if ext.eq_ignore_ascii_case("jps") {
1088 return Ok(FileType::Jps);
1089 }
1090 }
1091 }
1092 if ft == FileType::Plist {
1094 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1095 if ext.eq_ignore_ascii_case("aae") {
1096 return Ok(FileType::Aae);
1097 }
1098 }
1099 }
1100 if ft == FileType::Xmp {
1102 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1103 if ext.eq_ignore_ascii_case("plist") {
1104 return Ok(FileType::Plist);
1105 }
1106 if ext.eq_ignore_ascii_case("aae") {
1107 return Ok(FileType::Aae);
1108 }
1109 }
1110 }
1111 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1113 if ext.eq_ignore_ascii_case("pcd") && data.len() >= 2056
1114 && &data[2048..2055] == b"PCD_IPI"
1115 {
1116 return Ok(FileType::PhotoCd);
1117 }
1118 }
1119 if ft == FileType::Mp3 {
1121 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1122 if ext.eq_ignore_ascii_case("mpc") {
1123 return Ok(FileType::Mpc);
1124 }
1125 if ext.eq_ignore_ascii_case("ape") {
1126 return Ok(FileType::Ape);
1127 }
1128 if ext.eq_ignore_ascii_case("wv") {
1129 return Ok(FileType::WavPack);
1130 }
1131 }
1132 }
1133 if ft == FileType::Zip {
1135 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1137 if ext.eq_ignore_ascii_case("eip") {
1138 return Ok(FileType::Eip);
1139 }
1140 }
1141 if let Some(od_type) = detect_opendocument_type(data) {
1142 return Ok(od_type);
1143 }
1144 }
1145 return Ok(ft);
1146 }
1147
1148 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1150 if let Some(ft) = file_type::detect_from_extension(ext) {
1151 return Ok(ft);
1152 }
1153 }
1154
1155 let ext_str = path
1156 .extension()
1157 .and_then(|e| e.to_str())
1158 .unwrap_or("unknown");
1159 Err(Error::UnsupportedFileType(ext_str.to_string()))
1160 }
1161
1162 fn process_file(&self, data: &[u8], file_type: FileType) -> Result<Vec<Tag>> {
1165 match file_type {
1166 FileType::Jpeg | FileType::Jps => formats::jpeg::read_jpeg(data),
1167 FileType::Png | FileType::Mng => formats::png::read_png(data),
1168 FileType::Tiff
1170 | FileType::Btf
1171 | FileType::Dng
1172 | FileType::Cr2
1173 | FileType::Nef
1174 | FileType::Arw
1175 | FileType::Sr2
1176 | FileType::Orf
1177 | FileType::Pef
1178 | FileType::Erf
1179 | FileType::Fff
1180 | FileType::Rwl
1181 | FileType::Mef
1182 | FileType::Srw
1183 | FileType::Gpr
1184 | FileType::Arq
1185 | FileType::ThreeFR
1186 | FileType::Dcr
1187 | FileType::Rw2
1188 | FileType::Srf => formats::tiff::read_tiff(data),
1189 FileType::Iiq => formats::misc::read_iiq(data),
1191 FileType::Gif => formats::gif::read_gif(data),
1193 FileType::Bmp => formats::bmp::read_bmp(data),
1194 FileType::WebP | FileType::Avi | FileType::Wav => formats::riff::read_riff(data),
1195 FileType::Psd => formats::psd::read_psd(data),
1196 FileType::Mp3 => formats::id3::read_mp3(data),
1198 FileType::Flac => formats::flac::read_flac(data),
1199 FileType::Ogg | FileType::Opus => formats::ogg::read_ogg(data),
1200 FileType::Aiff => formats::aiff::read_aiff(data),
1201 FileType::Mp4
1203 | FileType::QuickTime
1204 | FileType::M4a
1205 | FileType::ThreeGP
1206 | FileType::Heif
1207 | FileType::Avif
1208 | FileType::Cr3
1209 | FileType::F4v
1210 | FileType::Mqv
1211 | FileType::Lrv => formats::quicktime::read_quicktime(data),
1212 FileType::Mkv | FileType::WebM => formats::matroska::read_matroska(data),
1213 FileType::Asf | FileType::Wmv | FileType::Wma => formats::asf::read_asf(data),
1214 FileType::Wtv => formats::wtv::read_wtv(data),
1215 FileType::Crw => formats::canon_raw::read_crw(data),
1217 FileType::Raf => formats::raf::read_raf(data),
1218 FileType::Mrw => formats::mrw::read_mrw(data),
1219 FileType::Mrc => formats::mrc::read_mrc(data),
1220 FileType::Jp2 => formats::jp2::read_jp2(data),
1222 FileType::J2c => formats::jp2::read_j2c(data),
1223 FileType::Jxl => formats::jp2::read_jxl(data),
1224 FileType::Ico => formats::ico::read_ico(data),
1225 FileType::Icc => formats::icc::read_icc(data),
1226 FileType::Pdf => formats::pdf::read_pdf(data),
1228 FileType::PostScript => {
1229 if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1231 formats::font::read_pfa(data).or_else(|_| formats::postscript::read_postscript(data))
1232 } else {
1233 formats::postscript::read_postscript(data)
1234 }
1235 }
1236 FileType::Eip => formats::capture_one::read_eip(data),
1237 FileType::Zip | FileType::Docx | FileType::Xlsx | FileType::Pptx
1238 | FileType::Doc | FileType::Xls | FileType::Ppt => formats::zip::read_zip(data),
1239 FileType::Rtf => formats::rtf::read_rtf(data),
1240 FileType::InDesign => formats::misc::read_indesign(data),
1241 FileType::Pcap => formats::misc::read_pcap(data),
1242 FileType::Pcapng => formats::misc::read_pcapng(data),
1243 FileType::Vrd => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1245 FileType::Dr4 => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1246 FileType::Xmp => formats::xmp_file::read_xmp(data),
1248 FileType::Svg => formats::misc::read_svg(data),
1249 FileType::Html => {
1250 let is_svg = data.windows(4).take(512).any(|w| w == b"<svg");
1252 if is_svg {
1253 formats::misc::read_svg(data)
1254 } else {
1255 formats::html::read_html(data)
1256 }
1257 }
1258 FileType::Exe => formats::exe::read_exe(data),
1259 FileType::Font => {
1260 if data.starts_with(b"StartFontMetrics") {
1262 return formats::font::read_afm(data);
1263 }
1264 if data.starts_with(b"%!PS-AdobeFont") || data.starts_with(b"%!FontType1") {
1266 return formats::font::read_pfa(data).or_else(|_| Ok(Vec::new()));
1267 }
1268 if data.len() >= 2 && data[0] == 0x80 && (data[1] == 0x01 || data[1] == 0x02) {
1270 return formats::font::read_pfb(data).or_else(|_| Ok(Vec::new()));
1271 }
1272 formats::font::read_font(data)
1273 }
1274 FileType::WavPack | FileType::Dsf => formats::id3::read_mp3(data),
1276 FileType::Ape => formats::ape::read_ape(data),
1277 FileType::Mpc => formats::ape::read_mpc(data),
1278 FileType::Aac => formats::misc::read_aac(data),
1279 FileType::RealAudio => {
1280 formats::misc::read_real_audio(data).or_else(|_| Ok(Vec::new()))
1281 }
1282 FileType::RealMedia => {
1283 formats::misc::read_real_media(data).or_else(|_| Ok(Vec::new()))
1284 }
1285 FileType::Czi => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1287 FileType::PhotoCd => formats::misc::read_photo_cd(data).or_else(|_| Ok(Vec::new())),
1288 FileType::Dicom => formats::dicom::read_dicom(data),
1289 FileType::Fits => formats::misc::read_fits(data),
1290 FileType::Flv => formats::misc::read_flv(data),
1291 FileType::Mxf => formats::misc::read_mxf(data).or_else(|_| Ok(Vec::new())),
1292 FileType::Swf => formats::misc::read_swf(data),
1293 FileType::Hdr => formats::misc::read_hdr(data),
1294 FileType::DjVu => formats::djvu::read_djvu(data),
1295 FileType::Xcf => formats::gimp::read_xcf(data),
1296 FileType::Mie => formats::mie::read_mie(data),
1297 FileType::Lfp => formats::lytro::read_lfp(data),
1298 FileType::Fpf => formats::flir_fpf::read_fpf(data),
1300 FileType::Flif => formats::misc::read_flif(data),
1301 FileType::Bpg => formats::misc::read_bpg(data),
1302 FileType::Pcx => formats::misc::read_pcx(data),
1303 FileType::Pict => formats::misc::read_pict(data),
1304 FileType::M2ts => formats::misc::read_m2ts(data),
1305 FileType::Gzip => formats::misc::read_gzip(data),
1306 FileType::Rar => formats::misc::read_rar(data),
1307 FileType::Dss => formats::misc::read_dss(data),
1308 FileType::Moi => formats::misc::read_moi(data),
1309 FileType::MacOs => formats::misc::read_macos(data),
1310 FileType::Json => formats::misc::read_json(data),
1311 FileType::Pgf => formats::pgf::read_pgf(data),
1313 FileType::Xisf => formats::xisf::read_xisf(data),
1314 FileType::Torrent => formats::torrent::read_torrent(data),
1315 FileType::Mobi => formats::palm::read_palm(data),
1316 FileType::Psp => formats::psp::read_psp(data),
1317 FileType::SonyPmp => formats::sony_pmp::read_sony_pmp(data),
1318 FileType::Audible => formats::audible::read_audible(data),
1319 FileType::Exr => formats::openexr::read_openexr(data),
1320 FileType::Plist => {
1322 if data.starts_with(b"bplist") {
1323 formats::plist::read_binary_plist_tags(data)
1324 } else {
1325 formats::plist::read_xml_plist(data)
1326 }
1327 }
1328 FileType::Aae => {
1329 if data.starts_with(b"bplist") {
1330 formats::plist::read_binary_plist_tags(data)
1331 } else {
1332 formats::plist::read_aae_plist(data)
1333 }
1334 }
1335 FileType::KyoceraRaw => formats::misc::read_kyocera_raw(data),
1336 FileType::PortableFloatMap => formats::misc::read_pfm(data),
1337 FileType::Fpf => formats::flir_fpf::read_fpf(data),
1338 FileType::Ods | FileType::Odt | FileType::Odp | FileType::Odg |
1339 FileType::Odf | FileType::Odb | FileType::Odi | FileType::Odc => formats::zip::read_zip(data),
1340 _ => Err(Error::UnsupportedFileType(format!("{}", file_type))),
1341 }
1342 }
1343
1344 fn process_by_extension(&self, data: &[u8], path: &Path) -> Result<Vec<Tag>> {
1346 let ext = path
1347 .extension()
1348 .and_then(|e| e.to_str())
1349 .unwrap_or("")
1350 .to_ascii_lowercase();
1351
1352 match ext.as_str() {
1353 "ppm" | "pgm" | "pbm" => formats::misc::read_ppm(data),
1354 "pfm" => {
1355 if data.len() >= 3 && data[0] == b'P' && (data[1] == b'f' || data[1] == b'F') {
1357 formats::misc::read_ppm(data)
1358 } else {
1359 Ok(Vec::new()) }
1361 }
1362 "json" => formats::misc::read_json(data),
1363 "svg" => formats::misc::read_svg(data),
1364 "ram" => formats::misc::read_ram(data).or_else(|_| Ok(Vec::new())),
1365 "txt" | "log" | "igc" => {
1366 Ok(compute_text_tags(data, false))
1367 }
1368 "csv" => {
1369 Ok(compute_text_tags(data, true))
1370 }
1371 "url" => formats::lnk::read_url(data).or_else(|_| Ok(Vec::new())),
1372 "lnk" => formats::lnk::read_lnk(data).or_else(|_| Ok(Vec::new())),
1373 "gpx" | "kml" | "xml" | "inx" => formats::xmp_file::read_xmp(data),
1374 "plist" => {
1375 if data.starts_with(b"bplist") {
1376 formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1377 } else {
1378 formats::plist::read_xml_plist(data).or_else(|_| Ok(Vec::new()))
1379 }
1380 }
1381 "aae" => {
1382 if data.starts_with(b"bplist") {
1383 formats::plist::read_binary_plist_tags(data).or_else(|_| Ok(Vec::new()))
1384 } else {
1385 formats::plist::read_aae_plist(data).or_else(|_| Ok(Vec::new()))
1386 }
1387 }
1388 "vcf" | "ics" | "vcard" => {
1389 let s = String::from_utf8_lossy(&data[..data.len().min(100)]);
1390 if s.contains("BEGIN:VCALENDAR") {
1391 formats::vcard::read_ics(data).or_else(|_| Ok(Vec::new()))
1392 } else {
1393 formats::vcard::read_vcf(data).or_else(|_| Ok(Vec::new()))
1394 }
1395 }
1396 "xcf" => Ok(Vec::new()), "vrd" => formats::canon_vrd::read_vrd(data).or_else(|_| Ok(Vec::new())),
1398 "dr4" => formats::canon_vrd::read_dr4(data).or_else(|_| Ok(Vec::new())),
1399 "indd" | "indt" => Ok(Vec::new()), "x3f" => formats::sigma_raw::read_x3f(data).or_else(|_| Ok(Vec::new())),
1401 "mie" => Ok(Vec::new()), "exr" => Ok(Vec::new()), "wpg" => formats::misc::read_wpg(data).or_else(|_| Ok(Vec::new())),
1404 "moi" => formats::misc::read_moi(data).or_else(|_| Ok(Vec::new())),
1405 "macos" => formats::misc::read_macos(data).or_else(|_| Ok(Vec::new())),
1406 "json" => formats::misc::read_json(data).or_else(|_| Ok(Vec::new())),
1407 "dpx" => formats::dpx::read_dpx(data).or_else(|_| Ok(Vec::new())),
1408 "r3d" => formats::red::read_r3d(data).or_else(|_| Ok(Vec::new())),
1409 "tnef" => formats::tnef::read_tnef(data).or_else(|_| Ok(Vec::new())),
1410 "ppt" | "fpx" => formats::flashpix::read_fpx(data).or_else(|_| Ok(Vec::new())),
1411 "fpf" => formats::flir_fpf::read_fpf(data).or_else(|_| Ok(Vec::new())),
1412 "itc" => formats::misc::read_itc(data).or_else(|_| Ok(Vec::new())),
1413 "dv" => formats::dv::read_dv(data, data.len() as u64).or_else(|_| Ok(Vec::new())),
1414 "czi" => formats::misc::read_czi(data).or_else(|_| Ok(Vec::new())),
1415 "miff" => formats::miff::read_miff(data).or_else(|_| Ok(Vec::new())),
1416 "lfp" | "mrc"
1417 | "dss" | "mobi" | "psp" | "pgf" | "raw"
1418 | "pmp" | "torrent"
1419 | "xisf" | "mxf"
1420 | "dfont" => Ok(Vec::new()),
1421 "iso" => formats::iso::read_iso(data).or_else(|_| Ok(Vec::new())),
1422 "afm" => formats::font::read_afm(data).or_else(|_| Ok(Vec::new())),
1423 "pfa" => formats::font::read_pfa(data).or_else(|_| Ok(Vec::new())),
1424 "pfb" => formats::font::read_pfb(data).or_else(|_| Ok(Vec::new())),
1425 _ => Err(Error::UnsupportedFileType(ext)),
1426 }
1427 }
1428}
1429
1430impl Default for ExifTool {
1431 fn default() -> Self {
1432 Self::new()
1433 }
1434}
1435
1436fn detect_opendocument_type(data: &[u8]) -> Option<FileType> {
1439 if data.len() < 30 || data[0..4] != [0x50, 0x4B, 0x03, 0x04] {
1441 return None;
1442 }
1443 let compression = u16::from_le_bytes([data[8], data[9]]);
1444 let compressed_size = u32::from_le_bytes([data[18], data[19], data[20], data[21]]) as usize;
1445 let name_len = u16::from_le_bytes([data[26], data[27]]) as usize;
1446 let extra_len = u16::from_le_bytes([data[28], data[29]]) as usize;
1447 let name_start = 30;
1448 if name_start + name_len > data.len() {
1449 return None;
1450 }
1451 let filename = std::str::from_utf8(&data[name_start..name_start + name_len]).unwrap_or("");
1452 if filename != "mimetype" || compression != 0 {
1453 return None;
1454 }
1455 let content_start = name_start + name_len + extra_len;
1456 let content_end = (content_start + compressed_size).min(data.len());
1457 if content_start >= content_end {
1458 return None;
1459 }
1460 let mime = std::str::from_utf8(&data[content_start..content_end]).unwrap_or("").trim();
1461 match mime {
1462 "application/vnd.oasis.opendocument.spreadsheet" => Some(FileType::Ods),
1463 "application/vnd.oasis.opendocument.text" => Some(FileType::Odt),
1464 "application/vnd.oasis.opendocument.presentation" => Some(FileType::Odp),
1465 "application/vnd.oasis.opendocument.graphics" => Some(FileType::Odg),
1466 "application/vnd.oasis.opendocument.formula" => Some(FileType::Odf),
1467 "application/vnd.oasis.opendocument.database" => Some(FileType::Odb),
1468 "application/vnd.oasis.opendocument.image" => Some(FileType::Odi),
1469 "application/vnd.oasis.opendocument.chart" => Some(FileType::Odc),
1470 _ => None,
1471 }
1472}
1473
1474pub fn get_file_type<P: AsRef<Path>>(path: P) -> Result<FileType> {
1476 let path = path.as_ref();
1477 let mut file = fs::File::open(path).map_err(Error::Io)?;
1478 let mut header = [0u8; 256];
1479 use std::io::Read;
1480 let n = file.read(&mut header).map_err(Error::Io)?;
1481
1482 if let Some(ft) = file_type::detect_from_magic(&header[..n]) {
1483 return Ok(ft);
1484 }
1485
1486 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1487 if let Some(ft) = file_type::detect_from_extension(ext) {
1488 return Ok(ft);
1489 }
1490 }
1491
1492 Err(Error::UnsupportedFileType("unknown".into()))
1493}
1494
1495enum ExifIfdGroup {
1497 Ifd0,
1498 ExifIfd,
1499 Gps,
1500}
1501
1502fn classify_exif_tag(tag_id: u16) -> ExifIfdGroup {
1504 match tag_id {
1505 0x829A..=0x829D | 0x8822..=0x8827 | 0x8830 | 0x9000..=0x9292
1507 | 0xA000..=0xA435 => ExifIfdGroup::ExifIfd,
1508 0x0000..=0x001F if tag_id <= 0x001F => ExifIfdGroup::Gps,
1510 _ => ExifIfdGroup::Ifd0,
1512 }
1513}
1514
1515fn extract_existing_exif_entries(jpeg_data: &[u8], target_bo: ByteOrderMark) -> Vec<exif_writer::IfdEntry> {
1517 let mut entries = Vec::new();
1518
1519 let mut pos = 2; while pos + 4 <= jpeg_data.len() {
1522 if jpeg_data[pos] != 0xFF {
1523 pos += 1;
1524 continue;
1525 }
1526 let marker = jpeg_data[pos + 1];
1527 pos += 2;
1528
1529 if marker == 0xDA || marker == 0xD9 {
1530 break; }
1532 if marker == 0xFF || marker == 0x00 || marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
1533 continue;
1534 }
1535
1536 if pos + 2 > jpeg_data.len() {
1537 break;
1538 }
1539 let seg_len = u16::from_be_bytes([jpeg_data[pos], jpeg_data[pos + 1]]) as usize;
1540 if seg_len < 2 || pos + seg_len > jpeg_data.len() {
1541 break;
1542 }
1543
1544 let seg_data = &jpeg_data[pos + 2..pos + seg_len];
1545
1546 if marker == 0xE1 && seg_data.len() > 14 && seg_data.starts_with(b"Exif\0\0") {
1548 let tiff_data = &seg_data[6..];
1549 extract_ifd_entries(tiff_data, target_bo, &mut entries);
1550 break;
1551 }
1552
1553 pos += seg_len;
1554 }
1555
1556 entries
1557}
1558
1559fn extract_ifd_entries(
1561 tiff_data: &[u8],
1562 target_bo: ByteOrderMark,
1563 entries: &mut Vec<exif_writer::IfdEntry>,
1564) {
1565 use crate::metadata::exif::parse_tiff_header;
1566
1567 let header = match parse_tiff_header(tiff_data) {
1568 Ok(h) => h,
1569 Err(_) => return,
1570 };
1571
1572 let src_bo = header.byte_order;
1573
1574 read_ifd_for_merge(tiff_data, header.ifd0_offset as usize, src_bo, target_bo, entries);
1576
1577 let ifd0_offset = header.ifd0_offset as usize;
1579 if ifd0_offset + 2 > tiff_data.len() {
1580 return;
1581 }
1582 let count = read_u16_bo(tiff_data, ifd0_offset, src_bo) as usize;
1583 for i in 0..count {
1584 let eoff = ifd0_offset + 2 + i * 12;
1585 if eoff + 12 > tiff_data.len() {
1586 break;
1587 }
1588 let tag = read_u16_bo(tiff_data, eoff, src_bo);
1589 let value_off = read_u32_bo(tiff_data, eoff + 8, src_bo) as usize;
1590
1591 match tag {
1592 0x8769 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1593 0x8825 => read_ifd_for_merge(tiff_data, value_off, src_bo, target_bo, entries),
1594 _ => {}
1595 }
1596 }
1597}
1598
1599fn read_ifd_for_merge(
1601 data: &[u8],
1602 offset: usize,
1603 src_bo: ByteOrderMark,
1604 target_bo: ByteOrderMark,
1605 entries: &mut Vec<exif_writer::IfdEntry>,
1606) {
1607 if offset + 2 > data.len() {
1608 return;
1609 }
1610 let count = read_u16_bo(data, offset, src_bo) as usize;
1611
1612 for i in 0..count {
1613 let eoff = offset + 2 + i * 12;
1614 if eoff + 12 > data.len() {
1615 break;
1616 }
1617
1618 let tag = read_u16_bo(data, eoff, src_bo);
1619 let dtype = read_u16_bo(data, eoff + 2, src_bo);
1620 let count_val = read_u32_bo(data, eoff + 4, src_bo);
1621
1622 if tag == 0x8769 || tag == 0x8825 || tag == 0xA005 || tag == 0x927C {
1624 continue;
1625 }
1626
1627 let type_size = match dtype {
1628 1 | 2 | 6 | 7 => 1usize,
1629 3 | 8 => 2,
1630 4 | 9 | 11 | 13 => 4,
1631 5 | 10 | 12 => 8,
1632 _ => continue,
1633 };
1634
1635 let total_size = type_size * count_val as usize;
1636 let raw_data = if total_size <= 4 {
1637 data[eoff + 8..eoff + 12].to_vec()
1638 } else {
1639 let voff = read_u32_bo(data, eoff + 8, src_bo) as usize;
1640 if voff + total_size > data.len() {
1641 continue;
1642 }
1643 data[voff..voff + total_size].to_vec()
1644 };
1645
1646 let final_data = if src_bo != target_bo && type_size > 1 {
1648 reencode_bytes(&raw_data, dtype, count_val as usize, src_bo, target_bo)
1649 } else {
1650 raw_data[..total_size].to_vec()
1651 };
1652
1653 let format = match dtype {
1654 1 => exif_writer::ExifFormat::Byte,
1655 2 => exif_writer::ExifFormat::Ascii,
1656 3 => exif_writer::ExifFormat::Short,
1657 4 => exif_writer::ExifFormat::Long,
1658 5 => exif_writer::ExifFormat::Rational,
1659 6 => exif_writer::ExifFormat::SByte,
1660 7 => exif_writer::ExifFormat::Undefined,
1661 8 => exif_writer::ExifFormat::SShort,
1662 9 => exif_writer::ExifFormat::SLong,
1663 10 => exif_writer::ExifFormat::SRational,
1664 11 => exif_writer::ExifFormat::Float,
1665 12 => exif_writer::ExifFormat::Double,
1666 _ => continue,
1667 };
1668
1669 entries.push(exif_writer::IfdEntry {
1670 tag,
1671 format,
1672 data: final_data,
1673 });
1674 }
1675}
1676
1677fn reencode_bytes(
1679 data: &[u8],
1680 dtype: u16,
1681 count: usize,
1682 src_bo: ByteOrderMark,
1683 dst_bo: ByteOrderMark,
1684) -> Vec<u8> {
1685 let mut out = Vec::with_capacity(data.len());
1686 match dtype {
1687 3 | 8 => {
1688 for i in 0..count {
1690 let v = read_u16_bo(data, i * 2, src_bo);
1691 match dst_bo {
1692 ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1693 ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1694 }
1695 }
1696 }
1697 4 | 9 | 11 | 13 => {
1698 for i in 0..count {
1700 let v = read_u32_bo(data, i * 4, src_bo);
1701 match dst_bo {
1702 ByteOrderMark::LittleEndian => out.extend_from_slice(&v.to_le_bytes()),
1703 ByteOrderMark::BigEndian => out.extend_from_slice(&v.to_be_bytes()),
1704 }
1705 }
1706 }
1707 5 | 10 => {
1708 for i in 0..count {
1710 let n = read_u32_bo(data, i * 8, src_bo);
1711 let d = read_u32_bo(data, i * 8 + 4, src_bo);
1712 match dst_bo {
1713 ByteOrderMark::LittleEndian => {
1714 out.extend_from_slice(&n.to_le_bytes());
1715 out.extend_from_slice(&d.to_le_bytes());
1716 }
1717 ByteOrderMark::BigEndian => {
1718 out.extend_from_slice(&n.to_be_bytes());
1719 out.extend_from_slice(&d.to_be_bytes());
1720 }
1721 }
1722 }
1723 }
1724 12 => {
1725 for i in 0..count {
1727 let mut bytes = [0u8; 8];
1728 bytes.copy_from_slice(&data[i * 8..i * 8 + 8]);
1729 if src_bo != dst_bo {
1730 bytes.reverse();
1731 }
1732 out.extend_from_slice(&bytes);
1733 }
1734 }
1735 _ => out.extend_from_slice(data),
1736 }
1737 out
1738}
1739
1740fn read_u16_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u16 {
1741 if offset + 2 > data.len() { return 0; }
1742 match bo {
1743 ByteOrderMark::LittleEndian => u16::from_le_bytes([data[offset], data[offset + 1]]),
1744 ByteOrderMark::BigEndian => u16::from_be_bytes([data[offset], data[offset + 1]]),
1745 }
1746}
1747
1748fn read_u32_bo(data: &[u8], offset: usize, bo: ByteOrderMark) -> u32 {
1749 if offset + 4 > data.len() { return 0; }
1750 match bo {
1751 ByteOrderMark::LittleEndian => u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1752 ByteOrderMark::BigEndian => u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]),
1753 }
1754}
1755
1756fn tag_name_to_id(name: &str) -> Option<u16> {
1758 encode_exif_tag(name, "", "", ByteOrderMark::BigEndian).map(|(id, _, _)| id)
1759}
1760
1761fn value_to_filename(value: &str) -> String {
1763 value
1764 .chars()
1765 .map(|c| match c {
1766 '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
1767 c if c.is_control() => '_',
1768 c => c,
1769 })
1770 .collect::<String>()
1771 .trim()
1772 .to_string()
1773}
1774
1775pub fn parse_date_shift(shift: &str) -> Option<(i32, u32, u32, u32)> {
1778 let (sign, rest) = if shift.starts_with('-') {
1779 (-1, &shift[1..])
1780 } else if shift.starts_with('+') {
1781 (1, &shift[1..])
1782 } else {
1783 (1, shift)
1784 };
1785
1786 let parts: Vec<&str> = rest.split(':').collect();
1787 match parts.len() {
1788 1 => {
1789 let h: u32 = parts[0].parse().ok()?;
1790 Some((sign, h, 0, 0))
1791 }
1792 2 => {
1793 let h: u32 = parts[0].parse().ok()?;
1794 let m: u32 = parts[1].parse().ok()?;
1795 Some((sign, h, m, 0))
1796 }
1797 3 => {
1798 let h: u32 = parts[0].parse().ok()?;
1799 let m: u32 = parts[1].parse().ok()?;
1800 let s: u32 = parts[2].parse().ok()?;
1801 Some((sign, h, m, s))
1802 }
1803 _ => None,
1804 }
1805}
1806
1807pub fn shift_datetime(datetime: &str, shift: &str) -> Option<String> {
1810 let (sign, hours, minutes, seconds) = parse_date_shift(shift)?;
1811
1812 if datetime.len() < 19 {
1814 return None;
1815 }
1816 let year: i32 = datetime[0..4].parse().ok()?;
1817 let month: u32 = datetime[5..7].parse().ok()?;
1818 let day: u32 = datetime[8..10].parse().ok()?;
1819 let hour: u32 = datetime[11..13].parse().ok()?;
1820 let min: u32 = datetime[14..16].parse().ok()?;
1821 let sec: u32 = datetime[17..19].parse().ok()?;
1822
1823 let total_secs = (hour * 3600 + min * 60 + sec) as i64
1825 + sign as i64 * (hours * 3600 + minutes * 60 + seconds) as i64;
1826
1827 let days_shift = if total_secs < 0 {
1828 -1 - (-total_secs - 1) as i64 / 86400
1829 } else {
1830 total_secs / 86400
1831 };
1832
1833 let time_secs = ((total_secs % 86400) + 86400) % 86400;
1834 let new_hour = (time_secs / 3600) as u32;
1835 let new_min = ((time_secs % 3600) / 60) as u32;
1836 let new_sec = (time_secs % 60) as u32;
1837
1838 let mut new_day = day as i32 + days_shift as i32;
1840 let mut new_month = month;
1841 let mut new_year = year;
1842
1843 let days_in_month = |m: u32, y: i32| -> i32 {
1844 match m {
1845 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1846 4 | 6 | 9 | 11 => 30,
1847 2 => if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 29 } else { 28 },
1848 _ => 30,
1849 }
1850 };
1851
1852 while new_day > days_in_month(new_month, new_year) {
1853 new_day -= days_in_month(new_month, new_year);
1854 new_month += 1;
1855 if new_month > 12 {
1856 new_month = 1;
1857 new_year += 1;
1858 }
1859 }
1860 while new_day < 1 {
1861 new_month = if new_month == 1 { 12 } else { new_month - 1 };
1862 if new_month == 12 {
1863 new_year -= 1;
1864 }
1865 new_day += days_in_month(new_month, new_year);
1866 }
1867
1868 Some(format!(
1869 "{:04}:{:02}:{:02} {:02}:{:02}:{:02}",
1870 new_year, new_month, new_day, new_hour, new_min, new_sec
1871 ))
1872}
1873
1874fn unix_to_datetime(secs: i64) -> String {
1875 let days = secs / 86400;
1876 let time = secs % 86400;
1877 let h = time / 3600;
1878 let m = (time % 3600) / 60;
1879 let s = time % 60;
1880 let mut y = 1970i32;
1881 let mut rem = days;
1882 loop {
1883 let dy = if (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 { 366 } else { 365 };
1884 if rem < dy { break; }
1885 rem -= dy;
1886 y += 1;
1887 }
1888 let leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
1889 let months = [31, if leap { 29 } else { 28 }, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
1890 let mut mo = 1;
1891 for &dm in &months {
1892 if rem < dm { break; }
1893 rem -= dm;
1894 mo += 1;
1895 }
1896 format!("{:04}:{:02}:{:02} {:02}:{:02}:{:02}", y, mo, rem + 1, h, m, s)
1897}
1898
1899fn format_file_size(bytes: u64) -> String {
1900 if bytes < 1024 {
1901 format!("{} bytes", bytes)
1902 } else if bytes < 1024 * 1024 {
1903 format!("{:.1} kB", bytes as f64 / 1024.0)
1904 } else if bytes < 1024 * 1024 * 1024 {
1905 format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
1906 } else {
1907 format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
1908 }
1909}
1910
1911fn is_xmp_tag(tag: &str) -> bool {
1913 matches!(
1914 tag.to_lowercase().as_str(),
1915 "title" | "description" | "subject" | "creator" | "rights"
1916 | "keywords" | "rating" | "label" | "hierarchicalsubject"
1917 )
1918}
1919
1920fn encode_exif_tag(
1923 tag_name: &str,
1924 value: &str,
1925 _group: &str,
1926 bo: ByteOrderMark,
1927) -> Option<(u16, exif_writer::ExifFormat, Vec<u8>)> {
1928 let tag_lower = tag_name.to_lowercase();
1929
1930 let (tag_id, format): (u16, exif_writer::ExifFormat) = match tag_lower.as_str() {
1932 "imagedescription" => (0x010E, exif_writer::ExifFormat::Ascii),
1934 "make" => (0x010F, exif_writer::ExifFormat::Ascii),
1935 "model" => (0x0110, exif_writer::ExifFormat::Ascii),
1936 "software" => (0x0131, exif_writer::ExifFormat::Ascii),
1937 "modifydate" | "datetime" => (0x0132, exif_writer::ExifFormat::Ascii),
1938 "artist" => (0x013B, exif_writer::ExifFormat::Ascii),
1939 "copyright" => (0x8298, exif_writer::ExifFormat::Ascii),
1940 "orientation" => (0x0112, exif_writer::ExifFormat::Short),
1942 "xresolution" => (0x011A, exif_writer::ExifFormat::Rational),
1943 "yresolution" => (0x011B, exif_writer::ExifFormat::Rational),
1944 "resolutionunit" => (0x0128, exif_writer::ExifFormat::Short),
1945 "datetimeoriginal" => (0x9003, exif_writer::ExifFormat::Ascii),
1947 "createdate" | "datetimedigitized" => (0x9004, exif_writer::ExifFormat::Ascii),
1948 "usercomment" => (0x9286, exif_writer::ExifFormat::Undefined),
1949 "imageuniqueid" => (0xA420, exif_writer::ExifFormat::Ascii),
1950 "ownername" | "cameraownername" => (0xA430, exif_writer::ExifFormat::Ascii),
1951 "serialnumber" | "bodyserialnumber" => (0xA431, exif_writer::ExifFormat::Ascii),
1952 "lensmake" => (0xA433, exif_writer::ExifFormat::Ascii),
1953 "lensmodel" => (0xA434, exif_writer::ExifFormat::Ascii),
1954 "lensserialnumber" => (0xA435, exif_writer::ExifFormat::Ascii),
1955 _ => return None,
1956 };
1957
1958 let encoded = match format {
1959 exif_writer::ExifFormat::Ascii => exif_writer::encode_ascii(value),
1960 exif_writer::ExifFormat::Short => {
1961 let v: u16 = value.parse().ok()?;
1962 exif_writer::encode_u16(v, bo)
1963 }
1964 exif_writer::ExifFormat::Long => {
1965 let v: u32 = value.parse().ok()?;
1966 exif_writer::encode_u32(v, bo)
1967 }
1968 exif_writer::ExifFormat::Rational => {
1969 if let Some(slash) = value.find('/') {
1971 let num: u32 = value[..slash].trim().parse().ok()?;
1972 let den: u32 = value[slash + 1..].trim().parse().ok()?;
1973 exif_writer::encode_urational(num, den, bo)
1974 } else if let Ok(v) = value.parse::<f64>() {
1975 let den = 10000u32;
1977 let num = (v * den as f64).round() as u32;
1978 exif_writer::encode_urational(num, den, bo)
1979 } else {
1980 return None;
1981 }
1982 }
1983 exif_writer::ExifFormat::Undefined => {
1984 let mut data = vec![0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00]; data.extend_from_slice(value.as_bytes());
1987 data
1988 }
1989 _ => return None,
1990 };
1991
1992 Some((tag_id, format, encoded))
1993}
1994
1995fn compute_text_tags(data: &[u8], is_csv: bool) -> Vec<Tag> {
1997 let mut tags = Vec::new();
1998 let mk = |name: &str, val: String| Tag {
1999 id: crate::tag::TagId::Text(name.into()),
2000 name: name.into(), description: name.into(),
2001 group: crate::tag::TagGroup { family0: "File".into(), family1: "File".into(), family2: "Other".into() },
2002 raw_value: Value::String(val.clone()), print_value: val, priority: 0,
2003 };
2004
2005 let is_ascii = data.iter().all(|&b| b < 128);
2007 let has_utf8_bom = data.starts_with(&[0xEF, 0xBB, 0xBF]);
2008 let has_utf16le_bom = data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2009 let has_utf16be_bom = data.starts_with(&[0xFE, 0xFF]);
2010 let has_utf32le_bom = data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]);
2011 let has_utf32be_bom = data.starts_with(&[0x00, 0x00, 0xFE, 0xFF]);
2012
2013 let has_weird_ctrl = data.iter().any(|&b| (b <= 0x06) || (b >= 0x0e && b <= 0x1a) || (b >= 0x1c && b <= 0x1f) || b == 0x7f);
2015
2016 let (encoding, is_bom, is_utf16) = if has_utf32le_bom {
2017 ("utf-32le", true, false)
2018 } else if has_utf32be_bom {
2019 ("utf-32be", true, false)
2020 } else if has_utf16le_bom {
2021 ("utf-16le", true, true)
2022 } else if has_utf16be_bom {
2023 ("utf-16be", true, true)
2024 } else if has_weird_ctrl {
2025 return tags;
2027 } else if is_ascii {
2028 ("us-ascii", false, false)
2029 } else {
2030 let is_valid_utf8 = std::str::from_utf8(data).is_ok();
2032 if is_valid_utf8 {
2033 if has_utf8_bom {
2034 ("utf-8", true, false)
2035 } else {
2036 ("utf-8", false, false)
2040 }
2041 } else if !data.iter().any(|&b| b >= 0x80 && b <= 0x9f) {
2042 ("iso-8859-1", false, false)
2043 } else {
2044 ("unknown-8bit", false, false)
2045 }
2046 };
2047
2048 tags.push(mk("MIMEEncoding", encoding.into()));
2049
2050 if is_bom {
2051 tags.push(mk("ByteOrderMark", "Yes".into()));
2052 }
2053
2054 let has_cr = data.contains(&b'\r');
2056 let has_lf = data.contains(&b'\n');
2057 let newline_type = if has_cr && has_lf { "Windows CRLF" }
2058 else if has_lf { "Unix LF" }
2059 else if has_cr { "Macintosh CR" }
2060 else { "(none)" };
2061 tags.push(mk("Newlines", newline_type.into()));
2062
2063 if is_csv {
2064 let text = String::from_utf8_lossy(data);
2066 let mut delim = "";
2067 let mut quot = "";
2068 let mut ncols = 1usize;
2069 let mut nrows = 0usize;
2070
2071 for line in text.lines() {
2072 if nrows == 0 {
2073 let comma_count = line.matches(',').count();
2075 let semi_count = line.matches(';').count();
2076 let tab_count = line.matches('\t').count();
2077 if comma_count > semi_count && comma_count > tab_count {
2078 delim = ",";
2079 ncols = comma_count + 1;
2080 } else if semi_count > tab_count {
2081 delim = ";";
2082 ncols = semi_count + 1;
2083 } else if tab_count > 0 {
2084 delim = "\t";
2085 ncols = tab_count + 1;
2086 } else {
2087 delim = "";
2088 ncols = 1;
2089 }
2090 if line.contains('"') { quot = "\""; }
2092 else if line.contains('\'') { quot = "'"; }
2093 }
2094 nrows += 1;
2095 if nrows >= 1000 { break; }
2096 }
2097
2098 let delim_display = match delim {
2099 "," => "Comma",
2100 ";" => "Semicolon",
2101 "\t" => "Tab",
2102 _ => "(none)",
2103 };
2104 let quot_display = match quot {
2105 "\"" => "Double quotes",
2106 "'" => "Single quotes",
2107 _ => "(none)",
2108 };
2109
2110 tags.push(mk("Delimiter", delim_display.into()));
2111 tags.push(mk("Quoting", quot_display.into()));
2112 tags.push(mk("ColumnCount", ncols.to_string()));
2113 if nrows > 0 {
2114 tags.push(mk("RowCount", nrows.to_string()));
2115 }
2116 } else if !is_utf16 {
2117 let line_count = data.iter().filter(|&&b| b == b'\n').count();
2119 let line_count = if line_count == 0 && !data.is_empty() { 1 } else { line_count };
2120 tags.push(mk("LineCount", line_count.to_string()));
2121
2122 let text = String::from_utf8_lossy(data);
2123 let word_count = text.split_whitespace().count();
2124 tags.push(mk("WordCount", word_count.to_string()));
2125 }
2126
2127 tags
2128}