1use crate::types::{ExifError, Result};
8use std::collections::{BTreeMap, HashMap};
9use std::io::{Read, Seek, SeekFrom};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum JpegSegment {
14 Soi,
16 App(u8),
18 Sof,
20 Dht,
22 Sos,
24 Eoi,
26 Other(u8),
28}
29
30impl JpegSegment {
31 fn from_marker(marker: u8) -> Self {
32 match marker {
33 0xD8 => Self::Soi,
34 0xE0..=0xEF => Self::App(marker - 0xE0),
35 0xC0 => Self::Sof,
36 0xC4 => Self::Dht,
37 0xDA => Self::Sos,
38 0xD9 => Self::Eoi,
39 _ => Self::Other(marker),
40 }
41 }
42
43 #[allow(dead_code)]
45 fn is_app1(&self) -> bool {
46 matches!(self, Self::App(1))
47 }
48
49 #[allow(dead_code)]
51 fn marker_byte(&self) -> u8 {
52 match self {
53 Self::Soi => 0xD8,
54 Self::App(app_num) => 0xE0 + app_num,
55 Self::Sof => 0xC0,
56 Self::Dht => 0xC4,
57 Self::Sos => 0xDA,
58 Self::Eoi => 0xD9,
59 Self::Other(marker) => *marker,
60 }
61 }
62}
63
64#[derive(Debug)]
66pub struct JpegSegmentInfo {
67 pub segment_type: JpegSegment,
68 pub offset: u64,
69 pub length: u16,
70 pub has_exif: bool,
71 pub has_xmp: bool,
72}
73
74#[derive(Debug)]
85pub struct ExtendedXmpInfo {
86 pub guid: String,
87 pub total_size: u32,
88 pub chunk_offset: u32,
89 pub segment_offset: u64, pub chunk_length: u16, }
92
93pub fn scan_jpeg_segments<R: Read + Seek>(mut reader: R) -> Result<Option<JpegSegmentInfo>> {
98 let mut magic = [0u8; 2];
100 reader.read_exact(&mut magic)?;
101 if magic != [0xFF, 0xD8] {
102 return Err(ExifError::InvalidFormat(
103 "Not a valid JPEG file (missing 0xFFD8 magic bytes)".to_string(),
104 ));
105 }
106
107 let mut current_pos = 2u64; let mut found_exif: Option<JpegSegmentInfo> = None;
109 let mut found_xmp: Option<JpegSegmentInfo> = None;
110
111 loop {
112 let mut marker_bytes = [0u8; 2];
114 if reader.read_exact(&mut marker_bytes).is_err() {
115 break;
117 }
118
119 if marker_bytes[0] != 0xFF {
120 return Err(ExifError::ParseError(
121 "Invalid JPEG segment marker".to_string(),
122 ));
123 }
124
125 let segment = JpegSegment::from_marker(marker_bytes[1]);
126 current_pos += 2;
127
128 match segment {
129 JpegSegment::Soi => {
130 continue;
132 }
133 JpegSegment::Eoi => {
134 break;
136 }
137 JpegSegment::Sos => {
138 break;
140 }
141 JpegSegment::App(app_num) => {
142 let mut length_bytes = [0u8; 2];
144 reader.read_exact(&mut length_bytes)?;
145 let length = u16::from_be_bytes(length_bytes);
146 current_pos += 2;
147
148 if app_num == 1 {
149 let segment_start = current_pos; let mut exif_header = [0u8; 6];
154 if reader.read_exact(&mut exif_header).is_ok()
155 && &exif_header[0..4] == b"Exif"
156 && exif_header[4] == 0
157 && exif_header[5] == 0
158 {
159 found_exif = Some(JpegSegmentInfo {
161 segment_type: segment,
162 offset: current_pos + 6, length: length - 8, has_exif: true,
165 has_xmp: false,
166 });
167 } else {
168 reader.seek(SeekFrom::Start(segment_start))?;
170 let mut xmp_header = [0u8; 29];
171 if reader.read_exact(&mut xmp_header).is_ok()
172 && &xmp_header == b"http://ns.adobe.com/xap/1.0/\0"
173 {
174 found_xmp = Some(JpegSegmentInfo {
176 segment_type: segment,
177 offset: current_pos + 29, length: length - 31, has_exif: false,
180 has_xmp: true,
181 });
182 }
183 }
184
185 reader.seek(SeekFrom::Start(segment_start))?;
187 }
188
189 let segment_data_length = length.saturating_sub(2) as u64;
191 reader.seek(SeekFrom::Current(segment_data_length as i64))?;
192 current_pos += segment_data_length;
193 }
194 _ => {
195 let mut length_bytes = [0u8; 2];
197 if reader.read_exact(&mut length_bytes).is_ok() {
198 let length = u16::from_be_bytes(length_bytes);
199 let segment_data_length = length.saturating_sub(2) as u64;
200 reader.seek(SeekFrom::Current(segment_data_length as i64))?;
201 current_pos += 2 + segment_data_length;
202 } else {
203 break;
204 }
205 }
206 }
207 }
208
209 Ok(found_exif.or(found_xmp))
211}
212
213pub struct XmpScanResult {
215 pub regular_xmp: Option<JpegSegmentInfo>,
216 pub extended_xmp: Vec<ExtendedXmpInfo>,
217}
218
219pub fn scan_jpeg_xmp_segments<R: Read + Seek>(mut reader: R) -> Result<XmpScanResult> {
224 let mut magic = [0u8; 2];
226 reader.read_exact(&mut magic)?;
227 if magic != [0xFF, 0xD8] {
228 return Err(ExifError::InvalidFormat(
229 "Not a valid JPEG file (missing 0xFFD8 magic bytes)".to_string(),
230 ));
231 }
232
233 let mut regular_xmp = None;
234 let mut extended_xmp = Vec::new();
235 let mut current_pos = 2u64; loop {
238 let mut marker_bytes = [0u8; 2];
240 if reader.read_exact(&mut marker_bytes).is_err() {
241 break;
242 }
243
244 if marker_bytes[0] != 0xFF {
245 return Err(ExifError::ParseError(
246 "Invalid JPEG segment marker".to_string(),
247 ));
248 }
249
250 let segment = JpegSegment::from_marker(marker_bytes[1]);
251 current_pos += 2;
252
253 match segment {
254 JpegSegment::Soi => continue,
255 JpegSegment::Eoi | JpegSegment::Sos => break,
256 JpegSegment::App(1) => {
257 let mut length_bytes = [0u8; 2];
259 reader.read_exact(&mut length_bytes)?;
260 let length = u16::from_be_bytes(length_bytes);
261 current_pos += 2;
262
263 let segment_start = current_pos;
264
265 let mut xmp_header = [0u8; 29];
267 if reader.read_exact(&mut xmp_header).is_ok()
268 && &xmp_header == b"http://ns.adobe.com/xap/1.0/\0"
269 {
270 if regular_xmp.is_none() {
272 regular_xmp = Some(JpegSegmentInfo {
273 segment_type: segment,
274 offset: current_pos + 29,
275 length: length - 31, has_exif: false,
277 has_xmp: true,
278 });
279 }
280
281 let remaining = (length - 31) as u64;
284 reader.seek(SeekFrom::Current(remaining as i64))?;
285 current_pos = segment_start + (length - 2) as u64;
286 continue;
287 }
288
289 reader.seek(SeekFrom::Start(segment_start))?;
291 let mut ext_xmp_header = [0u8; 35];
292 if reader.read_exact(&mut ext_xmp_header).is_ok()
293 && &ext_xmp_header[0..35] == b"http://ns.adobe.com/xmp/extension/\0"
294 {
295 let mut guid_bytes = [0u8; 32];
305 reader.read_exact(&mut guid_bytes)?;
306 let guid = String::from_utf8_lossy(&guid_bytes).to_string();
307
308 if !guid.chars().all(|c| c.is_ascii_alphanumeric()) {
311 reader.seek(SeekFrom::Start(segment_start))?;
313 let segment_data_length = length.saturating_sub(2) as u64;
314 reader.seek(SeekFrom::Current(segment_data_length as i64))?;
315 current_pos = segment_start + segment_data_length;
316 continue;
317 }
318
319 let mut size_bytes = [0u8; 4];
322 reader.read_exact(&mut size_bytes)?;
323 let total_size = u32::from_be_bytes(size_bytes);
324
325 let mut offset_bytes = [0u8; 4];
327 reader.read_exact(&mut offset_bytes)?;
328 let chunk_offset = u32::from_be_bytes(offset_bytes);
329
330 extended_xmp.push(ExtendedXmpInfo {
333 guid,
334 total_size,
335 chunk_offset,
336 segment_offset: current_pos + 75, chunk_length: length - 77, });
339
340 let remaining = (length - 77) as u64;
343 reader.seek(SeekFrom::Current(remaining as i64))?;
344 current_pos = segment_start + (length - 2) as u64;
345 continue;
346 }
347
348 reader.seek(SeekFrom::Start(segment_start))?;
350 let segment_data_length = length.saturating_sub(2) as u64;
351 reader.seek(SeekFrom::Current(segment_data_length as i64))?;
352 current_pos = segment_start + segment_data_length;
353 }
354 _ => {
355 let mut length_bytes = [0u8; 2];
357 if reader.read_exact(&mut length_bytes).is_ok() {
358 let length = u16::from_be_bytes(length_bytes);
359 let segment_data_length = length.saturating_sub(2) as u64;
360 reader.seek(SeekFrom::Current(segment_data_length as i64))?;
361 current_pos += 2 + segment_data_length;
362 } else {
363 break;
364 }
365 }
366 }
367 }
368
369 Ok(XmpScanResult {
370 regular_xmp,
371 extended_xmp,
372 })
373}
374
375fn extract_has_extended_xmp_guid(xmp_data: &[u8]) -> Option<String> {
382 let xmp_str = std::str::from_utf8(xmp_data).ok()?;
384
385 if let Some(start_pos) = xmp_str.find("<xmpNote:HasExtendedXMP>") {
392 let guid_start = start_pos + "<xmpNote:HasExtendedXMP>".len();
393 if let Some(end_pos) = xmp_str[guid_start..].find("</xmpNote:HasExtendedXMP>") {
394 let guid = &xmp_str[guid_start..guid_start + end_pos];
395 if guid.len() == 32 && guid.chars().all(|c| c.is_ascii_alphanumeric()) {
398 return Some(guid.to_string());
399 }
400 }
401 }
402
403 if let Some(start_pos) = xmp_str.find("<HasExtendedXMP>") {
405 let guid_start = start_pos + "<HasExtendedXMP>".len();
406 if let Some(end_pos) = xmp_str[guid_start..].find("</HasExtendedXMP>") {
407 let guid = &xmp_str[guid_start..guid_start + end_pos];
408 if guid.len() == 32 && guid.chars().all(|c| c.is_ascii_alphanumeric()) {
411 return Some(guid.to_string());
412 }
413 }
414 }
415
416 let patterns = [
418 "xmpNote:HasExtendedXMP=\"",
419 "xmpNote:HasExtendedXMP='",
420 "HasExtendedXMP=\"",
421 "HasExtendedXMP='",
422 ];
423
424 for pattern in &patterns {
425 if let Some(start_pos) = xmp_str.find(pattern) {
426 let guid_start = start_pos + pattern.len();
427 let quote_char = pattern.chars().last()?;
428
429 if let Some(end_pos) = xmp_str[guid_start..].find(quote_char) {
431 let guid = &xmp_str[guid_start..guid_start + end_pos];
432
433 if guid.len() == 32 && guid.chars().all(|c| c.is_ascii_alphanumeric()) {
435 return Some(guid.to_string());
436 }
437 }
438 }
439 }
440
441 None
442}
443
444pub fn extract_jpeg_xmp<R: Read + Seek>(mut reader: R) -> Result<Vec<u8>> {
451 let scan_result = scan_jpeg_xmp_segments(&mut reader)?;
452
453 if let Some(regular_xmp) = &scan_result.regular_xmp {
455 reader.seek(SeekFrom::Start(regular_xmp.offset))?;
457 let mut xmp_data = vec![0u8; regular_xmp.length as usize];
458 reader.read_exact(&mut xmp_data)?;
459
460 if let Some(has_extended_guid) = extract_has_extended_xmp_guid(&xmp_data) {
462 if !scan_result.extended_xmp.is_empty() {
465 let mut guid_chunks: HashMap<String, BTreeMap<u32, Vec<u8>>> = HashMap::new();
467
468 for ext_info in &scan_result.extended_xmp {
469 if ext_info.guid == has_extended_guid {
471 reader.seek(SeekFrom::Start(ext_info.segment_offset))?;
473 let mut chunk_data = vec![0u8; ext_info.chunk_length as usize];
474 reader.read_exact(&mut chunk_data)?;
475
476 guid_chunks
479 .entry(ext_info.guid.clone())
480 .or_default()
481 .insert(ext_info.chunk_offset, chunk_data);
482 }
483 }
484
485 if let Some(chunks) = guid_chunks.get(&has_extended_guid) {
487 let mut expected_offset = 0u32;
489 let mut total_size = 0u32;
490 let mut is_complete = true;
491
492 for ext_info in &scan_result.extended_xmp {
494 if ext_info.guid == has_extended_guid {
495 total_size = ext_info.total_size;
496 break;
497 }
498 }
499
500 for (offset, chunk) in chunks {
503 if *offset != expected_offset {
504 is_complete = false;
505 break;
506 }
507 expected_offset += chunk.len() as u32;
508 }
509
510 if is_complete && expected_offset == total_size {
511 let mut combined_xmp = xmp_data;
515 for chunk in chunks.values() {
516 combined_xmp.extend_from_slice(chunk);
517 }
518 return Ok(combined_xmp);
519 }
520 }
521 }
522 }
523
524 return Ok(xmp_data);
526 }
527
528 if !scan_result.extended_xmp.is_empty() {
530 let mut guid_chunks: HashMap<String, BTreeMap<u32, Vec<u8>>> = HashMap::new();
532
533 for ext_info in &scan_result.extended_xmp {
534 reader.seek(SeekFrom::Start(ext_info.segment_offset))?;
536 let mut chunk_data = vec![0u8; ext_info.chunk_length as usize];
537 reader.read_exact(&mut chunk_data)?;
538
539 guid_chunks
541 .entry(ext_info.guid.clone())
542 .or_default()
543 .insert(ext_info.chunk_offset, chunk_data);
544 }
545
546 for (guid, chunks) in guid_chunks {
549 let mut expected_offset = 0u32;
551 let mut total_size = 0u32;
552 let mut is_complete = true;
553
554 for ext_info in &scan_result.extended_xmp {
556 if ext_info.guid == guid {
557 total_size = ext_info.total_size;
558 break;
559 }
560 }
561
562 for (offset, chunk) in &chunks {
564 if *offset != expected_offset {
565 is_complete = false;
566 break;
567 }
568 expected_offset += chunk.len() as u32;
569 }
570
571 if is_complete && expected_offset == total_size {
572 let mut reassembled = Vec::with_capacity(total_size as usize);
575 for (_, chunk) in chunks {
576 reassembled.extend_from_slice(&chunk);
577 }
578 return Ok(reassembled);
579 }
580 }
581
582 return Err(ExifError::InvalidFormat(
583 "Incomplete Extended XMP data".to_string(),
584 ));
585 }
586
587 Err(ExifError::InvalidFormat(
588 "No XMP data found in JPEG file".to_string(),
589 ))
590}
591
592pub fn extract_jpeg_exif<R: Read + Seek>(mut reader: R) -> Result<Vec<u8>> {
597 reader.seek(SeekFrom::Start(0))?;
599 let segment_info = scan_jpeg_segments(&mut reader)?;
600
601 match segment_info {
602 Some(info) if info.has_exif => {
603 reader.seek(SeekFrom::Start(info.offset))?;
605 let mut exif_data = vec![0u8; info.length as usize];
606 reader.read_exact(&mut exif_data)?;
607 Ok(exif_data)
608 }
609 _ => Err(ExifError::InvalidFormat(
610 "No EXIF data found in JPEG file".to_string(),
611 )),
612 }
613}
614
615#[cfg(test)]
616mod tests {
617 use super::*;
618 use std::io::Cursor;
619
620 #[test]
621 fn test_jpeg_segment_from_marker() {
622 assert_eq!(JpegSegment::from_marker(0xD8), JpegSegment::Soi);
623 assert_eq!(JpegSegment::from_marker(0xE1), JpegSegment::App(1));
624 assert_eq!(JpegSegment::from_marker(0xC0), JpegSegment::Sof);
625 assert_eq!(JpegSegment::from_marker(0xDA), JpegSegment::Sos);
626 assert_eq!(JpegSegment::from_marker(0xD9), JpegSegment::Eoi);
627 }
628
629 #[test]
630 fn test_jpeg_segment_is_app1() {
631 assert!(JpegSegment::App(1).is_app1());
632 assert!(!JpegSegment::App(0).is_app1());
633 assert!(!JpegSegment::Soi.is_app1());
634 }
635
636 #[test]
637 fn test_jpeg_segment_marker_byte() {
638 assert_eq!(JpegSegment::Soi.marker_byte(), 0xD8);
639 assert_eq!(JpegSegment::App(1).marker_byte(), 0xE1);
640 assert_eq!(JpegSegment::Eoi.marker_byte(), 0xD9);
641 }
642
643 #[test]
644 fn test_scan_jpeg_segments_invalid_magic() {
645 let invalid_jpeg = [0x12, 0x34, 0x56, 0x78];
646 let cursor = Cursor::new(invalid_jpeg);
647 let result = scan_jpeg_segments(cursor);
648 assert!(result.is_err());
649 }
650
651 #[test]
652 fn test_scan_jpeg_segments_minimal() {
653 let minimal_jpeg = [0xFF, 0xD8, 0xFF, 0xD9];
655 let cursor = Cursor::new(minimal_jpeg);
656 let result = scan_jpeg_segments(cursor).unwrap();
657 assert!(result.is_none()); }
659
660 #[test]
661 fn test_scan_jpeg_segments_with_app1_exif() {
662 let jpeg_data = vec![
664 0xFF, 0xD8, 0xFF, 0xE1, 0x00, 0x10, 0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00, 0xFF, 0xD9, ];
672
673 let cursor = Cursor::new(&jpeg_data);
674 let result = scan_jpeg_segments(cursor).unwrap();
675 assert!(result.is_some());
676
677 let segment_info = result.unwrap();
678 assert!(segment_info.has_exif);
679 assert!(!segment_info.has_xmp);
680 assert_eq!(segment_info.offset, 12); assert_eq!(segment_info.length, 8); }
683
684 #[test]
685 fn test_scan_jpeg_segments_with_app1_xmp() {
686 let xmp_identifier = b"http://ns.adobe.com/xap/1.0/\0"; let xmp_packet = b"<?xml?><x:xmpmeta></x:xmpmeta>"; let segment_length = 2 + xmp_identifier.len() + xmp_packet.len(); let mut jpeg_data = vec![
692 0xFF,
693 0xD8, 0xFF,
695 0xE1, (segment_length >> 8) as u8,
697 (segment_length & 0xFF) as u8, ];
699
700 jpeg_data.extend_from_slice(xmp_identifier);
702 jpeg_data.extend_from_slice(xmp_packet);
703
704 jpeg_data.extend_from_slice(&[0xFF, 0xD9]);
706
707 let cursor = Cursor::new(&jpeg_data);
708 let result = scan_jpeg_segments(cursor).unwrap();
709 assert!(result.is_some());
710
711 let segment_info = result.unwrap();
712 assert!(!segment_info.has_exif);
713 assert!(segment_info.has_xmp);
714 assert_eq!(segment_info.offset, 35);
716 assert_eq!(segment_info.length, 30);
718 }
719
720 #[test]
721 fn test_scan_jpeg_xmp_segments() {
722 let xmp_identifier = b"http://ns.adobe.com/xap/1.0/\0"; let xmp_packet = b"<?xml?><x:xmpmeta></x:xmpmeta>"; let segment_length = 2 + xmp_identifier.len() + xmp_packet.len(); let mut jpeg_data = vec![
728 0xFF,
729 0xD8, 0xFF,
731 0xE1, (segment_length >> 8) as u8,
733 (segment_length & 0xFF) as u8, ];
735
736 jpeg_data.extend_from_slice(xmp_identifier);
738 jpeg_data.extend_from_slice(xmp_packet);
739
740 jpeg_data.extend_from_slice(&[0xFF, 0xD9]);
742
743 let cursor = Cursor::new(&jpeg_data);
744 let result = scan_jpeg_xmp_segments(cursor).unwrap();
745 assert!(result.regular_xmp.is_some());
746 assert!(result.extended_xmp.is_empty());
747
748 let segment_info = result.regular_xmp.unwrap();
749 assert!(segment_info.has_xmp);
750 assert_eq!(segment_info.length, 30); }
752
753 #[test]
754 fn test_extract_jpeg_xmp() {
755 let xmp_identifier = b"http://ns.adobe.com/xap/1.0/\0"; let xmp_packet = b"<?xml?><x:xmpmeta></x:xmpmeta>"; let segment_length = 2 + xmp_identifier.len() + xmp_packet.len(); let mut jpeg_data = vec![
761 0xFF,
762 0xD8, 0xFF,
764 0xE1, (segment_length >> 8) as u8,
766 (segment_length & 0xFF) as u8, ];
768
769 jpeg_data.extend_from_slice(xmp_identifier);
771 jpeg_data.extend_from_slice(xmp_packet);
772
773 jpeg_data.extend_from_slice(&[0xFF, 0xD9]);
775
776 let cursor = Cursor::new(&jpeg_data);
777 let result = extract_jpeg_xmp(cursor);
778 assert!(result.is_ok());
779
780 let xmp_data = result.unwrap();
781 assert_eq!(xmp_data, xmp_packet);
782 }
783}