use crate::{ExifIter, MediaParser, MediaSource};
use std::io::{Read, Seek};
use nom::{bytes::streaming, combinator::fail, number, sequence::tuple, IResult};
use crate::exif::{check_exif_header, Exif};
#[deprecated(since = "2.0.0")]
pub fn parse_jpeg_exif<R: Read + Seek>(reader: R) -> crate::Result<Option<Exif>> {
let mut parser = MediaParser::new();
let iter: ExifIter = parser.parse(MediaSource::unseekable(reader)?)?;
Ok(Some(iter.into()))
}
pub(crate) fn extract_exif_data(input: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
let (remain, segment) = find_exif_segment(input)?;
let data = segment.and_then(|segment| {
if segment.payload_len() <= 6 {
None
} else {
Some(&segment.payload[6..]) }
});
Ok((remain, data))
}
struct Segment<'a> {
marker_code: u8,
payload: &'a [u8],
}
impl Segment<'_> {
pub fn payload_len(&self) -> usize {
self.payload.len()
}
}
fn find_exif_segment(input: &[u8]) -> IResult<&[u8], Option<Segment<'_>>> {
let mut remain = input;
let (remain, segment) = loop {
let (rem, (_, code)) = tuple((streaming::tag([0xFF]), number::streaming::u8))(remain)?;
let (rem, segment) = parse_segment(code, rem)?;
assert!(rem.len() < remain.len());
remain = rem;
tracing::debug!(
marker = format!("0x{:04x}", segment.marker_code),
size = format!("0x{:04x}", segment.payload.len()),
"got segment"
);
let s = &segment;
if (s.marker_code == MarkerCode::APP1.code() && check_exif_header(s.payload)?)
|| s.marker_code == MarkerCode::Sos.code()
{
break (remain, segment);
}
};
if segment.marker_code != MarkerCode::Sos.code() {
Ok((remain, Some(segment)))
} else {
Ok((remain, None))
}
}
pub fn check_jpeg(input: &[u8]) -> crate::Result<()> {
let (_, (_, code)) = tuple((nom::bytes::complete::tag([0xFF]), number::complete::u8))(input)?;
if code != MarkerCode::Soi.code() {
return Err("invalid JPEG file; SOI marker not found".into());
}
let (_, (_, _)) = tuple((nom::bytes::complete::tag([0xFF]), number::complete::u8))(input)?;
Ok(())
}
fn parse_segment(marker_code: u8, input: &[u8]) -> IResult<&[u8], Segment<'_>> {
let remain = input;
if marker_code == MarkerCode::Soi.code() {
Ok((
remain,
Segment {
marker_code,
payload: b"",
},
))
} else {
let (remain, size) = number::streaming::be_u16(remain)?;
if size < 2 {
return fail(remain);
}
let (remain, data) = streaming::take(size - 2)(remain)?;
Ok((
remain,
Segment {
marker_code,
payload: data,
},
))
}
}
#[allow(dead_code)]
fn read_image_data<T: Read + Seek>(mut reader: T) -> crate::Result<Vec<u8>> {
let mut header = [0u8; 2];
loop {
reader.read_exact(&mut header)?;
let (tag, marker) = (header[0], header[1]);
if tag != 0xFF {
return Err("".into());
}
if marker == MarkerCode::Soi.code() {
continue;
}
if marker == MarkerCode::Eoi.code() {
return Err("exif not found".into());
}
if marker == MarkerCode::Sos.code() {
let mut data = Vec::new();
reader.read_to_end(&mut data)?;
loop {
let Some(tail) = data.pop() else {
break;
};
if tail == MarkerCode::Eoi.code() {
if let Some(tail) = data.pop() {
if tail == 0xFF {
break;
}
}
}
}
return Ok(data);
} else {
reader.read_exact(&mut header)?;
let len = u16::from_be_bytes([header[0], header[1]]);
reader.seek(std::io::SeekFrom::Current(len as i64 - 2))?;
}
}
}
enum MarkerCode {
Soi = 0xD8,
APP1 = 0xE1,
Sos = 0xDA,
Eoi = 0xD9,
}
impl MarkerCode {
fn code(self) -> u8 {
self as u8
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::exif::ExifTag::*;
use crate::testkit::*;
use test_case::test_case;
#[test_case("exif.jpg", true)]
#[test_case("broken.jpg", true)]
#[test_case("no-exif.jpg", false)]
fn test_check_jpeg(path: &str, has_exif: bool) {
let data = read_sample(path).unwrap();
check_jpeg(&data).unwrap();
let (_, data) = extract_exif_data(&data).unwrap();
if has_exif {
data.unwrap();
}
}
#[test_case("exif.jpg")]
#[allow(deprecated)]
fn jpeg(path: &str) {
let _ = tracing_subscriber::fmt().with_test_writer().try_init();
let f = open_sample(path).unwrap();
let exif = parse_jpeg_exif(f).unwrap().unwrap();
assert_eq!(exif.get_value(&Make).unwrap().unwrap().to_string(), "vivo");
assert_eq!(
exif.get_values(&[DateTimeOriginal, CreateDate, ModifyDate])
.into_iter()
.map(|x| (x.0.to_string(), x.1.to_string()))
.collect::<Vec<_>>(),
[
("DateTimeOriginal", "2023-07-09T20:36:33+08:00"),
("CreateDate", "2023-07-09T20:36:33+08:00"),
("ModifyDate", "2023-07-09T20:36:33+08:00")
]
.into_iter()
.map(|x| (x.0.to_string(), x.1.to_string()))
.collect::<Vec<_>>()
);
let mut entries = exif
.get_values(&[ImageWidth, ImageHeight])
.into_iter()
.map(|x| (x.0.to_string(), x.1.to_string()))
.collect::<Vec<_>>();
entries.sort();
assert_eq!(
entries,
[("ImageHeight", "4096"), ("ImageWidth", "3072")]
.into_iter()
.map(|x| (x.0.to_string(), x.1.to_string()))
.collect::<Vec<_>>()
);
}
#[test_case("no-exif.jpg", 0)]
#[test_case("exif.jpg", 0x4569-2)]
fn jpeg_find_exif(path: &str, exif_size: usize) {
let _ = tracing_subscriber::fmt().with_test_writer().try_init();
let buf = read_sample(path).unwrap();
let (_, segment) = find_exif_segment(&buf[..]).unwrap();
if exif_size == 0 {
assert!(segment.is_none());
} else {
assert_eq!(segment.unwrap().payload_len(), exif_size);
}
}
#[test_case("no-exif.jpg", 0)]
#[test_case("exif.jpg", 0x4569-8)]
fn jpeg_exif_data(path: &str, exif_size: usize) {
let _ = tracing_subscriber::fmt().with_test_writer().try_init();
let buf = read_sample(path).unwrap();
let (_, exif) = extract_exif_data(&buf[..]).unwrap();
if exif_size == 0 {
assert!(exif.is_none());
} else {
assert_eq!(exif.unwrap().len(), exif_size);
}
}
#[test_case("no-exif.jpg", 4089704, 0x000c0301, 0xb3b3e43f)]
#[test_case("exif.jpg", 3564768, 0x000c0301, 0x84a297a9)]
fn jpeg_image_data(path: &str, len: usize, start: u32, end: u32) {
let _ = tracing_subscriber::fmt().with_test_writer().try_init();
let f = open_sample(path).unwrap();
let data = read_image_data(f).unwrap();
assert_eq!(data.len(), len);
assert_eq!(u32::from_be_bytes(data[..4].try_into().unwrap()), start); assert_eq!(
u32::from_be_bytes(data[data.len() - 4..].try_into().unwrap()), end
);
}
#[allow(deprecated)]
#[test]
fn broken_jpg() {
let _ = tracing_subscriber::fmt().with_test_writer().try_init();
let f = open_sample("broken.jpg").unwrap();
parse_jpeg_exif(f).unwrap();
}
}