use std::{collections::HashSet, fmt::Debug, ops::Range, sync::Arc};
use nom::{number::complete, sequence::tuple};
use thiserror::Error;
use crate::{
partial_vec::{AssociatedInput, PartialVec},
slice::SliceChecked,
values::{DataFormat, EntryData, IRational, ParseEntryError, URational},
EntryValue, ExifTag,
};
use super::{exif_exif::IFD_ENTRY_SIZE, tags::ExifTagCode, GPSInfo, TiffHeader};
#[derive(Clone)]
pub(crate) struct TiffDataBlock {
#[allow(dead_code)]
pub block_id: String,
pub data_range: Range<usize>,
pub header: Option<TiffHeader>,
}
#[tracing::instrument]
pub(crate) fn input_into_iter(
input: impl Into<PartialVec> + Debug,
state: Option<TiffHeader>,
) -> crate::Result<ExifIter> {
let input: PartialVec = input.into();
let header = match state {
Some(header) => header,
_ => {
let (_, header) = TiffHeader::parse(&input[..])?;
tracing::debug!(
?header,
data_len = format!("{:#x}", input.len()),
"TIFF header parsed"
);
header
}
};
let start = header.ifd0_offset as usize;
if start > input.len() {
return Err(crate::Error::ParseFailed("no enough bytes".into()));
}
tracing::debug!(?header, offset = start);
let mut ifd0 = IfdIter::try_new(0, input.to_owned(), header.to_owned(), start, None)?;
let tz = ifd0.find_tz_offset();
ifd0.tz = tz.clone();
let iter: ExifIter = ExifIter::new(input, header, tz, ifd0);
tracing::debug!(?iter, "got IFD0");
Ok(iter)
}
pub struct ExifIter {
input: Arc<PartialVec>,
tiff_header: TiffHeader,
tz: Option<String>,
ifd0: IfdIter,
ifds: Vec<IfdIter>,
visited_offsets: HashSet<usize>,
additional_blocks: Vec<TiffDataBlock>,
current_block_index: usize,
encountered_tags: HashSet<(usize, u16)>,
}
impl Debug for ExifIter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ExifIter")
.field("data len", &self.input.len())
.field("tiff_header", &self.tiff_header)
.field("ifd0", &self.ifd0)
.field("state", &self.ifds.first().map(|x| (x.index, x.pos)))
.field("ifds num", &self.ifds.len())
.field("additional_blocks", &self.additional_blocks.len())
.field("current_block_index", &self.current_block_index)
.finish_non_exhaustive()
}
}
impl Clone for ExifIter {
fn clone(&self) -> Self {
self.clone_and_rewind()
}
}
impl ExifIter {
pub(crate) fn new(
input: impl Into<PartialVec>,
tiff_header: TiffHeader,
tz: Option<String>,
ifd0: IfdIter,
) -> ExifIter {
let ifds = vec![ifd0.clone()];
ExifIter {
input: Arc::new(input.into()),
tiff_header,
tz,
ifd0,
ifds,
visited_offsets: HashSet::new(),
additional_blocks: Vec::new(),
current_block_index: 0,
encountered_tags: HashSet::new(),
}
}
pub fn clone_and_rewind(&self) -> Self {
let ifd0 = self.ifd0.clone_and_rewind();
let ifds = vec![ifd0.clone()];
Self {
input: self.input.clone(),
tiff_header: self.tiff_header.clone(),
tz: self.tz.clone(),
ifd0,
ifds,
visited_offsets: HashSet::new(),
additional_blocks: self.additional_blocks.clone(),
current_block_index: 0,
encountered_tags: HashSet::new(),
}
}
#[tracing::instrument(skip_all)]
pub fn parse_gps_info(&self) -> crate::Result<Option<GPSInfo>> {
let mut iter = self.clone_and_rewind();
let Some(gps) = iter.find(|x| {
tracing::info!(?x, "find");
x.tag.tag().is_some_and(|t| t == ExifTag::GPSInfo)
}) else {
tracing::warn!(ifd0 = ?iter.ifds.first(), "GPSInfo not found");
return Ok(None);
};
let offset = match gps.get_result() {
Ok(v) => {
if let Some(offset) = v.as_u32() {
offset
} else {
return Err(EntryError(ParseEntryError::InvalidData(
"invalid gps offset".into(),
))
.into());
}
}
Err(e) => return Err(e.clone().into()),
};
if offset as usize >= iter.input.len() {
return Err(crate::Error::ParseFailed(
"GPSInfo offset is out of range".into(),
));
}
let mut gps_subifd = match IfdIter::try_new(
gps.ifd,
iter.input.partial(&iter.input[..]),
iter.tiff_header,
offset as usize,
iter.tz.clone(),
) {
Ok(ifd0) => ifd0.tag_code(ExifTag::GPSInfo.code()),
Err(e) => return Err(e),
};
Ok(gps_subifd.parse_gps_info())
}
pub(crate) fn to_owned(&self) -> ExifIter {
let mut iter = ExifIter::new(
self.input.to_vec(),
self.tiff_header.clone(),
self.tz.clone(),
self.ifd0.clone_and_rewind(),
);
iter.additional_blocks = self.additional_blocks.clone();
iter
}
pub(crate) fn add_tiff_block(
&mut self,
block_id: String,
data_range: Range<usize>,
header: Option<TiffHeader>,
) {
self.additional_blocks.push(TiffDataBlock {
block_id,
data_range,
header,
});
}
}
#[derive(Debug, Clone, Error)]
#[error("ifd entry error: {0}")]
pub struct EntryError(ParseEntryError);
impl From<EntryError> for crate::Error {
fn from(value: EntryError) -> Self {
Self::ParseFailed(value.into())
}
}
#[derive(Clone)]
pub struct ParsedExifEntry {
ifd: usize,
tag: ExifTagCode,
res: Option<Result<EntryValue, EntryError>>,
}
impl ParsedExifEntry {
pub fn ifd_index(&self) -> usize {
self.ifd
}
pub fn tag(&self) -> Option<ExifTag> {
match self.tag {
ExifTagCode::Tag(t) => Some(t),
ExifTagCode::Code(_) => None,
}
}
pub fn tag_code(&self) -> u16 {
self.tag.code()
}
pub fn has_value(&self) -> bool {
self.res.as_ref().map(|e| e.is_ok()).is_some_and(|b| b)
}
pub fn get_value(&self) -> Option<&EntryValue> {
match self.res.as_ref() {
Some(Ok(v)) => Some(v),
Some(Err(_)) | None => None,
}
}
pub fn take_value(&mut self) -> Option<EntryValue> {
match self.res.take() {
Some(v) => v.ok(),
None => None,
}
}
#[allow(rustdoc::private_intra_doc_links)]
pub fn get_result(&self) -> Result<&EntryValue, &EntryError> {
match self.res {
Some(ref v) => v.as_ref(),
None => panic!("take result of entry twice"),
}
}
pub fn take_result(&mut self) -> Result<EntryValue, EntryError> {
match self.res.take() {
Some(v) => v,
None => panic!("take result of entry twice"),
}
}
fn make_ok(ifd: usize, tag: ExifTagCode, v: EntryValue) -> Self {
Self {
ifd,
tag,
res: Some(Ok(v)),
}
}
}
impl Debug for ParsedExifEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let value = match self.get_result() {
Ok(v) => format!("{v}"),
Err(e) => format!("{e:?}"),
};
f.debug_struct("IfdEntryResult")
.field("ifd", &format!("ifd{}", self.ifd))
.field("tag", &self.tag)
.field("value", &value)
.finish()
}
}
const MAX_IFD_DEPTH: usize = 8;
impl ExifIter {
fn load_next_block(&mut self) -> bool {
let block_index = self.current_block_index;
if block_index >= self.additional_blocks.len() {
return false;
}
let block = &self.additional_blocks[block_index];
tracing::debug!(
block_id = block.block_id,
block_index,
"Loading additional TIFF block"
);
let data_range = block.data_range.clone();
let header = block.header.clone();
let block_data = PartialVec::new(self.input.data.clone(), data_range);
match input_into_iter(block_data, header) {
Ok(iter) => {
self.ifd0 = iter.ifd0;
self.ifds = vec![self.ifd0.clone()];
self.visited_offsets.clear();
self.current_block_index += 1;
tracing::debug!(block_index, "Successfully loaded additional TIFF block");
true
}
Err(e) => {
tracing::warn!(
block_index,
error = %e,
"Failed to load additional TIFF block, skipping"
);
self.current_block_index += 1;
self.load_next_block()
}
}
}
fn should_include_tag(&mut self, ifd_index: usize, tag_code: u16) -> bool {
let tag_key = (ifd_index, tag_code);
if self.encountered_tags.contains(&tag_key) {
tracing::debug!(ifd_index, tag_code, "Skipping duplicate tag");
false
} else {
self.encountered_tags.insert(tag_key);
true
}
}
}
impl Iterator for ExifIter {
type Item = ParsedExifEntry;
#[tracing::instrument(skip_all)]
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.ifds.is_empty() {
if !self.load_next_block() {
tracing::debug!(?self, "all IFDs and blocks have been parsed");
return None;
}
continue;
}
if self.ifds.len() > MAX_IFD_DEPTH {
self.ifds.clear();
tracing::error!(
ifds_depth = self.ifds.len(),
"ifd depth is too deep, just go back to ifd0"
);
self.ifds.push(self.ifd0.clone_with_state());
}
let mut ifd = self.ifds.pop()?;
let cur_ifd_idx = ifd.ifd_idx;
match ifd.next() {
Some((tag_code, entry)) => {
tracing::debug!(ifd = ifd.ifd_idx, ?tag_code, "next tag entry");
match entry {
IfdEntry::IfdNew(new_ifd) => {
if new_ifd.offset > 0 {
if self.visited_offsets.contains(&new_ifd.offset) {
continue;
}
self.visited_offsets.insert(new_ifd.offset);
}
let is_subifd = if new_ifd.ifd_idx == ifd.ifd_idx {
self.ifds.push(ifd);
tracing::debug!(?tag_code, ?new_ifd, "got new SUB-IFD");
true
} else {
tracing::debug!("IFD{} parsing completed", cur_ifd_idx);
tracing::debug!(?new_ifd, "got new IFD");
false
};
let (ifd_idx, offset) = (new_ifd.ifd_idx, new_ifd.offset);
self.ifds.push(new_ifd);
if is_subifd {
let tc = tag_code.unwrap();
if !self.should_include_tag(ifd_idx, tc.code()) {
continue;
}
return Some(ParsedExifEntry::make_ok(
ifd_idx,
tc,
EntryValue::U32(offset as u32),
));
}
}
IfdEntry::Entry(v) => {
let tc = tag_code.unwrap();
if !self.should_include_tag(ifd.ifd_idx, tc.code()) {
self.ifds.push(ifd);
continue;
}
let res = Some(ParsedExifEntry::make_ok(ifd.ifd_idx, tc, v));
self.ifds.push(ifd);
return res;
}
IfdEntry::Err(e) => {
tracing::warn!(?tag_code, ?e, "parse ifd entry error");
self.ifds.push(ifd);
continue;
}
}
}
None => continue,
}
}
}
}
#[derive(Clone)]
pub(crate) struct IfdIter {
ifd_idx: usize,
tag_code: Option<ExifTagCode>,
input: AssociatedInput,
offset: usize,
header: TiffHeader,
entry_num: u16,
pub tz: Option<String>,
index: u16,
pos: usize,
}
impl Debug for IfdIter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("IfdIter")
.field("ifd_idx", &self.ifd_idx)
.field("tag", &self.tag_code)
.field("data len", &self.input.len())
.field("tz", &self.tz)
.field("header", &self.header)
.field("entry_num", &self.entry_num)
.field("index", &self.index)
.field("pos", &self.pos)
.finish()
}
}
impl IfdIter {
pub fn rewind(&mut self) {
self.index = 0;
self.pos = self.offset + 2;
}
pub fn clone_and_rewind(&self) -> Self {
let mut it = self.clone();
it.rewind();
it
}
pub fn tag_code_maybe(mut self, code: Option<u16>) -> Self {
self.tag_code = code.map(|x| x.into());
self
}
pub fn tag_code(mut self, code: u16) -> Self {
self.tag_code = Some(code.into());
self
}
#[allow(unused)]
pub fn tag(mut self, tag: ExifTagCode) -> Self {
self.tag_code = Some(tag);
self
}
#[tracing::instrument(skip(input))]
pub fn try_new(
ifd_idx: usize,
input: AssociatedInput,
header: TiffHeader,
offset: usize,
tz: Option<String>,
) -> crate::Result<Self> {
if input.len() < 2 {
return Err(crate::Error::ParseFailed(
"ifd data is too small to decode entry num".into(),
));
}
assert!(offset <= input.len());
let ifd_data = input.partial(&input[offset..]);
let (_, entry_num) = TiffHeader::parse_ifd_entry_num(&ifd_data, header.endian)?;
Ok(Self {
ifd_idx,
tag_code: None,
input,
offset,
header,
entry_num,
tz,
pos: offset + 2,
index: 0,
})
}
fn parse_tag_entry(&self, entry_data: &[u8]) -> Option<(u16, IfdEntry)> {
let endian = self.header.endian;
let (_, (tag, data_format, components_num, value_or_offset)) = tuple((
complete::u16::<_, nom::error::Error<_>>(endian),
complete::u16(endian),
complete::u32(endian),
complete::u32(endian),
))(entry_data)
.ok()?;
if tag == 0 {
return None;
}
let df: DataFormat = match data_format.try_into() {
Ok(df) => df,
Err(e) => {
let t: ExifTagCode = tag.into();
tracing::warn!(tag = ?t, ?e, "invalid entry data format");
return Some((tag, IfdEntry::Err(e)));
}
};
let (tag, res) = self.parse_entry(tag, df, components_num, entry_data, value_or_offset);
Some((tag, res))
}
fn get_data_pos(&self, value_or_offset: u32) -> usize {
value_or_offset as usize
}
fn parse_entry(
&self,
tag: u16,
data_format: DataFormat,
components_num: u32,
entry_data: &[u8],
value_or_offset: u32,
) -> (u16, IfdEntry) {
let component_size = data_format.component_size();
let size = components_num as usize * component_size;
let data = if size <= 4 {
&entry_data[8..8 + size] } else {
let start = self.get_data_pos(value_or_offset);
let end = start + size;
let Some(data) = self.input.slice_checked(start..end) else {
tracing::warn!(
"entry data overflow, tag: {:04x} start: {:08x} end: {:08x} ifd data len {:08x}",
tag,
start,
end,
self.input.len(),
);
return (tag, IfdEntry::Err(ParseEntryError::EntrySizeTooBig));
};
data
};
if SUBIFD_TAGS.contains(&tag) {
if let Some(value) = self.new_ifd_iter(self.ifd_idx, value_or_offset, Some(tag)) {
return (tag, value);
}
}
let entry = EntryData {
endian: self.header.endian,
tag,
data,
data_format,
components_num,
};
match EntryValue::parse(&entry, &self.tz) {
Ok(v) => (tag, IfdEntry::Entry(v)),
Err(e) => (tag, IfdEntry::Err(e)),
}
}
fn new_ifd_iter(
&self,
ifd_idx: usize,
value_or_offset: u32,
tag: Option<u16>,
) -> Option<IfdEntry> {
let offset = self.get_data_pos(value_or_offset);
if offset < self.input.len() {
match IfdIter::try_new(
ifd_idx,
self.input.partial(&self.input[..]),
self.header.to_owned(),
offset,
self.tz.clone(),
) {
Ok(iter) => return Some(IfdEntry::IfdNew(iter.tag_code_maybe(tag))),
Err(e) => {
tracing::warn!(?tag, ?e, "Create next/sub IFD failed");
}
}
}
None
}
pub fn find_exif_iter(&self) -> Option<IfdIter> {
let endian = self.header.endian;
for i in 0..self.entry_num {
let pos = self.pos + i as usize * IFD_ENTRY_SIZE;
let (_, tag) =
complete::u16::<_, nom::error::Error<_>>(endian)(&self.input[pos..]).ok()?;
if tag == ExifTag::ExifOffset.code() {
let entry_data = self.input.slice_checked(pos..pos + IFD_ENTRY_SIZE)?;
let (_, entry) = self.parse_tag_entry(entry_data)?;
match entry {
IfdEntry::IfdNew(iter) => return Some(iter),
IfdEntry::Entry(_) | IfdEntry::Err(_) => return None,
}
}
}
None
}
pub fn find_tz_offset(&self) -> Option<String> {
let iter = self.find_exif_iter()?;
let mut offset = None;
for entry in iter {
let Some(tag) = entry.0 else {
continue;
};
if tag.code() == ExifTag::OffsetTimeOriginal.code()
|| tag.code() == ExifTag::OffsetTimeDigitized.code()
{
return entry.1.as_str().map(|x| x.to_owned());
} else if tag.code() == ExifTag::OffsetTime.code() {
offset = entry.1.as_str().map(|x| x.to_owned());
}
}
offset
}
pub fn parse_gps_info(&mut self) -> Option<GPSInfo> {
let mut gps = GPSInfo::default();
let mut has_data = false;
for (tag, entry) in self {
let Some(tag) = tag.and_then(|x| x.tag()) else {
continue;
};
has_data = true;
match tag {
ExifTag::GPSLatitudeRef => {
if let Some(c) = entry.as_char() {
gps.latitude_ref = c;
}
}
ExifTag::GPSLongitudeRef => {
if let Some(c) = entry.as_char() {
gps.longitude_ref = c;
}
}
ExifTag::GPSAltitudeRef => {
if let Some(c) = entry.as_u8() {
gps.altitude_ref = c;
}
}
ExifTag::GPSLatitude => {
if let Some(v) = entry.as_urational_array() {
gps.latitude = v.try_into().ok()?;
} else if let Some(v) = entry.as_irational_array() {
gps.latitude = v.try_into().ok()?;
}
}
ExifTag::GPSLongitude => {
if let Some(v) = entry.as_urational_array() {
gps.longitude = v.try_into().ok()?;
} else if let Some(v) = entry.as_irational_array() {
gps.longitude = v.try_into().ok()?;
}
}
ExifTag::GPSAltitude => {
if let Some(v) = entry.as_urational() {
gps.altitude = *v;
} else if let Some(v) = entry.as_irational() {
gps.altitude = (*v).into();
}
}
ExifTag::GPSSpeedRef => {
if let Some(c) = entry.as_char() {
gps.speed_ref = Some(c);
}
}
ExifTag::GPSSpeed => {
if let Some(v) = entry.as_urational() {
gps.speed = Some(*v);
} else if let Some(v) = entry.as_irational() {
gps.speed = Some((*v).into());
}
}
_ => (),
}
}
if has_data {
Some(gps)
} else {
tracing::warn!("GPSInfo data not found");
None
}
}
fn clone_with_state(&self) -> IfdIter {
let mut it = self.clone();
it.index = self.index;
it.pos = self.pos;
it
}
}
#[derive(Debug)]
pub(crate) enum IfdEntry {
IfdNew(IfdIter), Entry(EntryValue),
Err(ParseEntryError),
}
impl IfdEntry {
pub fn as_u8(&self) -> Option<u8> {
if let IfdEntry::Entry(EntryValue::U8(v)) = self {
Some(*v)
} else {
None
}
}
pub fn as_char(&self) -> Option<char> {
if let IfdEntry::Entry(EntryValue::Text(s)) = self {
s.chars().next()
} else {
None
}
}
fn as_irational(&self) -> Option<&IRational> {
if let IfdEntry::Entry(EntryValue::IRational(v)) = self {
Some(v)
} else {
None
}
}
fn as_irational_array(&self) -> Option<&Vec<IRational>> {
if let IfdEntry::Entry(EntryValue::IRationalArray(v)) = self {
Some(v)
} else {
None
}
}
fn as_urational(&self) -> Option<&URational> {
if let IfdEntry::Entry(EntryValue::URational(v)) = self {
Some(v)
} else {
None
}
}
fn as_urational_array(&self) -> Option<&Vec<URational>> {
if let IfdEntry::Entry(EntryValue::URationalArray(v)) = self {
Some(v)
} else {
None
}
}
fn as_str(&self) -> Option<&str> {
if let IfdEntry::Entry(e) = self {
e.as_str()
} else {
None
}
}
}
pub(crate) const SUBIFD_TAGS: &[u16] = &[ExifTag::ExifOffset.code(), ExifTag::GPSInfo.code()];
impl Iterator for IfdIter {
type Item = (Option<ExifTagCode>, IfdEntry);
#[tracing::instrument(skip(self))]
fn next(&mut self) -> Option<Self::Item> {
tracing::debug!(
ifd = self.ifd_idx,
index = self.index,
entry_num = self.entry_num,
offset = format!("{:08x}", self.offset),
pos = format!("{:08x}", self.pos),
"next IFD entry"
);
if self.input.len() < self.pos + IFD_ENTRY_SIZE {
return None;
}
let endian = self.header.endian;
if self.index > self.entry_num {
return None;
}
if self.index == self.entry_num {
tracing::debug!(
self.ifd_idx,
self.index,
pos = self.pos,
"try to get next ifd"
);
self.index += 1;
let (_, offset) =
complete::u32::<_, nom::error::Error<_>>(endian)(&self.input[self.pos..]).ok()?;
if offset == 0 {
tracing::debug!(?self, "IFD parsing completed");
return None;
}
return self
.new_ifd_iter(self.ifd_idx + 1, offset, None)
.map(|x| (None, x));
}
let entry_data = self
.input
.slice_checked(self.pos..self.pos + IFD_ENTRY_SIZE)?;
self.index += 1;
self.pos += IFD_ENTRY_SIZE;
let (tag, res) = self.parse_tag_entry(entry_data)?;
Some((Some(tag.into()), res)) }
}
#[cfg(test)]
mod tests {
use crate::exif::extract_exif_with_mime;
use crate::exif::input_into_iter;
use crate::file::MimeImage;
use crate::slice::SubsliceRange;
use crate::testkit::read_sample;
use crate::Exif;
use test_case::test_case;
#[test_case("exif.jpg", "+08:00", "2023-07-09T20:36:33+08:00", MimeImage::Jpeg)]
#[test_case("exif-no-tz.jpg", "", "2023-07-09 20:36:33", MimeImage::Jpeg)]
#[test_case("broken.jpg", "-", "2014-09-21 15:51:22", MimeImage::Jpeg)]
#[test_case("exif.heic", "+08:00", "2022-07-22T21:26:32+08:00", MimeImage::Heic)]
#[test_case("tif.tif", "-", "-", MimeImage::Tiff)]
#[test_case(
"fujifilm_x_t1_01.raf.meta",
"-",
"2014-01-30 12:49:13",
MimeImage::Raf
)]
fn exif_iter_tz(path: &str, tz: &str, time: &str, img_type: MimeImage) {
let buf = read_sample(path).unwrap();
let (data, _) = extract_exif_with_mime(img_type, &buf, None).unwrap();
let subslice_in_range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
let iter = input_into_iter((buf, subslice_in_range), None).unwrap();
let expect = if tz == "-" {
None
} else {
Some(tz.to_string())
};
assert_eq!(iter.tz, expect);
let exif: Exif = iter.into();
let value = exif.get(crate::ExifTag::DateTimeOriginal);
if time == "-" {
assert!(value.is_none());
} else {
let value = value.unwrap();
assert_eq!(value.to_string(), time);
}
}
}