Skip to main content

nom_exif/
jpeg.rs

1use crate::{ExifIter, MediaParser, MediaSource};
2use std::io::{Read, Seek};
3
4use nom::{bytes::streaming, combinator::fail, number, sequence::tuple, IResult};
5
6use crate::exif::{check_exif_header, Exif};
7
8/// *Deprecated*: Please use [`MediaParser`] + [`MediaSource`] instead.
9///
10/// Analyze the byte stream in the `reader` as a JPEG file, attempting to
11/// extract Exif data it may contain.
12///
13/// Please note that the parsing routine itself provides a buffer, so the
14/// `reader` may not need to be wrapped with `BufRead`.
15///
16/// # Usage
17///
18/// ```rust
19/// use nom_exif::*;
20/// use nom_exif::ExifTag::*;
21///
22/// use std::fs::File;
23/// use std::path::Path;
24///
25/// let f = File::open(Path::new("./testdata/exif.jpg")).unwrap();
26/// let exif = parse_jpeg_exif(f).unwrap().unwrap();
27///
28/// assert_eq!(exif.get_value(&Make).unwrap().unwrap().to_string(), "vivo");
29///
30/// assert_eq!(
31///     exif.get_values(&[DateTimeOriginal, CreateDate, ModifyDate])
32///         .into_iter()
33///         .map(|x| (x.0.to_string(), x.1.to_string()))
34///         .collect::<Vec<_>>(),
35///     [
36///         ("DateTimeOriginal", "2023-07-09T20:36:33+08:00"),
37///         ("CreateDate", "2023-07-09T20:36:33+08:00"),
38///         ("ModifyDate", "2023-07-09T20:36:33+08:00")
39///     ]
40///     .into_iter()
41///     .map(|x| (x.0.to_string(), x.1.to_string()))
42///     .collect::<Vec<_>>()
43/// );
44/// ```
45#[deprecated(since = "2.0.0")]
46pub fn parse_jpeg_exif<R: Read + Seek>(reader: R) -> crate::Result<Option<Exif>> {
47    let mut parser = MediaParser::new();
48    let iter: ExifIter = parser.parse(MediaSource::unseekable(reader)?)?;
49    Ok(Some(iter.into()))
50}
51
52/// Extract Exif TIFF data from the bytes of a JPEG file.
53pub(crate) fn extract_exif_data(input: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
54    let (remain, segment) = find_exif_segment(input)?;
55    let data = segment.and_then(|segment| {
56        if segment.payload_len() <= 6 {
57            None
58        } else {
59            Some(&segment.payload[6..]) // Safe-slice
60        }
61    });
62    Ok((remain, data))
63}
64
65struct Segment<'a> {
66    marker_code: u8,
67    payload: &'a [u8],
68}
69
70impl Segment<'_> {
71    pub fn payload_len(&self) -> usize {
72        self.payload.len()
73    }
74}
75
76fn find_exif_segment(input: &[u8]) -> IResult<&[u8], Option<Segment<'_>>> {
77    let mut remain = input;
78
79    let (remain, segment) = loop {
80        let (rem, (_, code)) = tuple((streaming::tag([0xFF]), number::streaming::u8))(remain)?;
81        let (rem, segment) = parse_segment(code, rem)?;
82        // Sanity check
83        assert!(rem.len() < remain.len());
84        remain = rem;
85        tracing::debug!(
86            marker = format!("0x{:04x}", segment.marker_code),
87            size = format!("0x{:04x}", segment.payload.len()),
88            "got segment"
89        );
90
91        let s = &segment;
92        if (s.marker_code == MarkerCode::APP1.code() && check_exif_header(s.payload)?)
93            || s.marker_code == MarkerCode::Sos.code()
94        // searching stop at SOS
95        {
96            break (remain, segment);
97        }
98    };
99
100    if segment.marker_code != MarkerCode::Sos.code() {
101        Ok((remain, Some(segment)))
102    } else {
103        Ok((remain, None))
104    }
105}
106
107pub fn check_jpeg(input: &[u8]) -> crate::Result<()> {
108    // check soi marker [0xff, 0xd8]
109    let (_, (_, code)) = tuple((nom::bytes::complete::tag([0xFF]), number::complete::u8))(input)?;
110
111    // SOI has no payload
112    if code != MarkerCode::Soi.code() {
113        return Err("invalid JPEG file; SOI marker not found".into());
114    }
115
116    // check next marker [0xff, *]
117    let (_, (_, _)) = tuple((nom::bytes::complete::tag([0xFF]), number::complete::u8))(input)?;
118    Ok(())
119}
120
121fn parse_segment(marker_code: u8, input: &[u8]) -> IResult<&[u8], Segment<'_>> {
122    let remain = input;
123
124    // SOI has no payload
125    if marker_code == MarkerCode::Soi.code() {
126        Ok((
127            remain,
128            Segment {
129                marker_code,
130                payload: b"",
131            },
132        ))
133    } else {
134        let (remain, size) = number::streaming::be_u16(remain)?;
135        if size < 2 {
136            return fail(remain);
137        }
138        // size contains the two bytes of `size` itself
139        let (remain, data) = streaming::take(size - 2)(remain)?;
140        Ok((
141            remain,
142            Segment {
143                marker_code,
144                payload: data,
145            },
146        ))
147    }
148}
149
150/// Read all image data after the first SOS marker & before EOI marker.
151///
152/// The returned data might include several other SOS markers if the image is a
153/// progressive JPEG.
154#[allow(dead_code)]
155fn read_image_data<T: Read + Seek>(mut reader: T) -> crate::Result<Vec<u8>> {
156    let mut header = [0u8; 2];
157    loop {
158        reader.read_exact(&mut header)?;
159        let (tag, marker) = (header[0], header[1]);
160        if tag != 0xFF {
161            return Err("".into());
162        }
163
164        if marker == MarkerCode::Soi.code() {
165            // SOI has no body
166            continue;
167        }
168        if marker == MarkerCode::Eoi.code() {
169            return Err("exif not found".into());
170        }
171
172        if marker == MarkerCode::Sos.code() {
173            // found it
174            let mut data = Vec::new();
175            reader.read_to_end(&mut data)?;
176
177            // remove tail data
178            loop {
179                let Some(tail) = data.pop() else {
180                    // empty
181                    break;
182                };
183                if tail == MarkerCode::Eoi.code() {
184                    if let Some(tail) = data.pop() {
185                        if tail == 0xFF {
186                            // EOI marker has been popped
187                            break;
188                        }
189                    }
190                }
191            }
192            return Ok(data);
193        } else {
194            // skip other markers
195            reader.read_exact(&mut header)?;
196            let len = u16::from_be_bytes([header[0], header[1]]);
197            reader.seek(std::io::SeekFrom::Current(len as i64 - 2))?;
198        }
199    }
200}
201
202/// A marker code is a byte following 0xFF that indicates the kind of marker.
203enum MarkerCode {
204    // Start of Image
205    Soi = 0xD8,
206
207    // APP1 marker
208    APP1 = 0xE1,
209
210    // Start of Scan
211    Sos = 0xDA,
212
213    // End of Image
214    Eoi = 0xD9,
215}
216
217impl MarkerCode {
218    fn code(self) -> u8 {
219        self as u8
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use crate::exif::ExifTag::*;
227    use crate::testkit::*;
228    use test_case::test_case;
229
230    #[test_case("exif.jpg", true)]
231    #[test_case("broken.jpg", true)]
232    #[test_case("no-exif.jpg", false)]
233    fn test_check_jpeg(path: &str, has_exif: bool) {
234        let data = read_sample(path).unwrap();
235        check_jpeg(&data).unwrap();
236        let (_, data) = extract_exif_data(&data).unwrap();
237        if has_exif {
238            data.unwrap();
239        }
240    }
241
242    #[test_case("exif.jpg")]
243    #[allow(deprecated)]
244    fn jpeg(path: &str) {
245        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
246
247        let f = open_sample(path).unwrap();
248        let exif = parse_jpeg_exif(f).unwrap().unwrap();
249
250        // TODO
251        // assert_eq!(
252        //     sorted_exif_entries(&exif).join("\n"),
253
254        // );
255
256        assert_eq!(exif.get_value(&Make).unwrap().unwrap().to_string(), "vivo");
257
258        assert_eq!(
259            exif.get_values(&[DateTimeOriginal, CreateDate, ModifyDate])
260                .into_iter()
261                .map(|x| (x.0.to_string(), x.1.to_string()))
262                .collect::<Vec<_>>(),
263            [
264                ("DateTimeOriginal", "2023-07-09T20:36:33+08:00"),
265                ("CreateDate", "2023-07-09T20:36:33+08:00"),
266                ("ModifyDate", "2023-07-09T20:36:33+08:00")
267            ]
268            .into_iter()
269            .map(|x| (x.0.to_string(), x.1.to_string()))
270            .collect::<Vec<_>>()
271        );
272
273        let mut entries = exif
274            .get_values(&[ImageWidth, ImageHeight])
275            .into_iter()
276            .map(|x| (x.0.to_string(), x.1.to_string()))
277            .collect::<Vec<_>>();
278        entries.sort();
279        assert_eq!(
280            entries,
281            [("ImageHeight", "4096"), ("ImageWidth", "3072")]
282                .into_iter()
283                .map(|x| (x.0.to_string(), x.1.to_string()))
284                .collect::<Vec<_>>()
285        );
286    }
287
288    #[test_case("no-exif.jpg", 0)]
289    #[test_case("exif.jpg", 0x4569-2)]
290    fn jpeg_find_exif(path: &str, exif_size: usize) {
291        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
292
293        let buf = read_sample(path).unwrap();
294        let (_, segment) = find_exif_segment(&buf[..]).unwrap();
295
296        if exif_size == 0 {
297            assert!(segment.is_none());
298        } else {
299            assert_eq!(segment.unwrap().payload_len(), exif_size);
300        }
301    }
302
303    #[test_case("no-exif.jpg", 0)]
304    #[test_case("exif.jpg", 0x4569-8)]
305    fn jpeg_exif_data(path: &str, exif_size: usize) {
306        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
307
308        let buf = read_sample(path).unwrap();
309        let (_, exif) = extract_exif_data(&buf[..]).unwrap();
310
311        if exif_size == 0 {
312            assert!(exif.is_none());
313        } else {
314            assert_eq!(exif.unwrap().len(), exif_size);
315        }
316    }
317
318    #[test_case("no-exif.jpg", 4089704, 0x000c0301, 0xb3b3e43f)]
319    #[test_case("exif.jpg", 3564768, 0x000c0301, 0x84a297a9)]
320    fn jpeg_image_data(path: &str, len: usize, start: u32, end: u32) {
321        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
322
323        let f = open_sample(path).unwrap();
324        let data = read_image_data(f).unwrap();
325        assert_eq!(data.len(), len);
326        assert_eq!(u32::from_be_bytes(data[..4].try_into().unwrap()), start); // Safe-slice in test_case
327        assert_eq!(
328            u32::from_be_bytes(data[data.len() - 4..].try_into().unwrap()), // Safe-slice in test_case
329            end
330        );
331    }
332
333    #[allow(deprecated)]
334    #[test]
335    fn broken_jpg() {
336        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
337
338        let f = open_sample("broken.jpg").unwrap();
339        parse_jpeg_exif(f).unwrap();
340    }
341}