Skip to main content

phasm_core/codec/jpeg/
marker.rs

1// Copyright (c) 2026 Christoph Gaffga
2// SPDX-License-Identifier: GPL-3.0-only
3// https://github.com/cgaffga/phasmcore
4
5//! JPEG marker parsing and iteration.
6//!
7//! Walks the marker segments in a JPEG byte stream, extracting headers
8//! (DQT, DHT, SOF, DRI, SOS) and preserving unknown markers verbatim.
9//! Stops at the SOS marker, returning the byte offset where entropy-coded
10//! scan data begins.
11
12use super::error::{JpegError, Result};
13
14/// JPEG marker constants.
15pub const SOI: u8 = 0xD8;
16pub const EOI: u8 = 0xD9;
17pub const SOF0: u8 = 0xC0;
18pub const SOF2: u8 = 0xC2;
19pub const DHT: u8 = 0xC4;
20pub const DQT: u8 = 0xDB;
21pub const DRI: u8 = 0xDD;
22pub const SOS: u8 = 0xDA;
23pub const COM: u8 = 0xFE;
24
25/// A raw marker segment preserving the original bytes.
26#[derive(Debug, Clone)]
27pub struct MarkerSegment {
28    /// The marker byte (e.g., 0xDB for DQT). Does NOT include the 0xFF prefix.
29    pub marker: u8,
30    /// The segment data NOT including the marker or the 2-byte length field.
31    pub data: Vec<u8>,
32}
33
34/// Parsed marker with position information.
35pub struct MarkerEntry {
36    pub marker: u8,
37    /// Segment data (empty for standalone markers like SOI, EOI, RST).
38    pub data: Vec<u8>,
39    /// Byte offset of the marker (the 0xFF byte) in the original data.
40    pub offset: usize,
41}
42
43/// Iterate over JPEG markers from a byte slice.
44///
45/// Returns markers and their segment data in order.
46/// Stops when SOS is encountered (caller handles entropy-coded data).
47pub fn iterate_markers(data: &[u8]) -> Result<(Vec<MarkerEntry>, usize)> {
48    let mut entries = Vec::new();
49    // Check SOI
50    if data.len() < 2 || data[0] != 0xFF || data[1] != SOI {
51        return Err(JpegError::InvalidSoi);
52    }
53    entries.push(MarkerEntry {
54        marker: SOI,
55        data: Vec::new(),
56        offset: 0,
57    });
58    let mut pos = 2;
59
60    loop {
61        // Find next 0xFF
62        while pos < data.len() && data[pos] != 0xFF {
63            pos += 1;
64        }
65        if pos + 1 >= data.len() {
66            return Err(JpegError::UnexpectedEof);
67        }
68
69        // Skip padding 0xFF bytes
70        while pos + 1 < data.len() && data[pos + 1] == 0xFF {
71            pos += 1;
72        }
73        if pos + 1 >= data.len() {
74            return Err(JpegError::UnexpectedEof);
75        }
76
77        let marker_offset = pos;
78        let marker = data[pos + 1];
79        pos += 2;
80
81        // Skip 0xFF00 (byte-stuffed, shouldn't appear outside scan but handle gracefully)
82        if marker == 0x00 {
83            continue;
84        }
85
86        // Standalone markers (no length field)
87        if marker == EOI || (0xD0..=0xD7).contains(&marker) {
88            entries.push(MarkerEntry {
89                marker,
90                data: Vec::new(),
91                offset: marker_offset,
92            });
93            if marker == EOI {
94                return Ok((entries, pos));
95            }
96            continue;
97        }
98
99        // Check for unsupported markers
100        if is_unsupported(marker) {
101            return Err(JpegError::UnsupportedMarker(marker));
102        }
103
104        // Read segment length
105        if pos + 2 > data.len() {
106            return Err(JpegError::UnexpectedEof);
107        }
108        let length = u16::from_be_bytes([data[pos], data[pos + 1]]) as usize;
109        if length < 2 || pos + length > data.len() {
110            return Err(JpegError::InvalidMarkerData("invalid segment length"));
111        }
112        let segment_data = data[pos + 2..pos + length].to_vec();
113
114        entries.push(MarkerEntry {
115            marker,
116            data: segment_data,
117            offset: marker_offset,
118        });
119
120        pos += length;
121
122        // Stop at SOS — scan data follows
123        if marker == SOS {
124            return Ok((entries, pos));
125        }
126    }
127}
128
129fn is_unsupported(marker: u8) -> bool {
130    matches!(
131        marker,
132        0xC1 // SOF1 extended sequential
133        | 0xC3 // SOF3 lossless
134        | 0xC5..=0xC7 // SOF5-7 differential
135        | 0xC9..=0xCB // SOF9-11 arithmetic
136        | 0xCD..=0xCF // SOF13-15 differential arithmetic
137    )
138}
139
140/// Spectral selection and successive approximation parameters from an SOS header.
141#[derive(Debug, Clone, Copy)]
142pub struct SosParams {
143    /// Start of spectral selection (zigzag index 0-63).
144    pub ss: u8,
145    /// End of spectral selection (zigzag index 0-63).
146    pub se: u8,
147    /// Successive approximation high bit (0 = first scan for this band).
148    pub ah: u8,
149    /// Successive approximation low bit (point transform).
150    pub al: u8,
151}
152
153/// Parse an SOS (Start of Scan) header.
154/// Returns component selectors: (component_id, dc_table_id, ac_table_id) per scan component.
155pub fn parse_sos(data: &[u8]) -> Result<Vec<(u8, u8, u8)>> {
156    if data.is_empty() {
157        return Err(JpegError::InvalidMarkerData("empty SOS"));
158    }
159    let num_components = data[0] as usize;
160    if data.len() < 1 + num_components * 2 + 3 {
161        return Err(JpegError::UnexpectedEof);
162    }
163
164    let mut selectors = Vec::with_capacity(num_components);
165    for i in 0..num_components {
166        let offset = 1 + i * 2;
167        let comp_id = data[offset];
168        let td_ta = data[offset + 1];
169        let dc_id = td_ta >> 4;
170        let ac_id = td_ta & 0x0F;
171        selectors.push((comp_id, dc_id, ac_id));
172    }
173
174    Ok(selectors)
175}
176
177/// Parse the spectral selection / successive approximation parameters from an SOS header.
178/// These are the last 3 bytes of the SOS header data: Ss, Se, Ah_Al.
179pub fn parse_sos_params(data: &[u8]) -> Result<SosParams> {
180    if data.is_empty() {
181        return Err(JpegError::InvalidMarkerData("empty SOS"));
182    }
183    let num_components = data[0] as usize;
184    let params_offset = 1 + num_components * 2;
185    if data.len() < params_offset + 3 {
186        return Err(JpegError::UnexpectedEof);
187    }
188    let ss = data[params_offset];
189    let se = data[params_offset + 1];
190    let ah_al = data[params_offset + 2];
191    let ah = ah_al >> 4;
192    let al = ah_al & 0x0F;
193    Ok(SosParams { ss, se, ah, al })
194}
195
196/// Skip past entropy-coded scan data to find the next marker.
197///
198/// Starting from `pos` (the first byte of entropy-coded data after an SOS header),
199/// scans forward looking for a 0xFF byte followed by a non-zero, non-RST marker byte.
200/// Returns the byte offset of the 0xFF byte of the next marker.
201pub fn skip_scan_data(data: &[u8], mut pos: usize) -> Result<usize> {
202    while pos < data.len() {
203        if data[pos] != 0xFF {
204            pos += 1;
205            continue;
206        }
207        // Found 0xFF — check what follows
208        if pos + 1 >= data.len() {
209            return Err(JpegError::UnexpectedEof);
210        }
211        let next = data[pos + 1];
212        if next == 0x00 {
213            // Byte-stuffed 0xFF — skip both bytes
214            pos += 2;
215            continue;
216        }
217        if (0xD0..=0xD7).contains(&next) {
218            // Restart marker — skip it
219            pos += 2;
220            continue;
221        }
222        if next == 0xFF {
223            // Fill byte — skip one 0xFF
224            pos += 1;
225            continue;
226        }
227        // Found a real marker — return position of the 0xFF
228        return Ok(pos);
229    }
230    Err(JpegError::UnexpectedEof)
231}
232
233/// Iterate markers for a progressive JPEG file, handling multiple scans.
234///
235/// Returns all marker entries (including multiple SOS markers) and, for each SOS,
236/// the byte offset where its entropy-coded data begins. The returned `scan_starts`
237/// vector has one entry per SOS marker found, giving the byte offset right after
238/// that SOS header where scan data begins.
239pub fn iterate_markers_all(data: &[u8]) -> Result<(Vec<MarkerEntry>, Vec<usize>)> {
240    let mut entries = Vec::new();
241    let mut scan_starts = Vec::new();
242
243    // Check SOI
244    if data.len() < 2 || data[0] != 0xFF || data[1] != SOI {
245        return Err(JpegError::InvalidSoi);
246    }
247    entries.push(MarkerEntry {
248        marker: SOI,
249        data: Vec::new(),
250        offset: 0,
251    });
252    let mut pos = 2;
253
254    loop {
255        // Find next 0xFF
256        while pos < data.len() && data[pos] != 0xFF {
257            pos += 1;
258        }
259        if pos + 1 >= data.len() {
260            return Err(JpegError::UnexpectedEof);
261        }
262
263        // Skip padding 0xFF bytes
264        while pos + 1 < data.len() && data[pos + 1] == 0xFF {
265            pos += 1;
266        }
267        if pos + 1 >= data.len() {
268            return Err(JpegError::UnexpectedEof);
269        }
270
271        let marker_offset = pos;
272        let marker = data[pos + 1];
273        pos += 2;
274
275        // Skip 0xFF00 (byte-stuffed)
276        if marker == 0x00 {
277            continue;
278        }
279
280        // Standalone markers (no length field)
281        if marker == EOI || (0xD0..=0xD7).contains(&marker) {
282            entries.push(MarkerEntry {
283                marker,
284                data: Vec::new(),
285                offset: marker_offset,
286            });
287            if marker == EOI {
288                return Ok((entries, scan_starts));
289            }
290            continue;
291        }
292
293        // Check for unsupported markers
294        if is_unsupported(marker) {
295            return Err(JpegError::UnsupportedMarker(marker));
296        }
297
298        // Read segment length
299        if pos + 2 > data.len() {
300            return Err(JpegError::UnexpectedEof);
301        }
302        let length = u16::from_be_bytes([data[pos], data[pos + 1]]) as usize;
303        if length < 2 || pos + length > data.len() {
304            return Err(JpegError::InvalidMarkerData("invalid segment length"));
305        }
306        let segment_data = data[pos + 2..pos + length].to_vec();
307
308        entries.push(MarkerEntry {
309            marker,
310            data: segment_data,
311            offset: marker_offset,
312        });
313
314        pos += length;
315
316        // For SOS: record scan start and skip past entropy-coded data
317        if marker == SOS {
318            scan_starts.push(pos);
319            // Skip past the entropy-coded scan data to find the next marker
320            pos = skip_scan_data(data, pos)?;
321        }
322    }
323}
324
325/// Parse DRI (Define Restart Interval) marker data.
326pub fn parse_dri(data: &[u8]) -> Result<u16> {
327    if data.len() < 2 {
328        return Err(JpegError::UnexpectedEof);
329    }
330    Ok(u16::from_be_bytes([data[0], data[1]]))
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn iterate_minimal_jpeg() {
339        // Minimal: SOI + EOI
340        let data = [0xFF, 0xD8, 0xFF, 0xD9];
341        let (entries, end_pos) = iterate_markers(&data).unwrap();
342        assert_eq!(entries.len(), 2);
343        assert_eq!(entries[0].marker, SOI);
344        assert_eq!(entries[1].marker, EOI);
345        assert_eq!(end_pos, 4);
346    }
347
348    #[test]
349    fn invalid_soi() {
350        let data = [0x00, 0x00];
351        assert!(matches!(iterate_markers(&data), Err(JpegError::InvalidSoi)));
352    }
353
354    #[test]
355    fn accept_progressive_sof2() {
356        // SOI then SOF2 (progressive) — should be accepted now
357        let data = [
358            0xFF, 0xD8, // SOI
359            0xFF, 0xC2, // SOF2
360            0x00, 0x0B, // length = 11
361            8, 0, 8, 0, 8, 1, // precision=8, 8x8, 1 component
362            1, 0x11, 0, // comp 1, 1x1, qt=0
363            0xFF, 0xD9, // EOI
364        ];
365        let (entries, _) = iterate_markers(&data).unwrap();
366        assert!(entries.iter().any(|e| e.marker == SOF2));
367    }
368
369    #[test]
370    fn reject_lossless() {
371        // SOI then SOF3 (lossless) — should still be rejected
372        let data = [
373            0xFF, 0xD8, // SOI
374            0xFF, 0xC3, // SOF3
375            0x00, 0x02, // length = 2 (minimal)
376        ];
377        assert!(matches!(
378            iterate_markers(&data),
379            Err(JpegError::UnsupportedMarker(0xC3))
380        ));
381    }
382
383    #[test]
384    fn parse_sos_header() {
385        // 2 components: comp1 uses DC0/AC0, comp2 uses DC1/AC1
386        let data = [2, 1, 0x00, 2, 0x11, 0, 63, 0]; // Ss=0, Se=63, Ah/Al=0
387        let sels = parse_sos(&data).unwrap();
388        assert_eq!(sels.len(), 2);
389        assert_eq!(sels[0], (1, 0, 0));
390        assert_eq!(sels[1], (2, 1, 1));
391    }
392
393    #[test]
394    fn parse_dri_value() {
395        let data = [0x00, 0x0A]; // restart interval = 10
396        assert_eq!(parse_dri(&data).unwrap(), 10);
397    }
398}