Skip to main content

phasm_core/codec/jpeg/
bitio.rs

1// Copyright (c) 2026 Christoph Gaffga
2// SPDX-License-Identifier: GPL-3.0-only
3// https://github.com/cgaffga/phasmcore
4
5//! Bit-level I/O for JPEG entropy-coded data.
6//!
7//! Provides [`BitReader`] for decoding and [`BitWriter`] for encoding the
8//! entropy-coded scan data. Both handle JPEG byte-stuffing (0xFF -> 0xFF 0x00)
9//! and operate in MSB-first bit order.
10
11use super::error::{JpegError, Result};
12
13/// Bit-level reader for JPEG entropy-coded data.
14///
15/// Handles JPEG byte-stuffing (0xFF00 → 0xFF) and marker detection.
16/// Bits are read MSB-first from a 32-bit internal buffer.
17pub struct BitReader<'a> {
18    data: &'a [u8],
19    pos: usize,
20    /// Bit buffer, MSB-aligned. Valid bits are in the top `bits_left` positions.
21    buf: u32,
22    bits_left: u8,
23    /// Set when a marker (0xFF followed by non-zero byte) is found in the stream.
24    marker_found: Option<u8>,
25}
26
27impl<'a> BitReader<'a> {
28    /// Create a new BitReader over the given byte slice.
29    /// `pos` should point to the first byte of entropy-coded data (after SOS header).
30    pub fn new(data: &'a [u8], pos: usize) -> Self {
31        Self {
32            data,
33            pos,
34            buf: 0,
35            bits_left: 0,
36            marker_found: None,
37        }
38    }
39
40    /// Read `count` bits (1–16) and return them right-aligned.
41    pub fn read_bits(&mut self, count: u8) -> Result<u16> {
42        debug_assert!((1..=16).contains(&count));
43        while self.bits_left < count {
44            self.fill_byte()?;
45        }
46        self.bits_left -= count;
47        let val = (self.buf >> self.bits_left) & ((1u32 << count) - 1);
48        Ok(val as u16)
49    }
50
51    /// Peek at the top `count` bits without consuming them.
52    pub fn peek_bits(&mut self, count: u8) -> Result<u16> {
53        debug_assert!((1..=16).contains(&count));
54        while self.bits_left < count {
55            self.fill_byte()?;
56        }
57        let val = (self.buf >> (self.bits_left - count)) & ((1u32 << count) - 1);
58        Ok(val as u16)
59    }
60
61    /// Discard `count` bits (must have been peeked already).
62    pub fn skip_bits(&mut self, count: u8) {
63        debug_assert!(count <= self.bits_left);
64        self.bits_left -= count;
65    }
66
67    /// Align to the next byte boundary by discarding remaining bits in the current byte.
68    pub fn byte_align(&mut self) {
69        self.bits_left = 0;
70        self.buf = 0;
71    }
72
73    /// Current byte position in the underlying data.
74    pub fn position(&self) -> usize {
75        self.pos
76    }
77
78    /// Returns the marker byte if a marker was encountered during reading.
79    pub fn marker_found(&self) -> Option<u8> {
80        self.marker_found
81    }
82
83    /// Check if a restart marker (0xFFD0–0xFFD7) is present.
84    /// Checks both the `marker_found` flag (set if `fill_byte` already consumed
85    /// a RST marker during Huffman decoding) and the next bytes in the stream.
86    /// If found, consume the marker and return the marker's low nibble (0–7).
87    pub fn check_restart_marker(&mut self) -> Result<Option<u8>> {
88        self.byte_align();
89
90        // Case 1: fill_byte already consumed a RST marker during Huffman decoding
91        if let Some(m) = self.marker_found
92            && (m & 0xF8) == 0xD0 {
93                self.marker_found = None;
94                return Ok(Some(m & 0x07));
95            }
96
97        // Case 2: RST marker is at the current position in the stream
98        // Also skip any fill 0xFF bytes before the marker
99        while self.pos + 1 < self.data.len() && self.data[self.pos] == 0xFF {
100            let next = self.data[self.pos + 1];
101            if next == 0xFF {
102                // Fill byte — skip it
103                self.pos += 1;
104                continue;
105            }
106            if (next & 0xF8) == 0xD0 {
107                let rst = next & 0x07;
108                self.pos += 2;
109                return Ok(Some(rst));
110            }
111            break;
112        }
113
114        Ok(None)
115    }
116
117    fn fill_byte(&mut self) -> Result<()> {
118        if self.pos >= self.data.len() {
119            return Err(JpegError::UnexpectedEof);
120        }
121        let byte = self.data[self.pos];
122        self.pos += 1;
123
124        if byte == 0xFF {
125            if self.pos >= self.data.len() {
126                return Err(JpegError::UnexpectedEof);
127            }
128            let next = self.data[self.pos];
129            if next == 0x00 {
130                // Byte-stuffed 0xFF
131                self.pos += 1;
132            } else {
133                // This is a marker — signal it
134                self.marker_found = Some(next);
135                self.pos += 1;
136                // Treat as zero-fill for remaining reads
137                self.buf = (self.buf << 8) | 0xFF;
138                self.bits_left += 8;
139                return Ok(());
140            }
141        }
142
143        self.buf = (self.buf << 8) | (byte as u32);
144        self.bits_left += 8;
145        Ok(())
146    }
147}
148
149/// Bit-level writer for JPEG entropy-coded data.
150///
151/// Handles byte-stuffing (0xFF → 0xFF 0x00). MSB-first bit order.
152pub struct BitWriter {
153    output: Vec<u8>,
154    buf: u8,
155    bits_used: u8,
156}
157
158impl Default for BitWriter {
159    fn default() -> Self {
160        Self::new()
161    }
162}
163
164impl BitWriter {
165    pub fn new() -> Self {
166        Self {
167            output: Vec::new(),
168            buf: 0,
169            bits_used: 0,
170        }
171    }
172
173    /// Write `count` bits (1–16) from the low bits of `value`.
174    pub fn write_bits(&mut self, value: u16, count: u8) {
175        debug_assert!((1..=16).contains(&count));
176        // Write bits MSB-first
177        for i in (0..count).rev() {
178            let bit = (value >> i) & 1;
179            self.buf = (self.buf << 1) | (bit as u8);
180            self.bits_used += 1;
181            if self.bits_used == 8 {
182                self.emit_byte(self.buf);
183                self.buf = 0;
184                self.bits_used = 0;
185            }
186        }
187    }
188
189    /// Pad remaining bits with 1s and flush.
190    pub fn flush(mut self) -> Vec<u8> {
191        if self.bits_used > 0 {
192            // Pad with 1-bits as required by JPEG spec
193            let remaining = 8 - self.bits_used;
194            self.buf = (self.buf << remaining) | ((1u8 << remaining) - 1);
195            self.emit_byte(self.buf);
196        }
197        self.output
198    }
199
200    fn emit_byte(&mut self, byte: u8) {
201        self.output.push(byte);
202        if byte == 0xFF {
203            self.output.push(0x00); // Byte-stuffing
204        }
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn read_basic_bits() {
214        // 0xA5 = 1010_0101
215        let data = [0xA5];
216        let mut r = BitReader::new(&data, 0);
217        assert_eq!(r.read_bits(4).unwrap(), 0b1010);
218        assert_eq!(r.read_bits(4).unwrap(), 0b0101);
219    }
220
221    #[test]
222    fn read_cross_byte() {
223        // 0xFF00 0x80 → after de-stuffing: 0xFF, 0x80
224        let data = [0xFF, 0x00, 0x80];
225        let mut r = BitReader::new(&data, 0);
226        // Read 12 bits across byte boundary
227        assert_eq!(r.read_bits(12).unwrap(), 0xFF8); // 1111_1111_1000
228    }
229
230    #[test]
231    fn byte_stuffing_decode() {
232        // 0xFF 0x00 should yield byte 0xFF
233        let data = [0xFF, 0x00];
234        let mut r = BitReader::new(&data, 0);
235        assert_eq!(r.read_bits(8).unwrap(), 0xFF);
236    }
237
238    #[test]
239    fn marker_detection() {
240        // 0xFF 0xD9 is a marker (EOI), not byte-stuffed data
241        let data = [0xAB, 0xFF, 0xD9];
242        let mut r = BitReader::new(&data, 0);
243        assert_eq!(r.read_bits(8).unwrap(), 0xAB);
244        // Next read hits the marker — the 0xFF is read as data but marker is flagged
245        let _ = r.read_bits(8);
246        assert_eq!(r.marker_found(), Some(0xD9));
247    }
248
249    #[test]
250    fn write_basic() {
251        let mut w = BitWriter::new();
252        w.write_bits(0b1010, 4);
253        w.write_bits(0b0101, 4);
254        let out = w.flush();
255        assert_eq!(out, vec![0xA5]);
256    }
257
258    #[test]
259    fn write_byte_stuffing() {
260        let mut w = BitWriter::new();
261        w.write_bits(0xFF, 8);
262        let out = w.flush();
263        assert_eq!(out, vec![0xFF, 0x00]);
264    }
265
266    #[test]
267    fn write_padding() {
268        let mut w = BitWriter::new();
269        w.write_bits(0b110, 3);
270        // Should pad with 1s: 110_11111 = 0xDF
271        let out = w.flush();
272        assert_eq!(out, vec![0xDF]);
273    }
274
275    #[test]
276    fn write_cross_byte() {
277        let mut w = BitWriter::new();
278        w.write_bits(0b1111_1111_1000, 12);
279        // First byte: 0xFF (needs stuffing), then 1000_1111 padded
280        let out = w.flush();
281        assert_eq!(out, vec![0xFF, 0x00, 0x8F]);
282    }
283
284    #[test]
285    fn peek_then_skip() {
286        let data = [0xA5]; // 1010_0101
287        let mut r = BitReader::new(&data, 0);
288        assert_eq!(r.peek_bits(4).unwrap(), 0b1010);
289        r.skip_bits(4);
290        assert_eq!(r.read_bits(4).unwrap(), 0b0101);
291    }
292}