hayro_ccitt/
lib.rs

1//! A decoder for CCITT fax-encoded images.
2//!
3//! This crate implements the CCITT Group 3 and Group 4 fax compression algorithms
4//! as defined in ITU-T Recommendations T.4 and T.6. These encodings are commonly
5//! used for bi-level (black and white) images in PDF documents and fax transmissions.
6//!
7//! The main entry point is the [`decode`] function, which takes encoded data and
8//! decoding settings, and outputs the decoded pixels through a [`Decoder`] trait
9//! that can be implemented according to your needs.
10//!
11//! The crate is `no_std` compatible but requires an allocator to be available.
12//!
13//! # Safety
14//! Unsafe code is forbidden via a crate-level attribute.
15//!
16//! # License
17//! Licensed under either of
18//!
19//! - Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or <http://www.apache.org/licenses/LICENSE-2.0>)
20//! - MIT license ([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
21//!
22//! at your option.
23//!
24//! [`decode`]: crate::decode
25//! [`Decoder`]: crate::Decoder
26
27#![no_std]
28#![forbid(unsafe_code)]
29#![forbid(missing_docs)]
30
31extern crate alloc;
32
33use crate::bit_reader::BitReader;
34
35use crate::decode::{EOFB, Mode};
36use alloc::vec;
37use alloc::vec::Vec;
38
39mod bit_reader;
40mod decode;
41mod state_machine;
42
43/// A specialized Result type for CCITT decoding operations.
44pub type Result<T> = core::result::Result<T, DecodeError>;
45
46/// An error that can occur during CCITT decoding.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum DecodeError {
49    /// Unexpected end of input while reading bits.
50    UnexpectedEof,
51    /// Invalid Huffman code sequence was encountered during decoding.
52    InvalidCode,
53    /// A scanline didn't have the expected number of pixels.
54    LineLengthMismatch,
55    /// Arithmetic overflow in run length or position calculation.
56    Overflow,
57}
58
59impl core::fmt::Display for DecodeError {
60    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
61        match self {
62            Self::UnexpectedEof => write!(f, "unexpected end of input"),
63            Self::InvalidCode => write!(f, "invalid CCITT code sequence"),
64            Self::LineLengthMismatch => write!(f, "scanline length mismatch"),
65            Self::Overflow => write!(f, "arithmetic overflow in position calculation"),
66        }
67    }
68}
69
70impl core::error::Error for DecodeError {}
71
72/// The encoding mode for CCITT fax decoding.
73#[derive(Copy, Clone, Debug, PartialEq, Eq)]
74pub enum EncodingMode {
75    /// Group 4 (MMR).
76    Group4,
77    /// Group 3 1D (MH).
78    Group3_1D,
79    /// Group 3 2D (MR).
80    Group3_2D {
81        /// The K parameter.
82        k: u32,
83    },
84}
85
86/// Settings to apply during decoding.
87#[derive(Copy, Clone, Debug)]
88pub struct DecodeSettings {
89    /// How many columns the image has (i.e. its width).
90    pub columns: u32,
91    /// How many rows the image has (i.e. its height).
92    ///
93    /// In case `end_of_block` has been set to true, decoding will run until
94    /// the given number of rows have been decoded, or the `end_of_block` marker
95    /// has been encountered, whichever occurs first.
96    pub rows: u32,
97    /// Whether the stream _MAY_ contain an end-of-block marker
98    /// (It doesn't have to. In that case this is set to `true` but there are
99    /// no end-of-block markers, hayro-ccitt will still use the value of `rows`
100    /// to determine when to stop decoding).
101    pub end_of_block: bool,
102    /// Whether the stream contains end-of-line markers.
103    pub end_of_line: bool,
104    /// Whether the data in the stream for each row is aligned to the byte
105    /// boundary.
106    pub rows_are_byte_aligned: bool,
107    /// The encoding mode used by the image.
108    pub encoding: EncodingMode,
109    /// Whether black and white should be inverted.
110    pub invert_black: bool,
111}
112
113/// A decoder for CCITT images.
114pub trait Decoder {
115    /// Push a single pixel with the given color.
116    fn push_pixel(&mut self, white: bool);
117    /// Push multiple chunks of 8 pixels of the same color.
118    ///
119    /// The `chunk_count` parameter indicates how many 8-pixel chunks to push.
120    /// For example, if this method is called with `white = true` and
121    /// `chunk_count = 10`, 80 white pixels are pushed (10 × 8 = 80).
122    ///
123    /// You can assume that this method is only called if the number of already
124    /// pushed pixels is a multiple of 8 (i.e. byte-aligned).
125    fn push_pixel_chunk(&mut self, white: bool, chunk_count: u32);
126    /// Called when a row has been completed.
127    fn next_line(&mut self);
128}
129
130/// Pixel color in a bi-level (black and white) image.
131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
132pub(crate) enum Color {
133    /// White pixel.
134    White,
135    /// Black pixel.
136    Black,
137}
138
139impl Color {
140    /// Returns the opposite color.
141    #[inline(always)]
142    fn opposite(self) -> Self {
143        match self {
144            Self::White => Self::Black,
145            Self::Black => Self::White,
146        }
147    }
148
149    /// Returns true if this color is white.
150    #[inline(always)]
151    fn is_white(self) -> bool {
152        matches!(self, Self::White)
153    }
154}
155
156/// Represents a color change at a specific index in a line.
157#[derive(Clone, Copy)]
158struct ColorChange {
159    idx: u32,
160    color: Color,
161}
162
163/// Decode the given image using the provided settings and the decoder.
164///
165/// If decoding was successful, the number of bytes that have been read in total
166/// is returned.
167///
168/// If an error is returned, it means that the file is somehow malformed.
169/// However, even if that's the case, it is possible that a number
170/// of rows were decoded successfully and written into the decoder, so those
171/// can still be used, but the image might be truncated.
172pub fn decode(data: &[u8], decoder: &mut impl Decoder, settings: &DecodeSettings) -> Result<usize> {
173    let mut ctx = DecoderContext::new(decoder, settings);
174    let mut reader = BitReader::new(data);
175
176    match settings.encoding {
177        EncodingMode::Group4 => decode_group4(&mut ctx, &mut reader)?,
178        EncodingMode::Group3_1D => decode_group3_1d(&mut ctx, &mut reader)?,
179        EncodingMode::Group3_2D { .. } => decode_group3_2d(&mut ctx, &mut reader)?,
180    }
181
182    reader.align();
183    Ok(reader.byte_pos())
184}
185
186/// Group 3 1D decoding (T.4 Section 4.1).
187fn decode_group3_1d<T: Decoder>(
188    ctx: &mut DecoderContext<'_, T>,
189    reader: &mut BitReader<'_>,
190) -> Result<()> {
191    // It seems like PDF producers are a bit sloppy with the `end_of_line` flag,
192    // so we just always try to read one.
193    let _ = reader.read_eol_if_available();
194
195    loop {
196        decode_1d_line(ctx, reader)?;
197        ctx.next_line(reader)?;
198
199        if group3_check_eob(ctx, reader) {
200            break;
201        }
202    }
203
204    Ok(())
205}
206
207/// Group 3 2D decoding (T.4 Section 4.2).
208fn decode_group3_2d<T: Decoder>(
209    ctx: &mut DecoderContext<'_, T>,
210    reader: &mut BitReader<'_>,
211) -> Result<()> {
212    // It seems like PDF producers are a bit sloppy with the `end_of_line` flag,
213    // so we just always try to read one.
214    let _ = reader.read_eol_if_available();
215
216    loop {
217        let tag_bit = reader.read_bit()?;
218
219        if tag_bit == 1 {
220            decode_1d_line(ctx, reader)?;
221        } else {
222            decode_2d_line(ctx, reader)?;
223        }
224
225        ctx.next_line(reader)?;
226
227        if group3_check_eob(ctx, reader) {
228            break;
229        }
230    }
231
232    Ok(())
233}
234
235/// Check for end-of-block, including RTC (T.4 Section 4.1.4).
236fn group3_check_eob<T: Decoder>(
237    ctx: &mut DecoderContext<'_, T>,
238    reader: &mut BitReader<'_>,
239) -> bool {
240    let eol_count = reader.read_eol_if_available();
241
242    // T.4 Section 4.1.4: "The end of a document transmission is indicated by
243    // sending six consecutive EOLs."
244    // PDFBOX-2778 has 7 EOL, although it should only be 6. Let's be lenient
245    // and check with >=.
246    if ctx.settings.end_of_block && eol_count >= 6 {
247        return true;
248    }
249
250    if ctx.decoded_rows == ctx.settings.rows || reader.at_end() {
251        return true;
252    }
253
254    false
255}
256
257fn decode_group4<T: Decoder>(
258    ctx: &mut DecoderContext<'_, T>,
259    reader: &mut BitReader<'_>,
260) -> Result<()> {
261    loop {
262        if ctx.settings.end_of_block && reader.peak_bits(24) == Ok(EOFB) {
263            reader.read_bits(24)?;
264            break;
265        }
266
267        if ctx.decoded_rows == ctx.settings.rows || reader.at_end() {
268            break;
269        }
270
271        decode_2d_line(ctx, reader)?;
272        ctx.next_line(reader)?;
273    }
274
275    Ok(())
276}
277
278/// Decode a single 1D-coded line (T.4 Section 4.1.1, T.6 Section 2.2.4).
279#[inline(always)]
280fn decode_1d_line<T: Decoder>(
281    ctx: &mut DecoderContext<'_, T>,
282    reader: &mut BitReader<'_>,
283) -> Result<()> {
284    while !ctx.at_eol() {
285        let run_length = reader.decode_run(ctx.color)?;
286        ctx.push_pixels(run_length);
287        ctx.color = ctx.color.opposite();
288    }
289
290    Ok(())
291}
292
293/// Decode a single 2D-coded line (T.4 Section 4.2, T.6 Section 2.2).
294#[inline(always)]
295fn decode_2d_line<T: Decoder>(
296    ctx: &mut DecoderContext<'_, T>,
297    reader: &mut BitReader<'_>,
298) -> Result<()> {
299    while !ctx.at_eol() {
300        let mode = reader.decode_mode()?;
301
302        match mode {
303            // Pass mode (T.4 Section 4.2.1.3.2a, T.6 Section 2.2.3.1).
304            Mode::Pass => {
305                ctx.push_pixels(ctx.b2() - ctx.a0().unwrap_or(0));
306                ctx.update_b();
307                // No color change happens in pass mode.
308            }
309            // Vertical mode (T.4 Section 4.2.1.3.2b, T.6 Section 2.2.3.2).
310            Mode::Vertical(i) => {
311                let b1 = ctx.b1();
312                let a1 = if i >= 0 {
313                    b1.checked_add(i as u32).ok_or(DecodeError::Overflow)?
314                } else {
315                    b1.checked_sub((-i) as u32).ok_or(DecodeError::Overflow)?
316                };
317
318                let a0 = ctx.a0().unwrap_or(0);
319
320                ctx.push_pixels(a1.checked_sub(a0).ok_or(DecodeError::Overflow)?);
321                ctx.color = ctx.color.opposite();
322
323                ctx.update_b();
324            }
325            // Horizontal mode (T.4 Section 4.2.1.3.2c, T.6 Section 2.2.3.3).
326            Mode::Horizontal => {
327                let a0a1 = reader.decode_run(ctx.color)?;
328                ctx.push_pixels(a0a1);
329                ctx.color = ctx.color.opposite();
330
331                let a1a2 = reader.decode_run(ctx.color)?;
332                ctx.push_pixels(a1a2);
333                ctx.color = ctx.color.opposite();
334
335                ctx.update_b();
336            }
337        }
338    }
339
340    Ok(())
341}
342
343struct DecoderContext<'a, T: Decoder> {
344    /// Color changes in the reference line (previous line).
345    ref_changes: Vec<ColorChange>,
346    /// The minimum index we need to start from when searching for b1.
347    ref_pos: u32,
348    /// The current index of b1.
349    b1_idx: u32,
350    /// Color changes in the coding line (current line being decoded).
351    coding_changes: Vec<ColorChange>,
352    /// Current position in the coding line (number of pixels decoded).
353    pixels_decoded: u32,
354    /// The decoder sink.
355    decoder: &'a mut T,
356    /// The width of a line in pixels (i.e. number of columns).
357    line_width: u32,
358    /// The color of the next run to be decoded.
359    color: Color,
360    /// How many rows have been decoded so far.
361    decoded_rows: u32,
362    /// The settings to apply during decoding.
363    settings: &'a DecodeSettings,
364    /// Whether to invert black and white.
365    invert_black: bool,
366}
367
368impl<'a, T: Decoder> DecoderContext<'a, T> {
369    fn new(decoder: &'a mut T, settings: &'a DecodeSettings) -> Self {
370        Self {
371            ref_changes: vec![],
372            ref_pos: 0,
373            b1_idx: 0,
374            coding_changes: Vec::new(),
375            pixels_decoded: 0,
376            decoder,
377            line_width: settings.columns,
378            // Each run starts with an imaginary white pixel on the left.
379            color: Color::White,
380            decoded_rows: 0,
381            settings,
382            invert_black: settings.invert_black,
383        }
384    }
385
386    /// `a0` refers to the first changing element on the current line.
387    fn a0(&self) -> Option<u32> {
388        if self.pixels_decoded == 0 {
389            // If we haven't coded anything yet, a0 conceptually points at the
390            // index -1. This is a bit of an edge case, and we therefore require
391            // callers of this method to handle the case themselves.
392            None
393        } else {
394            // Otherwise, the index points to the next element to be decoded.
395            Some(self.pixels_decoded)
396        }
397    }
398
399    /// "The first changing element on the reference line to the right of a0 and
400    /// of opposite color to a0."
401    fn b1(&self) -> u32 {
402        self.ref_changes
403            .get(self.b1_idx as usize)
404            .map_or(self.line_width, |c| c.idx)
405    }
406
407    /// "The next changing element to the right of b1, on the reference line."
408    fn b2(&self) -> u32 {
409        self.ref_changes
410            .get(self.b1_idx as usize + 1)
411            .map_or(self.line_width, |c| c.idx)
412    }
413
414    /// Compute the new position of b1 (and implicitly b2).
415    #[inline(always)]
416    fn update_b(&mut self) {
417        // b1 refers to an element of the opposite color.
418        let target_color = self.color.opposite();
419        // b1 must be strictly greater than a0.
420        let min_idx = self.a0().map_or(0, |a| a + 1);
421
422        self.b1_idx = self.line_width;
423
424        for i in self.ref_pos..self.ref_changes.len() as u32 {
425            let change = &self.ref_changes[i as usize];
426
427            if change.idx < min_idx {
428                self.ref_pos = i + 1;
429                continue;
430            }
431
432            if change.color == target_color {
433                self.b1_idx = i;
434                break;
435            }
436        }
437    }
438
439    #[inline(always)]
440    fn push_pixels(&mut self, count: u32) {
441        // Make sure we don't have too many pixels (for invalid files).
442        let count = count.min(self.line_width - self.pixels_decoded);
443        let white = self.color.is_white() ^ self.invert_black;
444        let mut remaining = count;
445
446        // Push individual pixels until we reach an 8-pixel boundary.
447        let pixels_to_boundary = (8 - (self.pixels_decoded % 8)) % 8;
448        let unaligned_pixels = remaining.min(pixels_to_boundary);
449        for _ in 0..unaligned_pixels {
450            self.decoder.push_pixel(white);
451            remaining -= 1;
452        }
453
454        // Push full chunks of 8 pixels.
455        let full_chunks = remaining / 8;
456        if full_chunks > 0 {
457            self.decoder.push_pixel_chunk(white, full_chunks);
458            remaining %= 8;
459        }
460
461        // Push remaining individual pixels.
462        for _ in 0..remaining {
463            self.decoder.push_pixel(white);
464        }
465
466        // Track the color change:
467        // - At start of line (no previous changes): only add if color differs from
468        //   imaginary white, i.e., only add if black.
469        // - Mid-line: only add if color differs from previous.
470        if count > 0 {
471            let is_change = self
472                .coding_changes
473                .last()
474                .map_or(!self.color.is_white(), |last| last.color != self.color);
475            if is_change {
476                self.coding_changes.push(ColorChange {
477                    idx: self.pixels_decoded,
478                    color: self.color,
479                });
480            }
481            self.pixels_decoded += count;
482        }
483    }
484
485    fn at_eol(&self) -> bool {
486        self.a0().unwrap_or(0) == self.line_width
487    }
488
489    #[inline(always)]
490    fn next_line(&mut self, reader: &mut BitReader<'_>) -> Result<()> {
491        if self.pixels_decoded != self.settings.columns {
492            return Err(DecodeError::LineLengthMismatch);
493        }
494
495        core::mem::swap(&mut self.ref_changes, &mut self.coding_changes);
496        self.coding_changes.clear();
497        self.pixels_decoded = 0;
498        self.ref_pos = 0;
499        self.b1_idx = 0;
500        self.color = Color::White;
501        self.decoded_rows += 1;
502        self.decoder.next_line();
503
504        if self.settings.rows_are_byte_aligned {
505            reader.align();
506        }
507
508        self.update_b();
509
510        Ok(())
511    }
512}