Skip to main content

hayro_ccitt/
lib.rs

1//! A decoder for CCITT fax-encoded images.
2//!
3//! This crate implements the CCITT Group 3 and Group 4 fax compression algorithms
4//! as defined in ITU-T Recommendations T.4 and T.6. These encodings are commonly
5//! used for bi-level (black and white) images in PDF documents and fax transmissions.
6//!
7//! The main entry point is the [`decode`] function, which takes encoded data, a
8//! [`DecoderContext`], and outputs the decoded pixels through a [`Decoder`] trait
9//! that can be implemented according to your needs.
10//!
11//! The crate is `no_std` compatible but requires an allocator to be available.
12//!
13//! # Safety
14//! Unsafe code is forbidden via a crate-level attribute.
15//!
16//! # License
17//! Licensed under either of
18//!
19//! - Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or <http://www.apache.org/licenses/LICENSE-2.0>)
20//! - MIT license ([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
21//!
22//! at your option.
23//!
24//! [`decode`]: crate::decode
25//! [`Decoder`]: crate::Decoder
26
27#![no_std]
28#![forbid(unsafe_code)]
29#![forbid(missing_docs)]
30
31extern crate alloc;
32
33use crate::bit_reader::BitReader;
34
35use crate::decode::{EOFB, Mode};
36use alloc::vec;
37use alloc::vec::Vec;
38
39mod bit_reader;
40mod decode;
41mod state_machine;
42
43/// A specialized Result type for CCITT decoding operations.
44pub type Result<T> = core::result::Result<T, DecodeError>;
45
46/// An error that can occur during CCITT decoding.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum DecodeError {
49    /// Unexpected end of input while reading bits.
50    UnexpectedEof,
51    /// Invalid Huffman code sequence was encountered during decoding.
52    InvalidCode,
53    /// A scanline didn't have the expected number of pixels.
54    LineLengthMismatch,
55    /// Arithmetic overflow in run length or position calculation.
56    Overflow,
57}
58
59impl core::fmt::Display for DecodeError {
60    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
61        match self {
62            Self::UnexpectedEof => write!(f, "unexpected end of input"),
63            Self::InvalidCode => write!(f, "invalid CCITT code sequence"),
64            Self::LineLengthMismatch => write!(f, "scanline length mismatch"),
65            Self::Overflow => write!(f, "arithmetic overflow in position calculation"),
66        }
67    }
68}
69
70impl core::error::Error for DecodeError {}
71
72/// The encoding mode for CCITT fax decoding.
73#[derive(Copy, Clone, Debug, PartialEq, Eq)]
74pub enum EncodingMode {
75    /// Group 4 (MMR).
76    Group4,
77    /// Group 3 1D (MH).
78    Group3_1D,
79    /// Group 3 2D (MR).
80    Group3_2D {
81        /// The K parameter.
82        k: u32,
83    },
84}
85
86/// Settings to apply during decoding.
87#[derive(Copy, Clone, Debug)]
88pub struct DecodeSettings {
89    /// How many columns the image has (i.e. its width).
90    pub columns: u32,
91    /// How many rows the image has (i.e. its height).
92    ///
93    /// In case `end_of_block` has been set to true, decoding will run until
94    /// the given number of rows have been decoded, or the `end_of_block` marker
95    /// has been encountered, whichever occurs first.
96    pub rows: u32,
97    /// Whether the stream _MAY_ contain an end-of-block marker
98    /// (It doesn't have to. In that case this is set to `true` but there are
99    /// no end-of-block markers, hayro-ccitt will still use the value of `rows`
100    /// to determine when to stop decoding).
101    pub end_of_block: bool,
102    /// Whether the stream contains end-of-line markers.
103    pub end_of_line: bool,
104    /// Whether the data in the stream for each row is aligned to the byte
105    /// boundary.
106    pub rows_are_byte_aligned: bool,
107    /// The encoding mode used by the image.
108    pub encoding: EncodingMode,
109    /// Whether black and white should be inverted.
110    pub invert_black: bool,
111}
112
113/// A decoder for CCITT images.
114pub trait Decoder {
115    /// Push a single pixel with the given color.
116    fn push_pixel(&mut self, white: bool);
117    /// Push multiple chunks of 8 pixels of the same color.
118    ///
119    /// The `chunk_count` parameter indicates how many 8-pixel chunks to push.
120    /// For example, if this method is called with `white = true` and
121    /// `chunk_count = 10`, 80 white pixels are pushed (10 × 8 = 80).
122    ///
123    /// You can assume that this method is only called if the number of already
124    /// pushed pixels is a multiple of 8 (i.e. byte-aligned).
125    fn push_pixel_chunk(&mut self, white: bool, chunk_count: u32);
126    /// Called when a row has been completed.
127    fn next_line(&mut self);
128}
129
130/// Pixel color in a bi-level (black and white) image.
131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
132pub(crate) enum Color {
133    /// White pixel.
134    White,
135    /// Black pixel.
136    Black,
137}
138
139impl Color {
140    /// Returns the opposite color.
141    #[inline(always)]
142    fn opposite(self) -> Self {
143        match self {
144            Self::White => Self::Black,
145            Self::Black => Self::White,
146        }
147    }
148
149    /// Returns true if this color is white.
150    #[inline(always)]
151    fn is_white(self) -> bool {
152        matches!(self, Self::White)
153    }
154}
155
156/// Represents a color change at a specific index in a line.
157#[derive(Clone, Copy)]
158struct ColorChange {
159    idx: u32,
160    color: Color,
161}
162
163/// Decode the given image using the provided decoder context and decoder.
164///
165/// If decoding was successful, the number of bytes that have been read in total
166/// is returned.
167///
168/// If an error is returned, it means that the file is somehow malformed.
169/// However, even if that's the case, it is possible that a number
170/// of rows were decoded successfully and written into the decoder, so those
171/// can still be used, but the image might be truncated.
172pub fn decode(data: &[u8], decoder: &mut impl Decoder, ctx: &mut DecoderContext) -> Result<usize> {
173    ctx.reset();
174    let mut reader = BitReader::new(data);
175
176    match ctx.settings.encoding {
177        EncodingMode::Group4 => decode_group4(ctx, &mut reader, decoder)?,
178        EncodingMode::Group3_1D => decode_group3_1d(ctx, &mut reader, decoder)?,
179        EncodingMode::Group3_2D { .. } => decode_group3_2d(ctx, &mut reader, decoder)?,
180    }
181
182    reader.align();
183    Ok(reader.byte_pos())
184}
185
186/// Group 3 1D decoding (T.4 Section 4.1).
187fn decode_group3_1d(
188    ctx: &mut DecoderContext,
189    reader: &mut BitReader<'_>,
190    decoder: &mut impl Decoder,
191) -> Result<()> {
192    // It seems like PDF producers are a bit sloppy with the `end_of_line` flag,
193    // so we just always try to read one.
194    let _ = reader.read_eol_if_available();
195
196    loop {
197        decode_1d_line(ctx, reader, decoder)?;
198        ctx.next_line(reader, decoder)?;
199
200        if group3_check_eob(ctx, reader) {
201            break;
202        }
203    }
204
205    Ok(())
206}
207
208/// Group 3 2D decoding (T.4 Section 4.2).
209fn decode_group3_2d(
210    ctx: &mut DecoderContext,
211    reader: &mut BitReader<'_>,
212    decoder: &mut impl Decoder,
213) -> Result<()> {
214    // It seems like PDF producers are a bit sloppy with the `end_of_line` flag,
215    // so we just always try to read one.
216    let _ = reader.read_eol_if_available();
217
218    loop {
219        let tag_bit = reader.read_bit()?;
220
221        if tag_bit == 1 {
222            decode_1d_line(ctx, reader, decoder)?;
223        } else {
224            decode_2d_line(ctx, reader, decoder)?;
225        }
226
227        ctx.next_line(reader, decoder)?;
228
229        if group3_check_eob(ctx, reader) {
230            break;
231        }
232    }
233
234    Ok(())
235}
236
237/// Check for end-of-block, including RTC (T.4 Section 4.1.4).
238fn group3_check_eob(ctx: &mut DecoderContext, reader: &mut BitReader<'_>) -> bool {
239    let eol_count = reader.read_eol_if_available();
240
241    // T.4 Section 4.1.4: "The end of a document transmission is indicated by
242    // sending six consecutive EOLs."
243    // PDFBOX-2778 has 7 EOL, although it should only be 6. Let's be lenient
244    // and check with >=.
245    if ctx.settings.end_of_block && eol_count >= 6 {
246        return true;
247    }
248
249    if ctx.decoded_rows == ctx.settings.rows || reader.at_end() {
250        return true;
251    }
252
253    false
254}
255
256fn decode_group4(
257    ctx: &mut DecoderContext,
258    reader: &mut BitReader<'_>,
259    decoder: &mut impl Decoder,
260) -> Result<()> {
261    loop {
262        if ctx.settings.end_of_block && reader.peak_bits(24) == Ok(EOFB) {
263            reader.read_bits(24)?;
264            break;
265        }
266
267        if ctx.decoded_rows == ctx.settings.rows || reader.at_end() {
268            break;
269        }
270
271        decode_2d_line(ctx, reader, decoder)?;
272        ctx.next_line(reader, decoder)?;
273    }
274
275    Ok(())
276}
277
278/// Decode a single 1D-coded line (T.4 Section 4.1.1, T.6 Section 2.2.4).
279#[inline(always)]
280fn decode_1d_line(
281    ctx: &mut DecoderContext,
282    reader: &mut BitReader<'_>,
283    decoder: &mut impl Decoder,
284) -> Result<()> {
285    while !ctx.at_eol() {
286        let run_length = reader.decode_run(ctx.color)?;
287        ctx.push_pixels(decoder, run_length);
288        ctx.color = ctx.color.opposite();
289    }
290
291    Ok(())
292}
293
294/// Decode a single 2D-coded line (T.4 Section 4.2, T.6 Section 2.2).
295#[inline(always)]
296fn decode_2d_line(
297    ctx: &mut DecoderContext,
298    reader: &mut BitReader<'_>,
299    decoder: &mut impl Decoder,
300) -> Result<()> {
301    while !ctx.at_eol() {
302        let mode = reader.decode_mode()?;
303
304        match mode {
305            // Pass mode (T.4 Section 4.2.1.3.2a, T.6 Section 2.2.3.1).
306            Mode::Pass => {
307                ctx.push_pixels(decoder, ctx.b2() - ctx.a0().unwrap_or(0));
308                ctx.update_b();
309                // No color change happens in pass mode.
310            }
311            // Vertical mode (T.4 Section 4.2.1.3.2b, T.6 Section 2.2.3.2).
312            Mode::Vertical(i) => {
313                let b1 = ctx.b1();
314                let a1 = if i >= 0 {
315                    b1.checked_add(i as u32).ok_or(DecodeError::Overflow)?
316                } else {
317                    b1.checked_sub((-i) as u32).ok_or(DecodeError::Overflow)?
318                };
319
320                let a0 = ctx.a0().unwrap_or(0);
321
322                ctx.push_pixels(decoder, a1.checked_sub(a0).ok_or(DecodeError::Overflow)?);
323                ctx.color = ctx.color.opposite();
324
325                ctx.update_b();
326            }
327            // Horizontal mode (T.4 Section 4.2.1.3.2c, T.6 Section 2.2.3.3).
328            Mode::Horizontal => {
329                let a0a1 = reader.decode_run(ctx.color)?;
330                ctx.push_pixels(decoder, a0a1);
331                ctx.color = ctx.color.opposite();
332
333                let a1a2 = reader.decode_run(ctx.color)?;
334                ctx.push_pixels(decoder, a1a2);
335                ctx.color = ctx.color.opposite();
336
337                ctx.update_b();
338            }
339        }
340    }
341
342    Ok(())
343}
344
345/// A reusable context for decoding CCITT images.
346pub struct DecoderContext {
347    /// Color changes in the reference line (previous line).
348    ref_changes: Vec<ColorChange>,
349    /// The minimum index we need to start from when searching for b1.
350    ref_pos: u32,
351    /// The current index of b1.
352    b1_idx: u32,
353    /// Color changes in the coding line (current line being decoded).
354    coding_changes: Vec<ColorChange>,
355    /// Current position in the coding line (number of pixels decoded).
356    pixels_decoded: u32,
357    /// The width of a line in pixels (i.e. number of columns).
358    line_width: u32,
359    /// The color of the next run to be decoded.
360    color: Color,
361    /// How many rows have been decoded so far.
362    decoded_rows: u32,
363    /// The settings to apply during decoding.
364    settings: DecodeSettings,
365    /// Whether to invert black and white.
366    invert_black: bool,
367}
368
369impl DecoderContext {
370    /// Creates a new decoder context using the provided decode settings.
371    pub fn new(settings: DecodeSettings) -> Self {
372        Self {
373            ref_changes: vec![],
374            ref_pos: 0,
375            b1_idx: 0,
376            coding_changes: Vec::new(),
377            pixels_decoded: 0,
378            line_width: settings.columns,
379            // Each run starts with an imaginary white pixel on the left.
380            color: Color::White,
381            decoded_rows: 0,
382            settings,
383            invert_black: settings.invert_black,
384        }
385    }
386
387    fn reset(&mut self) {
388        self.ref_changes.clear();
389        self.ref_pos = 0;
390        self.b1_idx = 0;
391        self.coding_changes.clear();
392        self.pixels_decoded = 0;
393        self.line_width = self.settings.columns;
394        self.color = Color::White;
395        self.decoded_rows = 0;
396        self.invert_black = self.settings.invert_black;
397    }
398
399    /// `a0` refers to the first changing element on the current line.
400    #[inline(always)]
401    fn a0(&self) -> Option<u32> {
402        if self.pixels_decoded == 0 {
403            // If we haven't coded anything yet, a0 conceptually points at the
404            // index -1. This is a bit of an edge case, and we therefore require
405            // callers of this method to handle the case themselves.
406            None
407        } else {
408            // Otherwise, the index points to the next element to be decoded.
409            Some(self.pixels_decoded)
410        }
411    }
412
413    /// "The first changing element on the reference line to the right of a0 and
414    /// of opposite color to a0."
415    #[inline(always)]
416    fn b1(&self) -> u32 {
417        self.ref_changes
418            .get(self.b1_idx as usize)
419            .map_or(self.line_width, |c| c.idx)
420    }
421
422    /// "The next changing element to the right of b1, on the reference line."
423    #[inline(always)]
424    fn b2(&self) -> u32 {
425        self.ref_changes
426            .get(self.b1_idx as usize + 1)
427            .map_or(self.line_width, |c| c.idx)
428    }
429
430    /// Compute the new position of b1 (and implicitly b2).
431    #[inline(always)]
432    fn update_b(&mut self) {
433        // b1 refers to an element of the opposite color.
434        let target_color = self.color.opposite();
435        // b1 must be strictly greater than a0.
436        let min_idx = self.a0().map_or(0, |a| a + 1);
437
438        self.b1_idx = self.line_width;
439
440        for i in self.ref_pos..self.ref_changes.len() as u32 {
441            let change = &self.ref_changes[i as usize];
442
443            if change.idx < min_idx {
444                self.ref_pos = i + 1;
445                continue;
446            }
447
448            if change.color == target_color {
449                self.b1_idx = i;
450                break;
451            }
452        }
453    }
454
455    #[inline(always)]
456    fn push_pixels(&mut self, decoder: &mut impl Decoder, count: u32) {
457        // Make sure we don't have too many pixels (for invalid files).
458        let count = count.min(self.line_width - self.pixels_decoded);
459        let white = self.color.is_white() ^ self.invert_black;
460        let mut remaining = count;
461
462        // Push individual pixels until we reach an 8-pixel boundary.
463        let pixels_to_boundary = (8 - (self.pixels_decoded % 8)) % 8;
464        let unaligned_pixels = remaining.min(pixels_to_boundary);
465        for _ in 0..unaligned_pixels {
466            decoder.push_pixel(white);
467            remaining -= 1;
468        }
469
470        // Push full chunks of 8 pixels.
471        let full_chunks = remaining / 8;
472        if full_chunks > 0 {
473            decoder.push_pixel_chunk(white, full_chunks);
474            remaining %= 8;
475        }
476
477        // Push remaining individual pixels.
478        for _ in 0..remaining {
479            decoder.push_pixel(white);
480        }
481
482        // Track the color change:
483        // - At start of line (no previous changes): only add if color differs from
484        //   imaginary white, i.e., only add if black.
485        // - Mid-line: only add if color differs from previous.
486        if count > 0 {
487            let is_change = self
488                .coding_changes
489                .last()
490                .map_or(!self.color.is_white(), |last| last.color != self.color);
491            if is_change {
492                self.coding_changes.push(ColorChange {
493                    idx: self.pixels_decoded,
494                    color: self.color,
495                });
496            }
497            self.pixels_decoded += count;
498        }
499    }
500
501    #[inline(always)]
502    fn at_eol(&self) -> bool {
503        self.a0().unwrap_or(0) == self.line_width
504    }
505
506    #[inline(always)]
507    fn next_line(&mut self, reader: &mut BitReader<'_>, decoder: &mut impl Decoder) -> Result<()> {
508        if self.pixels_decoded != self.settings.columns {
509            return Err(DecodeError::LineLengthMismatch);
510        }
511
512        core::mem::swap(&mut self.ref_changes, &mut self.coding_changes);
513        self.coding_changes.clear();
514        self.pixels_decoded = 0;
515        self.ref_pos = 0;
516        self.b1_idx = 0;
517        self.color = Color::White;
518        self.decoded_rows += 1;
519        decoder.next_line();
520
521        if self.settings.rows_are_byte_aligned {
522            reader.align();
523        }
524
525        self.update_b();
526
527        Ok(())
528    }
529}