hayro_ccitt/lib.rs
1//! A decoder for CCITT fax-encoded images.
2//!
3//! This crate implements the CCITT Group 3 and Group 4 fax compression algorithms
4//! as defined in ITU-T Recommendations T.4 and T.6. These encodings are commonly
5//! used for bi-level (black and white) images in PDF documents and fax transmissions.
6//!
7//! The main entry point is the [`decode`] function, which takes encoded data and
8//! decoding settings, and outputs the decoded pixels through a [`Decoder`] trait
9//! that can be implemented according to your needs.
10//!
11//! The crate is `no_std` compatible but requires an allocator to be available.
12//!
13//! # Safety
14//! Unsafe code is forbidden via a crate-level attribute.
15//!
16//! # License
17//! Licensed under either of
18//!
19//! - Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or <http://www.apache.org/licenses/LICENSE-2.0>)
20//! - MIT license ([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
21//!
22//! at your option.
23//!
24//! [`decode`]: crate::decode
25//! [`Decoder`]: crate::Decoder
26
27#![no_std]
28#![forbid(unsafe_code)]
29#![forbid(missing_docs)]
30
31extern crate alloc;
32
33use crate::bit_reader::BitReader;
34
35use crate::decode::{EOFB, Mode};
36use alloc::vec;
37use alloc::vec::Vec;
38
39mod bit_reader;
40mod decode;
41mod state_machine;
42
43/// A specialized Result type for CCITT decoding operations.
44pub type Result<T> = core::result::Result<T, DecodeError>;
45
46/// An error that can occur during CCITT decoding.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum DecodeError {
49 /// Unexpected end of input while reading bits.
50 UnexpectedEof,
51 /// Invalid Huffman code sequence was encountered during decoding.
52 InvalidCode,
53 /// A scanline didn't have the expected number of pixels.
54 LineLengthMismatch,
55 /// Arithmetic overflow in run length or position calculation.
56 Overflow,
57}
58
59impl core::fmt::Display for DecodeError {
60 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
61 match self {
62 Self::UnexpectedEof => write!(f, "unexpected end of input"),
63 Self::InvalidCode => write!(f, "invalid CCITT code sequence"),
64 Self::LineLengthMismatch => write!(f, "scanline length mismatch"),
65 Self::Overflow => write!(f, "arithmetic overflow in position calculation"),
66 }
67 }
68}
69
70impl core::error::Error for DecodeError {}
71
72/// The encoding mode for CCITT fax decoding.
73#[derive(Copy, Clone, Debug, PartialEq, Eq)]
74pub enum EncodingMode {
75 /// Group 4 (MMR).
76 Group4,
77 /// Group 3 1D (MH).
78 Group3_1D,
79 /// Group 3 2D (MR).
80 Group3_2D {
81 /// The K parameter.
82 k: u32,
83 },
84}
85
86/// Settings to apply during decoding.
87#[derive(Copy, Clone, Debug)]
88pub struct DecodeSettings {
89 /// How many columns the image has (i.e. its width).
90 pub columns: u32,
91 /// How many rows the image has (i.e. its height).
92 ///
93 /// In case `end_of_block` has been set to true, decoding will run until
94 /// the given number of rows have been decoded, or the `end_of_block` marker
95 /// has been encountered, whichever occurs first.
96 pub rows: u32,
97 /// Whether the stream _MAY_ contain an end-of-block marker
98 /// (It doesn't have to. In that case this is set to `true` but there are
99 /// no end-of-block markers, hayro-ccitt will still use the value of `rows`
100 /// to determine when to stop decoding).
101 pub end_of_block: bool,
102 /// Whether the stream contains end-of-line markers.
103 pub end_of_line: bool,
104 /// Whether the data in the stream for each row is aligned to the byte
105 /// boundary.
106 pub rows_are_byte_aligned: bool,
107 /// The encoding mode used by the image.
108 pub encoding: EncodingMode,
109 /// Whether black and white should be inverted.
110 pub invert_black: bool,
111}
112
113/// A decoder for CCITT images.
114pub trait Decoder {
115 /// Push a single pixel with the given color.
116 fn push_pixel(&mut self, white: bool);
117 /// Push multiple chunks of 8 pixels of the same color.
118 ///
119 /// The `chunk_count` parameter indicates how many 8-pixel chunks to push.
120 /// For example, if this method is called with `white = true` and
121 /// `chunk_count = 10`, 80 white pixels are pushed (10 × 8 = 80).
122 ///
123 /// You can assume that this method is only called if the number of already
124 /// pushed pixels is a multiple of 8 (i.e. byte-aligned).
125 fn push_pixel_chunk(&mut self, white: bool, chunk_count: u32);
126 /// Called when a row has been completed.
127 fn next_line(&mut self);
128}
129
130/// Pixel color in a bi-level (black and white) image.
131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
132pub(crate) enum Color {
133 /// White pixel.
134 White,
135 /// Black pixel.
136 Black,
137}
138
139impl Color {
140 /// Returns the opposite color.
141 #[inline(always)]
142 fn opposite(self) -> Self {
143 match self {
144 Self::White => Self::Black,
145 Self::Black => Self::White,
146 }
147 }
148
149 /// Returns true if this color is white.
150 #[inline(always)]
151 fn is_white(self) -> bool {
152 matches!(self, Self::White)
153 }
154}
155
156/// Represents a color change at a specific index in a line.
157#[derive(Clone, Copy)]
158struct ColorChange {
159 idx: u32,
160 color: Color,
161}
162
163/// Decode the given image using the provided settings and the decoder.
164///
165/// If decoding was successful, the number of bytes that have been read in total
166/// is returned.
167///
168/// If an error is returned, it means that the file is somehow malformed.
169/// However, even if that's the case, it is possible that a number
170/// of rows were decoded successfully and written into the decoder, so those
171/// can still be used, but the image might be truncated.
172pub fn decode(data: &[u8], decoder: &mut impl Decoder, settings: &DecodeSettings) -> Result<usize> {
173 let mut ctx = DecoderContext::new(decoder, settings);
174 let mut reader = BitReader::new(data);
175
176 match settings.encoding {
177 EncodingMode::Group4 => decode_group4(&mut ctx, &mut reader)?,
178 EncodingMode::Group3_1D => decode_group3_1d(&mut ctx, &mut reader)?,
179 EncodingMode::Group3_2D { .. } => decode_group3_2d(&mut ctx, &mut reader)?,
180 }
181
182 reader.align();
183 Ok(reader.byte_pos())
184}
185
186/// Group 3 1D decoding (T.4 Section 4.1).
187fn decode_group3_1d<T: Decoder>(
188 ctx: &mut DecoderContext<'_, T>,
189 reader: &mut BitReader<'_>,
190) -> Result<()> {
191 // It seems like PDF producers are a bit sloppy with the `end_of_line` flag,
192 // so we just always try to read one.
193 let _ = reader.read_eol_if_available();
194
195 loop {
196 decode_1d_line(ctx, reader)?;
197 ctx.next_line(reader)?;
198
199 if group3_check_eob(ctx, reader) {
200 break;
201 }
202 }
203
204 Ok(())
205}
206
207/// Group 3 2D decoding (T.4 Section 4.2).
208fn decode_group3_2d<T: Decoder>(
209 ctx: &mut DecoderContext<'_, T>,
210 reader: &mut BitReader<'_>,
211) -> Result<()> {
212 // It seems like PDF producers are a bit sloppy with the `end_of_line` flag,
213 // so we just always try to read one.
214 let _ = reader.read_eol_if_available();
215
216 loop {
217 let tag_bit = reader.read_bit()?;
218
219 if tag_bit == 1 {
220 decode_1d_line(ctx, reader)?;
221 } else {
222 decode_2d_line(ctx, reader)?;
223 }
224
225 ctx.next_line(reader)?;
226
227 if group3_check_eob(ctx, reader) {
228 break;
229 }
230 }
231
232 Ok(())
233}
234
235/// Check for end-of-block, including RTC (T.4 Section 4.1.4).
236fn group3_check_eob<T: Decoder>(
237 ctx: &mut DecoderContext<'_, T>,
238 reader: &mut BitReader<'_>,
239) -> bool {
240 let eol_count = reader.read_eol_if_available();
241
242 // T.4 Section 4.1.4: "The end of a document transmission is indicated by
243 // sending six consecutive EOLs."
244 // PDFBOX-2778 has 7 EOL, although it should only be 6. Let's be lenient
245 // and check with >=.
246 if ctx.settings.end_of_block && eol_count >= 6 {
247 return true;
248 }
249
250 if ctx.decoded_rows == ctx.settings.rows || reader.at_end() {
251 return true;
252 }
253
254 false
255}
256
257fn decode_group4<T: Decoder>(
258 ctx: &mut DecoderContext<'_, T>,
259 reader: &mut BitReader<'_>,
260) -> Result<()> {
261 loop {
262 if ctx.settings.end_of_block && reader.peak_bits(24) == Ok(EOFB) {
263 reader.read_bits(24)?;
264 break;
265 }
266
267 if ctx.decoded_rows == ctx.settings.rows || reader.at_end() {
268 break;
269 }
270
271 decode_2d_line(ctx, reader)?;
272 ctx.next_line(reader)?;
273 }
274
275 Ok(())
276}
277
278/// Decode a single 1D-coded line (T.4 Section 4.1.1, T.6 Section 2.2.4).
279#[inline(always)]
280fn decode_1d_line<T: Decoder>(
281 ctx: &mut DecoderContext<'_, T>,
282 reader: &mut BitReader<'_>,
283) -> Result<()> {
284 while !ctx.at_eol() {
285 let run_length = reader.decode_run(ctx.color)?;
286 ctx.push_pixels(run_length);
287 ctx.color = ctx.color.opposite();
288 }
289
290 Ok(())
291}
292
293/// Decode a single 2D-coded line (T.4 Section 4.2, T.6 Section 2.2).
294#[inline(always)]
295fn decode_2d_line<T: Decoder>(
296 ctx: &mut DecoderContext<'_, T>,
297 reader: &mut BitReader<'_>,
298) -> Result<()> {
299 while !ctx.at_eol() {
300 let mode = reader.decode_mode()?;
301
302 match mode {
303 // Pass mode (T.4 Section 4.2.1.3.2a, T.6 Section 2.2.3.1).
304 Mode::Pass => {
305 ctx.push_pixels(ctx.b2() - ctx.a0().unwrap_or(0));
306 ctx.update_b();
307 // No color change happens in pass mode.
308 }
309 // Vertical mode (T.4 Section 4.2.1.3.2b, T.6 Section 2.2.3.2).
310 Mode::Vertical(i) => {
311 let b1 = ctx.b1();
312 let a1 = if i >= 0 {
313 b1.checked_add(i as u32).ok_or(DecodeError::Overflow)?
314 } else {
315 b1.checked_sub((-i) as u32).ok_or(DecodeError::Overflow)?
316 };
317
318 let a0 = ctx.a0().unwrap_or(0);
319
320 ctx.push_pixels(a1.checked_sub(a0).ok_or(DecodeError::Overflow)?);
321 ctx.color = ctx.color.opposite();
322
323 ctx.update_b();
324 }
325 // Horizontal mode (T.4 Section 4.2.1.3.2c, T.6 Section 2.2.3.3).
326 Mode::Horizontal => {
327 let a0a1 = reader.decode_run(ctx.color)?;
328 ctx.push_pixels(a0a1);
329 ctx.color = ctx.color.opposite();
330
331 let a1a2 = reader.decode_run(ctx.color)?;
332 ctx.push_pixels(a1a2);
333 ctx.color = ctx.color.opposite();
334
335 ctx.update_b();
336 }
337 }
338 }
339
340 Ok(())
341}
342
343struct DecoderContext<'a, T: Decoder> {
344 /// Color changes in the reference line (previous line).
345 ref_changes: Vec<ColorChange>,
346 /// The minimum index we need to start from when searching for b1.
347 ref_pos: u32,
348 /// The current index of b1.
349 b1_idx: u32,
350 /// Color changes in the coding line (current line being decoded).
351 coding_changes: Vec<ColorChange>,
352 /// Current position in the coding line (number of pixels decoded).
353 pixels_decoded: u32,
354 /// The decoder sink.
355 decoder: &'a mut T,
356 /// The width of a line in pixels (i.e. number of columns).
357 line_width: u32,
358 /// The color of the next run to be decoded.
359 color: Color,
360 /// How many rows have been decoded so far.
361 decoded_rows: u32,
362 /// The settings to apply during decoding.
363 settings: &'a DecodeSettings,
364 /// Whether to invert black and white.
365 invert_black: bool,
366}
367
368impl<'a, T: Decoder> DecoderContext<'a, T> {
369 fn new(decoder: &'a mut T, settings: &'a DecodeSettings) -> Self {
370 Self {
371 ref_changes: vec![],
372 ref_pos: 0,
373 b1_idx: 0,
374 coding_changes: Vec::new(),
375 pixels_decoded: 0,
376 decoder,
377 line_width: settings.columns,
378 // Each run starts with an imaginary white pixel on the left.
379 color: Color::White,
380 decoded_rows: 0,
381 settings,
382 invert_black: settings.invert_black,
383 }
384 }
385
386 /// `a0` refers to the first changing element on the current line.
387 fn a0(&self) -> Option<u32> {
388 if self.pixels_decoded == 0 {
389 // If we haven't coded anything yet, a0 conceptually points at the
390 // index -1. This is a bit of an edge case, and we therefore require
391 // callers of this method to handle the case themselves.
392 None
393 } else {
394 // Otherwise, the index points to the next element to be decoded.
395 Some(self.pixels_decoded)
396 }
397 }
398
399 /// "The first changing element on the reference line to the right of a0 and
400 /// of opposite color to a0."
401 fn b1(&self) -> u32 {
402 self.ref_changes
403 .get(self.b1_idx as usize)
404 .map_or(self.line_width, |c| c.idx)
405 }
406
407 /// "The next changing element to the right of b1, on the reference line."
408 fn b2(&self) -> u32 {
409 self.ref_changes
410 .get(self.b1_idx as usize + 1)
411 .map_or(self.line_width, |c| c.idx)
412 }
413
414 /// Compute the new position of b1 (and implicitly b2).
415 #[inline(always)]
416 fn update_b(&mut self) {
417 // b1 refers to an element of the opposite color.
418 let target_color = self.color.opposite();
419 // b1 must be strictly greater than a0.
420 let min_idx = self.a0().map_or(0, |a| a + 1);
421
422 self.b1_idx = self.line_width;
423
424 for i in self.ref_pos..self.ref_changes.len() as u32 {
425 let change = &self.ref_changes[i as usize];
426
427 if change.idx < min_idx {
428 self.ref_pos = i + 1;
429 continue;
430 }
431
432 if change.color == target_color {
433 self.b1_idx = i;
434 break;
435 }
436 }
437 }
438
439 #[inline(always)]
440 fn push_pixels(&mut self, count: u32) {
441 // Make sure we don't have too many pixels (for invalid files).
442 let count = count.min(self.line_width - self.pixels_decoded);
443 let white = self.color.is_white() ^ self.invert_black;
444 let mut remaining = count;
445
446 // Push individual pixels until we reach an 8-pixel boundary.
447 let pixels_to_boundary = (8 - (self.pixels_decoded % 8)) % 8;
448 let unaligned_pixels = remaining.min(pixels_to_boundary);
449 for _ in 0..unaligned_pixels {
450 self.decoder.push_pixel(white);
451 remaining -= 1;
452 }
453
454 // Push full chunks of 8 pixels.
455 let full_chunks = remaining / 8;
456 if full_chunks > 0 {
457 self.decoder.push_pixel_chunk(white, full_chunks);
458 remaining %= 8;
459 }
460
461 // Push remaining individual pixels.
462 for _ in 0..remaining {
463 self.decoder.push_pixel(white);
464 }
465
466 // Track the color change:
467 // - At start of line (no previous changes): only add if color differs from
468 // imaginary white, i.e., only add if black.
469 // - Mid-line: only add if color differs from previous.
470 if count > 0 {
471 let is_change = self
472 .coding_changes
473 .last()
474 .map_or(!self.color.is_white(), |last| last.color != self.color);
475 if is_change {
476 self.coding_changes.push(ColorChange {
477 idx: self.pixels_decoded,
478 color: self.color,
479 });
480 }
481 self.pixels_decoded += count;
482 }
483 }
484
485 fn at_eol(&self) -> bool {
486 self.a0().unwrap_or(0) == self.line_width
487 }
488
489 #[inline(always)]
490 fn next_line(&mut self, reader: &mut BitReader<'_>) -> Result<()> {
491 if self.pixels_decoded != self.settings.columns {
492 return Err(DecodeError::LineLengthMismatch);
493 }
494
495 core::mem::swap(&mut self.ref_changes, &mut self.coding_changes);
496 self.coding_changes.clear();
497 self.pixels_decoded = 0;
498 self.ref_pos = 0;
499 self.b1_idx = 0;
500 self.color = Color::White;
501 self.decoded_rows += 1;
502 self.decoder.next_line();
503
504 if self.settings.rows_are_byte_aligned {
505 reader.align();
506 }
507
508 self.update_b();
509
510 Ok(())
511 }
512}