Skip to main content

vigb_decoder/
lib.rs

1//! Decoder for PaperPort 2 (`.max`) image scans.
2//!
3//! The PaperPort 2 file format ("ViGBe") is a proprietary container used
4//! by ScanSoft's PaperPort 2 (1996) for 1-bit scanned documents. Each
5//! image chunk wraps a CCITT-T.6 (Group 4 fax) compressed bitmap with a
6//! custom per-line marker dispatcher.
7//!
8//! # Quick start
9//!
10//! ```no_run
11//! use vigb_decoder::{decode_max_file, write_pdf, Config};
12//! use std::path::Path;
13//!
14//! let pages = decode_max_file("scan.max", &Config::default())?;
15//! write_pdf(&pages, Path::new("scan.pdf"))?;
16//! # Ok::<(), vigb_decoder::MaxError>(())
17//! ```
18//!
19//! # Format documentation
20//!
21//! See `docs/format.md` and `docs/decoder.md` in the repo for the format
22//! specification and the canonical decoder behaviour.
23//!
24//! # Output bitmap polarity
25//!
26//! [`Page::bitmap`] is 1-bit packed, MSB-first per byte. **Bit value 1
27//! means BLACK.** This matches the PDF `/Indexed [/DeviceGray 1 <FF 00>]`
28//! convention used by [`write_pdf`]. If you're comparing against a PNG
29//! ground-truth in PIL `'1'` mode, be aware that PIL `'1'` uses the
30//! opposite convention (bit 1 = white) — invert before comparing.
31
32#![forbid(unsafe_code)]
33#![warn(missing_docs)]
34
35mod error;
36pub use error::{MaxError, Result};
37
38mod config;
39pub use config::{Config, ConfigBuilder, DispatchKind, T0DropMode};
40
41mod bitstream;
42mod ccitt;
43mod chunks;
44pub use chunks::{MAX_IMAGE_PIXELS, MAX_PREVIEW_PIXELS};
45
46mod decoder;
47mod dispatch;
48mod pdf;
49mod preview;
50
51pub use decoder::{DecodeStats, Page, Preview};
52pub use pdf::{write_pdf, write_pdf_bytes, PdfOptions};
53
54/// Decode all image chunks in a `.max` byte buffer.
55///
56/// Returns one [`Page`] per image chunk in document order.
57///
58/// # Errors
59///
60/// - [`MaxError::BadMagic`] if the input does not begin with the
61///   `ViGBe` magic.
62/// - [`MaxError::Truncated`] if no valid image chunks are found.
63/// - [`MaxError::ImageTooLarge`] if any chunk's declared dimensions
64///   exceed [`MAX_IMAGE_PIXELS`].
65/// - [`MaxError::TooManyPages`] if the file claims more image chunks
66///   than [`Config::max_pages`] allows (SEC-M04).
67pub fn decode_max(data: &[u8], cfg: &Config) -> Result<Vec<Page>> {
68    if data.len() < 5 || &data[..5] != b"ViGBe" {
69        return Err(MaxError::BadMagic { offset: 0u64 });
70    }
71    let chunks = chunks::find_image_chunks(data);
72    if chunks.is_empty() {
73        return Err(MaxError::Truncated {
74            offset: 0u64,
75            need: 0x40,
76            have: data.len(),
77        });
78    }
79    // SEC-M04: bound total page count before iterating. Each Page allocates
80    // up to MAX_IMAGE_PIXELS / 8 bytes (~25 MiB) and is retained in `out`
81    // until this function returns; an N-chunk file can request N × 25 MiB
82    // resident memory without this cap.
83    if (chunks.len() as u64) > cfg.max_pages as u64 {
84        return Err(MaxError::TooManyPages {
85            count: chunks.len(),
86            max: cfg.max_pages,
87        });
88    }
89    let mut out = Vec::with_capacity(chunks.len());
90    for chunk in chunks {
91        out.push(dispatch::decode_image_chunk(
92            data,
93            chunk.offset,
94            chunk.length,
95            cfg,
96        )?);
97    }
98    Ok(out)
99}
100
101/// Decode a `.max` file from disk. Convenience wrapper for [`decode_max`].
102///
103/// # Errors
104///
105/// Returns an I/O error (wrapped as [`MaxError`]) if the file cannot be read,
106/// or any error that [`decode_max`] returns.
107pub fn decode_max_file<P: AsRef<std::path::Path>>(path: P, cfg: &Config) -> Result<Vec<Page>> {
108    let data = std::fs::read(path)?;
109    decode_max(&data, cfg)
110}