Skip to main content

jbig2enc_rust/
lib.rs

1//! JBIG2 Encoder in Rust
2//!
3//! This crate provides functionality to encode binary images into the JBIG2 format.
4//! It supports both standalone JBIG2 files and PDF-embedded fragments with proper
5//! global dictionary handling.
6
7#![allow(missing_docs)]
8#![allow(dead_code)]
9#![allow(unused_variables)]
10#![allow(unused_mut)]
11
12// Re-export commonly used types
13pub use ndarray::Array2;
14
15/// Errors that can occur during JBIG2 encoding
16#[derive(Debug)]
17pub enum Jbig2Error {
18    /// Input buffer size mismatch
19    BufferSizeMismatch {
20        expected: usize,
21        actual: usize,
22        width: u32,
23        height: u32,
24        ratio: f64,
25    },
26
27    /// Buffer too small for operation
28    BufferTooSmall { expected: usize, actual: usize },
29
30    /// Array shape error during conversion
31    ArrayShapeError { source: ndarray::ShapeError },
32
33    /// Encoding failed
34    EncodingFailed { message: String },
35
36    /// Dictionary creation failed  
37    DictionaryFailed { message: String },
38
39    /// Packed binary data detected when unpacked expected
40    PackedDataDetected,
41
42    /// Stream count mismatch
43    StreamCountMismatch { expected: usize, actual: usize },
44
45    /// Segment writing failed
46    SegmentWriteFailed {
47        segment_type: String,
48        message: String,
49    },
50}
51
52impl std::fmt::Display for Jbig2Error {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            Jbig2Error::BufferSizeMismatch {
56                expected,
57                actual,
58                width,
59                height,
60                ratio,
61            } => {
62                write!(
63                    f,
64                    "Input buffer size mismatch: expected {}, got {} for {}x{} image (ratio: {:.3})",
65                    expected, actual, width, height, ratio
66                )
67            }
68            Jbig2Error::BufferTooSmall { expected, actual } => {
69                write!(f, "Buffer too small: expected {}, got {}", expected, actual)
70            }
71            Jbig2Error::ArrayShapeError { source } => {
72                write!(f, "Array shape error: {}", source)
73            }
74            Jbig2Error::EncodingFailed { message } => {
75                write!(f, "Encoding failed: {}", message)
76            }
77            Jbig2Error::DictionaryFailed { message } => {
78                write!(f, "Dictionary creation failed: {}", message)
79            }
80            Jbig2Error::PackedDataDetected => {
81                write!(
82                    f,
83                    "Input appears to be packed binary data (1 bit per pixel), but JBIG2 encoder expects unpacked data (1 byte per pixel)"
84                )
85            }
86            Jbig2Error::StreamCountMismatch { expected, actual } => {
87                write!(
88                    f,
89                    "Expected {} stream(s) for single image encoding, got {}",
90                    expected, actual
91                )
92            }
93            Jbig2Error::SegmentWriteFailed {
94                segment_type,
95                message,
96            } => {
97                write!(f, "Failed to write {} segment: {}", segment_type, message)
98            }
99        }
100    }
101}
102
103impl std::error::Error for Jbig2Error {
104    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
105        match self {
106            Jbig2Error::ArrayShapeError { source } => Some(source),
107            _ => None,
108        }
109    }
110}
111
112impl From<ndarray::ShapeError> for Jbig2Error {
113    fn from(source: ndarray::ShapeError) -> Self {
114        Jbig2Error::ArrayShapeError { source }
115    }
116}
117
118// Module declarations
119pub mod jbig2arith;
120#[cfg(feature = "cc-analysis")]
121pub mod jbig2cc;
122pub mod jbig2classify;
123pub mod jbig2comparator;
124pub mod jbig2context;
125pub mod jbig2cost;
126pub mod jbig2enc;
127pub mod jbig2halftone;
128pub mod jbig2shared;
129pub mod jbig2structs;
130pub mod jbig2sym;
131pub mod jbig2unify;
132
133// Re-export the main encode functions and config
134pub use crate::jbig2arith::Jbig2ArithCoder;
135#[cfg(feature = "cc-analysis")]
136pub use jbig2cc::{BBox, CC, CCImage, Run, analyze_page, extract_symbols_for_jbig2};
137pub use jbig2enc::encode_document;
138pub use jbig2structs::Jbig2Config;
139
140use jbig2enc::Jbig2Encoder;
141use jbig2sym::binary_pixels_to_bitimage;
142use log::info;
143use std::env;
144
145// Constants for default thresholds (symbol classification only)
146const JBIG2_THRESHOLD_DEF: f32 = 0.92;
147const JBIG2_WEIGHT_DEF: f32 = 0.5;
148
149/// Result of JBIG2 encoding with separate global and page data for PDF embedding
150#[derive(Debug, Clone)]
151pub struct Jbig2EncodeResult {
152    /// Global dictionary data (if any) - should be stored as separate PDF object
153    pub global_data: Option<Vec<u8>>,
154    /// Page-specific data - the actual image stream
155    pub page_data: Vec<u8>,
156}
157
158/// Context for JBIG2 encoding operations
159#[derive(Debug, Clone)]
160pub struct Jbig2Context {
161    /// The underlying configuration
162    config: Jbig2Config,
163
164    // Legacy fields for backward compatibility
165    threshold: f32,
166    weight: f32,
167    pdf_mode: bool,
168}
169
170impl Default for Jbig2Context {
171    fn default() -> Self {
172        Self {
173            config: Jbig2Config::default(),
174            threshold: JBIG2_THRESHOLD_DEF,
175            weight: JBIG2_WEIGHT_DEF,
176            pdf_mode: false,
177        }
178    }
179}
180
181impl Jbig2Context {
182    /// Create a new context with default values
183    pub fn new() -> Self {
184        Self::default()
185    }
186
187    /// Create a new context with specified PDF mode
188    pub fn with_pdf_mode(pdf_mode: bool) -> Self {
189        Self {
190            config: Jbig2Config::default(),
191            threshold: JBIG2_THRESHOLD_DEF,
192            weight: JBIG2_WEIGHT_DEF,
193            pdf_mode,
194        }
195    }
196
197    /// Create a new context with custom configuration
198    pub fn with_config(config: Jbig2Config, pdf_mode: bool) -> Self {
199        Self {
200            config,
201            threshold: JBIG2_THRESHOLD_DEF,
202            weight: JBIG2_WEIGHT_DEF,
203            pdf_mode,
204        }
205    }
206
207    /// Create a new context with lossless configuration (no symbol dictionaries)
208    /// This is useful for PDF embedding when symbol dictionaries cause display issues
209    pub fn with_lossless_config(pdf_mode: bool) -> Self {
210        Self {
211            config: Jbig2Config::lossless(),
212            threshold: JBIG2_THRESHOLD_DEF,
213            weight: JBIG2_WEIGHT_DEF,
214            pdf_mode,
215        }
216    }
217
218    /// Get the PDF mode setting
219    pub fn get_pdf_mode(&self) -> bool {
220        self.pdf_mode
221    }
222
223    /// Get the symbol mode setting
224    pub fn get_symbol_mode(&self) -> bool {
225        self.config.symbol_mode
226    }
227
228    /// Get the DPI setting
229    pub fn get_dpi(&self) -> u32 {
230        if self.config.generic.dpi == 0 {
231            300
232        } else {
233            self.config.generic.dpi
234        }
235    }
236}
237
238/// Main encoding function that handles both standalone and PDF fragment modes
239///
240/// This function encodes a single binary image into JBIG2 format. When PDF mode is enabled,
241/// it returns separate global dictionary and page data that can be properly embedded in PDF.
242///
243/// # Arguments
244/// * `input` - Binary image data (0/1 values)
245/// * `width` - Image width in pixels
246/// * `height` - Image height in pixels
247/// * `pdf_mode` - Whether to create PDF fragments (true) or standalone file (false)
248///
249/// # Returns
250/// A `Jbig2EncodeResult` containing separate global and page data for PDF embedding,
251/// or combined data for standalone files.
252pub fn encode_single_image(
253    input: &[u8],
254    width: u32,
255    height: u32,
256    pdf_mode: bool,
257) -> Result<Jbig2EncodeResult, Jbig2Error> {
258    let bitimage = validate_and_build_bitimage(input, width, height)?;
259    encode_single_bitimage(bitimage, Jbig2Context::with_pdf_mode(pdf_mode))
260}
261
262/// Encodes a single binary image into JBIG2 format using lossless configuration.
263///
264/// This function forces symbol_mode = false to create standalone JBIG2 streams
265/// without global dictionaries, which can resolve PDF display issues.
266///
267/// # Arguments
268/// * `input` - Binary image data (0/1 values)
269/// * `width` - Image width in pixels
270/// * `height` - Image height in pixels
271/// * `pdf_mode` - Whether to create PDF fragments (true) or standalone file (false)
272///
273/// # Returns
274/// A `Jbig2EncodeResult` containing page data without global dictionary
275pub fn encode_single_image_lossless(
276    input: &[u8],
277    width: u32,
278    height: u32,
279    pdf_mode: bool,
280) -> Result<Jbig2EncodeResult, Jbig2Error> {
281    let bitimage = validate_and_build_bitimage(input, width, height)?;
282    encode_single_bitimage(bitimage, Jbig2Context::with_lossless_config(pdf_mode))
283}
284
285fn validate_and_build_bitimage(
286    input: &[u8],
287    width: u32,
288    height: u32,
289) -> Result<jbig2sym::BitImage, Jbig2Error> {
290    let expected_len = width as usize * height as usize;
291    if input.len() < expected_len {
292        let packed_size = (width as usize * height as usize).div_ceil(8);
293        if input.len() == packed_size {
294            return Err(Jbig2Error::PackedDataDetected);
295        }
296
297        return Err(Jbig2Error::BufferSizeMismatch {
298            expected: expected_len,
299            actual: input.len(),
300            width,
301            height,
302            ratio: input.len() as f64 / expected_len as f64,
303        });
304    }
305
306    binary_pixels_to_bitimage(&input[..expected_len], width as usize, height as usize)
307        .map_err(|message| Jbig2Error::EncodingFailed { message })
308}
309
310fn encode_single_bitimage(
311    bitimage: jbig2sym::BitImage,
312    ctx: Jbig2Context,
313) -> Result<Jbig2EncodeResult, Jbig2Error> {
314    let mut enc_config = ctx.config.clone();
315    enc_config.want_full_headers = !ctx.get_pdf_mode();
316    if !enc_config.symbol_mode {
317        enc_config.refine = false;
318        enc_config.text_refine = false;
319    }
320
321    let global_data = if ctx.get_symbol_mode() && ctx.get_pdf_mode() {
322        let mut dict_encoder = Jbig2Encoder::new(&enc_config).dict_only();
323        dict_encoder
324            .add_page_bitimage(bitimage.clone())
325            .map_err(|e| Jbig2Error::DictionaryFailed {
326                message: e.to_string(),
327            })?;
328        Some(
329            dict_encoder
330                .flush_dict()
331                .map_err(|e| Jbig2Error::DictionaryFailed {
332                    message: e.to_string(),
333                })?,
334        )
335    } else {
336        None
337    };
338
339    let mut encoder = Jbig2Encoder::new(&enc_config);
340    encoder
341        .add_page_bitimage(bitimage)
342        .map_err(|e| Jbig2Error::EncodingFailed {
343            message: e.to_string(),
344        })?;
345    let page_data = encoder.flush().map_err(|e| Jbig2Error::EncodingFailed {
346        message: e.to_string(),
347    })?;
348
349    Ok(Jbig2EncodeResult {
350        global_data,
351        page_data,
352    })
353}
354
355/// Encodes a list of text-only binary PBM ROIs into JBIG2 streams.
356///
357/// # Arguments
358/// * `rois` - A slice of 2D arrays where each array represents a binary image (0/255 or 0/1 values)
359/// * `ctx` - JBIG2 encoding context with configuration
360pub fn encode_rois(
361    rois: &[Array2<u8>],
362    ctx: Jbig2Context,
363) -> Result<(Option<Vec<u8>>, Vec<Vec<u8>>), Box<dyn std::error::Error>> {
364    if rois.is_empty() {
365        return Ok((None, Vec::new()));
366    }
367
368    info!(
369        "Processing {} ROIs in PDF mode: {}",
370        rois.len(),
371        ctx.get_pdf_mode()
372    );
373
374    // Initialize encoder configuration - use the context's config instead of default
375    let mut enc_config = ctx.config.clone();
376    enc_config.want_full_headers = !ctx.get_pdf_mode(); // PDF mode shouldn't have file headers
377    if !enc_config.symbol_mode {
378        enc_config.refine = false;
379        enc_config.text_refine = false;
380    }
381
382    // For PDF mode with symbol encoding, create global dictionary
383    let global_dict = if ctx.get_symbol_mode() && ctx.get_pdf_mode() {
384        let dict_data =
385            build_page_dict(rois, &enc_config, &ctx).map_err(|e| Jbig2Error::DictionaryFailed {
386                message: e.to_string(),
387            })?;
388        Some(dict_data)
389    } else {
390        None
391    };
392
393    let mut roi_streams = Vec::with_capacity(rois.len());
394
395    for roi in rois {
396        let mut encoder = Jbig2Encoder::new(&enc_config);
397
398        // Add the image data to the encoder.
399        encoder.add_page(roi).map_err(|e| e.to_string())?;
400
401        // Encode the document to get the final stream.
402        // This will produce a stream with or without headers based on `enc_config`.
403        let stream = encoder.flush().map_err(|e| e.to_string())?;
404        roi_streams.push(stream);
405    }
406
407    Ok((global_dict, roi_streams))
408}
409
410/// Encodes a dictionary covering every ROI on a page.
411fn build_page_dict(
412    rois: &[Array2<u8>],
413    cfg: &Jbig2Config,
414    ctx: &Jbig2Context,
415) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
416    // Create encoder with the configuration
417    let mut encoder = Jbig2Encoder::new(cfg);
418
419    // Set PDF mode if needed
420    if ctx.get_pdf_mode() {
421        encoder = encoder.dict_only();
422    }
423
424    // Add all ROIs to the encoder
425    for roi in rois {
426        encoder
427            .add_page(roi)
428            .map_err(|e| format!("Failed to add page: {}", e))?;
429    }
430
431    encoder
432        .flush_dict()
433        .map_err(|e| format!("Failed to flush dictionary: {}", e).into())
434}
435
436/// Get the version string for the crate
437pub fn get_version() -> String {
438    let enc_version = option_env!("JBIG2ENC_VERSION").unwrap_or("unknown");
439    format!(
440        "jbig2-rs {}, jbig2enc {}",
441        env!("CARGO_PKG_VERSION"),
442        enc_version
443    )
444}
445
446/// Get the build information string
447pub fn get_build_info() -> String {
448    let build_ts = option_env!("VERGEN_BUILD_TIMESTAMP").unwrap_or("unknown");
449    let build_type = if cfg!(debug_assertions) {
450        "debug"
451    } else {
452        "release"
453    };
454    format!("{} (built with {})", build_ts, build_type)
455}