Skip to main content

pdfluent_jbig2/
lib.rs

1/*!
2A memory-safe, pure-Rust JBIG2 decoder.
3
4`hayro-jbig2` decodes JBIG2 images as specified in ITU-T T.88 (also known as
5ISO/IEC 14492). JBIG2 is a bi-level image compression standard commonly used
6in PDF documents for compressing scanned text documents.
7
8The crate is `no_std` compatible but requires an allocator to be available.
9
10# Safety
11This crate forbids unsafe code via a crate-level attribute.
12*/
13
14#![cfg_attr(not(feature = "std"), no_std)]
15#![forbid(unsafe_code)]
16#![allow(missing_docs)]
17
18extern crate alloc;
19
20use alloc::vec::Vec;
21
22/// A decoder for JBIG2 images.
23pub trait Decoder {
24    /// Push a single pixel to the output.
25    fn push_pixel(&mut self, black: bool);
26    /// Push multiple chunks of 8 pixels of the same color.
27    ///
28    /// The `chunk_count` parameter indicates how many 8-pixel chunks to push.
29    /// For example, if this method is called with `white = true` and
30    /// `chunk_count = 10`, 80 white pixels are pushed (10 × 8 = 80).
31    ///
32    /// You can assume that this method is only called if the number of already
33    /// pushed pixels is a multiple of 8 (i.e. byte-aligned).
34    fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32);
35    /// Called when a row has been completed.
36    fn next_line(&mut self);
37}
38
39mod arithmetic_decoder;
40mod bitmap;
41mod decode;
42mod error;
43mod file;
44mod gray_scale;
45mod huffman_table;
46mod integer_decoder;
47mod lazy;
48mod page_info;
49mod reader;
50mod segment;
51mod symbol_id_decoder;
52
53use error::bail;
54pub use error::{
55    DecodeError, FormatError, HuffmanError, ParseError, RegionError, Result, SegmentError,
56    SymbolError, TemplateError,
57};
58
59use crate::file::parse_segments_sequential;
60use bitmap::Bitmap;
61use decode::CombinationOperator;
62use decode::generic;
63use decode::generic_refinement;
64use decode::halftone;
65use decode::pattern;
66use decode::pattern::PatternDictionary;
67use decode::symbol;
68use decode::symbol::SymbolDictionary;
69use decode::text;
70use file::parse_file;
71use huffman_table::{HuffmanTable, StandardHuffmanTables};
72use page_info::{PageInformation, parse_page_information};
73use reader::Reader;
74use segment::SegmentType;
75
76/// A decoded JBIG2 image.
77#[derive(Debug, Clone)]
78pub struct Image {
79    /// The width of the image in pixels.
80    pub width: u32,
81    /// The height of the image in pixels.
82    pub height: u32,
83    /// Number of u32 words per row.
84    stride: u32,
85    /// The packed pixel data.
86    data: Vec<u32>,
87}
88
89impl Image {
90    /// Decode the image data into the decoder.
91    pub fn decode<D: Decoder>(&self, decoder: &mut D) {
92        let bytes_per_row = self.width.div_ceil(8) as usize;
93
94        for row in self.data.chunks_exact(self.stride as usize) {
95            let mut x = 0_u32;
96            let mut chunk_byte: Option<u8> = None;
97            let mut chunk_count = 0_u32;
98
99            let bytes = row.iter().flat_map(|w| w.to_be_bytes()).take(bytes_per_row);
100
101            for byte in bytes {
102                let remaining = self.width - x;
103
104                if remaining >= 8 && (byte == 0x00 || byte == 0xFF) {
105                    // Continue the previous chunk.
106                    if chunk_byte == Some(byte) {
107                        chunk_count += 1;
108                        x += 8;
109                        continue;
110                    }
111
112                    // Flush previous chunk if any, then start new one.
113                    if let Some(b) = chunk_byte {
114                        decoder.push_pixel_chunk(b == 0xFF, chunk_count);
115                    }
116
117                    chunk_byte = Some(byte);
118                    chunk_count = 1;
119                    x += 8;
120
121                    continue;
122                }
123
124                // Can't continue/start chunk, flush any existing chunk first.
125                if let Some(b) = chunk_byte.take() {
126                    decoder.push_pixel_chunk(b == 0xFF, chunk_count);
127                    chunk_count = 0;
128                }
129
130                // Emit individual pixels.
131                let count = remaining.min(8);
132                for i in 0..count {
133                    decoder.push_pixel((byte >> (7 - i)) & 1 != 0);
134                }
135                x += count;
136            }
137
138            // Flush any remaining chunk at end of row.
139            if let Some(b) = chunk_byte {
140                decoder.push_pixel_chunk(b == 0xFF, chunk_count);
141            }
142
143            decoder.next_line();
144        }
145    }
146}
147
148/// Decode a JBIG2 file from the given data.
149///
150/// The file is expected to use the sequential or random-access organization,
151/// as defined in Annex D.1 and D.2.
152pub fn decode(data: &[u8]) -> Result<Image> {
153    let file = parse_file(data)?;
154    decode_with_segments(&file.segments)
155}
156
157/// Decode an embedded JBIG2 image. with the given global segments.
158///
159/// The file is expected to use the embedded organization defined in
160/// Annex D.3.
161pub fn decode_embedded(data: &[u8], globals: Option<&[u8]>) -> Result<Image> {
162    let mut segments = Vec::new();
163    if let Some(globals_data) = globals {
164        let mut reader = Reader::new(globals_data);
165        parse_segments_sequential(&mut reader, &mut segments)?;
166    };
167
168    let mut reader = Reader::new(data);
169    parse_segments_sequential(&mut reader, &mut segments)?;
170
171    segments.sort_by_key(|seg| seg.header.segment_number);
172
173    decode_with_segments(&segments)
174}
175
176fn decode_with_segments(segments: &[segment::Segment<'_>]) -> Result<Image> {
177    // Pre-scan for stripe height from EndOfStripe segments.
178    let height_from_stripes = segments
179        .iter()
180        .filter(|seg| seg.header.segment_type == SegmentType::EndOfStripe)
181        .filter_map(|seg| u32::from_be_bytes(seg.data.try_into().ok()?).checked_add(1))
182        .max();
183
184    // Find and parse page information segment first.
185    let (mut ctx, mut page_bitmap) = if let Some(page_info) = segments
186        .iter()
187        .find(|s| s.header.segment_type == SegmentType::PageInformation)
188    {
189        let mut reader = Reader::new(page_info.data);
190        get_ctx(&mut reader, height_from_stripes)?
191    } else {
192        bail!(FormatError::MissingPageInfo);
193    };
194
195    // Process all segments.
196    for seg in segments {
197        let mut reader = Reader::new(seg.data);
198
199        match seg.header.segment_type {
200            SegmentType::PageInformation => {
201                // Already processed above, skip.
202            }
203            SegmentType::ImmediateGenericRegion | SegmentType::ImmediateLosslessGenericRegion => {
204                let had_unknown_length = seg.header.data_length.is_none();
205                let header = generic::parse(&mut reader, had_unknown_length)?;
206
207                if ctx.can_decode_directly(&page_bitmap, &header.region_info, false) {
208                    generic::decode_into(&header, &mut page_bitmap)?;
209                } else {
210                    let region = generic::decode(&header)?;
211                    page_bitmap.combine(
212                        &region.bitmap,
213                        region.bitmap.x_location as i32,
214                        region.bitmap.y_location as i32,
215                        region.combination_operator,
216                    );
217                }
218                ctx.page_pristine = false;
219            }
220            SegmentType::IntermediateGenericRegion => {
221                // Intermediate segments cannot have unknown length.
222                let header = generic::parse(&mut reader, false)?;
223                let region = generic::decode(&header)?;
224                ctx.store_region(seg.header.segment_number, region.bitmap);
225            }
226            SegmentType::PatternDictionary => {
227                let header = pattern::parse(&mut reader)?;
228                let dictionary = pattern::decode(&header)?;
229                ctx.store_pattern_dictionary(seg.header.segment_number, dictionary);
230            }
231            SegmentType::SymbolDictionary => {
232                // "1) Concatenate all the input symbol dictionaries to form SDINSYMS."
233                // (6.5.5, step 1)
234                // Collect references to avoid cloning; symbols are only cloned if re-exported.
235                let input_symbols: Vec<&Bitmap> = seg
236                    .header
237                    .referred_to_segments
238                    .iter()
239                    .filter_map(|&num| ctx.get_symbol_dictionary(num))
240                    .flat_map(|dict| dict.exported_symbols.iter())
241                    .collect();
242
243                // Collect Huffman tables from referred table segments.
244                let referred_tables: Vec<HuffmanTable> = seg
245                    .header
246                    .referred_to_segments
247                    .iter()
248                    .filter_map(|&num| ctx.get_huffman_table(num))
249                    .cloned()
250                    .collect();
251
252                // Get retained contexts from the last referred symbol dictionary (7.4.2.2 step 3).
253                let retained_contexts = seg
254                    .header
255                    .referred_to_segments
256                    .last()
257                    .and_then(|&num| ctx.get_symbol_dictionary(num))
258                    .and_then(|dict| dict.retained_contexts.as_ref());
259
260                let header = symbol::parse(&mut reader)?;
261                let dictionary = symbol::decode(
262                    &header,
263                    &input_symbols,
264                    &referred_tables,
265                    &ctx.standard_tables,
266                    retained_contexts,
267                )?;
268                ctx.store_symbol_dictionary(seg.header.segment_number, dictionary);
269            }
270            SegmentType::ImmediateTextRegion | SegmentType::ImmediateLosslessTextRegion => {
271                // Collect symbols from referred symbol dictionaries (SBSYMS).
272                let symbols: Vec<&Bitmap> = seg
273                    .header
274                    .referred_to_segments
275                    .iter()
276                    .filter_map(|&num| ctx.get_symbol_dictionary(num))
277                    .flat_map(|dict| dict.exported_symbols.iter())
278                    .collect();
279
280                // Collect Huffman tables from referred table segments.
281                // "These user-supplied Huffman decoding tables may be supplied either
282                // as a Tables segment..." (7.4.3.1.6)
283                let referred_tables: Vec<HuffmanTable> = seg
284                    .header
285                    .referred_to_segments
286                    .iter()
287                    .filter_map(|&num| ctx.get_huffman_table(num))
288                    .cloned()
289                    .collect();
290
291                let header = text::parse(&mut reader, symbols.len() as u32)?;
292
293                if ctx.can_decode_directly(
294                    &page_bitmap,
295                    &header.region_info,
296                    header.flags.default_pixel,
297                ) {
298                    text::decode_into(
299                        &header,
300                        &symbols,
301                        &referred_tables,
302                        &ctx.standard_tables,
303                        &mut page_bitmap,
304                    )?;
305                } else {
306                    let region =
307                        text::decode(&header, &symbols, &referred_tables, &ctx.standard_tables)?;
308                    page_bitmap.combine(
309                        &region.bitmap,
310                        region.bitmap.x_location as i32,
311                        region.bitmap.y_location as i32,
312                        region.combination_operator,
313                    );
314                }
315                ctx.page_pristine = false;
316            }
317            SegmentType::IntermediateTextRegion => {
318                // Collect symbols from referred symbol dictionaries (SBSYMS).
319                let symbols: Vec<&Bitmap> = seg
320                    .header
321                    .referred_to_segments
322                    .iter()
323                    .filter_map(|&num| ctx.get_symbol_dictionary(num))
324                    .flat_map(|dict| dict.exported_symbols.iter())
325                    .collect();
326
327                // Collect Huffman tables from referred table segments.
328                let referred_tables: Vec<HuffmanTable> = seg
329                    .header
330                    .referred_to_segments
331                    .iter()
332                    .filter_map(|&num| ctx.get_huffman_table(num))
333                    .cloned()
334                    .collect();
335
336                let header = text::parse(&mut reader, symbols.len() as u32)?;
337                let region =
338                    text::decode(&header, &symbols, &referred_tables, &ctx.standard_tables)?;
339                ctx.store_region(seg.header.segment_number, region.bitmap);
340            }
341            SegmentType::ImmediateHalftoneRegion | SegmentType::ImmediateLosslessHalftoneRegion => {
342                let pattern_dict = seg
343                    .header
344                    .referred_to_segments
345                    .first()
346                    .and_then(|&num| ctx.get_pattern_dictionary(num))
347                    .ok_or(SegmentError::MissingPatternDictionary)?;
348
349                let header = halftone::parse(&mut reader)?;
350
351                if ctx.can_decode_directly(
352                    &page_bitmap,
353                    &header.region_info,
354                    header.flags.initial_pixel_color,
355                ) {
356                    halftone::decode_into(&header, pattern_dict, &mut page_bitmap)?;
357                } else {
358                    let region = halftone::decode(&header, pattern_dict)?;
359                    page_bitmap.combine(
360                        &region.bitmap,
361                        region.bitmap.x_location as i32,
362                        region.bitmap.y_location as i32,
363                        region.combination_operator,
364                    );
365                }
366                ctx.page_pristine = false;
367            }
368            SegmentType::IntermediateHalftoneRegion => {
369                let pattern_dict = seg
370                    .header
371                    .referred_to_segments
372                    .first()
373                    .and_then(|&num| ctx.get_pattern_dictionary(num))
374                    .ok_or(SegmentError::MissingPatternDictionary)?;
375
376                let header = halftone::parse(&mut reader)?;
377                let region = halftone::decode(&header, pattern_dict)?;
378                ctx.store_region(seg.header.segment_number, region.bitmap);
379            }
380            SegmentType::IntermediateGenericRefinementRegion => {
381                // Same logic as immediate refinement, but store result instead of combining.
382                let reference = seg
383                    .header
384                    .referred_to_segments
385                    .first()
386                    .and_then(|&num| ctx.get_referred_segment(num))
387                    .unwrap_or(&page_bitmap);
388
389                let header = generic_refinement::parse(&mut reader)?;
390                let region = generic_refinement::decode(&header, reference)?;
391                ctx.store_region(seg.header.segment_number, region.bitmap);
392            }
393            SegmentType::ImmediateGenericRefinementRegion
394            | SegmentType::ImmediateLosslessGenericRefinementRegion => {
395                // "3) Determine the buffer associated with the region segment that
396                // this segment refers to." (7.4.7.5)
397                //
398                // "2) If there are no referred-to segments, then use the page
399                // bitmap as the reference buffer." (7.4.7.5)
400                let referred_segment = seg
401                    .header
402                    .referred_to_segments
403                    .first()
404                    .and_then(|&num| ctx.get_referred_segment(num));
405
406                let header = generic_refinement::parse(&mut reader)?;
407
408                if let Some(referred_segment) = referred_segment
409                    && ctx.can_decode_directly(&page_bitmap, &header.region_info, false)
410                {
411                    generic_refinement::decode_into(&header, referred_segment, &mut page_bitmap)?;
412                } else {
413                    let reference = referred_segment.unwrap_or(&page_bitmap);
414                    let region = generic_refinement::decode(&header, reference)?;
415                    page_bitmap.combine(
416                        &region.bitmap,
417                        region.bitmap.x_location as i32,
418                        region.bitmap.y_location as i32,
419                        region.combination_operator,
420                    );
421                }
422                ctx.page_pristine = false;
423            }
424            SegmentType::Tables => {
425                // "Tables – see 7.4.13." (type 53)
426                // "This segment contains data which defines one or more user-supplied
427                // Huffman coding tables." (7.4.13)
428                let table = HuffmanTable::read_custom(&mut reader)?;
429                ctx.store_huffman_table(seg.header.segment_number, table);
430            }
431            SegmentType::EndOfPage | SegmentType::EndOfFile => {
432                break;
433            }
434            // Other segment types not yet implemented.
435            _ => {}
436        }
437    }
438
439    Ok(Image {
440        width: page_bitmap.width,
441        height: page_bitmap.height,
442        stride: page_bitmap.stride,
443        data: page_bitmap.data,
444    })
445}
446
447/// Decoding context for a JBIG2 page.
448///
449/// This holds the page information and the page bitmap that regions are
450/// decoded into.
451pub(crate) struct DecodeContext {
452    /// The parsed page information.
453    pub(crate) page_info: PageInformation,
454    /// Whether the page bitmap is still in its initial state (not yet painted to).
455    pub(crate) page_pristine: bool,
456    /// Decoded intermediate regions, stored as (`segment_number`, region) pairs.
457    pub(crate) referred_segments: Vec<(u32, Bitmap)>,
458    /// Decoded pattern dictionaries, stored as (`segment_number`, dictionary) pairs.
459    pub(crate) pattern_dictionaries: Vec<(u32, PatternDictionary)>,
460    /// Decoded symbol dictionaries, stored as (`segment_number`, dictionary) pairs.
461    pub(crate) symbol_dictionaries: Vec<(u32, SymbolDictionary)>,
462    /// Decoded Huffman tables from table segments, stored as (`segment_number`, table) pairs.
463    /// "Tables – see 7.4.13." (type 53)
464    pub(crate) huffman_tables: Vec<(u32, HuffmanTable)>,
465    /// Standard Huffman tables (`TABLE_A` through `TABLE_O`).
466    pub(crate) standard_tables: StandardHuffmanTables,
467}
468
469impl DecodeContext {
470    /// Check if an immediate region can be decoded directly into the page bitmap.
471    fn can_decode_directly(
472        &self,
473        page_bitmap: &Bitmap,
474        region_info: &decode::RegionSegmentInfo,
475        region_default_pixel: bool,
476    ) -> bool {
477        if !self.page_pristine {
478            return false;
479        }
480
481        let covers_page = region_info.x_location == 0
482            && region_info.y_location == 0
483            && region_info.width == page_bitmap.width
484            && region_info.height == page_bitmap.height;
485
486        if !covers_page {
487            return false;
488        }
489
490        let page_default_is_zero = self.page_info.flags.default_pixel == 0;
491
492        if region_default_pixel == page_default_is_zero {
493            return false;
494        }
495
496        let op = region_info.combination_operator;
497        match op {
498            CombinationOperator::Replace => true,
499            CombinationOperator::Or | CombinationOperator::Xor => page_default_is_zero,
500            CombinationOperator::And | CombinationOperator::Xnor => !page_default_is_zero,
501        }
502    }
503
504    /// Store a decoded region for later reference.
505    fn store_region(&mut self, segment_number: u32, region: Bitmap) {
506        self.referred_segments.push((segment_number, region));
507    }
508
509    /// Look up a referred segment by number.
510    fn get_referred_segment(&self, segment_number: u32) -> Option<&Bitmap> {
511        self.referred_segments
512            .binary_search_by_key(&segment_number, |(num, _)| *num)
513            .ok()
514            .map(|idx| &self.referred_segments[idx].1)
515    }
516
517    /// Store a decoded pattern dictionary for later reference.
518    fn store_pattern_dictionary(&mut self, segment_number: u32, dictionary: PatternDictionary) {
519        self.pattern_dictionaries.push((segment_number, dictionary));
520    }
521
522    /// Look up a pattern dictionary by segment number.
523    fn get_pattern_dictionary(&self, segment_number: u32) -> Option<&PatternDictionary> {
524        self.pattern_dictionaries
525            .binary_search_by_key(&segment_number, |(num, _)| *num)
526            .ok()
527            .map(|idx| &self.pattern_dictionaries[idx].1)
528    }
529
530    /// Store a decoded symbol dictionary for later reference.
531    fn store_symbol_dictionary(&mut self, segment_number: u32, dictionary: SymbolDictionary) {
532        self.symbol_dictionaries.push((segment_number, dictionary));
533    }
534
535    /// Look up a symbol dictionary by segment number.
536    fn get_symbol_dictionary(&self, segment_number: u32) -> Option<&SymbolDictionary> {
537        self.symbol_dictionaries
538            .binary_search_by_key(&segment_number, |(num, _)| *num)
539            .ok()
540            .map(|idx| &self.symbol_dictionaries[idx].1)
541    }
542
543    /// Store a decoded Huffman table for later reference.
544    fn store_huffman_table(&mut self, segment_number: u32, table: HuffmanTable) {
545        self.huffman_tables.push((segment_number, table));
546    }
547
548    /// Look up a Huffman table by segment number.
549    fn get_huffman_table(&self, segment_number: u32) -> Option<&HuffmanTable> {
550        self.huffman_tables
551            .binary_search_by_key(&segment_number, |(num, _)| *num)
552            .ok()
553            .map(|idx| &self.huffman_tables[idx].1)
554    }
555}
556
557#[cfg(test)]
558mod tests {
559    use super::*;
560    use alloc::vec::Vec;
561
562    /// A simple pixel sink that collects decoded pixels row by row.
563    struct PixelSink {
564        rows: Vec<Vec<bool>>,
565        current: Vec<bool>,
566    }
567
568    impl PixelSink {
569        fn new() -> Self {
570            Self {
571                rows: Vec::new(),
572                current: Vec::new(),
573            }
574        }
575    }
576
577    impl Decoder for PixelSink {
578        fn push_pixel(&mut self, black: bool) {
579            self.current.push(black);
580        }
581
582        fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32) {
583            for _ in 0..chunk_count * 8 {
584                self.current.push(black);
585            }
586        }
587
588        fn next_line(&mut self) {
589            self.rows.push(core::mem::take(&mut self.current));
590        }
591    }
592
593    // Minimal valid sequential JBIG2 file: 4×4 all-white image using MMR encoding.
594    //
595    // Structure:
596    //   File header (sequential, 1 page)
597    //   Segment 0: PageInformation — 4×4, default pixel = white (0)
598    //   Segment 1: ImmediateGenericRegion — 4×4 MMR, all-white via V(0)×4 = 0xF0
599    //   Segment 2: EndOfPage
600    //   Segment 3: EndOfFile
601    //
602    // The MMR data encodes 4 all-white rows: each row is one V(0) bit (`1`),
603    // 4 bits total → 0xF0 (MSB-first). With `invert_black: true`, CCITT "white"
604    // maps to JBIG2 pixel value 0 (white).
605    #[rustfmt::skip]
606    const MINIMAL_JBIG2: &[u8] = &[
607        // File header
608        0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A, // magic
609        0x01,                                              // flags: sequential, page count known
610        0x00, 0x00, 0x00, 0x01,                            // 1 page
611
612        // Segment 0: PageInformation (type 48), data_length = 19 bytes
613        0x00, 0x00, 0x00, 0x00,  // segment_number = 0
614        0x30,                    // flags: type = 48, page_assoc 1-byte
615        0x00,                    // count_and_retention = 0 (0 referred segments)
616        0x01,                    // page_association = 1
617        0x00, 0x00, 0x00, 0x13,  // data_length = 19
618        // PageInformation data (19 bytes):
619        0x00, 0x00, 0x00, 0x04,  // width  = 4
620        0x00, 0x00, 0x00, 0x04,  // height = 4
621        0x00, 0x00, 0x00, 0x00,  // x_resolution = unknown
622        0x00, 0x00, 0x00, 0x00,  // y_resolution = unknown
623        0x00,                    // flags: default_pixel = 0 (white), operator = OR
624        0x00, 0x00,              // striping = 0
625
626        // Segment 1: ImmediateGenericRegion (type 38), data_length = 19 bytes
627        0x00, 0x00, 0x00, 0x01,  // segment_number = 1
628        0x26,                    // flags: type = 38, page_assoc 1-byte
629        0x00,                    // count_and_retention = 0
630        0x01,                    // page_association = 1
631        0x00, 0x00, 0x00, 0x13,  // data_length = 19
632        // RegionSegmentInfo (17 bytes):
633        0x00, 0x00, 0x00, 0x04,  // width       = 4
634        0x00, 0x00, 0x00, 0x04,  // height      = 4
635        0x00, 0x00, 0x00, 0x00,  // x_location  = 0
636        0x00, 0x00, 0x00, 0x00,  // y_location  = 0
637        0x04,                    // region_flags: CombinationOperator::Replace (4)
638        // GenericRegion data (2 bytes):
639        0x01,                    // generic_region_flags: mmr = 1
640        0xF0,                    // MMR data: 4 × V(0) = `1111` → 0xF0 (4 all-white rows)
641
642        // Segment 2: EndOfPage (type 49)
643        0x00, 0x00, 0x00, 0x02,  // segment_number = 2
644        0x31,                    // flags: type = 49
645        0x00,                    // count_and_retention = 0
646        0x01,                    // page_association = 1
647        0x00, 0x00, 0x00, 0x00,  // data_length = 0
648
649        // Segment 3: EndOfFile (type 51)
650        0x00, 0x00, 0x00, 0x03,  // segment_number = 3
651        0x33,                    // flags: type = 51
652        0x00,                    // count_and_retention = 0
653        0x00,                    // page_association = 0 (not page-specific)
654        0x00, 0x00, 0x00, 0x00,  // data_length = 0
655    ];
656
657    #[test]
658    fn decode_minimal_jbig2_succeeds() {
659        assert!(decode(MINIMAL_JBIG2).is_ok());
660    }
661
662    #[test]
663    fn decode_minimal_jbig2_dimensions() {
664        let image = decode(MINIMAL_JBIG2).expect("JBIG2 should decode");
665        assert_eq!(image.width, 4);
666        assert_eq!(image.height, 4);
667    }
668
669    #[test]
670    fn decode_minimal_jbig2_all_white() {
671        let image = decode(MINIMAL_JBIG2).expect("JBIG2 should decode");
672        let mut sink = PixelSink::new();
673        image.decode(&mut sink);
674        assert_eq!(sink.rows.len(), 4);
675        for row in &sink.rows {
676            assert_eq!(row.len(), 4);
677            for &black in row {
678                assert!(!black, "expected white (non-black) pixel");
679            }
680        }
681    }
682
683    #[test]
684    fn decode_empty_data_returns_error() {
685        assert!(decode(&[]).is_err());
686    }
687
688    #[test]
689    fn decode_embedded_no_globals() {
690        // Embedded JBIG2 is the same bytes but without the file header;
691        // MINIMAL_JBIG2 starts with the file header, so test embedded
692        // with a truncated single-segment stream produces an error gracefully.
693        assert!(decode_embedded(&[], None).is_err());
694    }
695}
696
697/// Create a decode context from page information segment data.
698///
699/// This parses the page information and creates the initial page bitmap
700/// with the default pixel value.
701pub(crate) fn get_ctx(
702    reader: &mut Reader<'_>,
703    height_from_stripes: Option<u32>,
704) -> Result<(DecodeContext, Bitmap)> {
705    let page_info = parse_page_information(reader)?;
706
707    // "A page's bitmap height may be declared in its page information segment
708    // to be unknown (by specifying a height of 0xFFFFFFFF). In this case, the
709    // page must be striped." (7.4.8.2)
710    let height = if page_info.height == 0xFFFF_FFFF {
711        height_from_stripes.ok_or(FormatError::UnknownPageHeight)?
712    } else {
713        page_info.height
714    };
715
716    // "Bit 2: Page default pixel value. This bit contains the initial value
717    // for every pixel in the page, before any region segments are decoded
718    // or drawn." (7.4.8.5)
719    let page_bitmap = Bitmap::new_with(
720        page_info.width,
721        height,
722        0,
723        0,
724        page_info.flags.default_pixel != 0,
725    );
726
727    let ctx = DecodeContext {
728        page_info,
729        page_pristine: true,
730        referred_segments: Vec::new(),
731        pattern_dictionaries: Vec::new(),
732        symbol_dictionaries: Vec::new(),
733        huffman_tables: Vec::new(),
734        standard_tables: StandardHuffmanTables::new(),
735    };
736
737    Ok((ctx, page_bitmap))
738}