1#![cfg_attr(not(feature = "std"), no_std)]
15#![forbid(unsafe_code)]
16#![allow(missing_docs)]
17
18extern crate alloc;
19
20use alloc::vec::Vec;
21
22pub trait Decoder {
24 fn push_pixel(&mut self, black: bool);
26 fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32);
35 fn next_line(&mut self);
37}
38
39mod arithmetic_decoder;
40mod bitmap;
41mod decode;
42mod error;
43mod file;
44mod gray_scale;
45mod huffman_table;
46mod integer_decoder;
47mod lazy;
48mod page_info;
49mod reader;
50mod segment;
51mod symbol_id_decoder;
52
53use error::bail;
54pub use error::{
55 DecodeError, FormatError, HuffmanError, ParseError, RegionError, Result, SegmentError,
56 SymbolError, TemplateError,
57};
58
59use crate::file::parse_segments_sequential;
60use bitmap::Bitmap;
61use decode::CombinationOperator;
62use decode::generic;
63use decode::generic_refinement;
64use decode::halftone;
65use decode::pattern;
66use decode::pattern::PatternDictionary;
67use decode::symbol;
68use decode::symbol::SymbolDictionary;
69use decode::text;
70use file::parse_file;
71use huffman_table::{HuffmanTable, StandardHuffmanTables};
72use page_info::{PageInformation, parse_page_information};
73use reader::Reader;
74use segment::SegmentType;
75
76#[derive(Debug, Clone)]
78pub struct Image {
79 pub width: u32,
81 pub height: u32,
83 stride: u32,
85 data: Vec<u32>,
87}
88
89impl Image {
90 pub fn decode<D: Decoder>(&self, decoder: &mut D) {
92 let bytes_per_row = self.width.div_ceil(8) as usize;
93
94 for row in self.data.chunks_exact(self.stride as usize) {
95 let mut x = 0_u32;
96 let mut chunk_byte: Option<u8> = None;
97 let mut chunk_count = 0_u32;
98
99 let bytes = row.iter().flat_map(|w| w.to_be_bytes()).take(bytes_per_row);
100
101 for byte in bytes {
102 let remaining = self.width - x;
103
104 if remaining >= 8 && (byte == 0x00 || byte == 0xFF) {
105 if chunk_byte == Some(byte) {
107 chunk_count += 1;
108 x += 8;
109 continue;
110 }
111
112 if let Some(b) = chunk_byte {
114 decoder.push_pixel_chunk(b == 0xFF, chunk_count);
115 }
116
117 chunk_byte = Some(byte);
118 chunk_count = 1;
119 x += 8;
120
121 continue;
122 }
123
124 if let Some(b) = chunk_byte.take() {
126 decoder.push_pixel_chunk(b == 0xFF, chunk_count);
127 chunk_count = 0;
128 }
129
130 let count = remaining.min(8);
132 for i in 0..count {
133 decoder.push_pixel((byte >> (7 - i)) & 1 != 0);
134 }
135 x += count;
136 }
137
138 if let Some(b) = chunk_byte {
140 decoder.push_pixel_chunk(b == 0xFF, chunk_count);
141 }
142
143 decoder.next_line();
144 }
145 }
146}
147
148pub fn decode(data: &[u8]) -> Result<Image> {
153 let file = parse_file(data)?;
154 decode_with_segments(&file.segments)
155}
156
157pub fn decode_embedded(data: &[u8], globals: Option<&[u8]>) -> Result<Image> {
162 let mut segments = Vec::new();
163 if let Some(globals_data) = globals {
164 let mut reader = Reader::new(globals_data);
165 parse_segments_sequential(&mut reader, &mut segments)?;
166 };
167
168 let mut reader = Reader::new(data);
169 parse_segments_sequential(&mut reader, &mut segments)?;
170
171 segments.sort_by_key(|seg| seg.header.segment_number);
172
173 decode_with_segments(&segments)
174}
175
176fn decode_with_segments(segments: &[segment::Segment<'_>]) -> Result<Image> {
177 let height_from_stripes = segments
179 .iter()
180 .filter(|seg| seg.header.segment_type == SegmentType::EndOfStripe)
181 .filter_map(|seg| u32::from_be_bytes(seg.data.try_into().ok()?).checked_add(1))
182 .max();
183
184 let (mut ctx, mut page_bitmap) = if let Some(page_info) = segments
186 .iter()
187 .find(|s| s.header.segment_type == SegmentType::PageInformation)
188 {
189 let mut reader = Reader::new(page_info.data);
190 get_ctx(&mut reader, height_from_stripes)?
191 } else {
192 bail!(FormatError::MissingPageInfo);
193 };
194
195 for seg in segments {
197 let mut reader = Reader::new(seg.data);
198
199 match seg.header.segment_type {
200 SegmentType::PageInformation => {
201 }
203 SegmentType::ImmediateGenericRegion | SegmentType::ImmediateLosslessGenericRegion => {
204 let had_unknown_length = seg.header.data_length.is_none();
205 let header = generic::parse(&mut reader, had_unknown_length)?;
206
207 if ctx.can_decode_directly(&page_bitmap, &header.region_info, false) {
208 generic::decode_into(&header, &mut page_bitmap)?;
209 } else {
210 let region = generic::decode(&header)?;
211 page_bitmap.combine(
212 ®ion.bitmap,
213 region.bitmap.x_location as i32,
214 region.bitmap.y_location as i32,
215 region.combination_operator,
216 );
217 }
218 ctx.page_pristine = false;
219 }
220 SegmentType::IntermediateGenericRegion => {
221 let header = generic::parse(&mut reader, false)?;
223 let region = generic::decode(&header)?;
224 ctx.store_region(seg.header.segment_number, region.bitmap);
225 }
226 SegmentType::PatternDictionary => {
227 let header = pattern::parse(&mut reader)?;
228 let dictionary = pattern::decode(&header)?;
229 ctx.store_pattern_dictionary(seg.header.segment_number, dictionary);
230 }
231 SegmentType::SymbolDictionary => {
232 let input_symbols: Vec<&Bitmap> = seg
236 .header
237 .referred_to_segments
238 .iter()
239 .filter_map(|&num| ctx.get_symbol_dictionary(num))
240 .flat_map(|dict| dict.exported_symbols.iter())
241 .collect();
242
243 let referred_tables: Vec<HuffmanTable> = seg
245 .header
246 .referred_to_segments
247 .iter()
248 .filter_map(|&num| ctx.get_huffman_table(num))
249 .cloned()
250 .collect();
251
252 let retained_contexts = seg
254 .header
255 .referred_to_segments
256 .last()
257 .and_then(|&num| ctx.get_symbol_dictionary(num))
258 .and_then(|dict| dict.retained_contexts.as_ref());
259
260 let header = symbol::parse(&mut reader)?;
261 let dictionary = symbol::decode(
262 &header,
263 &input_symbols,
264 &referred_tables,
265 &ctx.standard_tables,
266 retained_contexts,
267 )?;
268 ctx.store_symbol_dictionary(seg.header.segment_number, dictionary);
269 }
270 SegmentType::ImmediateTextRegion | SegmentType::ImmediateLosslessTextRegion => {
271 let symbols: Vec<&Bitmap> = seg
273 .header
274 .referred_to_segments
275 .iter()
276 .filter_map(|&num| ctx.get_symbol_dictionary(num))
277 .flat_map(|dict| dict.exported_symbols.iter())
278 .collect();
279
280 let referred_tables: Vec<HuffmanTable> = seg
284 .header
285 .referred_to_segments
286 .iter()
287 .filter_map(|&num| ctx.get_huffman_table(num))
288 .cloned()
289 .collect();
290
291 let header = text::parse(&mut reader, symbols.len() as u32)?;
292
293 if ctx.can_decode_directly(
294 &page_bitmap,
295 &header.region_info,
296 header.flags.default_pixel,
297 ) {
298 text::decode_into(
299 &header,
300 &symbols,
301 &referred_tables,
302 &ctx.standard_tables,
303 &mut page_bitmap,
304 )?;
305 } else {
306 let region =
307 text::decode(&header, &symbols, &referred_tables, &ctx.standard_tables)?;
308 page_bitmap.combine(
309 ®ion.bitmap,
310 region.bitmap.x_location as i32,
311 region.bitmap.y_location as i32,
312 region.combination_operator,
313 );
314 }
315 ctx.page_pristine = false;
316 }
317 SegmentType::IntermediateTextRegion => {
318 let symbols: Vec<&Bitmap> = seg
320 .header
321 .referred_to_segments
322 .iter()
323 .filter_map(|&num| ctx.get_symbol_dictionary(num))
324 .flat_map(|dict| dict.exported_symbols.iter())
325 .collect();
326
327 let referred_tables: Vec<HuffmanTable> = seg
329 .header
330 .referred_to_segments
331 .iter()
332 .filter_map(|&num| ctx.get_huffman_table(num))
333 .cloned()
334 .collect();
335
336 let header = text::parse(&mut reader, symbols.len() as u32)?;
337 let region =
338 text::decode(&header, &symbols, &referred_tables, &ctx.standard_tables)?;
339 ctx.store_region(seg.header.segment_number, region.bitmap);
340 }
341 SegmentType::ImmediateHalftoneRegion | SegmentType::ImmediateLosslessHalftoneRegion => {
342 let pattern_dict = seg
343 .header
344 .referred_to_segments
345 .first()
346 .and_then(|&num| ctx.get_pattern_dictionary(num))
347 .ok_or(SegmentError::MissingPatternDictionary)?;
348
349 let header = halftone::parse(&mut reader)?;
350
351 if ctx.can_decode_directly(
352 &page_bitmap,
353 &header.region_info,
354 header.flags.initial_pixel_color,
355 ) {
356 halftone::decode_into(&header, pattern_dict, &mut page_bitmap)?;
357 } else {
358 let region = halftone::decode(&header, pattern_dict)?;
359 page_bitmap.combine(
360 ®ion.bitmap,
361 region.bitmap.x_location as i32,
362 region.bitmap.y_location as i32,
363 region.combination_operator,
364 );
365 }
366 ctx.page_pristine = false;
367 }
368 SegmentType::IntermediateHalftoneRegion => {
369 let pattern_dict = seg
370 .header
371 .referred_to_segments
372 .first()
373 .and_then(|&num| ctx.get_pattern_dictionary(num))
374 .ok_or(SegmentError::MissingPatternDictionary)?;
375
376 let header = halftone::parse(&mut reader)?;
377 let region = halftone::decode(&header, pattern_dict)?;
378 ctx.store_region(seg.header.segment_number, region.bitmap);
379 }
380 SegmentType::IntermediateGenericRefinementRegion => {
381 let reference = seg
383 .header
384 .referred_to_segments
385 .first()
386 .and_then(|&num| ctx.get_referred_segment(num))
387 .unwrap_or(&page_bitmap);
388
389 let header = generic_refinement::parse(&mut reader)?;
390 let region = generic_refinement::decode(&header, reference)?;
391 ctx.store_region(seg.header.segment_number, region.bitmap);
392 }
393 SegmentType::ImmediateGenericRefinementRegion
394 | SegmentType::ImmediateLosslessGenericRefinementRegion => {
395 let referred_segment = seg
401 .header
402 .referred_to_segments
403 .first()
404 .and_then(|&num| ctx.get_referred_segment(num));
405
406 let header = generic_refinement::parse(&mut reader)?;
407
408 if let Some(referred_segment) = referred_segment
409 && ctx.can_decode_directly(&page_bitmap, &header.region_info, false)
410 {
411 generic_refinement::decode_into(&header, referred_segment, &mut page_bitmap)?;
412 } else {
413 let reference = referred_segment.unwrap_or(&page_bitmap);
414 let region = generic_refinement::decode(&header, reference)?;
415 page_bitmap.combine(
416 ®ion.bitmap,
417 region.bitmap.x_location as i32,
418 region.bitmap.y_location as i32,
419 region.combination_operator,
420 );
421 }
422 ctx.page_pristine = false;
423 }
424 SegmentType::Tables => {
425 let table = HuffmanTable::read_custom(&mut reader)?;
429 ctx.store_huffman_table(seg.header.segment_number, table);
430 }
431 SegmentType::EndOfPage | SegmentType::EndOfFile => {
432 break;
433 }
434 _ => {}
436 }
437 }
438
439 Ok(Image {
440 width: page_bitmap.width,
441 height: page_bitmap.height,
442 stride: page_bitmap.stride,
443 data: page_bitmap.data,
444 })
445}
446
447pub(crate) struct DecodeContext {
452 pub(crate) page_info: PageInformation,
454 pub(crate) page_pristine: bool,
456 pub(crate) referred_segments: Vec<(u32, Bitmap)>,
458 pub(crate) pattern_dictionaries: Vec<(u32, PatternDictionary)>,
460 pub(crate) symbol_dictionaries: Vec<(u32, SymbolDictionary)>,
462 pub(crate) huffman_tables: Vec<(u32, HuffmanTable)>,
465 pub(crate) standard_tables: StandardHuffmanTables,
467}
468
469impl DecodeContext {
470 fn can_decode_directly(
472 &self,
473 page_bitmap: &Bitmap,
474 region_info: &decode::RegionSegmentInfo,
475 region_default_pixel: bool,
476 ) -> bool {
477 if !self.page_pristine {
478 return false;
479 }
480
481 let covers_page = region_info.x_location == 0
482 && region_info.y_location == 0
483 && region_info.width == page_bitmap.width
484 && region_info.height == page_bitmap.height;
485
486 if !covers_page {
487 return false;
488 }
489
490 let page_default_is_zero = self.page_info.flags.default_pixel == 0;
491
492 if region_default_pixel == page_default_is_zero {
493 return false;
494 }
495
496 let op = region_info.combination_operator;
497 match op {
498 CombinationOperator::Replace => true,
499 CombinationOperator::Or | CombinationOperator::Xor => page_default_is_zero,
500 CombinationOperator::And | CombinationOperator::Xnor => !page_default_is_zero,
501 }
502 }
503
504 fn store_region(&mut self, segment_number: u32, region: Bitmap) {
506 self.referred_segments.push((segment_number, region));
507 }
508
509 fn get_referred_segment(&self, segment_number: u32) -> Option<&Bitmap> {
511 self.referred_segments
512 .binary_search_by_key(&segment_number, |(num, _)| *num)
513 .ok()
514 .map(|idx| &self.referred_segments[idx].1)
515 }
516
517 fn store_pattern_dictionary(&mut self, segment_number: u32, dictionary: PatternDictionary) {
519 self.pattern_dictionaries.push((segment_number, dictionary));
520 }
521
522 fn get_pattern_dictionary(&self, segment_number: u32) -> Option<&PatternDictionary> {
524 self.pattern_dictionaries
525 .binary_search_by_key(&segment_number, |(num, _)| *num)
526 .ok()
527 .map(|idx| &self.pattern_dictionaries[idx].1)
528 }
529
530 fn store_symbol_dictionary(&mut self, segment_number: u32, dictionary: SymbolDictionary) {
532 self.symbol_dictionaries.push((segment_number, dictionary));
533 }
534
535 fn get_symbol_dictionary(&self, segment_number: u32) -> Option<&SymbolDictionary> {
537 self.symbol_dictionaries
538 .binary_search_by_key(&segment_number, |(num, _)| *num)
539 .ok()
540 .map(|idx| &self.symbol_dictionaries[idx].1)
541 }
542
543 fn store_huffman_table(&mut self, segment_number: u32, table: HuffmanTable) {
545 self.huffman_tables.push((segment_number, table));
546 }
547
548 fn get_huffman_table(&self, segment_number: u32) -> Option<&HuffmanTable> {
550 self.huffman_tables
551 .binary_search_by_key(&segment_number, |(num, _)| *num)
552 .ok()
553 .map(|idx| &self.huffman_tables[idx].1)
554 }
555}
556
557#[cfg(test)]
558mod tests {
559 use super::*;
560 use alloc::vec::Vec;
561
562 struct PixelSink {
564 rows: Vec<Vec<bool>>,
565 current: Vec<bool>,
566 }
567
568 impl PixelSink {
569 fn new() -> Self {
570 Self {
571 rows: Vec::new(),
572 current: Vec::new(),
573 }
574 }
575 }
576
577 impl Decoder for PixelSink {
578 fn push_pixel(&mut self, black: bool) {
579 self.current.push(black);
580 }
581
582 fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32) {
583 for _ in 0..chunk_count * 8 {
584 self.current.push(black);
585 }
586 }
587
588 fn next_line(&mut self) {
589 self.rows.push(core::mem::take(&mut self.current));
590 }
591 }
592
593 #[rustfmt::skip]
606 const MINIMAL_JBIG2: &[u8] = &[
607 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x26, 0x00, 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x01, 0xF0, 0x00, 0x00, 0x00, 0x02, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
656
657 #[test]
658 fn decode_minimal_jbig2_succeeds() {
659 assert!(decode(MINIMAL_JBIG2).is_ok());
660 }
661
662 #[test]
663 fn decode_minimal_jbig2_dimensions() {
664 let image = decode(MINIMAL_JBIG2).expect("JBIG2 should decode");
665 assert_eq!(image.width, 4);
666 assert_eq!(image.height, 4);
667 }
668
669 #[test]
670 fn decode_minimal_jbig2_all_white() {
671 let image = decode(MINIMAL_JBIG2).expect("JBIG2 should decode");
672 let mut sink = PixelSink::new();
673 image.decode(&mut sink);
674 assert_eq!(sink.rows.len(), 4);
675 for row in &sink.rows {
676 assert_eq!(row.len(), 4);
677 for &black in row {
678 assert!(!black, "expected white (non-black) pixel");
679 }
680 }
681 }
682
683 #[test]
684 fn decode_empty_data_returns_error() {
685 assert!(decode(&[]).is_err());
686 }
687
688 #[test]
689 fn decode_embedded_no_globals() {
690 assert!(decode_embedded(&[], None).is_err());
694 }
695}
696
697pub(crate) fn get_ctx(
702 reader: &mut Reader<'_>,
703 height_from_stripes: Option<u32>,
704) -> Result<(DecodeContext, Bitmap)> {
705 let page_info = parse_page_information(reader)?;
706
707 let height = if page_info.height == 0xFFFF_FFFF {
711 height_from_stripes.ok_or(FormatError::UnknownPageHeight)?
712 } else {
713 page_info.height
714 };
715
716 let page_bitmap = Bitmap::new_with(
720 page_info.width,
721 height,
722 0,
723 0,
724 page_info.flags.default_pixel != 0,
725 );
726
727 let ctx = DecodeContext {
728 page_info,
729 page_pristine: true,
730 referred_segments: Vec::new(),
731 pattern_dictionaries: Vec::new(),
732 symbol_dictionaries: Vec::new(),
733 huffman_tables: Vec::new(),
734 standard_tables: StandardHuffmanTables::new(),
735 };
736
737 Ok((ctx, page_bitmap))
738}