1#![cfg_attr(not(feature = "std"), no_std)]
15#![forbid(unsafe_code)]
16#![allow(missing_docs)]
17
18extern crate alloc;
19
20use alloc::vec::Vec;
21
22pub trait Decoder {
24 fn push_pixel(&mut self, black: bool);
26 fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32);
35 fn next_line(&mut self);
37}
38
39mod arithmetic_decoder;
40mod bitmap;
41mod decode;
42mod error;
43mod file;
44mod gray_scale;
45mod huffman_table;
46mod integer_decoder;
47mod lazy;
48mod page_info;
49mod reader;
50mod segment;
51mod symbol_id_decoder;
52
53use error::bail;
54pub use error::{
55 DecodeError, FormatError, HuffmanError, ParseError, RegionError, Result, SegmentError,
56 SymbolError, TemplateError,
57};
58
59use crate::file::parse_segments_sequential;
60use bitmap::Bitmap;
61use decode::CombinationOperator;
62use decode::generic;
63use decode::generic_refinement;
64use decode::halftone;
65use decode::pattern;
66use decode::pattern::PatternDictionary;
67use decode::symbol;
68use decode::symbol::SymbolDictionary;
69use decode::text;
70use file::parse_file;
71use huffman_table::{HuffmanTable, StandardHuffmanTables};
72use page_info::{PageInformation, parse_page_information};
73use reader::Reader;
74use segment::SegmentType;
75
76#[derive(Debug, Clone)]
78pub struct Image {
79 pub width: u32,
81 pub height: u32,
83 stride: u32,
85 data: Vec<u32>,
87}
88
89impl Image {
90 pub fn decode<D: Decoder>(&self, decoder: &mut D) {
92 let bytes_per_row = self.width.div_ceil(8) as usize;
93
94 for row in self.data.chunks_exact(self.stride as usize) {
95 let mut x = 0_u32;
96 let mut chunk_byte: Option<u8> = None;
97 let mut chunk_count = 0_u32;
98
99 let bytes = row.iter().flat_map(|w| w.to_be_bytes()).take(bytes_per_row);
100
101 for byte in bytes {
102 let remaining = self.width - x;
103
104 if remaining >= 8 && (byte == 0x00 || byte == 0xFF) {
105 if chunk_byte == Some(byte) {
107 chunk_count += 1;
108 x += 8;
109 continue;
110 }
111
112 if let Some(b) = chunk_byte {
114 decoder.push_pixel_chunk(b == 0xFF, chunk_count);
115 }
116
117 chunk_byte = Some(byte);
118 chunk_count = 1;
119 x += 8;
120
121 continue;
122 }
123
124 if let Some(b) = chunk_byte.take() {
126 decoder.push_pixel_chunk(b == 0xFF, chunk_count);
127 chunk_count = 0;
128 }
129
130 let count = remaining.min(8);
132 for i in 0..count {
133 decoder.push_pixel((byte >> (7 - i)) & 1 != 0);
134 }
135 x += count;
136 }
137
138 if let Some(b) = chunk_byte {
140 decoder.push_pixel_chunk(b == 0xFF, chunk_count);
141 }
142
143 decoder.next_line();
144 }
145 }
146}
147
148pub fn decode(data: &[u8]) -> Result<Image> {
153 let file = parse_file(data)?;
154 decode_with_segments(&file.segments)
155}
156
157pub fn decode_embedded(data: &[u8], globals: Option<&[u8]>) -> Result<Image> {
162 let mut segments = Vec::new();
163 if let Some(globals_data) = globals {
164 let mut reader = Reader::new(globals_data);
165 parse_segments_sequential(&mut reader, &mut segments)?;
166 };
167
168 let mut reader = Reader::new(data);
169 parse_segments_sequential(&mut reader, &mut segments)?;
170
171 segments.sort_by_key(|seg| seg.header.segment_number);
172
173 decode_with_segments(&segments)
174}
175
176fn decode_with_segments(segments: &[segment::Segment<'_>]) -> Result<Image> {
177 let height_from_stripes = segments
179 .iter()
180 .filter(|seg| seg.header.segment_type == SegmentType::EndOfStripe)
181 .filter_map(|seg| u32::from_be_bytes(seg.data.try_into().ok()?).checked_add(1))
182 .max();
183
184 let (mut ctx, mut page_bitmap) = if let Some(page_info) = segments
186 .iter()
187 .find(|s| s.header.segment_type == SegmentType::PageInformation)
188 {
189 let mut reader = Reader::new(page_info.data);
190 get_ctx(&mut reader, height_from_stripes)?
191 } else {
192 bail!(FormatError::MissingPageInfo);
193 };
194
195 for seg in segments {
197 let mut reader = Reader::new(seg.data);
198
199 match seg.header.segment_type {
200 SegmentType::PageInformation => {
201 }
203 SegmentType::ImmediateGenericRegion | SegmentType::ImmediateLosslessGenericRegion => {
204 let had_unknown_length = seg.header.data_length.is_none();
205 let header = generic::parse(&mut reader, had_unknown_length)?;
206
207 if ctx.can_decode_directly(&page_bitmap, &header.region_info, false) {
208 generic::decode_into(&header, &mut page_bitmap)?;
209 } else {
210 let region = generic::decode(&header)?;
211 page_bitmap.combine(
212 ®ion.bitmap,
213 region.bitmap.x_location as i32,
214 region.bitmap.y_location as i32,
215 region.combination_operator,
216 );
217 }
218 ctx.page_pristine = false;
219 }
220 SegmentType::IntermediateGenericRegion => {
221 let header = generic::parse(&mut reader, false)?;
223 let region = generic::decode(&header)?;
224 ctx.store_region(seg.header.segment_number, region.bitmap);
225 }
226 SegmentType::PatternDictionary => {
227 let header = pattern::parse(&mut reader)?;
228 let dictionary = pattern::decode(&header)?;
229 ctx.store_pattern_dictionary(seg.header.segment_number, dictionary);
230 }
231 SegmentType::SymbolDictionary => {
232 let input_symbols: Vec<&Bitmap> = seg
236 .header
237 .referred_to_segments
238 .iter()
239 .filter_map(|&num| ctx.get_symbol_dictionary(num))
240 .flat_map(|dict| dict.exported_symbols.iter())
241 .collect();
242
243 let referred_tables: Vec<HuffmanTable> = seg
245 .header
246 .referred_to_segments
247 .iter()
248 .filter_map(|&num| ctx.get_huffman_table(num))
249 .cloned()
250 .collect();
251
252 let retained_contexts = seg
254 .header
255 .referred_to_segments
256 .last()
257 .and_then(|&num| ctx.get_symbol_dictionary(num))
258 .and_then(|dict| dict.retained_contexts.as_ref());
259
260 let header = symbol::parse(&mut reader)?;
261 let dictionary = symbol::decode(
262 &header,
263 &input_symbols,
264 &referred_tables,
265 &ctx.standard_tables,
266 retained_contexts,
267 )?;
268 ctx.store_symbol_dictionary(seg.header.segment_number, dictionary);
269 }
270 SegmentType::ImmediateTextRegion | SegmentType::ImmediateLosslessTextRegion => {
271 let symbols: Vec<&Bitmap> = seg
273 .header
274 .referred_to_segments
275 .iter()
276 .filter_map(|&num| ctx.get_symbol_dictionary(num))
277 .flat_map(|dict| dict.exported_symbols.iter())
278 .collect();
279
280 let referred_tables: Vec<HuffmanTable> = seg
284 .header
285 .referred_to_segments
286 .iter()
287 .filter_map(|&num| ctx.get_huffman_table(num))
288 .cloned()
289 .collect();
290
291 let header = text::parse(&mut reader, symbols.len() as u32)?;
292
293 if ctx.can_decode_directly(
294 &page_bitmap,
295 &header.region_info,
296 header.flags.default_pixel,
297 ) {
298 text::decode_into(
299 &header,
300 &symbols,
301 &referred_tables,
302 &ctx.standard_tables,
303 &mut page_bitmap,
304 )?;
305 } else {
306 let region =
307 text::decode(&header, &symbols, &referred_tables, &ctx.standard_tables)?;
308 page_bitmap.combine(
309 ®ion.bitmap,
310 region.bitmap.x_location as i32,
311 region.bitmap.y_location as i32,
312 region.combination_operator,
313 );
314 }
315 ctx.page_pristine = false;
316 }
317 SegmentType::IntermediateTextRegion => {
318 let symbols: Vec<&Bitmap> = seg
320 .header
321 .referred_to_segments
322 .iter()
323 .filter_map(|&num| ctx.get_symbol_dictionary(num))
324 .flat_map(|dict| dict.exported_symbols.iter())
325 .collect();
326
327 let referred_tables: Vec<HuffmanTable> = seg
329 .header
330 .referred_to_segments
331 .iter()
332 .filter_map(|&num| ctx.get_huffman_table(num))
333 .cloned()
334 .collect();
335
336 let header = text::parse(&mut reader, symbols.len() as u32)?;
337 let region =
338 text::decode(&header, &symbols, &referred_tables, &ctx.standard_tables)?;
339 ctx.store_region(seg.header.segment_number, region.bitmap);
340 }
341 SegmentType::ImmediateHalftoneRegion | SegmentType::ImmediateLosslessHalftoneRegion => {
342 let pattern_dict = seg
343 .header
344 .referred_to_segments
345 .first()
346 .and_then(|&num| ctx.get_pattern_dictionary(num))
347 .ok_or(SegmentError::MissingPatternDictionary)?;
348
349 let header = halftone::parse(&mut reader)?;
350
351 if ctx.can_decode_directly(
352 &page_bitmap,
353 &header.region_info,
354 header.flags.initial_pixel_color,
355 ) {
356 halftone::decode_into(&header, pattern_dict, &mut page_bitmap)?;
357 } else {
358 let region = halftone::decode(&header, pattern_dict)?;
359 page_bitmap.combine(
360 ®ion.bitmap,
361 region.bitmap.x_location as i32,
362 region.bitmap.y_location as i32,
363 region.combination_operator,
364 );
365 }
366 ctx.page_pristine = false;
367 }
368 SegmentType::IntermediateHalftoneRegion => {
369 let pattern_dict = seg
370 .header
371 .referred_to_segments
372 .first()
373 .and_then(|&num| ctx.get_pattern_dictionary(num))
374 .ok_or(SegmentError::MissingPatternDictionary)?;
375
376 let header = halftone::parse(&mut reader)?;
377 let region = halftone::decode(&header, pattern_dict)?;
378 ctx.store_region(seg.header.segment_number, region.bitmap);
379 }
380 SegmentType::IntermediateGenericRefinementRegion => {
381 let reference = seg
383 .header
384 .referred_to_segments
385 .first()
386 .and_then(|&num| ctx.get_referred_segment(num))
387 .unwrap_or(&page_bitmap);
388
389 let header = generic_refinement::parse(&mut reader)?;
390 let region = generic_refinement::decode(&header, reference)?;
391 ctx.store_region(seg.header.segment_number, region.bitmap);
392 }
393 SegmentType::ImmediateGenericRefinementRegion
394 | SegmentType::ImmediateLosslessGenericRefinementRegion => {
395 let referred_segment = seg
401 .header
402 .referred_to_segments
403 .first()
404 .and_then(|&num| ctx.get_referred_segment(num));
405
406 let header = generic_refinement::parse(&mut reader)?;
407
408 if let Some(referred_segment) = referred_segment
409 && ctx.can_decode_directly(&page_bitmap, &header.region_info, false)
410 {
411 generic_refinement::decode_into(&header, referred_segment, &mut page_bitmap)?;
412 } else {
413 let reference = referred_segment.unwrap_or(&page_bitmap);
414 let region = generic_refinement::decode(&header, reference)?;
415 page_bitmap.combine(
416 ®ion.bitmap,
417 region.bitmap.x_location as i32,
418 region.bitmap.y_location as i32,
419 region.combination_operator,
420 );
421 }
422 ctx.page_pristine = false;
423 }
424 SegmentType::Tables => {
425 let table = HuffmanTable::read_custom(&mut reader)?;
429 ctx.store_huffman_table(seg.header.segment_number, table);
430 }
431 SegmentType::EndOfPage | SegmentType::EndOfFile => {
432 break;
433 }
434 _ => {}
436 }
437 }
438
439 Ok(Image {
440 width: page_bitmap.width,
441 height: page_bitmap.height,
442 stride: page_bitmap.stride,
443 data: page_bitmap.data,
444 })
445}
446
447pub(crate) struct DecodeContext {
452 pub(crate) page_info: PageInformation,
454 pub(crate) page_pristine: bool,
456 pub(crate) referred_segments: Vec<(u32, Bitmap)>,
458 pub(crate) pattern_dictionaries: Vec<(u32, PatternDictionary)>,
460 pub(crate) symbol_dictionaries: Vec<(u32, SymbolDictionary)>,
462 pub(crate) huffman_tables: Vec<(u32, HuffmanTable)>,
465 pub(crate) standard_tables: StandardHuffmanTables,
467}
468
469impl DecodeContext {
470 fn can_decode_directly(
472 &self,
473 page_bitmap: &Bitmap,
474 region_info: &decode::RegionSegmentInfo,
475 region_default_pixel: bool,
476 ) -> bool {
477 if !self.page_pristine {
478 return false;
479 }
480
481 let covers_page = region_info.x_location == 0
482 && region_info.y_location == 0
483 && region_info.width == page_bitmap.width
484 && region_info.height == page_bitmap.height;
485
486 if !covers_page {
487 return false;
488 }
489
490 let page_default_is_zero = self.page_info.flags.default_pixel == 0;
491
492 if region_default_pixel == page_default_is_zero {
493 return false;
494 }
495
496 let op = region_info.combination_operator;
497 match op {
498 CombinationOperator::Replace => true,
499 CombinationOperator::Or | CombinationOperator::Xor => page_default_is_zero,
500 CombinationOperator::And | CombinationOperator::Xnor => !page_default_is_zero,
501 }
502 }
503
504 fn store_region(&mut self, segment_number: u32, region: Bitmap) {
506 self.referred_segments.push((segment_number, region));
507 }
508
509 fn get_referred_segment(&self, segment_number: u32) -> Option<&Bitmap> {
511 self.referred_segments
512 .binary_search_by_key(&segment_number, |(num, _)| *num)
513 .ok()
514 .map(|idx| &self.referred_segments[idx].1)
515 }
516
517 fn store_pattern_dictionary(&mut self, segment_number: u32, dictionary: PatternDictionary) {
519 self.pattern_dictionaries.push((segment_number, dictionary));
520 }
521
522 fn get_pattern_dictionary(&self, segment_number: u32) -> Option<&PatternDictionary> {
524 self.pattern_dictionaries
525 .binary_search_by_key(&segment_number, |(num, _)| *num)
526 .ok()
527 .map(|idx| &self.pattern_dictionaries[idx].1)
528 }
529
530 fn store_symbol_dictionary(&mut self, segment_number: u32, dictionary: SymbolDictionary) {
532 self.symbol_dictionaries.push((segment_number, dictionary));
533 }
534
535 fn get_symbol_dictionary(&self, segment_number: u32) -> Option<&SymbolDictionary> {
537 self.symbol_dictionaries
538 .binary_search_by_key(&segment_number, |(num, _)| *num)
539 .ok()
540 .map(|idx| &self.symbol_dictionaries[idx].1)
541 }
542
543 fn store_huffman_table(&mut self, segment_number: u32, table: HuffmanTable) {
545 self.huffman_tables.push((segment_number, table));
546 }
547
548 fn get_huffman_table(&self, segment_number: u32) -> Option<&HuffmanTable> {
550 self.huffman_tables
551 .binary_search_by_key(&segment_number, |(num, _)| *num)
552 .ok()
553 .map(|idx| &self.huffman_tables[idx].1)
554 }
555}
556
557pub(crate) fn get_ctx(
562 reader: &mut Reader<'_>,
563 height_from_stripes: Option<u32>,
564) -> Result<(DecodeContext, Bitmap)> {
565 let page_info = parse_page_information(reader)?;
566
567 let height = if page_info.height == 0xFFFF_FFFF {
571 height_from_stripes.ok_or(FormatError::UnknownPageHeight)?
572 } else {
573 page_info.height
574 };
575
576 let page_bitmap = Bitmap::new_with(
580 page_info.width,
581 height,
582 0,
583 0,
584 page_info.flags.default_pixel != 0,
585 );
586
587 let ctx = DecodeContext {
588 page_info,
589 page_pristine: true,
590 referred_segments: Vec::new(),
591 pattern_dictionaries: Vec::new(),
592 symbol_dictionaries: Vec::new(),
593 huffman_tables: Vec::new(),
594 standard_tables: StandardHuffmanTables::new(),
595 };
596
597 Ok((ctx, page_bitmap))
598}
599
600#[cfg(test)]
601mod tests {
602 use super::*;
603 use alloc::vec::Vec;
604
605 struct PixelSink {
607 rows: Vec<Vec<bool>>,
608 current: Vec<bool>,
609 }
610
611 impl PixelSink {
612 fn new() -> Self {
613 Self {
614 rows: Vec::new(),
615 current: Vec::new(),
616 }
617 }
618 }
619
620 impl Decoder for PixelSink {
621 fn push_pixel(&mut self, black: bool) {
622 self.current.push(black);
623 }
624
625 fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32) {
626 for _ in 0..chunk_count * 8 {
627 self.current.push(black);
628 }
629 }
630
631 fn next_line(&mut self) {
632 self.rows.push(core::mem::take(&mut self.current));
633 }
634 }
635
636 #[rustfmt::skip]
649 const MINIMAL_JBIG2: &[u8] = &[
650 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x26, 0x00, 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x01, 0xF0, 0x00, 0x00, 0x00, 0x02, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
699
700 #[test]
701 fn decode_minimal_jbig2_succeeds() {
702 assert!(decode(MINIMAL_JBIG2).is_ok());
703 }
704
705 #[test]
706 fn decode_minimal_jbig2_dimensions() {
707 let image = decode(MINIMAL_JBIG2).expect("JBIG2 should decode");
708 assert_eq!(image.width, 4);
709 assert_eq!(image.height, 4);
710 }
711
712 #[test]
713 fn decode_minimal_jbig2_all_white() {
714 let image = decode(MINIMAL_JBIG2).expect("JBIG2 should decode");
715 let mut sink = PixelSink::new();
716 image.decode(&mut sink);
717 assert_eq!(sink.rows.len(), 4);
718 for row in &sink.rows {
719 assert_eq!(row.len(), 4);
720 for &black in row {
721 assert!(!black, "expected white (non-black) pixel");
722 }
723 }
724 }
725
726 #[test]
727 fn decode_empty_data_returns_error() {
728 assert!(decode(&[]).is_err());
729 }
730
731 #[test]
732 fn decode_embedded_no_globals() {
733 assert!(decode_embedded(&[], None).is_err());
737 }
738}