1use crate::bitmap::Bitmap;
2use crate::error::Error;
3use crate::iff::{Chunk, DjvuFile};
4use crate::iw44::IW44Image;
5use crate::jb2::JB2Dict;
6use crate::pixmap::Pixmap;
7use std::collections::HashMap;
8use std::sync::{Arc, RwLock};
9
10#[cfg(test)]
11pub use crate::iw44::NormalizedPlanes;
12
13#[derive(Debug, Clone)]
15pub struct Bookmark {
16 pub title: String,
17 pub url: String,
18 pub children: Vec<Bookmark>,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum Rotation {
24 None,
25 Cw90,
26 Cw180,
27 Cw270,
28}
29
30#[derive(Debug, Clone)]
32pub struct PageInfo {
33 pub width: u16,
34 pub height: u16,
35 pub dpi: u16,
36 pub gamma: f32,
38 pub rotation: Rotation,
39}
40
41#[derive(Debug, Clone)]
43pub struct Palette {
44 pub colors: Vec<(u8, u8, u8)>,
45 pub indices: Vec<i16>,
46}
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum TextZoneKind {
51 Page = 1,
52 Column = 2,
53 Region = 3,
54 Paragraph = 4,
55 Line = 5,
56 Word = 6,
57 Character = 7,
58}
59
60#[derive(Debug, Clone)]
65pub struct TextZone {
66 pub kind: TextZoneKind,
67 pub x: i32,
68 pub y: i32,
69 pub width: i32,
70 pub height: i32,
71 pub text_start: usize,
72 pub text_len: usize,
73 pub children: Vec<TextZone>,
74}
75
76#[derive(Debug, Clone)]
78pub struct TextLayer {
79 pub text: String,
81 pub root: Option<TextZone>,
83}
84
85impl TextLayer {
86 pub fn zone_text(&self, zone: &TextZone) -> &str {
88 let end = (zone.text_start + zone.text_len).min(self.text.len());
89 let start = zone.text_start.min(end);
90 if self.text.is_char_boundary(start) && self.text.is_char_boundary(end) {
92 &self.text[start..end]
93 } else {
94 ""
95 }
96 }
97}
98
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101enum ComponentType {
102 Shared, Page, Thumbnail, }
106
107#[derive(Debug, Clone)]
109struct DirmEntry {
110 comp_type: ComponentType,
111 id: String,
112}
113
114pub struct Document {
116 file: DjvuFile,
117 dirm_entries: Vec<DirmEntry>,
119 page_indices: Vec<usize>,
121 is_single_page: bool,
123 dict_cache: RwLock<HashMap<usize, Arc<JB2Dict>>>,
129}
130
131impl Document {
132 pub fn parse(data: &[u8]) -> Result<Self, Error> {
134 let file = crate::iff::parse(data)?;
135 match &file.root {
136 Chunk::Form {
137 secondary_id: [b'D', b'J', b'V', b'U'],
138 ..
139 } => {
140 Ok(Document {
141 file,
142 dirm_entries: vec![],
143 page_indices: vec![0], is_single_page: true,
145 dict_cache: RwLock::new(HashMap::new()),
146 })
147 }
148 Chunk::Form {
149 secondary_id: [b'D', b'J', b'V', b'M'],
150 children,
151 ..
152 } => {
153 let dirm_chunk = children
155 .iter()
156 .find_map(|c| match c {
157 Chunk::Leaf {
158 id: [b'D', b'I', b'R', b'M'],
159 data,
160 } => Some(data.as_slice()),
161 _ => None,
162 })
163 .ok_or(Error::MissingChunk("DIRM"))?;
164
165 let (dirm_entries, is_bundled) = parse_dirm(dirm_chunk)?;
166 if !is_bundled {
167 return Err(Error::Unsupported("indirect DJVM not supported"));
168 }
169
170 let page_indices: Vec<usize> = dirm_entries
171 .iter()
172 .enumerate()
173 .filter(|(_, e)| e.comp_type == ComponentType::Page)
174 .map(|(i, _)| i)
175 .collect();
176
177 Ok(Document {
178 file,
179 dirm_entries,
180 page_indices,
181 is_single_page: false,
182 dict_cache: RwLock::new(HashMap::new()),
183 })
184 }
185 _ => Err(Error::Unsupported("not a DJVU or DJVM document")),
186 }
187 }
188
189 pub fn page_count(&self) -> usize {
191 self.page_indices.len()
192 }
193
194 pub fn page(&self, index: usize) -> Result<Page<'_>, Error> {
196 if index >= self.page_count() {
197 return Err(Error::FormatError(format!(
198 "page index {} out of range ({})",
199 index,
200 self.page_count()
201 )));
202 }
203
204 if self.is_single_page {
205 return Page::from_form(&self.file.root, self);
206 }
207
208 let dirm_index = self.page_indices[index];
210 let form = self.get_component_form(dirm_index)?;
211 Page::from_form(form, self)
212 }
213
214 fn get_component_form(&self, dirm_index: usize) -> Result<&Chunk, Error> {
217 let forms: Vec<&Chunk> = self
218 .file
219 .root
220 .children()
221 .iter()
222 .filter(|c| matches!(c, Chunk::Form { .. }))
223 .collect();
224
225 forms
226 .get(dirm_index)
227 .copied()
228 .ok_or(Error::FormatError(format!(
229 "component {} not found",
230 dirm_index
231 )))
232 }
233
234 pub fn bookmarks(&self) -> Result<Vec<Bookmark>, Error> {
238 let navm_data = match self.file.root.find_first(b"NAVM") {
239 Some(c) => c.data(),
240 None => return Ok(vec![]),
241 };
242
243 let decoded = crate::bzz_new::bzz_decode(navm_data)
244 .map_err(|e| Error::FormatError(format!("NAVM BZZ decode: {}", e)))?;
245
246 if decoded.len() < 2 {
247 return Ok(vec![]);
248 }
249
250 let total_count = u16::from_be_bytes([decoded[0], decoded[1]]) as usize;
251 let mut pos = 2usize;
252 let mut bookmarks = Vec::new();
253 let mut decoded_count = 0usize;
254
255 while decoded_count < total_count {
256 let bm = parse_bookmark(&decoded, &mut pos, &mut decoded_count)?;
257 bookmarks.push(bm);
258 }
259
260 Ok(bookmarks)
261 }
262
263 pub fn thumbnail(&self, page_index: usize) -> Result<Option<Pixmap>, Error> {
268 if self.is_single_page {
269 return Ok(None);
270 }
271
272 let mut thumb_idx: usize = 0;
273 for (i, entry) in self.dirm_entries.iter().enumerate() {
274 if entry.comp_type != ComponentType::Thumbnail {
275 continue;
276 }
277 let form = self.get_component_form(i)?;
278 let th44_chunks: Vec<&[u8]> = form
279 .find_all(b"TH44")
280 .into_iter()
281 .map(|c| c.data())
282 .collect();
283
284 let mut img = IW44Image::new();
285 for chunk_data in &th44_chunks {
286 if chunk_data.is_empty() {
287 continue;
288 }
289 let serial = chunk_data[0];
290 if serial == 0 && img.width() > 0 {
291 if thumb_idx == page_index {
293 let pm = img
294 .to_pixmap()
295 .map_err(|e| Error::FormatError(e.to_string()))?;
296 return Ok(Some(pm));
297 }
298 thumb_idx += 1;
299 img = IW44Image::new();
300 }
301 img.decode_chunk(chunk_data)
302 .map_err(|e| Error::FormatError(e.to_string()))?;
303 }
304 if img.width() > 0 {
306 if thumb_idx == page_index {
307 let pm = img
308 .to_pixmap()
309 .map_err(|e| Error::FormatError(e.to_string()))?;
310 return Ok(Some(pm));
311 }
312 thumb_idx += 1;
313 }
314 }
315
316 Ok(None)
317 }
318
319 fn resolve_incl(&self, ref_id: &str) -> Result<&Chunk, Error> {
321 if self.is_single_page {
322 return Err(Error::FormatError("INCL in single-page document".into()));
323 }
324
325 for (i, entry) in self.dirm_entries.iter().enumerate() {
326 if entry.id == ref_id {
327 return self.get_component_form(i);
328 }
329 }
330
331 Err(Error::FormatError(format!(
332 "INCL target '{}' not found",
333 ref_id
334 )))
335 }
336
337 fn get_or_decode_dict(&self, djbz_data: &[u8]) -> Result<Arc<JB2Dict>, Error> {
344 let key = djbz_data.as_ptr() as usize;
345
346 {
348 let cache = self.dict_cache.read().unwrap();
349 if let Some(dict) = cache.get(&key) {
350 return Ok(Arc::clone(dict));
351 }
352 }
353
354 let dict = crate::jb2::decode_dict(djbz_data, None)
356 .map_err(|e| Error::FormatError(e.to_string()))?;
357 let arc = Arc::new(dict);
358 self.dict_cache
359 .write()
360 .unwrap()
361 .insert(key, Arc::clone(&arc));
362 Ok(arc)
363 }
364}
365
366pub struct Page<'a> {
368 pub info: PageInfo,
369 form: &'a Chunk,
370 doc: &'a Document,
371}
372
373impl<'a> Page<'a> {
374 fn from_form(form: &'a Chunk, doc: &'a Document) -> Result<Self, Error> {
375 let info_chunk = form
376 .find_first(b"INFO")
377 .ok_or(Error::MissingChunk("INFO"))?;
378 let info = parse_info(info_chunk.data())?;
379 Ok(Page { info, form, doc })
380 }
381
382 #[cfg(test)]
383 pub fn has_mask(&self) -> bool {
384 self.form.find_first(b"Sjbz").is_some()
385 }
386
387 #[cfg(test)]
388 pub fn has_background(&self) -> bool {
389 self.form.find_first(b"BG44").is_some()
390 }
391
392 pub fn has_foreground(&self) -> bool {
401 self.form.find_first(b"FG44").is_some()
402 }
403
404 pub fn has_palette(&self) -> bool {
405 self.form.find_first(b"FGbz").is_some()
406 }
407
408 pub fn decode_mask(&self) -> Result<Option<Bitmap>, Error> {
410 let sjbz = match self.form.find_first(b"Sjbz") {
411 Some(c) => c.data(),
412 None => return Ok(None),
413 };
414
415 let shared_dict = self.resolve_shared_dict()?;
416
417 let bitmap = crate::jb2::decode(sjbz, shared_dict.as_deref())
418 .map_err(|e| Error::FormatError(e.to_string()))?;
419 Ok(Some(bitmap))
420 }
421
422 pub fn decode_mask_indexed(&self) -> Result<Option<(Bitmap, Vec<i32>)>, Error> {
424 let sjbz = match self.form.find_first(b"Sjbz") {
425 Some(c) => c.data(),
426 None => return Ok(None),
427 };
428
429 let shared_dict = self.resolve_shared_dict()?;
430
431 let result = crate::jb2::decode_indexed(sjbz, shared_dict.as_deref())
432 .map_err(|e| Error::FormatError(e.to_string()))?;
433 Ok(Some(result))
434 }
435
436 pub fn decode_background(&self) -> Result<Option<Pixmap>, Error> {
438 let img = match self.decode_iw44_layer(b"BG44")? {
439 Some(img) => img,
440 None => return Ok(None),
441 };
442 let pm = img
443 .to_pixmap()
444 .map_err(|e| Error::FormatError(e.to_string()))?;
445 Ok(Some(pm))
446 }
447
448 pub fn bg44_chunk_count(&self) -> usize {
450 self.form.find_all(b"BG44").len()
451 }
452
453 pub fn decode_background_progressive(&self) -> Result<Option<Vec<Pixmap>>, Error> {
460 let chunks: Vec<&[u8]> = self
461 .form
462 .find_all(b"BG44")
463 .into_iter()
464 .map(|c| c.data())
465 .collect();
466
467 if chunks.is_empty() {
468 return Ok(None);
469 }
470
471 let mut img = IW44Image::new();
472 let mut frames = Vec::with_capacity(chunks.len());
473
474 for chunk_data in &chunks {
475 img.decode_chunk(chunk_data)
476 .map_err(|e| Error::FormatError(e.to_string()))?;
477 let pm = img
478 .to_pixmap()
479 .map_err(|e| Error::FormatError(e.to_string()))?;
480 frames.push(pm);
481 }
482
483 Ok(Some(frames))
484 }
485
486 pub fn decode_background_coarse(&self) -> Result<Option<Pixmap>, Error> {
493 let chunks: Vec<&[u8]> = self
494 .form
495 .find_all(b"BG44")
496 .into_iter()
497 .map(|c| c.data())
498 .collect();
499
500 if chunks.len() <= 1 {
502 return Ok(None);
503 }
504
505 let mut img = IW44Image::new();
506 img.decode_chunk(chunks[0])
507 .map_err(|e| Error::FormatError(e.to_string()))?;
508 let pm = img
509 .to_pixmap()
510 .map_err(|e| Error::FormatError(e.to_string()))?;
511 Ok(Some(pm))
512 }
513
514 pub fn decode_foreground(&self) -> Result<Option<Pixmap>, Error> {
516 let img = match self.decode_iw44_layer(b"FG44")? {
517 Some(img) => img,
518 None => return Ok(None),
519 };
520 let pm = img
521 .to_pixmap()
522 .map_err(|e| Error::FormatError(e.to_string()))?;
523 Ok(Some(pm))
524 }
525
526 #[cfg(test)]
527 pub fn decode_background_planes(&self) -> Result<Option<NormalizedPlanes>, Error> {
528 let img = match self.decode_iw44_layer(b"BG44")? {
529 Some(img) => img,
530 None => return Ok(None),
531 };
532 let planes = img
533 .to_normalized_planes_subsample(1)
534 .map_err(|e| Error::FormatError(e.to_string()))?;
535 Ok(Some(planes))
536 }
537
538 pub fn decode_palette(&self) -> Result<Option<Palette>, Error> {
540 let fgbz = match self.form.find_first(b"FGbz") {
541 Some(c) => c.data(),
542 None => return Ok(None),
543 };
544 let palette = parse_fgbz(fgbz)?;
545 Ok(Some(palette))
546 }
547
548 pub fn text_layer(&self) -> Result<Option<TextLayer>, Error> {
552 let data = if let Some(txtz) = self.form.find_first(b"TXTz") {
554 let compressed = txtz.data();
555 if compressed.is_empty() {
556 return Ok(None);
557 }
558 crate::bzz_new::bzz_decode(compressed)
559 .map_err(|e| Error::FormatError(format!("TXTz BZZ decode: {}", e)))?
560 } else if let Some(txta) = self.form.find_first(b"TXTa") {
561 txta.data().to_vec()
562 } else {
563 return Ok(None);
564 };
565
566 parse_text_layer(&data)
567 }
568
569 fn resolve_shared_dict(&self) -> Result<Option<Arc<JB2Dict>>, Error> {
575 for incl in self.form.find_all(b"INCL") {
577 let ref_id = std::str::from_utf8(incl.data())
578 .map_err(|_| Error::FormatError("invalid INCL UTF-8".into()))?
579 .trim_end_matches('\0')
580 .trim();
581
582 let shared_form = self.doc.resolve_incl(ref_id)?;
583 if let Some(djbz) = shared_form.find_first(b"Djbz") {
584 return Ok(Some(self.doc.get_or_decode_dict(djbz.data())?));
585 }
586 }
587
588 if let Some(djbz) = self.form.find_first(b"Djbz") {
590 return Ok(Some(self.doc.get_or_decode_dict(djbz.data())?));
591 }
592
593 Ok(None)
594 }
595
596 fn decode_iw44_layer(&self, chunk_id: &[u8; 4]) -> Result<Option<IW44Image>, Error> {
597 let chunks: Vec<&[u8]> = self
598 .form
599 .find_all(chunk_id)
600 .into_iter()
601 .map(|c| c.data())
602 .collect();
603
604 if chunks.is_empty() {
605 return Ok(None);
606 }
607
608 let mut img = IW44Image::new();
609 for chunk_data in &chunks {
610 img.decode_chunk(chunk_data)
611 .map_err(|e| Error::FormatError(e.to_string()))?;
612 }
613 Ok(Some(img))
614 }
615}
616
617fn parse_info(data: &[u8]) -> Result<PageInfo, Error> {
622 if data.len() < 5 {
623 return Err(Error::InvalidLength);
624 }
625
626 let width = u16::from_be_bytes([data[0], data[1]]);
627 let height = u16::from_be_bytes([data[2], data[3]]);
628 let _minver = data[4];
629 let _majver = if data.len() > 5 { data[5] } else { 0 };
630
631 let raw_dpi = if data.len() >= 8 {
633 u16::from_le_bytes([data[6], data[7]])
634 } else {
635 300
636 };
637 let dpi = if (25..=6000).contains(&raw_dpi) {
638 raw_dpi
639 } else {
640 300
641 };
642
643 let gamma_byte = if data.len() >= 9 { data[8] } else { 0 };
644 let gamma = if gamma_byte == 0 {
645 2.2_f32
646 } else {
647 gamma_byte as f32 / 10.0
648 };
649
650 let flags = if data.len() >= 10 { data[9] } else { 0 };
651 let rotation = match flags & 0x07 {
652 5 => Rotation::Cw90,
653 2 => Rotation::Cw180,
654 6 => Rotation::Cw270,
655 _ => Rotation::None,
656 };
657
658 Ok(PageInfo {
659 width,
660 height,
661 dpi,
662 gamma,
663 rotation,
664 })
665}
666
667fn parse_dirm(data: &[u8]) -> Result<(Vec<DirmEntry>, bool), Error> {
672 if data.len() < 3 {
673 return Err(Error::InvalidLength);
674 }
675
676 let dflags = data[0];
677 let is_bundled = (dflags >> 7) != 0;
678 let nfiles = u16::from_be_bytes([data[1], data[2]]) as usize;
679
680 let mut pos = 3;
681
682 if is_bundled {
684 let offsets_size = nfiles * 4;
685 if pos + offsets_size > data.len() {
686 return Err(Error::UnexpectedEof);
687 }
688 pos += offsets_size;
689 }
690
691 let bzz_data = &data[pos..];
693 let meta =
694 crate::bzz_new::bzz_decode(bzz_data).map_err(|e| Error::FormatError(e.to_string()))?;
695
696 let mut mpos = 0;
698 mpos += nfiles * 3;
700
701 if mpos + nfiles > meta.len() {
703 return Err(Error::UnexpectedEof);
704 }
705 let flags: Vec<u8> = meta[mpos..mpos + nfiles].to_vec();
706 mpos += nfiles;
707
708 let mut entries = Vec::with_capacity(nfiles);
710 for &flag in flags.iter().take(nfiles) {
711 let id = read_str_nt(&meta, &mut mpos)?;
712 let has_name = (flag & 0x80) != 0;
713 let has_title = (flag & 0x40) != 0;
714 if has_name {
715 let _ = read_str_nt(&meta, &mut mpos)?;
716 }
717 if has_title {
718 let _ = read_str_nt(&meta, &mut mpos)?;
719 }
720
721 let comp_type = match flag & 0x3f {
722 1 => ComponentType::Page,
723 2 => ComponentType::Thumbnail,
724 _ => ComponentType::Shared,
725 };
726
727 entries.push(DirmEntry { comp_type, id });
728 }
729
730 Ok((entries, is_bundled))
731}
732
733fn read_str_nt(data: &[u8], pos: &mut usize) -> Result<String, Error> {
734 let start = *pos;
735 while *pos < data.len() && data[*pos] != 0 {
736 *pos += 1;
737 }
738 if *pos >= data.len() {
739 return Err(Error::UnexpectedEof);
740 }
741 let s = std::str::from_utf8(&data[start..*pos])
742 .map_err(|_| Error::FormatError("invalid UTF-8 in DIRM".into()))?;
743 *pos += 1; Ok(s.to_string())
745}
746
747fn parse_fgbz(data: &[u8]) -> Result<Palette, Error> {
752 if data.len() < 3 {
753 return Err(Error::InvalidLength);
754 }
755
756 let version = data[0];
757 if (version & 0x7f) != 0 {
758 return Err(Error::Unsupported("unsupported FGbz version"));
759 }
760
761 let palette_size = u16::from_be_bytes([data[1], data[2]]) as usize;
762 let color_bytes = palette_size * 3;
763 if data.len() < 3 + color_bytes {
764 return Err(Error::UnexpectedEof);
765 }
766
767 let mut colors = Vec::with_capacity(palette_size);
769 for i in 0..palette_size {
770 let base = 3 + i * 3;
771 let b = data[base];
772 let g = data[base + 1];
773 let r = data[base + 2];
774 colors.push((r, g, b));
775 }
776
777 let mut indices = Vec::new();
778 if (version & 0x80) != 0 {
779 let idx_start = 3 + color_bytes;
780 if idx_start + 3 > data.len() {
781 return Err(Error::UnexpectedEof);
782 }
783 let data_size = ((data[idx_start] as u32) << 16)
784 | ((data[idx_start + 1] as u32) << 8)
785 | (data[idx_start + 2] as u32);
786
787 let bzz_data = &data[idx_start + 3..];
788 let decoded =
789 crate::bzz_new::bzz_decode(bzz_data).map_err(|e| Error::FormatError(e.to_string()))?;
790
791 let num_indices = data_size as usize;
793 if decoded.len() < num_indices * 2 {
794 return Err(Error::UnexpectedEof);
795 }
796 indices.reserve(num_indices);
797 for i in 0..num_indices {
798 let idx = i16::from_be_bytes([decoded[i * 2], decoded[i * 2 + 1]]);
799 indices.push(idx);
800 }
801 }
802
803 Ok(Palette { colors, indices })
804}
805
806fn parse_bookmark(data: &[u8], pos: &mut usize, counter: &mut usize) -> Result<Bookmark, Error> {
811 if *pos >= data.len() {
812 return Err(Error::UnexpectedEof);
813 }
814 let children_count = data[*pos] as usize;
815 *pos += 1;
816
817 let title = read_navm_string(data, pos)?;
818 let url = read_navm_string(data, pos)?;
819 *counter += 1;
820
821 let mut children = Vec::with_capacity(children_count);
822 for _ in 0..children_count {
823 children.push(parse_bookmark(data, pos, counter)?);
824 }
825
826 Ok(Bookmark {
827 title,
828 url,
829 children,
830 })
831}
832
833fn read_navm_string(data: &[u8], pos: &mut usize) -> Result<String, Error> {
834 if *pos + 3 > data.len() {
835 return Err(Error::UnexpectedEof);
836 }
837 let len = ((data[*pos] as usize) << 16)
838 | ((data[*pos + 1] as usize) << 8)
839 | (data[*pos + 2] as usize);
840 *pos += 3;
841
842 if *pos + len > data.len() {
843 return Err(Error::UnexpectedEof);
844 }
845 let s = std::str::from_utf8(&data[*pos..*pos + len])
846 .map_err(|_| Error::FormatError("invalid UTF-8 in NAVM bookmark".into()))?;
847 *pos += len;
848 Ok(s.to_string())
849}
850
851fn parse_text_layer(data: &[u8]) -> Result<Option<TextLayer>, Error> {
856 if data.len() < 3 {
857 return Ok(None);
858 }
859
860 let mut pos = 0;
861
862 let text_len = read_text_u24(data, &mut pos)?;
864
865 if pos + text_len > data.len() {
867 return Err(Error::UnexpectedEof);
868 }
869 let text = std::str::from_utf8(&data[pos..pos + text_len])
870 .map_err(|_| Error::FormatError("invalid UTF-8 in text layer".into()))?
871 .to_string();
872 pos += text_len;
873
874 if pos >= data.len() {
876 return Ok(Some(TextLayer { text, root: None }));
877 }
878 let _version = data[pos];
879 pos += 1;
880
881 if pos >= data.len() {
883 return Ok(Some(TextLayer { text, root: None }));
884 }
885
886 let root = parse_text_zone(data, &mut pos, None, None)?;
887 Ok(Some(TextLayer {
888 text,
889 root: Some(root),
890 }))
891}
892
893struct ZoneCtx {
895 x: i32,
896 y: i32,
897 width: i32,
898 height: i32,
899 text_start: i32,
900 text_len: i32,
901}
902
903fn parse_text_zone(
904 data: &[u8],
905 pos: &mut usize,
906 parent: Option<&ZoneCtx>,
907 prev: Option<&ZoneCtx>,
908) -> Result<TextZone, Error> {
909 if *pos >= data.len() {
910 return Err(Error::UnexpectedEof);
911 }
912
913 let type_byte = data[*pos];
914 *pos += 1;
915
916 let kind = match type_byte {
917 1 => TextZoneKind::Page,
918 2 => TextZoneKind::Column,
919 3 => TextZoneKind::Region,
920 4 => TextZoneKind::Paragraph,
921 5 => TextZoneKind::Line,
922 6 => TextZoneKind::Word,
923 7 => TextZoneKind::Character,
924 _ => {
925 return Err(Error::FormatError(format!(
926 "unknown text zone type {}",
927 type_byte
928 )));
929 }
930 };
931
932 let mut x = read_text_i16_biased(data, pos)?;
934 let mut y = read_text_i16_biased(data, pos)?;
935 let width = read_text_i16_biased(data, pos)?;
936 let height = read_text_i16_biased(data, pos)?;
937 let mut text_start = read_text_i16_biased(data, pos)?;
938 let text_len = read_text_i24(data, pos)?;
939
940 if let Some(prev) = prev {
942 match type_byte {
943 1 | 4 | 5 => {
944 x += prev.x;
946 y = prev.y - (y + height);
947 }
948 _ => {
949 x += prev.x + prev.width;
951 y += prev.y;
952 }
953 }
954 text_start += prev.text_start + prev.text_len;
955 } else if let Some(parent) = parent {
956 x += parent.x;
957 y = parent.y + parent.height - (y + height);
958 text_start += parent.text_start;
959 }
960
961 let children_count = read_text_i24(data, pos)?.max(0) as usize;
963
964 let ctx = ZoneCtx {
965 x,
966 y,
967 width,
968 height,
969 text_start,
970 text_len,
971 };
972
973 let mut children = Vec::with_capacity(children_count);
974 let mut prev_child: Option<ZoneCtx> = None;
975
976 for _ in 0..children_count {
977 let child = parse_text_zone(data, pos, Some(&ctx), prev_child.as_ref())?;
978 prev_child = Some(ZoneCtx {
979 x: child.x,
980 y: child.y,
981 width: child.width,
982 height: child.height,
983 text_start: child.text_start as i32,
984 text_len: child.text_len as i32,
985 });
986 children.push(child);
987 }
988
989 Ok(TextZone {
990 kind,
991 x,
992 y,
993 width,
994 height,
995 text_start: text_start.max(0) as usize,
996 text_len: text_len.max(0) as usize,
997 children,
998 })
999}
1000
1001fn read_text_u24(data: &[u8], pos: &mut usize) -> Result<usize, Error> {
1002 if *pos + 3 > data.len() {
1003 return Err(Error::UnexpectedEof);
1004 }
1005 let val = ((data[*pos] as usize) << 16)
1006 | ((data[*pos + 1] as usize) << 8)
1007 | (data[*pos + 2] as usize);
1008 *pos += 3;
1009 Ok(val)
1010}
1011
1012fn read_text_i16_biased(data: &[u8], pos: &mut usize) -> Result<i32, Error> {
1013 if *pos + 2 > data.len() {
1014 return Err(Error::UnexpectedEof);
1015 }
1016 let raw = u16::from_be_bytes([data[*pos], data[*pos + 1]]);
1017 *pos += 2;
1018 Ok(raw as i32 - 0x8000)
1019}
1020
1021fn read_text_i24(data: &[u8], pos: &mut usize) -> Result<i32, Error> {
1022 if *pos + 3 > data.len() {
1023 return Err(Error::UnexpectedEof);
1024 }
1025 let val =
1026 ((data[*pos] as i32) << 16) | ((data[*pos + 1] as i32) << 8) | (data[*pos + 2] as i32);
1027 *pos += 3;
1028 Ok(val)
1029}
1030
1031#[cfg(test)]
1032mod tests {
1033 use super::*;
1034
1035 fn assets_path() -> std::path::PathBuf {
1036 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1037 .join("references/djvujs/library/assets")
1038 }
1039
1040 fn golden_path() -> std::path::PathBuf {
1041 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/golden/document")
1042 }
1043
1044 #[test]
1045 fn page_counts() {
1046 let cases: &[(&str, usize)] = &[
1047 ("boy_jb2.djvu", 1),
1048 ("boy.djvu", 1),
1049 ("chicken.djvu", 1),
1050 ("navm_fgbz.djvu", 6),
1051 ("DjVu3Spec_bundled.djvu", 71),
1052 ("colorbook.djvu", 62),
1053 ];
1054 for (file, expected) in cases {
1055 let data = std::fs::read(assets_path().join(file)).unwrap();
1056 let doc = Document::parse(&data).unwrap();
1057 assert_eq!(
1058 doc.page_count(),
1059 *expected,
1060 "page count mismatch for {}",
1061 file
1062 );
1063 }
1064 }
1065
1066 #[test]
1067 fn page_dimensions_navm_fgbz() {
1068 let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
1069 let doc = Document::parse(&data).unwrap();
1070
1071 let golden = std::fs::read_to_string(golden_path().join("navm_fgbz_sizes.txt")).unwrap();
1072 for (i, line) in golden.lines().enumerate() {
1073 let page = doc.page(i).unwrap();
1074 let expected = format!("width={} height={}", page.info.width, page.info.height);
1075 assert_eq!(
1076 expected,
1077 line.trim(),
1078 "size mismatch for navm_fgbz page {}",
1079 i + 1
1080 );
1081 }
1082 }
1083
1084 #[test]
1085 fn page_dimensions_djvu3spec() {
1086 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
1087 let doc = Document::parse(&data).unwrap();
1088
1089 let golden =
1090 std::fs::read_to_string(golden_path().join("djvu3spec_bundled_sizes.txt")).unwrap();
1091 for (i, line) in golden.lines().enumerate() {
1092 if line.trim().is_empty() {
1093 continue;
1094 }
1095 let page = doc.page(i).unwrap();
1096 let expected = format!("width={} height={}", page.info.width, page.info.height);
1097 assert_eq!(
1098 expected,
1099 line.trim(),
1100 "size mismatch for djvu3spec page {}",
1101 i + 1
1102 );
1103 }
1104 }
1105
1106 #[test]
1107 fn layer_availability() {
1108 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1110 let doc = Document::parse(&data).unwrap();
1111 let p = doc.page(0).unwrap();
1112 assert!(p.has_mask());
1113 assert!(!p.has_background());
1114 assert!(!p.has_foreground());
1115 assert!(!p.has_palette());
1116
1117 let data = std::fs::read(assets_path().join("chicken.djvu")).unwrap();
1119 let doc = Document::parse(&data).unwrap();
1120 let p = doc.page(0).unwrap();
1121 assert!(!p.has_mask());
1122 assert!(p.has_background());
1123 assert!(!p.has_foreground());
1124 assert!(!p.has_palette());
1125
1126 let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
1128 let doc = Document::parse(&data).unwrap();
1129 let p = doc.page(0).unwrap();
1130 assert!(p.has_mask());
1131 assert!(p.has_background());
1132 assert!(!p.has_foreground());
1133 assert!(p.has_palette());
1134 }
1135
1136 #[test]
1137 fn decode_mask_matches_direct_boy_jb2() {
1138 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1139
1140 let doc = Document::parse(&data).unwrap();
1142 let mask_via_doc = doc.page(0).unwrap().decode_mask().unwrap().unwrap();
1143
1144 let file = crate::iff::parse(&data).unwrap();
1146 let sjbz = file.root.find_first(b"Sjbz").unwrap();
1147 let mask_direct = crate::jb2::decode(sjbz.data(), None).unwrap();
1148
1149 assert_eq!(mask_via_doc.data, mask_direct.data, "mask data mismatch");
1150 }
1151
1152 #[test]
1153 fn decode_mask_with_shared_dict() {
1154 let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
1156 let doc = Document::parse(&data).unwrap();
1157 let mask = doc.page(0).unwrap().decode_mask().unwrap();
1158 assert!(mask.is_some(), "expected mask for navm_fgbz p1");
1159 let bm = mask.unwrap();
1160 assert_eq!(bm.width, 2550);
1161 assert_eq!(bm.height, 3300);
1162 }
1163
1164 #[test]
1170 fn decode_mask_repeated_calls_identical() {
1171 let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
1172 let doc = Document::parse(&data).unwrap();
1173 let bm1 = doc.page(0).unwrap().decode_mask().unwrap().unwrap();
1174 let bm2 = doc.page(0).unwrap().decode_mask().unwrap().unwrap();
1175 assert_eq!(
1176 bm1.data, bm2.data,
1177 "decode_mask repeated calls must be identical (cache must not corrupt dict state)"
1178 );
1179 }
1180
1181 #[test]
1182 fn decode_background_chicken() {
1183 let data = std::fs::read(assets_path().join("chicken.djvu")).unwrap();
1184 let doc = Document::parse(&data).unwrap();
1185 let bg = doc.page(0).unwrap().decode_background().unwrap();
1186 assert!(bg.is_some());
1187 let pm = bg.unwrap();
1188 assert_eq!(pm.width, 181);
1189 assert_eq!(pm.height, 240);
1190 }
1191
1192 #[test]
1193 fn decode_palette_navm_fgbz() {
1194 let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
1195 let doc = Document::parse(&data).unwrap();
1196 let pal = doc.page(0).unwrap().decode_palette().unwrap();
1197 assert!(pal.is_some());
1198 let p = pal.unwrap();
1199 assert_eq!(p.colors.len(), 2); }
1201
1202 #[test]
1203 fn page_info_dpi() {
1204 let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
1205 let doc = Document::parse(&data).unwrap();
1206 let p = doc.page(0).unwrap();
1207 assert_eq!(p.info.dpi, 300);
1208 }
1209
1210 #[test]
1211 #[ignore]
1212 fn debug_bg_lowres_vs_ddjvu() {
1213 let cases = [
1214 ("carte.djvu", 0usize, "/tmp/rdjvu_debug/carte_bg_sub3.ppm"),
1215 (
1216 "colorbook.djvu",
1217 0usize,
1218 "/tmp/rdjvu_debug/colorbook_p1_bg_sub3.ppm",
1219 ),
1220 (
1221 "navm_fgbz.djvu",
1222 3usize,
1223 "/tmp/rdjvu_debug/navm_p4_bg_sub3.ppm",
1224 ),
1225 ];
1226 for (file, page_idx, ref_file) in cases {
1227 let ref_path = std::path::Path::new(ref_file);
1228 if !ref_path.exists() {
1229 continue;
1230 }
1231 let data = std::fs::read(assets_path().join(file)).unwrap();
1232 let doc = Document::parse(&data).unwrap();
1233 let page = doc.page(page_idx).unwrap();
1234 let bg = page.decode_background().unwrap().unwrap();
1235 let actual = bg.to_ppm();
1236 let expected = std::fs::read(ref_path).unwrap();
1237 let header_end = actual.iter().position(|&b| b == b'\n').unwrap() + 1;
1238 let header_end = header_end
1239 + actual[header_end..]
1240 .iter()
1241 .position(|&b| b == b'\n')
1242 .unwrap()
1243 + 1;
1244 let header_end = header_end
1245 + actual[header_end..]
1246 .iter()
1247 .position(|&b| b == b'\n')
1248 .unwrap()
1249 + 1;
1250 let a = &actual[header_end..];
1251 let e = &expected[header_end..];
1252 let mut diff_px = 0usize;
1253 let mut abs = [0u64; 3];
1254 let px = (a.len().min(e.len())) / 3;
1255 for p in 0..px {
1256 let i = p * 3;
1257 if a[i] != e[i] || a[i + 1] != e[i + 1] || a[i + 2] != e[i + 2] {
1258 diff_px += 1;
1259 }
1260 abs[0] += (a[i] as i32 - e[i] as i32).unsigned_abs() as u64;
1261 abs[1] += (a[i + 1] as i32 - e[i + 1] as i32).unsigned_abs() as u64;
1262 abs[2] += (a[i + 2] as i32 - e[i + 2] as i32).unsigned_abs() as u64;
1263 }
1264 eprintln!(
1265 "{} p{} bg-lowres mismatch_px={} mean_abs=({:.3},{:.3},{:.3}) dims_a={} dims_e={}",
1266 file,
1267 page_idx + 1,
1268 diff_px,
1269 abs[0] as f64 / px as f64,
1270 abs[1] as f64 / px as f64,
1271 abs[2] as f64 / px as f64,
1272 a.len() / 3,
1273 e.len() / 3
1274 );
1275 }
1276 }
1277
1278 #[test]
1279 fn bookmarks_navm_fgbz() {
1280 let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
1281 let doc = Document::parse(&data).unwrap();
1282 let bm = doc.bookmarks().unwrap();
1283
1284 assert_eq!(bm.len(), 4);
1286
1287 assert_eq!(bm[0].title, "Links");
1288 assert_eq!(bm[0].url, "#1");
1289 assert!(bm[0].children.is_empty());
1290
1291 assert_eq!(bm[1].title, "Ink, Rectangles, Ellipses, Lines");
1292 assert_eq!(bm[1].url, "#2");
1293 assert!(bm[1].children.is_empty());
1294
1295 assert_eq!(bm[2].title, "Stamps");
1296 assert_eq!(bm[2].url, "#3");
1297 assert_eq!(bm[2].children.len(), 2);
1298 assert_eq!(bm[2].children[0].title, "Stamps - Faces");
1299 assert_eq!(bm[2].children[0].url, "#4");
1300 assert_eq!(bm[2].children[1].title, "Stamps - Pointers");
1301 assert_eq!(bm[2].children[1].url, "#5");
1302
1303 assert_eq!(bm[3].title, "Last Page");
1304 assert_eq!(bm[3].url, "#6");
1305 assert!(bm[3].children.is_empty());
1306 }
1307
1308 #[test]
1309 fn bookmarks_empty_for_single_page() {
1310 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1311 let doc = Document::parse(&data).unwrap();
1312 let bm = doc.bookmarks().unwrap();
1313 assert!(bm.is_empty());
1314 }
1315
1316 #[test]
1317 fn bookmarks_empty_for_no_navm() {
1318 let data = std::fs::read(assets_path().join("colorbook.djvu")).unwrap();
1319 let doc = Document::parse(&data).unwrap();
1320 let bm = doc.bookmarks().unwrap();
1321 assert!(bm.is_empty());
1322 }
1323
1324 #[test]
1327 fn document_empty_input() {
1328 assert!(Document::parse(&[]).is_err());
1329 }
1330
1331 #[test]
1332 fn document_truncated_file() {
1333 assert!(Document::parse(b"AT&T").is_err());
1335 }
1336
1337 #[test]
1338 fn document_missing_info_chunk() {
1339 let mut data = b"AT&TFORM".to_vec();
1341 let form_size = 4 + 4 + 4 + 4; data.extend_from_slice(&(form_size as u32).to_be_bytes());
1343 data.extend_from_slice(b"DJVU");
1344 data.extend_from_slice(b"Sjbz");
1345 data.extend_from_slice(&4u32.to_be_bytes());
1346 data.extend_from_slice(&[0u8; 4]);
1347 let result = Document::parse(&data);
1348 match result {
1350 Err(_) => {} Ok(doc) => {
1352 assert!(doc.page(0).is_err());
1353 }
1354 }
1355 }
1356
1357 #[test]
1358 fn document_page_out_of_bounds() {
1359 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1360 let doc = Document::parse(&data).unwrap();
1361 assert_eq!(doc.page_count(), 1);
1362 assert!(doc.page(1).is_err());
1363 assert!(doc.page(100).is_err());
1364 }
1365
1366 #[test]
1367 fn document_missing_optional_chunks() {
1368 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1370 let doc = Document::parse(&data).unwrap();
1371 let page = doc.page(0).unwrap();
1372 assert!(page.decode_background().unwrap().is_none());
1373 assert!(page.decode_foreground().unwrap().is_none());
1374 assert!(!page.has_palette());
1375 }
1376
1377 fn text_golden_path() -> std::path::PathBuf {
1380 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/golden/text")
1381 }
1382
1383 fn format_zone(layer: &TextLayer, zone: &TextZone, indent: usize) -> String {
1385 let mut out = String::new();
1386 let pad = " ".repeat(indent);
1387 let kind_str = match zone.kind {
1388 TextZoneKind::Page => "page",
1389 TextZoneKind::Column => "column",
1390 TextZoneKind::Region => "region",
1391 TextZoneKind::Paragraph => "para",
1392 TextZoneKind::Line => "line",
1393 TextZoneKind::Word => "word",
1394 TextZoneKind::Character => "char",
1395 };
1396 let x2 = zone.x + zone.width;
1397 let y2 = zone.y + zone.height;
1398
1399 if zone.children.is_empty() {
1400 let text = layer.zone_text(zone);
1402 let trimmed = text.trim_end();
1403 let escaped = djvused_escape(trimmed);
1404 out.push_str(&format!(
1405 "{}({} {} {} {} {} \"{}\")",
1406 pad, kind_str, zone.x, zone.y, x2, y2, escaped
1407 ));
1408 } else {
1409 out.push_str(&format!(
1410 "{}({} {} {} {} {}",
1411 pad, kind_str, zone.x, zone.y, x2, y2
1412 ));
1413 for child in &zone.children {
1414 out.push('\n');
1415 out.push_str(&format_zone(layer, child, indent + 1));
1416 }
1417 out.push(')');
1418 }
1419 out
1420 }
1421
1422 fn djvused_escape(text: &str) -> String {
1424 let mut out = String::new();
1425 for b in text.bytes() {
1426 match b {
1427 b'\\' => out.push_str("\\\\"),
1428 b'"' => out.push_str("\\\""),
1429 0x20..=0x7e => out.push(b as char),
1430 _ => out.push_str(&format!("\\{:03o}", b)),
1431 }
1432 }
1433 out
1434 }
1435
1436 fn format_text_layer(layer: &TextLayer) -> String {
1437 match &layer.root {
1438 Some(root) => format_zone(layer, root, 0),
1439 None => String::new(),
1440 }
1441 }
1442
1443 #[test]
1444 fn text_layer_none_for_no_text() {
1445 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1447 let doc = Document::parse(&data).unwrap();
1448 let tl = doc.page(0).unwrap().text_layer().unwrap();
1449 assert!(tl.is_none());
1450 }
1451
1452 #[test]
1453 fn text_layer_carte_p1() {
1454 let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
1455 let doc = Document::parse(&data).unwrap();
1456 let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
1457
1458 assert!(!tl.text.is_empty(), "carte text should not be empty");
1460
1461 let root = tl.root.as_ref().unwrap();
1463 assert_eq!(root.kind, TextZoneKind::Page);
1464
1465 let golden = std::fs::read_to_string(text_golden_path().join("carte_p1.txt")).unwrap();
1467 let actual = format_text_layer(&tl);
1468 assert_eq!(actual.trim(), golden.trim(), "carte p1 text mismatch");
1469 }
1470
1471 #[test]
1472 fn text_layer_djvu3spec_p1() {
1473 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
1474 let doc = Document::parse(&data).unwrap();
1475 let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
1476
1477 assert!(!tl.text.is_empty());
1478
1479 let root = tl.root.as_ref().unwrap();
1480 assert_eq!(root.kind, TextZoneKind::Page);
1481 assert!(!root.children.is_empty());
1483
1484 let golden = std::fs::read_to_string(text_golden_path().join("djvu3spec_p1.txt")).unwrap();
1485 let actual = format_text_layer(&tl);
1486 assert_eq!(actual.trim(), golden.trim(), "djvu3spec p1 text mismatch");
1487 }
1488
1489 #[test]
1490 fn text_layer_colorbook_p1() {
1491 let data = std::fs::read(assets_path().join("colorbook.djvu")).unwrap();
1492 let doc = Document::parse(&data).unwrap();
1493 let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
1494
1495 assert!(!tl.text.is_empty());
1496
1497 let golden = std::fs::read_to_string(text_golden_path().join("colorbook_p1.txt")).unwrap();
1498 let actual = format_text_layer(&tl);
1499 assert_eq!(actual.trim(), golden.trim(), "colorbook p1 text mismatch");
1500 }
1501
1502 #[test]
1503 fn text_layer_czech_p6_utf8() {
1504 let data = std::fs::read(assets_path().join("czech.djvu")).unwrap();
1506 let doc = Document::parse(&data).unwrap();
1507 let tl = doc.page(5).unwrap().text_layer().unwrap().unwrap();
1508
1509 assert!(!tl.text.is_empty());
1510
1511 let golden = std::fs::read_to_string(text_golden_path().join("czech_p6.txt")).unwrap();
1512 let actual = format_text_layer(&tl);
1513 assert_eq!(actual.trim(), golden.trim(), "czech p6 text mismatch");
1514 }
1515
1516 #[test]
1517 fn text_layer_zone_text_access() {
1518 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
1520 let doc = Document::parse(&data).unwrap();
1521 let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
1522
1523 fn find_first_word(zone: &TextZone) -> Option<&TextZone> {
1525 if zone.kind == TextZoneKind::Word {
1526 return Some(zone);
1527 }
1528 for child in &zone.children {
1529 if let Some(w) = find_first_word(child) {
1530 return Some(w);
1531 }
1532 }
1533 None
1534 }
1535
1536 let root = tl.root.as_ref().unwrap();
1537 let word = find_first_word(root).expect("should have at least one word");
1538 let text = tl.zone_text(word);
1539 assert!(!text.is_empty(), "first word text should not be empty");
1540 }
1541
1542 #[test]
1543 fn text_layer_all_pages_djvu3spec() {
1544 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
1546 let doc = Document::parse(&data).unwrap();
1547 for i in 0..doc.page_count() {
1548 let result = doc.page(i).unwrap().text_layer();
1549 assert!(result.is_ok(), "text_layer failed for djvu3spec page {}", i);
1550 }
1551 }
1552
1553 #[test]
1556 fn thumbnail_carte() {
1557 let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
1558 let doc = Document::parse(&data).unwrap();
1559 let thumb = doc
1560 .thumbnail(0)
1561 .unwrap()
1562 .expect("carte should have a thumbnail");
1563 assert!(
1565 thumb.width > 0 && thumb.width < 500,
1566 "thumb width: {}",
1567 thumb.width
1568 );
1569 assert!(
1570 thumb.height > 0 && thumb.height < 500,
1571 "thumb height: {}",
1572 thumb.height
1573 );
1574 assert_eq!(
1575 thumb.data.len(),
1576 thumb.width as usize * thumb.height as usize * 4
1577 );
1578 }
1579
1580 #[test]
1581 fn thumbnail_djvu3spec_all_pages() {
1582 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
1583 let doc = Document::parse(&data).unwrap();
1584 let mut count = 0;
1585 for i in 0..doc.page_count() {
1586 if let Some(thumb) = doc.thumbnail(i).unwrap() {
1587 assert!(
1588 thumb.width > 0 && thumb.height > 0,
1589 "page {} thumb empty",
1590 i
1591 );
1592 assert_eq!(
1593 thumb.data.len(),
1594 thumb.width as usize * thumb.height as usize * 4,
1595 "page {} thumb data mismatch",
1596 i
1597 );
1598 count += 1;
1599 }
1600 }
1601 assert_eq!(count, 71, "expected 71 thumbnails, got {}", count);
1602 }
1603
1604 #[test]
1605 fn thumbnail_none_for_single_page() {
1606 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1607 let doc = Document::parse(&data).unwrap();
1608 assert!(doc.thumbnail(0).unwrap().is_none());
1609 }
1610
1611 #[test]
1612 fn thumbnail_none_for_no_thum() {
1613 let data = std::fs::read(assets_path().join("colorbook.djvu")).unwrap();
1614 let doc = Document::parse(&data).unwrap();
1615 assert!(doc.thumbnail(0).unwrap().is_none());
1616 }
1617
1618 #[test]
1621 fn progressive_bg_returns_frames_per_chunk() {
1622 let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
1624 let doc = Document::parse(&data).unwrap();
1625 let page = doc.page(0).unwrap();
1626 let chunk_count = page.bg44_chunk_count();
1627 assert!(
1628 chunk_count > 1,
1629 "need multi-chunk file for progressive test"
1630 );
1631
1632 let frames = page.decode_background_progressive().unwrap().unwrap();
1633 assert_eq!(frames.len(), chunk_count, "one frame per BG44 chunk");
1634
1635 let (w, h) = (frames[0].width, frames[0].height);
1637 for (i, f) in frames.iter().enumerate() {
1638 assert_eq!((f.width, f.height), (w, h), "frame {i} size mismatch");
1639 }
1640 }
1641
1642 #[test]
1643 fn progressive_last_frame_matches_full_decode() {
1644 let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
1645 let doc = Document::parse(&data).unwrap();
1646 let page = doc.page(0).unwrap();
1647
1648 let full = page.decode_background().unwrap().unwrap();
1649 let frames = page.decode_background_progressive().unwrap().unwrap();
1650 let last = frames.last().unwrap();
1651
1652 assert_eq!(full.width, last.width);
1653 assert_eq!(full.height, last.height);
1654 assert_eq!(
1655 full.data, last.data,
1656 "last progressive frame must match full decode"
1657 );
1658 }
1659
1660 #[test]
1661 fn progressive_single_chunk_returns_one_frame() {
1662 let data = std::fs::read(assets_path().join("boy.djvu")).unwrap();
1664 let doc = Document::parse(&data).unwrap();
1665 let page = doc.page(0).unwrap();
1666 if page.bg44_chunk_count() <= 1 {
1667 let frames = page.decode_background_progressive().unwrap().unwrap();
1668 assert_eq!(frames.len(), 1);
1669 }
1670 }
1671
1672 #[test]
1673 fn coarse_decode_returns_blurry_frame() {
1674 let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
1675 let doc = Document::parse(&data).unwrap();
1676 let page = doc.page(0).unwrap();
1677 assert!(page.bg44_chunk_count() > 1);
1678
1679 let coarse = page.decode_background_coarse().unwrap().unwrap();
1680 let full = page.decode_background().unwrap().unwrap();
1681
1682 assert_eq!(coarse.width, full.width);
1684 assert_eq!(coarse.height, full.height);
1685 assert_ne!(coarse.data, full.data, "coarse should differ from full");
1686 }
1687
1688 #[test]
1689 fn coarse_decode_single_chunk_returns_none() {
1690 let data = std::fs::read(assets_path().join("boy.djvu")).unwrap();
1691 let doc = Document::parse(&data).unwrap();
1692 let page = doc.page(0).unwrap();
1693 if page.bg44_chunk_count() <= 1 {
1694 assert!(page.decode_background_coarse().unwrap().is_none());
1695 }
1696 }
1697
1698 #[test]
1699 fn progressive_no_bg_returns_none() {
1700 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
1702 let doc = Document::parse(&data).unwrap();
1703 let page = doc.page(0).unwrap();
1704 assert_eq!(page.bg44_chunk_count(), 0);
1705 assert!(page.decode_background_progressive().unwrap().is_none());
1706 }
1707}