1#[cfg(not(feature = "std"))]
27use alloc::{
28 format,
29 string::{String, ToString},
30 vec,
31 vec::Vec,
32};
33
34use crate::{
35 annotation::{Annotation, AnnotationError, MapArea},
36 bzz_new::bzz_decode,
37 error::{BzzError, IffError, Iw44Error, Jb2Error},
38 iff::{IffChunk, parse_form},
39 info::PageInfo,
40 iw44_new::Iw44Image,
41 jb2::Jb2Dict,
42 metadata::{DjVuMetadata, MetadataError},
43 pixmap::Pixmap,
44 text::{TextError, TextLayer},
45};
46
47#[cfg(feature = "std")]
48use std::sync::Arc;
49
50#[derive(Debug, thiserror::Error)]
54pub enum DocError {
55 #[error("IFF error: {0}")]
57 Iff(#[from] IffError),
58
59 #[error("BZZ error: {0}")]
61 Bzz(#[from] BzzError),
62
63 #[error("IW44 error: {0}")]
65 Iw44(#[from] Iw44Error),
66
67 #[error("JB2 error: {0}")]
69 Jb2(#[from] Jb2Error),
70
71 #[error("not a DjVu file: found form type {0:?}")]
73 NotDjVu([u8; 4]),
74
75 #[error("missing required chunk: {0}")]
77 MissingChunk(&'static str),
78
79 #[error("malformed DjVu document: {0}")]
81 Malformed(&'static str),
82
83 #[error("failed to resolve indirect page '{0}'")]
85 IndirectResolve(String),
86
87 #[error("page index {index} is out of range (document has {count} pages)")]
89 PageOutOfRange { index: usize, count: usize },
90
91 #[error("invalid UTF-8 in DjVu metadata")]
93 InvalidUtf8,
94
95 #[error("indirect DjVu document requires a resolver callback")]
97 NoResolver,
98
99 #[cfg(feature = "std")]
101 #[error("I/O error: {0}")]
102 Io(#[from] std::io::Error),
103
104 #[error("Smmr decode error: {0}")]
106 Smmr(String),
107
108 #[error("text layer error: {0}")]
110 Text(#[from] TextError),
111
112 #[error("annotation error: {0}")]
114 Annotation(#[from] AnnotationError),
115
116 #[error("metadata error: {0}")]
118 Metadata(#[from] MetadataError),
119}
120
121#[derive(Debug, Clone)]
125#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
126pub struct DjVuBookmark {
127 pub title: String,
129 pub url: String,
131 pub children: Vec<DjVuBookmark>,
133}
134
135#[derive(Debug, Clone, Copy, PartialEq, Eq)]
139enum ComponentType {
140 Shared,
141 Page,
142 Thumbnail,
143}
144
145#[derive(Debug, Clone)]
147struct RawChunk {
148 id: [u8; 4],
149 data: Vec<u8>,
150}
151
152pub struct DjVuPage {
161 info: PageInfo,
163 chunks: Vec<RawChunk>,
165 index: usize,
167 #[cfg(feature = "std")]
174 shared_djbz: Option<Arc<Vec<u8>>>,
175 #[cfg(not(feature = "std"))]
176 shared_djbz: Option<Vec<u8>>,
177 #[cfg(feature = "std")]
181 bg44_decoded: std::sync::OnceLock<Option<Iw44Image>>,
182 #[cfg(feature = "std")]
186 bg44_decoded_partial: std::sync::OnceLock<Option<Iw44Image>>,
187 #[cfg(feature = "std")]
191 mask_decoded: std::sync::OnceLock<Option<crate::bitmap::Bitmap>>,
192 #[cfg(feature = "std")]
196 mask_decoded_sub4: std::sync::OnceLock<Option<crate::bitmap::Bitmap>>,
197 #[cfg(feature = "std")]
200 fg44_decoded: std::sync::OnceLock<Option<Pixmap>>,
201 #[cfg(feature = "std")]
205 jb2_dict_decoded: std::sync::OnceLock<Option<Jb2Dict>>,
206}
207
208impl Clone for DjVuPage {
209 fn clone(&self) -> Self {
210 DjVuPage {
211 info: self.info.clone(),
212 chunks: self.chunks.clone(),
213 index: self.index,
214 shared_djbz: self.shared_djbz.clone(),
215 #[cfg(feature = "std")]
217 bg44_decoded: std::sync::OnceLock::new(),
218 #[cfg(feature = "std")]
219 bg44_decoded_partial: std::sync::OnceLock::new(),
220 #[cfg(feature = "std")]
221 mask_decoded: std::sync::OnceLock::new(),
222 #[cfg(feature = "std")]
223 mask_decoded_sub4: std::sync::OnceLock::new(),
224 #[cfg(feature = "std")]
225 fg44_decoded: std::sync::OnceLock::new(),
226 #[cfg(feature = "std")]
227 jb2_dict_decoded: std::sync::OnceLock::new(),
228 }
229 }
230}
231
232impl core::fmt::Debug for DjVuPage {
233 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
234 f.debug_struct("DjVuPage")
235 .field("info", &self.info)
236 .field("chunks", &self.chunks)
237 .field("index", &self.index)
238 .field("shared_djbz", &self.shared_djbz.as_ref().map(|v| v.len()))
239 .finish_non_exhaustive()
240 }
241}
242
243impl DjVuPage {
244 pub fn width(&self) -> u16 {
246 self.info.width
247 }
248
249 pub fn height(&self) -> u16 {
251 self.info.height
252 }
253
254 pub fn dpi(&self) -> u16 {
256 self.info.dpi
257 }
258
259 pub fn gamma(&self) -> f32 {
261 self.info.gamma
262 }
263
264 pub fn rotation(&self) -> crate::info::Rotation {
266 self.info.rotation
267 }
268
269 pub fn index(&self) -> usize {
271 self.index
272 }
273
274 pub fn dimensions(&self) -> (u16, u16) {
276 (self.info.width, self.info.height)
277 }
278
279 pub fn thumbnail(&self) -> Result<Option<Pixmap>, DocError> {
285 let th44_chunks: Vec<&[u8]> = self
286 .chunks
287 .iter()
288 .filter(|c| &c.id == b"TH44")
289 .map(|c| c.data.as_slice())
290 .collect();
291
292 if th44_chunks.is_empty() {
293 return Ok(None);
294 }
295
296 let mut img = Iw44Image::new();
297 for chunk_data in &th44_chunks {
298 img.decode_chunk(chunk_data)?;
299 }
300 let pixmap = img.to_rgb()?;
301 Ok(Some(pixmap))
302 }
303
304 pub fn raw_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
315 self.chunks
316 .iter()
317 .find(|c| &c.id == id)
318 .map(|c| c.data.as_slice())
319 }
320
321 pub fn all_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
332 self.chunks
333 .iter()
334 .filter(|c| &c.id == id)
335 .map(|c| c.data.as_slice())
336 .collect()
337 }
338
339 pub fn chunk_ids(&self) -> Vec<[u8; 4]> {
343 self.chunks.iter().map(|c| c.id).collect()
344 }
345
346 pub fn find_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
350 self.raw_chunk(id)
351 }
352
353 pub fn find_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
357 self.all_chunks(id)
358 }
359
360 pub fn bg44_chunks(&self) -> Vec<&[u8]> {
362 self.find_chunks(b"BG44")
363 }
364
365 #[cfg(feature = "std")]
377 pub fn decoded_bg44(&self) -> Option<&Iw44Image> {
378 self.bg44_decoded
379 .get_or_init(|| {
380 let chunks = self.bg44_chunks();
381 if chunks.is_empty() {
382 return None;
383 }
384 let mut img = Iw44Image::new();
385 for chunk_data in &chunks {
386 if img.decode_chunk(chunk_data).is_err() {
387 break;
388 }
389 }
390 if img.width == 0 { None } else { Some(img) }
391 })
392 .as_ref()
393 }
394
395 #[cfg(not(feature = "std"))]
396 pub fn decoded_bg44(&self) -> Option<&Iw44Image> {
397 None
398 }
399
400 #[cfg(feature = "std")]
408 pub fn decoded_bg44_partial(&self) -> Option<&Iw44Image> {
409 self.bg44_decoded_partial
410 .get_or_init(|| {
411 let chunks = self.bg44_chunks();
412 if chunks.is_empty() {
413 return None;
414 }
415 let mut img = Iw44Image::new();
416 if img.decode_chunk(chunks[0]).is_err() {
418 return None;
419 }
420 if img.width == 0 { None } else { Some(img) }
421 })
422 .as_ref()
423 }
424
425 #[cfg(not(feature = "std"))]
426 pub fn decoded_bg44_partial(&self) -> Option<&Iw44Image> {
427 None
428 }
429
430 #[cfg(feature = "std")]
436 pub fn decoded_shared_dict(&self) -> Option<&Jb2Dict> {
437 self.jb2_dict_decoded
438 .get_or_init(|| {
439 let djbz = self.shared_djbz.as_deref()?;
440 crate::jb2::decode_dict(djbz, None).ok()
441 })
442 .as_ref()
443 }
444
445 #[cfg(not(feature = "std"))]
446 pub fn decoded_shared_dict(&self) -> Option<&Jb2Dict> {
447 None
448 }
449
450 pub fn fg44_chunks(&self) -> Vec<&[u8]> {
452 self.find_chunks(b"FG44")
453 }
454
455 pub fn text_layer(&self) -> Result<Option<TextLayer>, DocError> {
459 let page_height = self.info.height as u32;
460
461 if let Some(txtz) = self.find_chunk(b"TXTz") {
462 if txtz.is_empty() {
463 return Ok(None);
464 }
465 let layer = crate::text::parse_text_layer_bzz(txtz, page_height)?;
466 return Ok(Some(layer));
467 }
468
469 if let Some(txta) = self.find_chunk(b"TXTa") {
470 if txta.is_empty() {
471 return Ok(None);
472 }
473 let layer = crate::text::parse_text_layer(txta, page_height)?;
474 return Ok(Some(layer));
475 }
476
477 Ok(None)
478 }
479
480 pub fn text_layer_at_size(
491 &self,
492 render_w: u32,
493 render_h: u32,
494 ) -> Result<Option<TextLayer>, DocError> {
495 let page_w = self.info.width as u32;
496 let page_h = self.info.height as u32;
497 let rotation = self.info.rotation;
498 Ok(self
499 .text_layer()?
500 .map(|tl| tl.transform(page_w, page_h, rotation, render_w, render_h)))
501 }
502
503 pub fn text(&self) -> Result<Option<String>, DocError> {
507 Ok(self.text_layer()?.map(|tl| tl.text))
508 }
509
510 pub fn annotations(&self) -> Result<Option<(Annotation, Vec<MapArea>)>, DocError> {
514 if let Some(antz) = self.find_chunk(b"ANTz") {
515 if antz.is_empty() {
516 return Ok(None);
517 }
518 let result = crate::annotation::parse_annotations_bzz(antz)?;
519 return Ok(Some(result));
520 }
521
522 if let Some(anta) = self.find_chunk(b"ANTa") {
523 if anta.is_empty() {
524 return Ok(None);
525 }
526 let result = crate::annotation::parse_annotations(anta)?;
527 return Ok(Some(result));
528 }
529
530 Ok(None)
531 }
532
533 pub fn hyperlinks(&self) -> Result<Vec<MapArea>, DocError> {
535 match self.annotations()? {
536 None => Ok(Vec::new()),
537 Some((_, mapareas)) => Ok(mapareas.into_iter().filter(|m| !m.url.is_empty()).collect()),
538 }
539 }
540
541 pub fn extract_mask(&self) -> Result<Option<crate::bitmap::Bitmap>, DocError> {
549 if let Some(sjbz) = self.find_chunk(b"Sjbz") {
550 let inline_dict;
554 let dict_ref = if let Some(djbz) = self.find_chunk(b"Djbz") {
555 inline_dict = crate::jb2::decode_dict(djbz, None)?;
556 Some(&inline_dict)
557 } else {
558 self.decoded_shared_dict()
559 };
560 let bm = crate::jb2::decode(sjbz, dict_ref)?;
561 return Ok(Some(bm));
562 }
563 if let Some(smmr) = self.find_chunk(b"Smmr") {
564 let bm = crate::smmr::decode_smmr(smmr).map_err(|e| DocError::Smmr(e.to_string()))?;
565 return Ok(Some(bm));
566 }
567 Ok(None)
568 }
569
570 pub fn extract_mask_indexed(
576 &self,
577 ) -> Result<Option<(crate::bitmap::Bitmap, Vec<i32>)>, DocError> {
578 if let Some(sjbz) = self.find_chunk(b"Sjbz") {
579 let inline_dict;
580 let dict_ref = if let Some(djbz) = self.find_chunk(b"Djbz") {
581 inline_dict = crate::jb2::decode_dict(djbz, None)?;
582 Some(&inline_dict)
583 } else {
584 self.decoded_shared_dict()
585 };
586 let (bm, blit_map) = crate::jb2::decode_indexed(sjbz, dict_ref)?;
587 return Ok(Some((bm, blit_map)));
588 }
589 if let Some(smmr) = self.find_chunk(b"Smmr") {
590 let bm = crate::smmr::decode_smmr(smmr).map_err(|e| DocError::Smmr(e.to_string()))?;
591 let len = (bm.width * bm.height) as usize;
592 return Ok(Some((bm, vec![0i32; len])));
593 }
594 Ok(None)
595 }
596
597 pub fn extract_foreground(&self) -> Result<Option<Pixmap>, DocError> {
601 let chunks = self.fg44_chunks();
602 if chunks.is_empty() {
603 return Ok(None);
604 }
605
606 let mut img = Iw44Image::new();
607 for chunk_data in &chunks {
608 img.decode_chunk(chunk_data)?;
609 }
610 let pixmap = img.to_rgb()?;
611 Ok(Some(pixmap))
612 }
613
614 #[cfg(feature = "std")]
622 pub fn decoded_mask(&self) -> Option<&crate::bitmap::Bitmap> {
623 self.mask_decoded
624 .get_or_init(|| self.extract_mask().ok().flatten())
625 .as_ref()
626 }
627
628 #[cfg(not(feature = "std"))]
629 pub fn decoded_mask(&self) -> Option<&crate::bitmap::Bitmap> {
630 None
631 }
632
633 #[cfg(feature = "std")]
642 pub fn decoded_mask_sub4(&self) -> Option<&crate::bitmap::Bitmap> {
643 self.mask_decoded_sub4
644 .get_or_init(|| {
645 let src = self.decoded_mask()?;
646 Some(downsample_mask_4x(src))
647 })
648 .as_ref()
649 }
650
651 #[cfg(not(feature = "std"))]
652 pub fn decoded_mask_sub4(&self) -> Option<&crate::bitmap::Bitmap> {
653 None
654 }
655
656 #[cfg(feature = "std")]
661 pub fn decoded_fg44(&self) -> Option<&Pixmap> {
662 self.fg44_decoded
663 .get_or_init(|| self.extract_foreground().ok().flatten())
664 .as_ref()
665 }
666
667 #[cfg(not(feature = "std"))]
668 pub fn decoded_fg44(&self) -> Option<&Pixmap> {
669 None
670 }
671
672 pub fn extract_background(&self) -> Result<Option<Pixmap>, DocError> {
676 let chunks = self.bg44_chunks();
677 if chunks.is_empty() {
678 return Ok(None);
679 }
680
681 let mut img = Iw44Image::new();
682 for chunk_data in &chunks {
683 img.decode_chunk(chunk_data)?;
684 }
685 let pixmap = img.to_rgb()?;
686 Ok(Some(pixmap))
687 }
688
689 pub fn render_into(
700 &self,
701 opts: &crate::djvu_render::RenderOptions,
702 buf: &mut [u8],
703 ) -> Result<(), crate::djvu_render::RenderError> {
704 crate::djvu_render::render_into(self, opts, buf)
705 }
706}
707
708#[derive(Debug)]
715pub struct DjVuDocument {
716 pages: Vec<DjVuPage>,
718 bookmarks: Vec<DjVuBookmark>,
720 global_chunks: Vec<RawChunk>,
723 page_byte_ranges: Vec<core::ops::Range<u64>>,
732}
733
734impl DjVuDocument {
735 pub fn parse(data: &[u8]) -> Result<Self, DocError> {
745 Self::parse_with_resolver(data, None::<fn(&str) -> Result<Vec<u8>, DocError>>)
746 }
747
748 pub fn parse_with_resolver<R>(data: &[u8], resolver: Option<R>) -> Result<Self, DocError>
753 where
754 R: Fn(&str) -> Result<Vec<u8>, DocError>,
755 {
756 let form = parse_form(data)?;
757
758 match &form.form_type {
759 b"DJVU" => {
760 let global_chunks: Vec<RawChunk> = form
762 .chunks
763 .iter()
764 .map(|c| RawChunk {
765 id: c.id,
766 data: c.data.to_vec(),
767 })
768 .collect();
769 let page = parse_page_from_chunks(&form.chunks, 0, None)?;
770 #[allow(clippy::single_range_in_vec_init)]
772 let page_byte_ranges = vec![0u64..(data.len() as u64)];
773 Ok(DjVuDocument {
774 pages: vec![page],
775 bookmarks: vec![],
776 global_chunks,
777 page_byte_ranges,
778 })
779 }
780 b"DJVM" => {
781 let dirm_chunk = form
783 .chunks
784 .iter()
785 .find(|c| &c.id == b"DIRM")
786 .ok_or(DocError::MissingChunk("DIRM"))?;
787
788 let (entries, is_bundled, comp_offsets) = parse_dirm(dirm_chunk.data)?;
789
790 let bookmarks = parse_navm_bookmarks(&form.chunks)?;
792
793 let global_chunks: Vec<RawChunk> = form
795 .chunks
796 .iter()
797 .filter(|c| &c.id != b"FORM")
798 .map(|c| RawChunk {
799 id: c.id,
800 data: c.data.to_vec(),
801 })
802 .collect();
803
804 if is_bundled {
805 let sub_forms: Vec<&IffChunk<'_>> =
807 form.chunks.iter().filter(|c| &c.id == b"FORM").collect();
808
809 #[cfg(not(feature = "std"))]
814 use alloc::collections::BTreeMap;
815 #[cfg(feature = "std")]
816 use std::collections::BTreeMap;
817 #[cfg(feature = "std")]
820 let djvi_djbz: BTreeMap<String, Arc<Vec<u8>>> = entries
821 .iter()
822 .enumerate()
823 .filter(|(_, e)| e.comp_type == ComponentType::Shared)
824 .filter_map(|(comp_idx, entry)| {
825 let sf = sub_forms.get(comp_idx)?;
826 let chunks = parse_sub_form(sf.data).ok()?;
827 let djbz = chunks.iter().find(|c| &c.id == b"Djbz")?;
828 Some((entry.id.clone(), Arc::new(djbz.data.to_vec())))
829 })
830 .collect();
831 #[cfg(not(feature = "std"))]
832 let djvi_djbz: BTreeMap<String, Vec<u8>> = entries
833 .iter()
834 .enumerate()
835 .filter(|(_, e)| e.comp_type == ComponentType::Shared)
836 .filter_map(|(comp_idx, entry)| {
837 let sf = sub_forms.get(comp_idx)?;
838 let chunks = parse_sub_form(sf.data).ok()?;
839 let djbz = chunks.iter().find(|c| &c.id == b"Djbz")?;
840 Some((entry.id.clone(), djbz.data.to_vec()))
841 })
842 .collect();
843
844 let mut pages = Vec::new();
845 let mut page_byte_ranges = Vec::new();
846 let mut page_idx = 0usize;
847 for (comp_idx, entry) in entries.iter().enumerate() {
848 if entry.comp_type != ComponentType::Page {
849 continue;
850 }
851 let sub_form = sub_forms.get(comp_idx).ok_or(DocError::Malformed(
852 "DIRM entry count exceeds FORM children",
853 ))?;
854 let sub_chunks = parse_sub_form(sub_form.data)?;
855
856 #[cfg(feature = "std")]
858 let shared_djbz = sub_chunks
859 .iter()
860 .find(|c| &c.id == b"INCL")
861 .and_then(|incl| core::str::from_utf8(incl.data.trim_ascii_end()).ok())
862 .and_then(|name| djvi_djbz.get(name))
863 .cloned();
864 #[cfg(not(feature = "std"))]
865 let shared_djbz = sub_chunks
866 .iter()
867 .find(|c| &c.id == b"INCL")
868 .and_then(|incl| core::str::from_utf8(incl.data.trim_ascii_end()).ok())
869 .and_then(|name| djvi_djbz.get(name))
870 .cloned();
871
872 let page = parse_page_from_chunks(&sub_chunks, page_idx, shared_djbz)?;
873 pages.push(page);
874
875 if let Some(off) = comp_offsets.get(comp_idx) {
880 let start = *off as usize;
881 if let Some(size_bytes) = data.get(start + 4..start + 8) {
882 let size = u32::from_be_bytes([
883 size_bytes[0],
884 size_bytes[1],
885 size_bytes[2],
886 size_bytes[3],
887 ]) as u64;
888 let begin = start as u64;
889 let end = begin.saturating_add(8).saturating_add(size);
890 page_byte_ranges.push(begin..end);
891 }
892 }
893 page_idx += 1;
894 }
895
896 if page_byte_ranges.len() != pages.len() {
899 page_byte_ranges.clear();
900 }
901
902 Ok(DjVuDocument {
903 pages,
904 bookmarks,
905 global_chunks,
906 page_byte_ranges,
907 })
908 } else {
909 let resolver = resolver.ok_or(DocError::NoResolver)?;
911
912 let mut pages = Vec::new();
913 let mut page_idx = 0usize;
914 for entry in &entries {
915 if entry.comp_type != ComponentType::Page {
916 continue;
917 }
918 let resolved_data = resolver(&entry.id)
919 .map_err(|_| DocError::IndirectResolve(entry.id.clone()))?;
920 let sub_form = parse_form(&resolved_data)?;
921 let page = parse_page_from_chunks(&sub_form.chunks, page_idx, None)?;
922 pages.push(page);
923 page_idx += 1;
924 }
925
926 Ok(DjVuDocument {
927 pages,
928 bookmarks,
929 global_chunks,
930 page_byte_ranges: Vec::new(),
933 })
934 }
935 }
936 other => Err(DocError::NotDjVu(*other)),
937 }
938 }
939
940 #[cfg(all(feature = "std", feature = "async"))]
941 pub(crate) fn parse_single_page_with_shared(
942 data: &[u8],
943 index: usize,
944 shared_djbz: Option<Arc<Vec<u8>>>,
945 ) -> Result<DjVuPage, DocError> {
946 let form = parse_form(data)?;
947 if form.form_type != *b"DJVU" {
948 return Err(DocError::NotDjVu(form.form_type));
949 }
950 parse_page_from_chunks(&form.chunks, index, shared_djbz)
951 }
952
953 pub fn page_count(&self) -> usize {
955 self.pages.len()
956 }
957
958 pub fn page_byte_range(&self, index: usize) -> Option<core::ops::Range<u64>> {
976 self.page_byte_ranges.get(index).cloned()
977 }
978
979 pub fn page(&self, index: usize) -> Result<&DjVuPage, DocError> {
985 self.pages.get(index).ok_or(DocError::PageOutOfRange {
986 index,
987 count: self.pages.len(),
988 })
989 }
990
991 pub fn bookmarks(&self) -> &[DjVuBookmark] {
993 &self.bookmarks
994 }
995
996 pub fn metadata(&self) -> Result<Option<DjVuMetadata>, DocError> {
1001 if let Some(metz) = self.raw_chunk(b"METz") {
1002 if metz.is_empty() {
1003 return Ok(None);
1004 }
1005 return Ok(Some(crate::metadata::parse_metadata_bzz(metz)?));
1006 }
1007 if let Some(meta) = self.raw_chunk(b"METa") {
1008 if meta.is_empty() {
1009 return Ok(None);
1010 }
1011 return Ok(Some(crate::metadata::parse_metadata(meta)?));
1012 }
1013 Ok(None)
1014 }
1015
1016 pub fn raw_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
1026 self.global_chunks
1027 .iter()
1028 .find(|c| &c.id == id)
1029 .map(|c| c.data.as_slice())
1030 }
1031
1032 pub fn all_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
1036 self.global_chunks
1037 .iter()
1038 .filter(|c| &c.id == id)
1039 .map(|c| c.data.as_slice())
1040 .collect()
1041 }
1042
1043 pub fn chunk_ids(&self) -> Vec<[u8; 4]> {
1048 self.global_chunks.iter().map(|c| c.id).collect()
1049 }
1050
1051 #[cfg(feature = "std")]
1063 pub fn parse_from_dir(
1064 data: &[u8],
1065 base_dir: impl AsRef<std::path::Path>,
1066 ) -> Result<Self, DocError> {
1067 let base = base_dir.as_ref().to_path_buf();
1068 let resolver = move |name: &str| -> Result<Vec<u8>, DocError> {
1069 let name = name.strip_prefix("file://").unwrap_or(name);
1071 let path = if std::path::Path::new(name).is_absolute() {
1072 std::path::PathBuf::from(name)
1073 } else {
1074 base.join(name)
1075 };
1076 std::fs::read(&path).map_err(|_| DocError::IndirectResolve(name.to_string()))
1077 };
1078 Self::parse_with_resolver(data, Some(resolver))
1079 }
1080}
1081
1082#[cfg(feature = "mmap")]
1101pub struct MmapDocument {
1102 _mmap: memmap2::Mmap,
1106 doc: DjVuDocument,
1107}
1108
1109#[cfg(feature = "mmap")]
1110impl MmapDocument {
1111 pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self, DocError> {
1124 let file = std::fs::File::open(path.as_ref())?;
1125
1126 #[allow(unsafe_code)]
1129 let mmap = unsafe { memmap2::Mmap::map(&file) }?;
1130
1131 let doc = DjVuDocument::parse(&mmap)?;
1132 Ok(MmapDocument { _mmap: mmap, doc })
1133 }
1134
1135 pub fn open_indirect(path: impl AsRef<std::path::Path>) -> Result<Self, DocError> {
1146 let path = path.as_ref();
1147 let file = std::fs::File::open(path)?;
1148 #[allow(unsafe_code)]
1149 let mmap = unsafe { memmap2::Mmap::map(&file) }?;
1150
1151 let base_dir = path
1152 .parent()
1153 .map(|p| p.to_path_buf())
1154 .unwrap_or_else(|| std::path::PathBuf::from("."));
1155 let doc = DjVuDocument::parse_from_dir(&mmap, &base_dir)?;
1156 Ok(MmapDocument { _mmap: mmap, doc })
1157 }
1158
1159 pub fn document(&self) -> &DjVuDocument {
1161 &self.doc
1162 }
1163
1164 pub fn page_count(&self) -> usize {
1166 self.doc.page_count()
1167 }
1168
1169 pub fn page(&self, index: usize) -> Result<&DjVuPage, DocError> {
1171 self.doc.page(index)
1172 }
1173}
1174
1175#[cfg(feature = "mmap")]
1176impl core::ops::Deref for MmapDocument {
1177 type Target = DjVuDocument;
1178 fn deref(&self) -> &DjVuDocument {
1179 &self.doc
1180 }
1181}
1182
1183#[cfg(feature = "std")]
1191fn parse_page_from_chunks(
1192 chunks: &[IffChunk<'_>],
1193 index: usize,
1194 shared_djbz: Option<Arc<Vec<u8>>>,
1195) -> Result<DjVuPage, DocError> {
1196 let info_chunk = chunks
1197 .iter()
1198 .find(|c| &c.id == b"INFO")
1199 .ok_or(DocError::MissingChunk("INFO"))?;
1200
1201 let info = PageInfo::parse(info_chunk.data)?;
1202
1203 let raw_chunks: Vec<RawChunk> = chunks
1205 .iter()
1206 .map(|c| RawChunk {
1207 id: c.id,
1208 data: c.data.to_vec(),
1209 })
1210 .collect();
1211
1212 Ok(DjVuPage {
1213 info,
1214 chunks: raw_chunks,
1215 index,
1216 shared_djbz,
1217 bg44_decoded: std::sync::OnceLock::new(),
1218 bg44_decoded_partial: std::sync::OnceLock::new(),
1219 mask_decoded: std::sync::OnceLock::new(),
1220 mask_decoded_sub4: std::sync::OnceLock::new(),
1221 fg44_decoded: std::sync::OnceLock::new(),
1222 jb2_dict_decoded: std::sync::OnceLock::new(),
1223 })
1224}
1225
1226#[cfg(not(feature = "std"))]
1227fn parse_page_from_chunks(
1228 chunks: &[IffChunk<'_>],
1229 index: usize,
1230 shared_djbz: Option<Vec<u8>>,
1231) -> Result<DjVuPage, DocError> {
1232 let info_chunk = chunks
1233 .iter()
1234 .find(|c| &c.id == b"INFO")
1235 .ok_or(DocError::MissingChunk("INFO"))?;
1236
1237 let info = PageInfo::parse(info_chunk.data)?;
1238
1239 let raw_chunks: Vec<RawChunk> = chunks
1240 .iter()
1241 .map(|c| RawChunk {
1242 id: c.id,
1243 data: c.data.to_vec(),
1244 })
1245 .collect();
1246
1247 Ok(DjVuPage {
1248 info,
1249 chunks: raw_chunks,
1250 index,
1251 shared_djbz,
1252 })
1253}
1254
1255fn parse_sub_form(data: &[u8]) -> Result<Vec<IffChunk<'_>>, DocError> {
1260 if data.len() < 4 {
1261 return Err(DocError::Malformed("sub-form data too short"));
1262 }
1263 let body = data
1266 .get(4..)
1267 .ok_or(DocError::Malformed("sub-form body missing"))?;
1268 let chunks = parse_iff_body_chunks(body)?;
1269 Ok(chunks)
1270}
1271
1272fn parse_iff_body_chunks(mut buf: &[u8]) -> Result<Vec<IffChunk<'_>>, DocError> {
1274 let mut chunks = Vec::new();
1275
1276 while buf.len() >= 8 {
1277 let id: [u8; 4] = buf
1278 .get(0..4)
1279 .and_then(|s| s.try_into().ok())
1280 .ok_or(IffError::Truncated)?;
1281 let data_len = buf
1282 .get(4..8)
1283 .and_then(|b| b.try_into().ok())
1284 .map(u32::from_be_bytes)
1285 .map(|n| n as usize)
1286 .ok_or(IffError::Truncated)?;
1287
1288 let data_start = 8usize;
1289 let data_end = data_start
1290 .checked_add(data_len)
1291 .ok_or(IffError::Truncated)?;
1292
1293 if data_end > buf.len() {
1294 return Err(DocError::Iff(IffError::ChunkTooLong {
1295 id,
1296 claimed: data_len as u32,
1297 available: buf.len().saturating_sub(data_start),
1298 }));
1299 }
1300
1301 let chunk_data = buf.get(data_start..data_end).ok_or(IffError::Truncated)?;
1302
1303 chunks.push(IffChunk {
1306 id,
1307 data: chunk_data,
1308 });
1309
1310 let padded_len = data_len + (data_len & 1);
1311 let next = data_start
1312 .checked_add(padded_len)
1313 .ok_or(IffError::Truncated)?;
1314 buf = buf.get(next.min(buf.len())..).ok_or(IffError::Truncated)?;
1315 }
1316
1317 Ok(chunks)
1318}
1319
1320#[derive(Debug, Clone)]
1322struct DirmEntry {
1323 comp_type: ComponentType,
1324 id: String,
1325}
1326
1327fn parse_dirm(data: &[u8]) -> Result<(Vec<DirmEntry>, bool, Vec<u32>), DocError> {
1334 if data.len() < 3 {
1335 return Err(DocError::Malformed("DIRM chunk too short"));
1336 }
1337
1338 let dflags = *data.first().ok_or(DocError::Malformed("DIRM empty"))?;
1339 let is_bundled = (dflags >> 7) != 0;
1340 let nfiles = u16::from_be_bytes([
1341 *data.get(1).ok_or(DocError::Malformed("DIRM too short"))?,
1342 *data.get(2).ok_or(DocError::Malformed("DIRM too short"))?,
1343 ]) as usize;
1344
1345 let mut pos = 3usize;
1346
1347 let mut offsets: Vec<u32> = Vec::new();
1349 if is_bundled {
1350 let offsets_size = nfiles * 4;
1351 let end = pos
1352 .checked_add(offsets_size)
1353 .ok_or(DocError::Malformed("DIRM offset arithmetic overflow"))?;
1354 if end > data.len() {
1355 return Err(DocError::Malformed("DIRM offset table truncated"));
1356 }
1357 offsets.reserve(nfiles);
1358 for i in 0..nfiles {
1359 let base = pos + i * 4;
1360 let bytes = data
1361 .get(base..base + 4)
1362 .ok_or(DocError::Malformed("DIRM offset slice OOB"))?;
1363 offsets.push(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]));
1364 }
1365 pos = end;
1366 }
1367
1368 let bzz_data = data
1370 .get(pos..)
1371 .ok_or(DocError::Malformed("DIRM bzz data missing"))?;
1372 let meta = bzz_decode(bzz_data).unwrap_or_default();
1373
1374 let mut mpos = nfiles * 3; if mpos + nfiles > meta.len() {
1380 let entries: Vec<DirmEntry> = (0..nfiles)
1383 .map(|i| DirmEntry {
1384 comp_type: ComponentType::Page,
1385 id: format!("p{:04}", i),
1386 })
1387 .collect();
1388 return Ok((entries, is_bundled, offsets));
1389 }
1390 let flags: Vec<u8> = meta
1391 .get(mpos..mpos + nfiles)
1392 .ok_or(DocError::Malformed("DIRM flags truncated"))?
1393 .to_vec();
1394 mpos += nfiles;
1395
1396 let mut entries = Vec::with_capacity(nfiles);
1397 for &flag in flags.iter().take(nfiles) {
1398 let id = read_str_nt(&meta, &mut mpos)?;
1399
1400 if (flag & 0x80) != 0 {
1402 let _ = read_str_nt(&meta, &mut mpos)?;
1403 }
1404 if (flag & 0x40) != 0 {
1405 let _ = read_str_nt(&meta, &mut mpos)?;
1406 }
1407
1408 let comp_type = match flag & 0x3f {
1409 1 => ComponentType::Page,
1410 2 => ComponentType::Thumbnail,
1411 _ => ComponentType::Shared,
1412 };
1413
1414 entries.push(DirmEntry { comp_type, id });
1415 }
1416
1417 Ok((entries, is_bundled, offsets))
1418}
1419
1420fn read_str_nt(data: &[u8], pos: &mut usize) -> Result<String, DocError> {
1422 let start = *pos;
1423 while *pos < data.len() && *data.get(*pos).ok_or(DocError::Malformed("str read OOB"))? != 0 {
1424 *pos += 1;
1425 }
1426 if *pos >= data.len() {
1427 return Err(DocError::Malformed(
1428 "null terminator missing in DIRM string",
1429 ));
1430 }
1431 let s = core::str::from_utf8(
1432 data.get(start..*pos)
1433 .ok_or(DocError::Malformed("str slice OOB"))?,
1434 )
1435 .map_err(|_| DocError::InvalidUtf8)?
1436 .to_string();
1437 *pos += 1; Ok(s)
1439}
1440
1441fn parse_navm_bookmarks(chunks: &[IffChunk<'_>]) -> Result<Vec<DjVuBookmark>, DocError> {
1445 let navm_data = match chunks.iter().find(|c| &c.id == b"NAVM") {
1446 Some(c) => c.data,
1447 None => return Ok(vec![]),
1448 };
1449
1450 let decoded = bzz_decode(navm_data)?;
1451
1452 if decoded.len() < 2 {
1453 return Ok(vec![]);
1454 }
1455
1456 let b0 = *decoded
1457 .first()
1458 .ok_or(DocError::Malformed("NAVM total count byte 0"))?;
1459 let b1 = *decoded
1460 .get(1)
1461 .ok_or(DocError::Malformed("NAVM total count byte 1"))?;
1462 let total_count = u16::from_be_bytes([b0, b1]) as usize;
1463
1464 let mut pos = 2usize;
1465 let mut bookmarks = Vec::new();
1466 let mut decoded_count = 0usize;
1467
1468 while decoded_count < total_count {
1469 let bm = parse_bookmark_entry(&decoded, &mut pos, &mut decoded_count)?;
1470 bookmarks.push(bm);
1471 }
1472
1473 Ok(bookmarks)
1474}
1475
1476fn parse_bookmark_entry(
1481 data: &[u8],
1482 pos: &mut usize,
1483 total_counter: &mut usize,
1484) -> Result<DjVuBookmark, DocError> {
1485 if *pos >= data.len() {
1486 return Err(DocError::Malformed("NAVM bookmark entry truncated"));
1487 }
1488
1489 let n_children = *data
1491 .get(*pos)
1492 .ok_or(DocError::Malformed("NAVM children count"))? as usize;
1493 *pos += 1;
1494
1495 let title = read_navm_str(data, pos)?;
1496 let url = read_navm_str(data, pos)?;
1497 *total_counter += 1;
1498
1499 let mut children = Vec::with_capacity(n_children);
1501 for _ in 0..n_children {
1502 let child = parse_bookmark_entry(data, pos, total_counter)?;
1503 children.push(child);
1504 }
1505
1506 Ok(DjVuBookmark {
1507 title,
1508 url,
1509 children,
1510 })
1511}
1512
1513fn read_navm_str(data: &[u8], pos: &mut usize) -> Result<String, DocError> {
1517 if *pos + 3 > data.len() {
1518 return Err(DocError::Malformed("NAVM string length truncated"));
1519 }
1520 let len = ((*data.get(*pos).ok_or(DocError::Malformed("NAVM str"))? as usize) << 16)
1521 | ((*data.get(*pos + 1).ok_or(DocError::Malformed("NAVM str"))? as usize) << 8)
1522 | (*data.get(*pos + 2).ok_or(DocError::Malformed("NAVM str"))? as usize);
1523 *pos += 3;
1524
1525 let bytes = data
1526 .get(*pos..*pos + len)
1527 .ok_or(DocError::Malformed("NAVM string bytes truncated"))?;
1528 *pos += len;
1529
1530 core::str::from_utf8(bytes)
1531 .map(|s| s.to_string())
1532 .map_err(|_| DocError::InvalidUtf8)
1533}
1534
1535#[cfg(feature = "std")]
1541fn downsample_mask_4x(src: &crate::bitmap::Bitmap) -> crate::bitmap::Bitmap {
1542 let out_w = src.width.div_ceil(4);
1543 let out_h = src.height.div_ceil(4);
1544 let mut out = crate::bitmap::Bitmap::new(out_w, out_h);
1545 for oy in 0..out_h {
1546 for ox in 0..out_w {
1547 'outer: for dy in 0..4u32 {
1548 for dx in 0..4u32 {
1549 let sx = ox * 4 + dx;
1550 let sy = oy * 4 + dy;
1551 if sx < src.width && sy < src.height && src.get(sx, sy) {
1552 out.set(ox, oy, true);
1553 break 'outer;
1554 }
1555 }
1556 }
1557 }
1558 }
1559 out
1560}
1561
1562#[cfg(test)]
1565mod tests {
1566 use super::*;
1567
1568 fn assets_path() -> std::path::PathBuf {
1569 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1570 .join("references/djvujs/library/assets")
1571 }
1572
1573 #[test]
1577 fn single_page_parse_and_metadata() {
1578 let data =
1579 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1580 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1581
1582 assert_eq!(doc.page_count(), 1);
1583 let page = doc.page(0).expect("page 0 must exist");
1584 assert_eq!(page.width(), 181);
1585 assert_eq!(page.height(), 240);
1586 assert_eq!(page.dpi(), 100);
1587 assert!((page.gamma() - 2.2).abs() < 0.01, "gamma should be ~2.2");
1588 }
1589
1590 #[test]
1592 fn single_page_out_of_range() {
1593 let data =
1594 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1595 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1596 let err = doc.page(1).expect_err("page 1 should be out of range");
1597 assert!(
1598 matches!(err, DocError::PageOutOfRange { index: 1, count: 1 }),
1599 "unexpected error: {err:?}"
1600 );
1601 }
1602
1603 #[test]
1605 fn single_page_no_thumbnail() {
1606 let data =
1607 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1608 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1609 let page = doc.page(0).expect("page 0 must exist");
1610 let thumb = page.thumbnail().expect("thumbnail() should not error");
1612 assert!(
1613 thumb.is_none(),
1614 "single-page chicken.djvu has no TH44 chunks"
1615 );
1616 }
1617
1618 #[test]
1620 fn single_page_dimensions() {
1621 let data =
1622 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1623 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1624 let page = doc.page(0).unwrap();
1625 assert_eq!(page.dimensions(), (181, 240));
1626 }
1627
1628 #[test]
1630 fn multipage_bundled_page_count() {
1631 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu"))
1632 .expect("DjVu3Spec_bundled.djvu must exist");
1633 let doc = DjVuDocument::parse(&data).expect("bundled parse should succeed");
1634 assert!(
1636 doc.page_count() > 1,
1637 "bundled document should have more than 1 page, got {}",
1638 doc.page_count()
1639 );
1640 }
1641
1642 #[test]
1644 fn multipage_bundled_page_metadata() {
1645 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu"))
1646 .expect("DjVu3Spec_bundled.djvu must exist");
1647 let doc = DjVuDocument::parse(&data).expect("bundled parse should succeed");
1648
1649 let page0 = doc.page(0).expect("page 0 must exist");
1650 assert!(page0.width() > 0, "page width must be non-zero");
1651 assert!(page0.height() > 0, "page height must be non-zero");
1652 assert!(page0.dpi() > 0, "page dpi must be non-zero");
1653 }
1654
1655 #[test]
1657 fn navm_bookmarks_present() {
1658 let data =
1659 std::fs::read(assets_path().join("navm_fgbz.djvu")).expect("navm_fgbz.djvu must exist");
1660 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1661 let bm = doc.bookmarks();
1663 assert!(
1664 !bm.is_empty(),
1665 "navm_fgbz.djvu should have at least one bookmark"
1666 );
1667 }
1668
1669 #[test]
1671 fn no_navm_returns_empty_bookmarks() {
1672 let data =
1673 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1674 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1675 assert!(
1676 doc.bookmarks().is_empty(),
1677 "chicken.djvu has no NAVM — bookmarks should be empty"
1678 );
1679 }
1680
1681 #[test]
1687 fn indirect_document_with_resolver() {
1688 let chicken_data =
1690 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1691 let djvm_data = build_indirect_djvm_bytes("chicken.djvu");
1693
1694 let resolver = |name: &str| -> Result<Vec<u8>, DocError> {
1695 if name == "chicken.djvu" {
1696 Ok(chicken_data.clone())
1697 } else {
1698 Err(DocError::IndirectResolve(name.to_string()))
1699 }
1700 };
1701
1702 let doc = DjVuDocument::parse_with_resolver(&djvm_data, Some(resolver))
1703 .expect("indirect parse should succeed");
1704
1705 assert_eq!(doc.page_count(), 1);
1706 let page = doc.page(0).unwrap();
1707 assert_eq!(page.width(), 181);
1708 assert_eq!(page.height(), 240);
1709 }
1710
1711 #[test]
1713 fn indirect_document_no_resolver_returns_error() {
1714 let djvm_data = build_indirect_djvm_bytes("chicken.djvu");
1715 let err = DjVuDocument::parse(&djvm_data).expect_err("should fail without resolver");
1716 assert!(
1717 matches!(err, DocError::NoResolver),
1718 "expected NoResolver, got {err:?}"
1719 );
1720 }
1721
1722 #[test]
1729 fn page_is_lazy_no_decode_before_thumbnail() {
1730 let data =
1731 std::fs::read(assets_path().join("boy_jb2.djvu")).expect("boy_jb2.djvu must exist");
1732 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1733 let page = doc.page(0).expect("page 0 must exist");
1734
1735 assert!(!page.chunks.is_empty(), "chunks must be stored (lazy)");
1737
1738 let thumb = page.thumbnail().expect("thumbnail() should not error");
1740 assert!(thumb.is_none());
1741 }
1742
1743 #[test]
1745 fn not_djvu_returns_error() {
1746 let mut data = Vec::new();
1748 data.extend_from_slice(b"AT&T");
1749 data.extend_from_slice(b"FORM");
1750 data.extend_from_slice(&8u32.to_be_bytes());
1751 data.extend_from_slice(b"XXXXXXXX"); let err = DjVuDocument::parse(&data).expect_err("should fail");
1753 assert!(
1754 matches!(err, DocError::NotDjVu(_) | DocError::Iff(_)),
1755 "expected NotDjVu or Iff error, got {err:?}"
1756 );
1757 }
1758
1759 fn build_indirect_djvm_bytes(_page_name: &str) -> Vec<u8> {
1768 let bzz_meta: &[u8] = &[
1772 0xff, 0xff, 0xed, 0xbf, 0x8a, 0x1f, 0xbe, 0xad, 0x14, 0x57, 0x10, 0xc9, 0x63, 0x19,
1773 0x11, 0xf0, 0x85, 0x28, 0x12, 0x8a, 0xbf,
1774 ];
1775
1776 let mut dirm_data = Vec::new();
1777 dirm_data.push(0x00); dirm_data.push(0x00); dirm_data.push(0x01); dirm_data.extend_from_slice(bzz_meta);
1781
1782 build_djvm_with_dirm(&dirm_data)
1783 }
1784
1785 fn build_djvm_with_dirm(dirm_data: &[u8]) -> Vec<u8> {
1786 let mut dirm_chunk = Vec::new();
1788 dirm_chunk.extend_from_slice(b"DIRM");
1789 dirm_chunk.extend_from_slice(&(dirm_data.len() as u32).to_be_bytes());
1790 dirm_chunk.extend_from_slice(dirm_data);
1791 if !dirm_data.len().is_multiple_of(2) {
1792 dirm_chunk.push(0); }
1794
1795 let mut form_body = Vec::new();
1797 form_body.extend_from_slice(b"DJVM");
1798 form_body.extend_from_slice(&dirm_chunk);
1799
1800 let mut file = Vec::new();
1802 file.extend_from_slice(b"AT&T");
1803 file.extend_from_slice(b"FORM");
1804 file.extend_from_slice(&(form_body.len() as u32).to_be_bytes());
1805 file.extend_from_slice(&form_body);
1806 file
1807 }
1808
1809 #[test]
1813 fn page_raw_chunk_info_present() {
1814 let data =
1815 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1816 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1817 let page = doc.page(0).expect("page 0 must exist");
1818
1819 let info = page.raw_chunk(b"INFO").expect("INFO chunk must be present");
1821 assert_eq!(info.len(), 10, "INFO chunk is always 10 bytes");
1822 }
1823
1824 #[test]
1826 fn page_raw_chunk_absent() {
1827 let data =
1828 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1829 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1830 let page = doc.page(0).expect("page 0 must exist");
1831
1832 assert!(
1833 page.raw_chunk(b"XXXX").is_none(),
1834 "unknown chunk type must return None"
1835 );
1836 }
1837
1838 #[test]
1840 fn page_all_chunks_bg44_multiple() {
1841 let data = std::fs::read(
1843 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1844 .join("tests/fixtures/big-scanned-page.djvu"),
1845 )
1846 .expect("big-scanned-page.djvu must exist");
1847 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1848 let page = doc.page(0).expect("page 0 must exist");
1849
1850 let bg44 = page.all_chunks(b"BG44");
1851 assert!(
1852 bg44.len() >= 2,
1853 "colour page must have ≥2 BG44 chunks, got {}",
1854 bg44.len()
1855 );
1856
1857 for (i, chunk) in bg44.iter().enumerate() {
1859 assert!(!chunk.is_empty(), "BG44 chunk {i} must not be empty");
1860 }
1861 }
1862
1863 #[test]
1865 fn page_chunk_ids_includes_info() {
1866 let data =
1867 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1868 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1869 let page = doc.page(0).expect("page 0 must exist");
1870
1871 let ids = page.chunk_ids();
1872 assert!(!ids.is_empty(), "chunk_ids must not be empty");
1873 assert!(
1874 ids.contains(b"INFO"),
1875 "chunk_ids must include INFO, got: {:?}",
1876 ids.iter()
1877 .map(|id| std::str::from_utf8(id).unwrap_or("????"))
1878 .collect::<Vec<_>>()
1879 );
1880 }
1881
1882 #[test]
1884 fn document_raw_chunk_single_page() {
1885 let data =
1886 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1887 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1888
1889 let info = doc
1891 .raw_chunk(b"INFO")
1892 .expect("document must expose INFO chunk");
1893 assert_eq!(info.len(), 10);
1894 }
1895
1896 #[test]
1901 fn djvi_shared_dict_parsed_from_bundled_djvm() {
1902 let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1903 .join("tests/fixtures/DjVu3Spec_bundled.djvu");
1904 let data = std::fs::read(&path).expect("DjVu3Spec_bundled.djvu must exist");
1905 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1906
1907 assert!(doc.page_count() > 0, "document must have pages");
1908
1909 let pages_with_dict = doc.pages.iter().filter(|p| p.shared_djbz.is_some()).count();
1911 assert!(
1912 pages_with_dict > 0,
1913 "at least one page must have a resolved shared DJVI dict"
1914 );
1915 }
1916
1917 #[test]
1919 fn djvi_incl_page_mask_renders_ok() {
1920 let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1921 .join("tests/fixtures/DjVu3Spec_bundled.djvu");
1922 let data = std::fs::read(&path).expect("DjVu3Spec_bundled.djvu must exist");
1923 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1924
1925 let page = doc
1927 .pages
1928 .iter()
1929 .find(|p| p.shared_djbz.is_some())
1930 .expect("at least one page must have a shared dict");
1931
1932 let mask = page
1933 .extract_mask()
1934 .expect("extract_mask must succeed for INCL page");
1935 assert!(mask.is_some(), "INCL page must have a JB2 mask");
1936 let bm = mask.unwrap();
1937 assert!(
1938 bm.width > 0 && bm.height > 0,
1939 "mask must have non-zero dimensions"
1940 );
1941 }
1942
1943 #[test]
1945 fn no_regression_non_incl_pages() {
1946 let data = std::fs::read(
1948 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1949 .join("tests/fixtures/boy_jb2.djvu"),
1950 )
1951 .expect("boy_jb2.djvu must exist");
1952 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1953 let page = doc.page(0).expect("page 0 must exist");
1954 assert!(
1955 page.shared_djbz.is_none(),
1956 "single-page DJVU has no shared dict"
1957 );
1958 let mask = page.extract_mask().expect("extract_mask must succeed");
1959 assert!(mask.is_some(), "boy_jb2.djvu page must have a JB2 mask");
1960 }
1961
1962 #[test]
1964 fn page_raw_chunk_info_roundtrip() {
1965 let data =
1966 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1967 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1968 let page = doc.page(0).expect("page 0 must exist");
1969
1970 let raw_info = page.raw_chunk(b"INFO").expect("INFO chunk must be present");
1971 let reparsed = crate::info::PageInfo::parse(raw_info).expect("re-parse must succeed");
1972 assert_eq!(reparsed.width, page.width() as u16);
1973 assert_eq!(reparsed.height, page.height() as u16);
1974 assert_eq!(reparsed.dpi, page.dpi());
1975 }
1976
1977 #[test]
1981 fn page_byte_range_single_page_covers_full_buffer() {
1982 let data =
1983 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1984 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1985
1986 let r = doc.page_byte_range(0).expect("page 0 must have a range");
1987 assert_eq!(r.start, 0);
1988 assert_eq!(r.end, data.len() as u64);
1989
1990 assert!(
1991 doc.page_byte_range(1).is_none(),
1992 "out-of-range index returns None"
1993 );
1994 }
1995
1996 #[test]
1999 fn page_byte_range_bundled_djvm_round_trips() {
2000 let path = assets_path().join("DjVu3Spec_bundled.djvu");
2001 let Ok(data) = std::fs::read(&path) else {
2002 eprintln!("skip: {} missing", path.display());
2003 return;
2004 };
2005 let doc = DjVuDocument::parse(&data).expect("bundled DJVM parse must succeed");
2006
2007 let mut prev_end = 0u64;
2008 for i in 0..doc.page_count() {
2009 let r = doc
2010 .page_byte_range(i)
2011 .unwrap_or_else(|| panic!("page {i} must have a range"));
2012 assert!(r.end <= data.len() as u64, "page {i} range OOB");
2013 assert!(r.start < r.end, "page {i} range empty");
2014 assert!(r.start >= prev_end, "page {i} overlaps previous");
2015 prev_end = r.end;
2016
2017 let slice = &data[r.start as usize..r.end as usize];
2019 assert_eq!(&slice[..4], b"FORM", "page {i} range must start with FORM");
2020 }
2021 }
2022
2023 #[test]
2025 fn page_byte_range_out_of_range() {
2026 let data =
2027 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
2028 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
2029 assert!(doc.page_byte_range(99).is_none());
2030 }
2031
2032 #[test]
2034 #[cfg(feature = "mmap")]
2035 fn mmap_document_matches_parse() {
2036 let path = assets_path().join("chicken.djvu");
2037 let mmap_doc = MmapDocument::open(&path).expect("mmap open should succeed");
2038 let data = std::fs::read(&path).expect("read should succeed");
2039 let mem_doc = DjVuDocument::parse(&data).expect("parse should succeed");
2040
2041 assert_eq!(mmap_doc.page_count(), mem_doc.page_count());
2042 for i in 0..mmap_doc.page_count() {
2043 let mp = mmap_doc.page(i).unwrap();
2044 let pp = mem_doc.page(i).unwrap();
2045 assert_eq!(mp.width(), pp.width());
2046 assert_eq!(mp.height(), pp.height());
2047 assert_eq!(mp.dpi(), pp.dpi());
2048 }
2049 }
2050}