1#[cfg(not(feature = "std"))]
27use alloc::{
28 string::{String, ToString},
29 vec,
30 vec::Vec,
31};
32
33use crate::{
34 annotation::{Annotation, AnnotationError, MapArea},
35 bzz_new::bzz_decode,
36 error::{BzzError, IffError, Iw44Error, Jb2Error},
37 iff::{IffChunk, parse_form},
38 info::PageInfo,
39 iw44_new::Iw44Image,
40 metadata::{DjVuMetadata, MetadataError},
41 pixmap::Pixmap,
42 text::{TextError, TextLayer},
43};
44
45#[derive(Debug, thiserror::Error)]
49pub enum DocError {
50 #[error("IFF error: {0}")]
52 Iff(#[from] IffError),
53
54 #[error("BZZ error: {0}")]
56 Bzz(#[from] BzzError),
57
58 #[error("IW44 error: {0}")]
60 Iw44(#[from] Iw44Error),
61
62 #[error("JB2 error: {0}")]
64 Jb2(#[from] Jb2Error),
65
66 #[error("not a DjVu file: found form type {0:?}")]
68 NotDjVu([u8; 4]),
69
70 #[error("missing required chunk: {0}")]
72 MissingChunk(&'static str),
73
74 #[error("malformed DjVu document: {0}")]
76 Malformed(&'static str),
77
78 #[error("failed to resolve indirect page '{0}'")]
80 IndirectResolve(String),
81
82 #[error("page index {index} is out of range (document has {count} pages)")]
84 PageOutOfRange { index: usize, count: usize },
85
86 #[error("invalid UTF-8 in DjVu metadata")]
88 InvalidUtf8,
89
90 #[error("indirect DjVu document requires a resolver callback")]
92 NoResolver,
93
94 #[cfg(feature = "std")]
96 #[error("I/O error: {0}")]
97 Io(#[from] std::io::Error),
98
99 #[error("text layer error: {0}")]
101 Text(#[from] TextError),
102
103 #[error("annotation error: {0}")]
105 Annotation(#[from] AnnotationError),
106
107 #[error("metadata error: {0}")]
109 Metadata(#[from] MetadataError),
110}
111
112#[derive(Debug, Clone)]
116#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
117pub struct DjVuBookmark {
118 pub title: String,
120 pub url: String,
122 pub children: Vec<DjVuBookmark>,
124}
125
126#[derive(Debug, Clone, Copy, PartialEq, Eq)]
130enum ComponentType {
131 Shared,
132 Page,
133 Thumbnail,
134}
135
136#[derive(Debug, Clone)]
138struct RawChunk {
139 id: [u8; 4],
140 data: Vec<u8>,
141}
142
143#[derive(Debug, Clone)]
152pub struct DjVuPage {
153 info: PageInfo,
155 chunks: Vec<RawChunk>,
157 index: usize,
159 shared_djbz: Option<Vec<u8>>,
163 #[cfg(feature = "std")]
167 bg44_decoded: std::sync::OnceLock<Option<Iw44Image>>,
168}
169
170impl DjVuPage {
171 pub fn width(&self) -> u16 {
173 self.info.width
174 }
175
176 pub fn height(&self) -> u16 {
178 self.info.height
179 }
180
181 pub fn dpi(&self) -> u16 {
183 self.info.dpi
184 }
185
186 pub fn gamma(&self) -> f32 {
188 self.info.gamma
189 }
190
191 pub fn rotation(&self) -> crate::info::Rotation {
193 self.info.rotation
194 }
195
196 pub fn index(&self) -> usize {
198 self.index
199 }
200
201 pub fn dimensions(&self) -> (u16, u16) {
203 (self.info.width, self.info.height)
204 }
205
206 pub fn thumbnail(&self) -> Result<Option<Pixmap>, DocError> {
212 let th44_chunks: Vec<&[u8]> = self
213 .chunks
214 .iter()
215 .filter(|c| &c.id == b"TH44")
216 .map(|c| c.data.as_slice())
217 .collect();
218
219 if th44_chunks.is_empty() {
220 return Ok(None);
221 }
222
223 let mut img = Iw44Image::new();
224 for chunk_data in &th44_chunks {
225 img.decode_chunk(chunk_data)?;
226 }
227 let pixmap = img.to_rgb()?;
228 Ok(Some(pixmap))
229 }
230
231 pub fn raw_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
242 self.chunks
243 .iter()
244 .find(|c| &c.id == id)
245 .map(|c| c.data.as_slice())
246 }
247
248 pub fn all_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
259 self.chunks
260 .iter()
261 .filter(|c| &c.id == id)
262 .map(|c| c.data.as_slice())
263 .collect()
264 }
265
266 pub fn chunk_ids(&self) -> Vec<[u8; 4]> {
270 self.chunks.iter().map(|c| c.id).collect()
271 }
272
273 pub fn find_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
277 self.raw_chunk(id)
278 }
279
280 pub fn find_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
284 self.all_chunks(id)
285 }
286
287 pub fn bg44_chunks(&self) -> Vec<&[u8]> {
289 self.find_chunks(b"BG44")
290 }
291
292 #[cfg(feature = "std")]
304 pub fn decoded_bg44(&self) -> Option<&Iw44Image> {
305 self.bg44_decoded
306 .get_or_init(|| {
307 let chunks = self.bg44_chunks();
308 if chunks.is_empty() {
309 return None;
310 }
311 let mut img = Iw44Image::new();
312 for chunk_data in &chunks {
313 if img.decode_chunk(chunk_data).is_err() {
314 break;
315 }
316 }
317 if img.width == 0 { None } else { Some(img) }
318 })
319 .as_ref()
320 }
321
322 #[cfg(not(feature = "std"))]
323 pub fn decoded_bg44(&self) -> Option<&Iw44Image> {
324 None
325 }
326
327 pub fn fg44_chunks(&self) -> Vec<&[u8]> {
329 self.find_chunks(b"FG44")
330 }
331
332 pub fn text_layer(&self) -> Result<Option<TextLayer>, DocError> {
336 let page_height = self.info.height as u32;
337
338 if let Some(txtz) = self.find_chunk(b"TXTz") {
339 if txtz.is_empty() {
340 return Ok(None);
341 }
342 let layer = crate::text::parse_text_layer_bzz(txtz, page_height)?;
343 return Ok(Some(layer));
344 }
345
346 if let Some(txta) = self.find_chunk(b"TXTa") {
347 if txta.is_empty() {
348 return Ok(None);
349 }
350 let layer = crate::text::parse_text_layer(txta, page_height)?;
351 return Ok(Some(layer));
352 }
353
354 Ok(None)
355 }
356
357 pub fn text_layer_at_size(
368 &self,
369 render_w: u32,
370 render_h: u32,
371 ) -> Result<Option<TextLayer>, DocError> {
372 let page_w = self.info.width as u32;
373 let page_h = self.info.height as u32;
374 let rotation = self.info.rotation;
375 Ok(self
376 .text_layer()?
377 .map(|tl| tl.transform(page_w, page_h, rotation, render_w, render_h)))
378 }
379
380 pub fn text(&self) -> Result<Option<String>, DocError> {
384 Ok(self.text_layer()?.map(|tl| tl.text))
385 }
386
387 pub fn annotations(&self) -> Result<Option<(Annotation, Vec<MapArea>)>, DocError> {
391 if let Some(antz) = self.find_chunk(b"ANTz") {
392 if antz.is_empty() {
393 return Ok(None);
394 }
395 let result = crate::annotation::parse_annotations_bzz(antz)?;
396 return Ok(Some(result));
397 }
398
399 if let Some(anta) = self.find_chunk(b"ANTa") {
400 if anta.is_empty() {
401 return Ok(None);
402 }
403 let result = crate::annotation::parse_annotations(anta)?;
404 return Ok(Some(result));
405 }
406
407 Ok(None)
408 }
409
410 pub fn hyperlinks(&self) -> Result<Vec<MapArea>, DocError> {
412 match self.annotations()? {
413 None => Ok(Vec::new()),
414 Some((_, mapareas)) => Ok(mapareas.into_iter().filter(|m| !m.url.is_empty()).collect()),
415 }
416 }
417
418 pub fn extract_mask(&self) -> Result<Option<crate::bitmap::Bitmap>, DocError> {
422 let sjbz = match self.find_chunk(b"Sjbz") {
423 Some(data) => data,
424 None => return Ok(None),
425 };
426
427 let dict = if let Some(djbz) = self.find_chunk(b"Djbz") {
430 Some(crate::jb2_new::decode_dict(djbz, None)?)
431 } else if let Some(djbz) = self.shared_djbz.as_deref() {
432 Some(crate::jb2_new::decode_dict(djbz, None)?)
433 } else {
434 None
435 };
436
437 let bm = crate::jb2_new::decode(sjbz, dict.as_ref())?;
438 Ok(Some(bm))
439 }
440
441 pub fn extract_foreground(&self) -> Result<Option<Pixmap>, DocError> {
445 let chunks = self.fg44_chunks();
446 if chunks.is_empty() {
447 return Ok(None);
448 }
449
450 let mut img = Iw44Image::new();
451 for chunk_data in &chunks {
452 img.decode_chunk(chunk_data)?;
453 }
454 let pixmap = img.to_rgb()?;
455 Ok(Some(pixmap))
456 }
457
458 pub fn extract_background(&self) -> Result<Option<Pixmap>, DocError> {
462 let chunks = self.bg44_chunks();
463 if chunks.is_empty() {
464 return Ok(None);
465 }
466
467 let mut img = Iw44Image::new();
468 for chunk_data in &chunks {
469 img.decode_chunk(chunk_data)?;
470 }
471 let pixmap = img.to_rgb()?;
472 Ok(Some(pixmap))
473 }
474
475 pub fn render_into(
486 &self,
487 opts: &crate::djvu_render::RenderOptions,
488 buf: &mut [u8],
489 ) -> Result<(), crate::djvu_render::RenderError> {
490 crate::djvu_render::render_into(self, opts, buf)
491 }
492}
493
494#[derive(Debug)]
501pub struct DjVuDocument {
502 pages: Vec<DjVuPage>,
504 bookmarks: Vec<DjVuBookmark>,
506 global_chunks: Vec<RawChunk>,
509}
510
511impl DjVuDocument {
512 pub fn parse(data: &[u8]) -> Result<Self, DocError> {
522 Self::parse_with_resolver(data, None::<fn(&str) -> Result<Vec<u8>, DocError>>)
523 }
524
525 pub fn parse_with_resolver<R>(data: &[u8], resolver: Option<R>) -> Result<Self, DocError>
530 where
531 R: Fn(&str) -> Result<Vec<u8>, DocError>,
532 {
533 let form = parse_form(data)?;
534
535 match &form.form_type {
536 b"DJVU" => {
537 let global_chunks: Vec<RawChunk> = form
539 .chunks
540 .iter()
541 .map(|c| RawChunk {
542 id: c.id,
543 data: c.data.to_vec(),
544 })
545 .collect();
546 let page = parse_page_from_chunks(&form.chunks, 0, None)?;
547 Ok(DjVuDocument {
548 pages: vec![page],
549 bookmarks: vec![],
550 global_chunks,
551 })
552 }
553 b"DJVM" => {
554 let dirm_chunk = form
556 .chunks
557 .iter()
558 .find(|c| &c.id == b"DIRM")
559 .ok_or(DocError::MissingChunk("DIRM"))?;
560
561 let (entries, is_bundled) = parse_dirm(dirm_chunk.data)?;
562
563 let bookmarks = parse_navm_bookmarks(&form.chunks)?;
565
566 let global_chunks: Vec<RawChunk> = form
568 .chunks
569 .iter()
570 .filter(|c| &c.id != b"FORM")
571 .map(|c| RawChunk {
572 id: c.id,
573 data: c.data.to_vec(),
574 })
575 .collect();
576
577 if is_bundled {
578 let sub_forms: Vec<&IffChunk<'_>> =
580 form.chunks.iter().filter(|c| &c.id == b"FORM").collect();
581
582 #[cfg(not(feature = "std"))]
587 use alloc::collections::BTreeMap;
588 #[cfg(feature = "std")]
589 use std::collections::BTreeMap;
590 let djvi_djbz: BTreeMap<String, Vec<u8>> = entries
591 .iter()
592 .enumerate()
593 .filter(|(_, e)| e.comp_type == ComponentType::Shared)
594 .filter_map(|(comp_idx, entry)| {
595 let sf = sub_forms.get(comp_idx)?;
596 let chunks = parse_sub_form(sf.data).ok()?;
597 let djbz = chunks.iter().find(|c| &c.id == b"Djbz")?;
598 Some((entry.id.clone(), djbz.data.to_vec()))
599 })
600 .collect();
601
602 let mut pages = Vec::new();
603 let mut page_idx = 0usize;
604 for (comp_idx, entry) in entries.iter().enumerate() {
605 if entry.comp_type != ComponentType::Page {
606 continue;
607 }
608 let sub_form = sub_forms.get(comp_idx).ok_or(DocError::Malformed(
609 "DIRM entry count exceeds FORM children",
610 ))?;
611 let sub_chunks = parse_sub_form(sub_form.data)?;
612
613 let shared_djbz = sub_chunks
615 .iter()
616 .find(|c| &c.id == b"INCL")
617 .and_then(|incl| core::str::from_utf8(incl.data.trim_ascii_end()).ok())
618 .and_then(|name| djvi_djbz.get(name))
619 .cloned();
620
621 let page = parse_page_from_chunks(&sub_chunks, page_idx, shared_djbz)?;
622 pages.push(page);
623 page_idx += 1;
624 }
625
626 Ok(DjVuDocument {
627 pages,
628 bookmarks,
629 global_chunks,
630 })
631 } else {
632 let resolver = resolver.ok_or(DocError::NoResolver)?;
634
635 let mut pages = Vec::new();
636 let mut page_idx = 0usize;
637 for entry in &entries {
638 if entry.comp_type != ComponentType::Page {
639 continue;
640 }
641 let resolved_data = resolver(&entry.id)
642 .map_err(|_| DocError::IndirectResolve(entry.id.clone()))?;
643 let sub_form = parse_form(&resolved_data)?;
644 let page = parse_page_from_chunks(&sub_form.chunks, page_idx, None)?;
645 pages.push(page);
646 page_idx += 1;
647 }
648
649 Ok(DjVuDocument {
650 pages,
651 bookmarks,
652 global_chunks,
653 })
654 }
655 }
656 other => Err(DocError::NotDjVu(*other)),
657 }
658 }
659
660 pub fn page_count(&self) -> usize {
662 self.pages.len()
663 }
664
665 pub fn page(&self, index: usize) -> Result<&DjVuPage, DocError> {
671 self.pages.get(index).ok_or(DocError::PageOutOfRange {
672 index,
673 count: self.pages.len(),
674 })
675 }
676
677 pub fn bookmarks(&self) -> &[DjVuBookmark] {
679 &self.bookmarks
680 }
681
682 pub fn metadata(&self) -> Result<Option<DjVuMetadata>, DocError> {
687 if let Some(metz) = self.raw_chunk(b"METz") {
688 if metz.is_empty() {
689 return Ok(None);
690 }
691 return Ok(Some(crate::metadata::parse_metadata_bzz(metz)?));
692 }
693 if let Some(meta) = self.raw_chunk(b"METa") {
694 if meta.is_empty() {
695 return Ok(None);
696 }
697 return Ok(Some(crate::metadata::parse_metadata(meta)?));
698 }
699 Ok(None)
700 }
701
702 pub fn raw_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
712 self.global_chunks
713 .iter()
714 .find(|c| &c.id == id)
715 .map(|c| c.data.as_slice())
716 }
717
718 pub fn all_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
722 self.global_chunks
723 .iter()
724 .filter(|c| &c.id == id)
725 .map(|c| c.data.as_slice())
726 .collect()
727 }
728
729 pub fn chunk_ids(&self) -> Vec<[u8; 4]> {
734 self.global_chunks.iter().map(|c| c.id).collect()
735 }
736}
737
738#[cfg(feature = "mmap")]
757pub struct MmapDocument {
758 _mmap: memmap2::Mmap,
762 doc: DjVuDocument,
763}
764
765#[cfg(feature = "mmap")]
766impl MmapDocument {
767 pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self, DocError> {
780 let file = std::fs::File::open(path.as_ref())?;
781
782 #[allow(unsafe_code)]
785 let mmap = unsafe { memmap2::Mmap::map(&file) }?;
786
787 let doc = DjVuDocument::parse(&mmap)?;
788 Ok(MmapDocument { _mmap: mmap, doc })
789 }
790
791 pub fn document(&self) -> &DjVuDocument {
793 &self.doc
794 }
795
796 pub fn page_count(&self) -> usize {
798 self.doc.page_count()
799 }
800
801 pub fn page(&self, index: usize) -> Result<&DjVuPage, DocError> {
803 self.doc.page(index)
804 }
805}
806
807#[cfg(feature = "mmap")]
808impl core::ops::Deref for MmapDocument {
809 type Target = DjVuDocument;
810 fn deref(&self) -> &DjVuDocument {
811 &self.doc
812 }
813}
814
815fn parse_page_from_chunks(
823 chunks: &[IffChunk<'_>],
824 index: usize,
825 shared_djbz: Option<Vec<u8>>,
826) -> Result<DjVuPage, DocError> {
827 let info_chunk = chunks
828 .iter()
829 .find(|c| &c.id == b"INFO")
830 .ok_or(DocError::MissingChunk("INFO"))?;
831
832 let info = PageInfo::parse(info_chunk.data)?;
833
834 let raw_chunks: Vec<RawChunk> = chunks
836 .iter()
837 .map(|c| RawChunk {
838 id: c.id,
839 data: c.data.to_vec(),
840 })
841 .collect();
842
843 Ok(DjVuPage {
844 info,
845 chunks: raw_chunks,
846 index,
847 shared_djbz,
848 #[cfg(feature = "std")]
849 bg44_decoded: std::sync::OnceLock::new(),
850 })
851}
852
853fn parse_sub_form(data: &[u8]) -> Result<Vec<IffChunk<'_>>, DocError> {
858 if data.len() < 4 {
859 return Err(DocError::Malformed("sub-form data too short"));
860 }
861 let body = data
864 .get(4..)
865 .ok_or(DocError::Malformed("sub-form body missing"))?;
866 let chunks = parse_iff_body_chunks(body)?;
867 Ok(chunks)
868}
869
870fn parse_iff_body_chunks(mut buf: &[u8]) -> Result<Vec<IffChunk<'_>>, DocError> {
872 let mut chunks = Vec::new();
873
874 while buf.len() >= 8 {
875 let id: [u8; 4] = buf
876 .get(0..4)
877 .and_then(|s| s.try_into().ok())
878 .ok_or(IffError::Truncated)?;
879 let data_len = buf
880 .get(4..8)
881 .and_then(|b| b.try_into().ok())
882 .map(u32::from_be_bytes)
883 .map(|n| n as usize)
884 .ok_or(IffError::Truncated)?;
885
886 let data_start = 8usize;
887 let data_end = data_start
888 .checked_add(data_len)
889 .ok_or(IffError::Truncated)?;
890
891 if data_end > buf.len() {
892 return Err(DocError::Iff(IffError::ChunkTooLong {
893 id,
894 claimed: data_len as u32,
895 available: buf.len().saturating_sub(data_start),
896 }));
897 }
898
899 let chunk_data = buf.get(data_start..data_end).ok_or(IffError::Truncated)?;
900
901 chunks.push(IffChunk {
904 id,
905 data: chunk_data,
906 });
907
908 let padded_len = data_len + (data_len & 1);
909 let next = data_start
910 .checked_add(padded_len)
911 .ok_or(IffError::Truncated)?;
912 buf = buf.get(next.min(buf.len())..).ok_or(IffError::Truncated)?;
913 }
914
915 Ok(chunks)
916}
917
918#[derive(Debug, Clone)]
920struct DirmEntry {
921 comp_type: ComponentType,
922 id: String,
923}
924
925fn parse_dirm(data: &[u8]) -> Result<(Vec<DirmEntry>, bool), DocError> {
929 if data.len() < 3 {
930 return Err(DocError::Malformed("DIRM chunk too short"));
931 }
932
933 let dflags = *data.first().ok_or(DocError::Malformed("DIRM empty"))?;
934 let is_bundled = (dflags >> 7) != 0;
935 let nfiles = u16::from_be_bytes([
936 *data.get(1).ok_or(DocError::Malformed("DIRM too short"))?,
937 *data.get(2).ok_or(DocError::Malformed("DIRM too short"))?,
938 ]) as usize;
939
940 let mut pos = 3usize;
941
942 if is_bundled {
944 let offsets_size = nfiles * 4;
945 pos = pos
946 .checked_add(offsets_size)
947 .ok_or(DocError::Malformed("DIRM offset arithmetic overflow"))?;
948 if pos > data.len() {
949 return Err(DocError::Malformed("DIRM offset table truncated"));
950 }
951 }
952
953 let bzz_data = data
955 .get(pos..)
956 .ok_or(DocError::Malformed("DIRM bzz data missing"))?;
957 let meta = bzz_decode(bzz_data)?;
958
959 let mut mpos = nfiles * 3; if mpos + nfiles > meta.len() {
963 return Err(DocError::Malformed("DIRM meta too short for flags"));
964 }
965 let flags: Vec<u8> = meta
966 .get(mpos..mpos + nfiles)
967 .ok_or(DocError::Malformed("DIRM flags truncated"))?
968 .to_vec();
969 mpos += nfiles;
970
971 let mut entries = Vec::with_capacity(nfiles);
972 for &flag in flags.iter().take(nfiles) {
973 let id = read_str_nt(&meta, &mut mpos)?;
974
975 if (flag & 0x80) != 0 {
977 let _ = read_str_nt(&meta, &mut mpos)?;
978 }
979 if (flag & 0x40) != 0 {
980 let _ = read_str_nt(&meta, &mut mpos)?;
981 }
982
983 let comp_type = match flag & 0x3f {
984 1 => ComponentType::Page,
985 2 => ComponentType::Thumbnail,
986 _ => ComponentType::Shared,
987 };
988
989 entries.push(DirmEntry { comp_type, id });
990 }
991
992 Ok((entries, is_bundled))
993}
994
995fn read_str_nt(data: &[u8], pos: &mut usize) -> Result<String, DocError> {
997 let start = *pos;
998 while *pos < data.len() && *data.get(*pos).ok_or(DocError::Malformed("str read OOB"))? != 0 {
999 *pos += 1;
1000 }
1001 if *pos >= data.len() {
1002 return Err(DocError::Malformed(
1003 "null terminator missing in DIRM string",
1004 ));
1005 }
1006 let s = core::str::from_utf8(
1007 data.get(start..*pos)
1008 .ok_or(DocError::Malformed("str slice OOB"))?,
1009 )
1010 .map_err(|_| DocError::InvalidUtf8)?
1011 .to_string();
1012 *pos += 1; Ok(s)
1014}
1015
1016fn parse_navm_bookmarks(chunks: &[IffChunk<'_>]) -> Result<Vec<DjVuBookmark>, DocError> {
1020 let navm_data = match chunks.iter().find(|c| &c.id == b"NAVM") {
1021 Some(c) => c.data,
1022 None => return Ok(vec![]),
1023 };
1024
1025 let decoded = bzz_decode(navm_data)?;
1026
1027 if decoded.len() < 2 {
1028 return Ok(vec![]);
1029 }
1030
1031 let b0 = *decoded
1032 .first()
1033 .ok_or(DocError::Malformed("NAVM total count byte 0"))?;
1034 let b1 = *decoded
1035 .get(1)
1036 .ok_or(DocError::Malformed("NAVM total count byte 1"))?;
1037 let total_count = u16::from_be_bytes([b0, b1]) as usize;
1038
1039 let mut pos = 2usize;
1040 let mut bookmarks = Vec::new();
1041 let mut decoded_count = 0usize;
1042
1043 while decoded_count < total_count {
1044 let bm = parse_bookmark_entry(&decoded, &mut pos, &mut decoded_count)?;
1045 bookmarks.push(bm);
1046 }
1047
1048 Ok(bookmarks)
1049}
1050
1051fn parse_bookmark_entry(
1056 data: &[u8],
1057 pos: &mut usize,
1058 total_counter: &mut usize,
1059) -> Result<DjVuBookmark, DocError> {
1060 if *pos >= data.len() {
1061 return Err(DocError::Malformed("NAVM bookmark entry truncated"));
1062 }
1063
1064 let n_children = *data
1066 .get(*pos)
1067 .ok_or(DocError::Malformed("NAVM children count"))? as usize;
1068 *pos += 1;
1069
1070 let title = read_navm_str(data, pos)?;
1071 let url = read_navm_str(data, pos)?;
1072 *total_counter += 1;
1073
1074 let mut children = Vec::with_capacity(n_children);
1076 for _ in 0..n_children {
1077 let child = parse_bookmark_entry(data, pos, total_counter)?;
1078 children.push(child);
1079 }
1080
1081 Ok(DjVuBookmark {
1082 title,
1083 url,
1084 children,
1085 })
1086}
1087
1088fn read_navm_str(data: &[u8], pos: &mut usize) -> Result<String, DocError> {
1092 if *pos + 3 > data.len() {
1093 return Err(DocError::Malformed("NAVM string length truncated"));
1094 }
1095 let len = ((*data.get(*pos).ok_or(DocError::Malformed("NAVM str"))? as usize) << 16)
1096 | ((*data.get(*pos + 1).ok_or(DocError::Malformed("NAVM str"))? as usize) << 8)
1097 | (*data.get(*pos + 2).ok_or(DocError::Malformed("NAVM str"))? as usize);
1098 *pos += 3;
1099
1100 let bytes = data
1101 .get(*pos..*pos + len)
1102 .ok_or(DocError::Malformed("NAVM string bytes truncated"))?;
1103 *pos += len;
1104
1105 core::str::from_utf8(bytes)
1106 .map(|s| s.to_string())
1107 .map_err(|_| DocError::InvalidUtf8)
1108}
1109
1110#[cfg(test)]
1113mod tests {
1114 use super::*;
1115
1116 fn assets_path() -> std::path::PathBuf {
1117 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1118 .join("references/djvujs/library/assets")
1119 }
1120
1121 #[test]
1125 fn single_page_parse_and_metadata() {
1126 let data =
1127 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1128 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1129
1130 assert_eq!(doc.page_count(), 1);
1131 let page = doc.page(0).expect("page 0 must exist");
1132 assert_eq!(page.width(), 181);
1133 assert_eq!(page.height(), 240);
1134 assert_eq!(page.dpi(), 100);
1135 assert!((page.gamma() - 2.2).abs() < 0.01, "gamma should be ~2.2");
1136 }
1137
1138 #[test]
1140 fn single_page_out_of_range() {
1141 let data =
1142 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1143 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1144 let err = doc.page(1).expect_err("page 1 should be out of range");
1145 assert!(
1146 matches!(err, DocError::PageOutOfRange { index: 1, count: 1 }),
1147 "unexpected error: {err:?}"
1148 );
1149 }
1150
1151 #[test]
1153 fn single_page_no_thumbnail() {
1154 let data =
1155 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1156 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1157 let page = doc.page(0).expect("page 0 must exist");
1158 let thumb = page.thumbnail().expect("thumbnail() should not error");
1160 assert!(
1161 thumb.is_none(),
1162 "single-page chicken.djvu has no TH44 chunks"
1163 );
1164 }
1165
1166 #[test]
1168 fn single_page_dimensions() {
1169 let data =
1170 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1171 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1172 let page = doc.page(0).unwrap();
1173 assert_eq!(page.dimensions(), (181, 240));
1174 }
1175
1176 #[test]
1178 fn multipage_bundled_page_count() {
1179 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu"))
1180 .expect("DjVu3Spec_bundled.djvu must exist");
1181 let doc = DjVuDocument::parse(&data).expect("bundled parse should succeed");
1182 assert!(
1184 doc.page_count() > 1,
1185 "bundled document should have more than 1 page, got {}",
1186 doc.page_count()
1187 );
1188 }
1189
1190 #[test]
1192 fn multipage_bundled_page_metadata() {
1193 let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu"))
1194 .expect("DjVu3Spec_bundled.djvu must exist");
1195 let doc = DjVuDocument::parse(&data).expect("bundled parse should succeed");
1196
1197 let page0 = doc.page(0).expect("page 0 must exist");
1198 assert!(page0.width() > 0, "page width must be non-zero");
1199 assert!(page0.height() > 0, "page height must be non-zero");
1200 assert!(page0.dpi() > 0, "page dpi must be non-zero");
1201 }
1202
1203 #[test]
1205 fn navm_bookmarks_present() {
1206 let data =
1207 std::fs::read(assets_path().join("navm_fgbz.djvu")).expect("navm_fgbz.djvu must exist");
1208 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1209 let bm = doc.bookmarks();
1211 assert!(
1212 !bm.is_empty(),
1213 "navm_fgbz.djvu should have at least one bookmark"
1214 );
1215 }
1216
1217 #[test]
1219 fn no_navm_returns_empty_bookmarks() {
1220 let data =
1221 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1222 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1223 assert!(
1224 doc.bookmarks().is_empty(),
1225 "chicken.djvu has no NAVM — bookmarks should be empty"
1226 );
1227 }
1228
1229 #[test]
1235 fn indirect_document_with_resolver() {
1236 let chicken_data =
1238 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1239 let djvm_data = build_indirect_djvm_bytes("chicken.djvu");
1241
1242 let resolver = |name: &str| -> Result<Vec<u8>, DocError> {
1243 if name == "chicken.djvu" {
1244 Ok(chicken_data.clone())
1245 } else {
1246 Err(DocError::IndirectResolve(name.to_string()))
1247 }
1248 };
1249
1250 let doc = DjVuDocument::parse_with_resolver(&djvm_data, Some(resolver))
1251 .expect("indirect parse should succeed");
1252
1253 assert_eq!(doc.page_count(), 1);
1254 let page = doc.page(0).unwrap();
1255 assert_eq!(page.width(), 181);
1256 assert_eq!(page.height(), 240);
1257 }
1258
1259 #[test]
1261 fn indirect_document_no_resolver_returns_error() {
1262 let djvm_data = build_indirect_djvm_bytes("chicken.djvu");
1263 let err = DjVuDocument::parse(&djvm_data).expect_err("should fail without resolver");
1264 assert!(
1265 matches!(err, DocError::NoResolver),
1266 "expected NoResolver, got {err:?}"
1267 );
1268 }
1269
1270 #[test]
1277 fn page_is_lazy_no_decode_before_thumbnail() {
1278 let data =
1279 std::fs::read(assets_path().join("boy_jb2.djvu")).expect("boy_jb2.djvu must exist");
1280 let doc = DjVuDocument::parse(&data).expect("parse should succeed");
1281 let page = doc.page(0).expect("page 0 must exist");
1282
1283 assert!(!page.chunks.is_empty(), "chunks must be stored (lazy)");
1285
1286 let thumb = page.thumbnail().expect("thumbnail() should not error");
1288 assert!(thumb.is_none());
1289 }
1290
1291 #[test]
1293 fn not_djvu_returns_error() {
1294 let mut data = Vec::new();
1296 data.extend_from_slice(b"AT&T");
1297 data.extend_from_slice(b"FORM");
1298 data.extend_from_slice(&8u32.to_be_bytes());
1299 data.extend_from_slice(b"XXXXXXXX"); let err = DjVuDocument::parse(&data).expect_err("should fail");
1301 assert!(
1302 matches!(err, DocError::NotDjVu(_) | DocError::Iff(_)),
1303 "expected NotDjVu or Iff error, got {err:?}"
1304 );
1305 }
1306
1307 fn build_indirect_djvm_bytes(_page_name: &str) -> Vec<u8> {
1316 let bzz_meta: &[u8] = &[
1320 0xff, 0xff, 0xed, 0xbf, 0x8a, 0x1f, 0xbe, 0xad, 0x14, 0x57, 0x10, 0xc9, 0x63, 0x19,
1321 0x11, 0xf0, 0x85, 0x28, 0x12, 0x8a, 0xbf,
1322 ];
1323
1324 let mut dirm_data = Vec::new();
1325 dirm_data.push(0x00); dirm_data.push(0x00); dirm_data.push(0x01); dirm_data.extend_from_slice(bzz_meta);
1329
1330 build_djvm_with_dirm(&dirm_data)
1331 }
1332
1333 fn build_djvm_with_dirm(dirm_data: &[u8]) -> Vec<u8> {
1334 let mut dirm_chunk = Vec::new();
1336 dirm_chunk.extend_from_slice(b"DIRM");
1337 dirm_chunk.extend_from_slice(&(dirm_data.len() as u32).to_be_bytes());
1338 dirm_chunk.extend_from_slice(dirm_data);
1339 if !dirm_data.len().is_multiple_of(2) {
1340 dirm_chunk.push(0); }
1342
1343 let mut form_body = Vec::new();
1345 form_body.extend_from_slice(b"DJVM");
1346 form_body.extend_from_slice(&dirm_chunk);
1347
1348 let mut file = Vec::new();
1350 file.extend_from_slice(b"AT&T");
1351 file.extend_from_slice(b"FORM");
1352 file.extend_from_slice(&(form_body.len() as u32).to_be_bytes());
1353 file.extend_from_slice(&form_body);
1354 file
1355 }
1356
1357 #[test]
1361 fn page_raw_chunk_info_present() {
1362 let data =
1363 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1364 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1365 let page = doc.page(0).expect("page 0 must exist");
1366
1367 let info = page.raw_chunk(b"INFO").expect("INFO chunk must be present");
1369 assert_eq!(info.len(), 10, "INFO chunk is always 10 bytes");
1370 }
1371
1372 #[test]
1374 fn page_raw_chunk_absent() {
1375 let data =
1376 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1377 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1378 let page = doc.page(0).expect("page 0 must exist");
1379
1380 assert!(
1381 page.raw_chunk(b"XXXX").is_none(),
1382 "unknown chunk type must return None"
1383 );
1384 }
1385
1386 #[test]
1388 fn page_all_chunks_bg44_multiple() {
1389 let data = std::fs::read(
1391 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1392 .join("tests/fixtures/big-scanned-page.djvu"),
1393 )
1394 .expect("big-scanned-page.djvu must exist");
1395 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1396 let page = doc.page(0).expect("page 0 must exist");
1397
1398 let bg44 = page.all_chunks(b"BG44");
1399 assert!(
1400 bg44.len() >= 2,
1401 "colour page must have ≥2 BG44 chunks, got {}",
1402 bg44.len()
1403 );
1404
1405 for (i, chunk) in bg44.iter().enumerate() {
1407 assert!(!chunk.is_empty(), "BG44 chunk {i} must not be empty");
1408 }
1409 }
1410
1411 #[test]
1413 fn page_chunk_ids_includes_info() {
1414 let data =
1415 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1416 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1417 let page = doc.page(0).expect("page 0 must exist");
1418
1419 let ids = page.chunk_ids();
1420 assert!(!ids.is_empty(), "chunk_ids must not be empty");
1421 assert!(
1422 ids.contains(b"INFO"),
1423 "chunk_ids must include INFO, got: {:?}",
1424 ids.iter()
1425 .map(|id| std::str::from_utf8(id).unwrap_or("????"))
1426 .collect::<Vec<_>>()
1427 );
1428 }
1429
1430 #[test]
1432 fn document_raw_chunk_single_page() {
1433 let data =
1434 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1435 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1436
1437 let info = doc
1439 .raw_chunk(b"INFO")
1440 .expect("document must expose INFO chunk");
1441 assert_eq!(info.len(), 10);
1442 }
1443
1444 #[test]
1449 fn djvi_shared_dict_parsed_from_bundled_djvm() {
1450 let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1451 .join("tests/fixtures/DjVu3Spec_bundled.djvu");
1452 let data = std::fs::read(&path).expect("DjVu3Spec_bundled.djvu must exist");
1453 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1454
1455 assert!(doc.page_count() > 0, "document must have pages");
1456
1457 let pages_with_dict = doc.pages.iter().filter(|p| p.shared_djbz.is_some()).count();
1459 assert!(
1460 pages_with_dict > 0,
1461 "at least one page must have a resolved shared DJVI dict"
1462 );
1463 }
1464
1465 #[test]
1467 fn djvi_incl_page_mask_renders_ok() {
1468 let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1469 .join("tests/fixtures/DjVu3Spec_bundled.djvu");
1470 let data = std::fs::read(&path).expect("DjVu3Spec_bundled.djvu must exist");
1471 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1472
1473 let page = doc
1475 .pages
1476 .iter()
1477 .find(|p| p.shared_djbz.is_some())
1478 .expect("at least one page must have a shared dict");
1479
1480 let mask = page
1481 .extract_mask()
1482 .expect("extract_mask must succeed for INCL page");
1483 assert!(mask.is_some(), "INCL page must have a JB2 mask");
1484 let bm = mask.unwrap();
1485 assert!(
1486 bm.width > 0 && bm.height > 0,
1487 "mask must have non-zero dimensions"
1488 );
1489 }
1490
1491 #[test]
1493 fn no_regression_non_incl_pages() {
1494 let data = std::fs::read(
1496 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1497 .join("tests/fixtures/boy_jb2.djvu"),
1498 )
1499 .expect("boy_jb2.djvu must exist");
1500 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1501 let page = doc.page(0).expect("page 0 must exist");
1502 assert!(
1503 page.shared_djbz.is_none(),
1504 "single-page DJVU has no shared dict"
1505 );
1506 let mask = page.extract_mask().expect("extract_mask must succeed");
1507 assert!(mask.is_some(), "boy_jb2.djvu page must have a JB2 mask");
1508 }
1509
1510 #[test]
1512 fn page_raw_chunk_info_roundtrip() {
1513 let data =
1514 std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
1515 let doc = DjVuDocument::parse(&data).expect("parse must succeed");
1516 let page = doc.page(0).expect("page 0 must exist");
1517
1518 let raw_info = page.raw_chunk(b"INFO").expect("INFO chunk must be present");
1519 let reparsed = crate::info::PageInfo::parse(raw_info).expect("re-parse must succeed");
1520 assert_eq!(reparsed.width, page.width() as u16);
1521 assert_eq!(reparsed.height, page.height() as u16);
1522 assert_eq!(reparsed.dpi, page.dpi());
1523 }
1524
1525 #[test]
1527 #[cfg(feature = "mmap")]
1528 fn mmap_document_matches_parse() {
1529 let path = assets_path().join("chicken.djvu");
1530 let mmap_doc = MmapDocument::open(&path).expect("mmap open should succeed");
1531 let data = std::fs::read(&path).expect("read should succeed");
1532 let mem_doc = DjVuDocument::parse(&data).expect("parse should succeed");
1533
1534 assert_eq!(mmap_doc.page_count(), mem_doc.page_count());
1535 for i in 0..mmap_doc.page_count() {
1536 let mp = mmap_doc.page(i).unwrap();
1537 let pp = mem_doc.page(i).unwrap();
1538 assert_eq!(mp.width(), pp.width());
1539 assert_eq!(mp.height(), pp.height());
1540 assert_eq!(mp.dpi(), pp.dpi());
1541 }
1542 }
1543}