#[cfg(not(feature = "std"))]
use alloc::{
string::{String, ToString},
vec,
vec::Vec,
};
use crate::{
annotation::{Annotation, AnnotationError, MapArea},
bzz_new::bzz_decode,
error::{BzzError, IffError, Iw44Error, Jb2Error},
iff::{IffChunk, parse_form},
info::PageInfo,
iw44_new::Iw44Image,
metadata::{DjVuMetadata, MetadataError},
pixmap::Pixmap,
text::{TextError, TextLayer},
};
#[derive(Debug, thiserror::Error)]
pub enum DocError {
#[error("IFF error: {0}")]
Iff(#[from] IffError),
#[error("BZZ error: {0}")]
Bzz(#[from] BzzError),
#[error("IW44 error: {0}")]
Iw44(#[from] Iw44Error),
#[error("JB2 error: {0}")]
Jb2(#[from] Jb2Error),
#[error("not a DjVu file: found form type {0:?}")]
NotDjVu([u8; 4]),
#[error("missing required chunk: {0}")]
MissingChunk(&'static str),
#[error("malformed DjVu document: {0}")]
Malformed(&'static str),
#[error("failed to resolve indirect page '{0}'")]
IndirectResolve(String),
#[error("page index {index} is out of range (document has {count} pages)")]
PageOutOfRange { index: usize, count: usize },
#[error("invalid UTF-8 in DjVu metadata")]
InvalidUtf8,
#[error("indirect DjVu document requires a resolver callback")]
NoResolver,
#[cfg(feature = "std")]
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("text layer error: {0}")]
Text(#[from] TextError),
#[error("annotation error: {0}")]
Annotation(#[from] AnnotationError),
#[error("metadata error: {0}")]
Metadata(#[from] MetadataError),
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct DjVuBookmark {
pub title: String,
pub url: String,
pub children: Vec<DjVuBookmark>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ComponentType {
Shared,
Page,
Thumbnail,
}
#[derive(Debug, Clone)]
struct RawChunk {
id: [u8; 4],
data: Vec<u8>,
}
#[derive(Debug, Clone)]
pub struct DjVuPage {
info: PageInfo,
chunks: Vec<RawChunk>,
index: usize,
shared_djbz: Option<Vec<u8>>,
}
impl DjVuPage {
pub fn width(&self) -> u16 {
self.info.width
}
pub fn height(&self) -> u16 {
self.info.height
}
pub fn dpi(&self) -> u16 {
self.info.dpi
}
pub fn gamma(&self) -> f32 {
self.info.gamma
}
pub fn rotation(&self) -> crate::info::Rotation {
self.info.rotation
}
pub fn index(&self) -> usize {
self.index
}
pub fn dimensions(&self) -> (u16, u16) {
(self.info.width, self.info.height)
}
pub fn thumbnail(&self) -> Result<Option<Pixmap>, DocError> {
let th44_chunks: Vec<&[u8]> = self
.chunks
.iter()
.filter(|c| &c.id == b"TH44")
.map(|c| c.data.as_slice())
.collect();
if th44_chunks.is_empty() {
return Ok(None);
}
let mut img = Iw44Image::new();
for chunk_data in &th44_chunks {
img.decode_chunk(chunk_data)?;
}
let pixmap = img.to_rgb()?;
Ok(Some(pixmap))
}
pub fn raw_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
self.chunks
.iter()
.find(|c| &c.id == id)
.map(|c| c.data.as_slice())
}
pub fn all_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
self.chunks
.iter()
.filter(|c| &c.id == id)
.map(|c| c.data.as_slice())
.collect()
}
pub fn chunk_ids(&self) -> Vec<[u8; 4]> {
self.chunks.iter().map(|c| c.id).collect()
}
pub fn find_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
self.raw_chunk(id)
}
pub fn find_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
self.all_chunks(id)
}
pub fn bg44_chunks(&self) -> Vec<&[u8]> {
self.find_chunks(b"BG44")
}
pub fn fg44_chunks(&self) -> Vec<&[u8]> {
self.find_chunks(b"FG44")
}
pub fn text_layer(&self) -> Result<Option<TextLayer>, DocError> {
let page_height = self.info.height as u32;
if let Some(txtz) = self.find_chunk(b"TXTz") {
if txtz.is_empty() {
return Ok(None);
}
let layer = crate::text::parse_text_layer_bzz(txtz, page_height)?;
return Ok(Some(layer));
}
if let Some(txta) = self.find_chunk(b"TXTa") {
if txta.is_empty() {
return Ok(None);
}
let layer = crate::text::parse_text_layer(txta, page_height)?;
return Ok(Some(layer));
}
Ok(None)
}
pub fn text_layer_at_size(
&self,
render_w: u32,
render_h: u32,
) -> Result<Option<TextLayer>, DocError> {
let page_w = self.info.width as u32;
let page_h = self.info.height as u32;
let rotation = self.info.rotation;
Ok(self
.text_layer()?
.map(|tl| tl.transform(page_w, page_h, rotation, render_w, render_h)))
}
pub fn text(&self) -> Result<Option<String>, DocError> {
Ok(self.text_layer()?.map(|tl| tl.text))
}
pub fn annotations(&self) -> Result<Option<(Annotation, Vec<MapArea>)>, DocError> {
if let Some(antz) = self.find_chunk(b"ANTz") {
if antz.is_empty() {
return Ok(None);
}
let result = crate::annotation::parse_annotations_bzz(antz)?;
return Ok(Some(result));
}
if let Some(anta) = self.find_chunk(b"ANTa") {
if anta.is_empty() {
return Ok(None);
}
let result = crate::annotation::parse_annotations(anta)?;
return Ok(Some(result));
}
Ok(None)
}
pub fn hyperlinks(&self) -> Result<Vec<MapArea>, DocError> {
match self.annotations()? {
None => Ok(Vec::new()),
Some((_, mapareas)) => Ok(mapareas.into_iter().filter(|m| !m.url.is_empty()).collect()),
}
}
pub fn extract_mask(&self) -> Result<Option<crate::bitmap::Bitmap>, DocError> {
let sjbz = match self.find_chunk(b"Sjbz") {
Some(data) => data,
None => return Ok(None),
};
let dict = if let Some(djbz) = self.find_chunk(b"Djbz") {
Some(crate::jb2_new::decode_dict(djbz, None)?)
} else if let Some(djbz) = self.shared_djbz.as_deref() {
Some(crate::jb2_new::decode_dict(djbz, None)?)
} else {
None
};
let bm = crate::jb2_new::decode(sjbz, dict.as_ref())?;
Ok(Some(bm))
}
pub fn extract_foreground(&self) -> Result<Option<Pixmap>, DocError> {
let chunks = self.fg44_chunks();
if chunks.is_empty() {
return Ok(None);
}
let mut img = Iw44Image::new();
for chunk_data in &chunks {
img.decode_chunk(chunk_data)?;
}
let pixmap = img.to_rgb()?;
Ok(Some(pixmap))
}
pub fn extract_background(&self) -> Result<Option<Pixmap>, DocError> {
let chunks = self.bg44_chunks();
if chunks.is_empty() {
return Ok(None);
}
let mut img = Iw44Image::new();
for chunk_data in &chunks {
img.decode_chunk(chunk_data)?;
}
let pixmap = img.to_rgb()?;
Ok(Some(pixmap))
}
pub fn render_into(
&self,
opts: &crate::djvu_render::RenderOptions,
buf: &mut [u8],
) -> Result<(), crate::djvu_render::RenderError> {
crate::djvu_render::render_into(self, opts, buf)
}
}
#[derive(Debug)]
pub struct DjVuDocument {
pages: Vec<DjVuPage>,
bookmarks: Vec<DjVuBookmark>,
global_chunks: Vec<RawChunk>,
}
impl DjVuDocument {
pub fn parse(data: &[u8]) -> Result<Self, DocError> {
Self::parse_with_resolver(data, None::<fn(&str) -> Result<Vec<u8>, DocError>>)
}
pub fn parse_with_resolver<R>(data: &[u8], resolver: Option<R>) -> Result<Self, DocError>
where
R: Fn(&str) -> Result<Vec<u8>, DocError>,
{
let form = parse_form(data)?;
match &form.form_type {
b"DJVU" => {
let global_chunks: Vec<RawChunk> = form
.chunks
.iter()
.map(|c| RawChunk {
id: c.id,
data: c.data.to_vec(),
})
.collect();
let page = parse_page_from_chunks(&form.chunks, 0, None)?;
Ok(DjVuDocument {
pages: vec![page],
bookmarks: vec![],
global_chunks,
})
}
b"DJVM" => {
let dirm_chunk = form
.chunks
.iter()
.find(|c| &c.id == b"DIRM")
.ok_or(DocError::MissingChunk("DIRM"))?;
let (entries, is_bundled) = parse_dirm(dirm_chunk.data)?;
let bookmarks = parse_navm_bookmarks(&form.chunks)?;
let global_chunks: Vec<RawChunk> = form
.chunks
.iter()
.filter(|c| &c.id != b"FORM")
.map(|c| RawChunk {
id: c.id,
data: c.data.to_vec(),
})
.collect();
if is_bundled {
let sub_forms: Vec<&IffChunk<'_>> =
form.chunks.iter().filter(|c| &c.id == b"FORM").collect();
#[cfg(not(feature = "std"))]
use alloc::collections::BTreeMap;
#[cfg(feature = "std")]
use std::collections::BTreeMap;
let djvi_djbz: BTreeMap<String, Vec<u8>> = entries
.iter()
.enumerate()
.filter(|(_, e)| e.comp_type == ComponentType::Shared)
.filter_map(|(comp_idx, entry)| {
let sf = sub_forms.get(comp_idx)?;
let chunks = parse_sub_form(sf.data).ok()?;
let djbz = chunks.iter().find(|c| &c.id == b"Djbz")?;
Some((entry.id.clone(), djbz.data.to_vec()))
})
.collect();
let mut pages = Vec::new();
let mut page_idx = 0usize;
for (comp_idx, entry) in entries.iter().enumerate() {
if entry.comp_type != ComponentType::Page {
continue;
}
let sub_form = sub_forms.get(comp_idx).ok_or(DocError::Malformed(
"DIRM entry count exceeds FORM children",
))?;
let sub_chunks = parse_sub_form(sub_form.data)?;
let shared_djbz = sub_chunks
.iter()
.find(|c| &c.id == b"INCL")
.and_then(|incl| core::str::from_utf8(incl.data.trim_ascii_end()).ok())
.and_then(|name| djvi_djbz.get(name))
.cloned();
let page = parse_page_from_chunks(&sub_chunks, page_idx, shared_djbz)?;
pages.push(page);
page_idx += 1;
}
Ok(DjVuDocument {
pages,
bookmarks,
global_chunks,
})
} else {
let resolver = resolver.ok_or(DocError::NoResolver)?;
let mut pages = Vec::new();
let mut page_idx = 0usize;
for entry in &entries {
if entry.comp_type != ComponentType::Page {
continue;
}
let resolved_data = resolver(&entry.id)
.map_err(|_| DocError::IndirectResolve(entry.id.clone()))?;
let sub_form = parse_form(&resolved_data)?;
let page = parse_page_from_chunks(&sub_form.chunks, page_idx, None)?;
pages.push(page);
page_idx += 1;
}
Ok(DjVuDocument {
pages,
bookmarks,
global_chunks,
})
}
}
other => Err(DocError::NotDjVu(*other)),
}
}
pub fn page_count(&self) -> usize {
self.pages.len()
}
pub fn page(&self, index: usize) -> Result<&DjVuPage, DocError> {
self.pages.get(index).ok_or(DocError::PageOutOfRange {
index,
count: self.pages.len(),
})
}
pub fn bookmarks(&self) -> &[DjVuBookmark] {
&self.bookmarks
}
pub fn metadata(&self) -> Result<Option<DjVuMetadata>, DocError> {
if let Some(metz) = self.raw_chunk(b"METz") {
if metz.is_empty() {
return Ok(None);
}
return Ok(Some(crate::metadata::parse_metadata_bzz(metz)?));
}
if let Some(meta) = self.raw_chunk(b"METa") {
if meta.is_empty() {
return Ok(None);
}
return Ok(Some(crate::metadata::parse_metadata(meta)?));
}
Ok(None)
}
pub fn raw_chunk(&self, id: &[u8; 4]) -> Option<&[u8]> {
self.global_chunks
.iter()
.find(|c| &c.id == id)
.map(|c| c.data.as_slice())
}
pub fn all_chunks(&self, id: &[u8; 4]) -> Vec<&[u8]> {
self.global_chunks
.iter()
.filter(|c| &c.id == id)
.map(|c| c.data.as_slice())
.collect()
}
pub fn chunk_ids(&self) -> Vec<[u8; 4]> {
self.global_chunks.iter().map(|c| c.id).collect()
}
}
#[cfg(feature = "mmap")]
pub struct MmapDocument {
_mmap: memmap2::Mmap,
doc: DjVuDocument,
}
#[cfg(feature = "mmap")]
impl MmapDocument {
pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self, DocError> {
let file = std::fs::File::open(path.as_ref())?;
#[allow(unsafe_code)]
let mmap = unsafe { memmap2::Mmap::map(&file) }?;
let doc = DjVuDocument::parse(&mmap)?;
Ok(MmapDocument { _mmap: mmap, doc })
}
pub fn document(&self) -> &DjVuDocument {
&self.doc
}
pub fn page_count(&self) -> usize {
self.doc.page_count()
}
pub fn page(&self, index: usize) -> Result<&DjVuPage, DocError> {
self.doc.page(index)
}
}
#[cfg(feature = "mmap")]
impl core::ops::Deref for MmapDocument {
type Target = DjVuDocument;
fn deref(&self) -> &DjVuDocument {
&self.doc
}
}
fn parse_page_from_chunks(
chunks: &[IffChunk<'_>],
index: usize,
shared_djbz: Option<Vec<u8>>,
) -> Result<DjVuPage, DocError> {
let info_chunk = chunks
.iter()
.find(|c| &c.id == b"INFO")
.ok_or(DocError::MissingChunk("INFO"))?;
let info = PageInfo::parse(info_chunk.data)?;
let raw_chunks: Vec<RawChunk> = chunks
.iter()
.map(|c| RawChunk {
id: c.id,
data: c.data.to_vec(),
})
.collect();
Ok(DjVuPage {
info,
chunks: raw_chunks,
index,
shared_djbz,
})
}
fn parse_sub_form(data: &[u8]) -> Result<Vec<IffChunk<'_>>, DocError> {
if data.len() < 4 {
return Err(DocError::Malformed("sub-form data too short"));
}
let body = data
.get(4..)
.ok_or(DocError::Malformed("sub-form body missing"))?;
let chunks = parse_iff_body_chunks(body)?;
Ok(chunks)
}
fn parse_iff_body_chunks(mut buf: &[u8]) -> Result<Vec<IffChunk<'_>>, DocError> {
let mut chunks = Vec::new();
while buf.len() >= 8 {
let id: [u8; 4] = buf
.get(0..4)
.and_then(|s| s.try_into().ok())
.ok_or(IffError::Truncated)?;
let data_len = buf
.get(4..8)
.and_then(|b| b.try_into().ok())
.map(u32::from_be_bytes)
.map(|n| n as usize)
.ok_or(IffError::Truncated)?;
let data_start = 8usize;
let data_end = data_start
.checked_add(data_len)
.ok_or(IffError::Truncated)?;
if data_end > buf.len() {
return Err(DocError::Iff(IffError::ChunkTooLong {
id,
claimed: data_len as u32,
available: buf.len().saturating_sub(data_start),
}));
}
let chunk_data = buf.get(data_start..data_end).ok_or(IffError::Truncated)?;
chunks.push(IffChunk {
id,
data: chunk_data,
});
let padded_len = data_len + (data_len & 1);
let next = data_start
.checked_add(padded_len)
.ok_or(IffError::Truncated)?;
buf = buf.get(next.min(buf.len())..).ok_or(IffError::Truncated)?;
}
Ok(chunks)
}
#[derive(Debug, Clone)]
struct DirmEntry {
comp_type: ComponentType,
id: String,
}
fn parse_dirm(data: &[u8]) -> Result<(Vec<DirmEntry>, bool), DocError> {
if data.len() < 3 {
return Err(DocError::Malformed("DIRM chunk too short"));
}
let dflags = *data.first().ok_or(DocError::Malformed("DIRM empty"))?;
let is_bundled = (dflags >> 7) != 0;
let nfiles = u16::from_be_bytes([
*data.get(1).ok_or(DocError::Malformed("DIRM too short"))?,
*data.get(2).ok_or(DocError::Malformed("DIRM too short"))?,
]) as usize;
let mut pos = 3usize;
if is_bundled {
let offsets_size = nfiles * 4;
pos = pos
.checked_add(offsets_size)
.ok_or(DocError::Malformed("DIRM offset arithmetic overflow"))?;
if pos > data.len() {
return Err(DocError::Malformed("DIRM offset table truncated"));
}
}
let bzz_data = data
.get(pos..)
.ok_or(DocError::Malformed("DIRM bzz data missing"))?;
let meta = bzz_decode(bzz_data)?;
let mut mpos = nfiles * 3;
if mpos + nfiles > meta.len() {
return Err(DocError::Malformed("DIRM meta too short for flags"));
}
let flags: Vec<u8> = meta
.get(mpos..mpos + nfiles)
.ok_or(DocError::Malformed("DIRM flags truncated"))?
.to_vec();
mpos += nfiles;
let mut entries = Vec::with_capacity(nfiles);
for &flag in flags.iter().take(nfiles) {
let id = read_str_nt(&meta, &mut mpos)?;
if (flag & 0x80) != 0 {
let _ = read_str_nt(&meta, &mut mpos)?;
}
if (flag & 0x40) != 0 {
let _ = read_str_nt(&meta, &mut mpos)?;
}
let comp_type = match flag & 0x3f {
1 => ComponentType::Page,
2 => ComponentType::Thumbnail,
_ => ComponentType::Shared,
};
entries.push(DirmEntry { comp_type, id });
}
Ok((entries, is_bundled))
}
fn read_str_nt(data: &[u8], pos: &mut usize) -> Result<String, DocError> {
let start = *pos;
while *pos < data.len() && *data.get(*pos).ok_or(DocError::Malformed("str read OOB"))? != 0 {
*pos += 1;
}
if *pos >= data.len() {
return Err(DocError::Malformed(
"null terminator missing in DIRM string",
));
}
let s = core::str::from_utf8(
data.get(start..*pos)
.ok_or(DocError::Malformed("str slice OOB"))?,
)
.map_err(|_| DocError::InvalidUtf8)?
.to_string();
*pos += 1; Ok(s)
}
fn parse_navm_bookmarks(chunks: &[IffChunk<'_>]) -> Result<Vec<DjVuBookmark>, DocError> {
let navm_data = match chunks.iter().find(|c| &c.id == b"NAVM") {
Some(c) => c.data,
None => return Ok(vec![]),
};
let decoded = bzz_decode(navm_data)?;
if decoded.len() < 2 {
return Ok(vec![]);
}
let b0 = *decoded
.first()
.ok_or(DocError::Malformed("NAVM total count byte 0"))?;
let b1 = *decoded
.get(1)
.ok_or(DocError::Malformed("NAVM total count byte 1"))?;
let total_count = u16::from_be_bytes([b0, b1]) as usize;
let mut pos = 2usize;
let mut bookmarks = Vec::new();
let mut decoded_count = 0usize;
while decoded_count < total_count {
let bm = parse_bookmark_entry(&decoded, &mut pos, &mut decoded_count)?;
bookmarks.push(bm);
}
Ok(bookmarks)
}
fn parse_bookmark_entry(
data: &[u8],
pos: &mut usize,
total_counter: &mut usize,
) -> Result<DjVuBookmark, DocError> {
if *pos >= data.len() {
return Err(DocError::Malformed("NAVM bookmark entry truncated"));
}
let n_children = *data
.get(*pos)
.ok_or(DocError::Malformed("NAVM children count"))? as usize;
*pos += 1;
let title = read_navm_str(data, pos)?;
let url = read_navm_str(data, pos)?;
*total_counter += 1;
let mut children = Vec::with_capacity(n_children);
for _ in 0..n_children {
let child = parse_bookmark_entry(data, pos, total_counter)?;
children.push(child);
}
Ok(DjVuBookmark {
title,
url,
children,
})
}
fn read_navm_str(data: &[u8], pos: &mut usize) -> Result<String, DocError> {
if *pos + 3 > data.len() {
return Err(DocError::Malformed("NAVM string length truncated"));
}
let len = ((*data.get(*pos).ok_or(DocError::Malformed("NAVM str"))? as usize) << 16)
| ((*data.get(*pos + 1).ok_or(DocError::Malformed("NAVM str"))? as usize) << 8)
| (*data.get(*pos + 2).ok_or(DocError::Malformed("NAVM str"))? as usize);
*pos += 3;
let bytes = data
.get(*pos..*pos + len)
.ok_or(DocError::Malformed("NAVM string bytes truncated"))?;
*pos += len;
core::str::from_utf8(bytes)
.map(|s| s.to_string())
.map_err(|_| DocError::InvalidUtf8)
}
#[cfg(test)]
mod tests {
use super::*;
fn assets_path() -> std::path::PathBuf {
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("references/djvujs/library/assets")
}
#[test]
fn single_page_parse_and_metadata() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse should succeed");
assert_eq!(doc.page_count(), 1);
let page = doc.page(0).expect("page 0 must exist");
assert_eq!(page.width(), 181);
assert_eq!(page.height(), 240);
assert_eq!(page.dpi(), 100);
assert!((page.gamma() - 2.2).abs() < 0.01, "gamma should be ~2.2");
}
#[test]
fn single_page_out_of_range() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse should succeed");
let err = doc.page(1).expect_err("page 1 should be out of range");
assert!(
matches!(err, DocError::PageOutOfRange { index: 1, count: 1 }),
"unexpected error: {err:?}"
);
}
#[test]
fn single_page_no_thumbnail() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse should succeed");
let page = doc.page(0).expect("page 0 must exist");
let thumb = page.thumbnail().expect("thumbnail() should not error");
assert!(
thumb.is_none(),
"single-page chicken.djvu has no TH44 chunks"
);
}
#[test]
fn single_page_dimensions() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse should succeed");
let page = doc.page(0).unwrap();
assert_eq!(page.dimensions(), (181, 240));
}
#[test]
fn multipage_bundled_page_count() {
let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu"))
.expect("DjVu3Spec_bundled.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("bundled parse should succeed");
assert!(
doc.page_count() > 1,
"bundled document should have more than 1 page, got {}",
doc.page_count()
);
}
#[test]
fn multipage_bundled_page_metadata() {
let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu"))
.expect("DjVu3Spec_bundled.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("bundled parse should succeed");
let page0 = doc.page(0).expect("page 0 must exist");
assert!(page0.width() > 0, "page width must be non-zero");
assert!(page0.height() > 0, "page height must be non-zero");
assert!(page0.dpi() > 0, "page dpi must be non-zero");
}
#[test]
fn navm_bookmarks_present() {
let data =
std::fs::read(assets_path().join("navm_fgbz.djvu")).expect("navm_fgbz.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse should succeed");
let bm = doc.bookmarks();
assert!(
!bm.is_empty(),
"navm_fgbz.djvu should have at least one bookmark"
);
}
#[test]
fn no_navm_returns_empty_bookmarks() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse should succeed");
assert!(
doc.bookmarks().is_empty(),
"chicken.djvu has no NAVM — bookmarks should be empty"
);
}
#[test]
fn indirect_document_with_resolver() {
let chicken_data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let djvm_data = build_indirect_djvm_bytes("chicken.djvu");
let resolver = |name: &str| -> Result<Vec<u8>, DocError> {
if name == "chicken.djvu" {
Ok(chicken_data.clone())
} else {
Err(DocError::IndirectResolve(name.to_string()))
}
};
let doc = DjVuDocument::parse_with_resolver(&djvm_data, Some(resolver))
.expect("indirect parse should succeed");
assert_eq!(doc.page_count(), 1);
let page = doc.page(0).unwrap();
assert_eq!(page.width(), 181);
assert_eq!(page.height(), 240);
}
#[test]
fn indirect_document_no_resolver_returns_error() {
let djvm_data = build_indirect_djvm_bytes("chicken.djvu");
let err = DjVuDocument::parse(&djvm_data).expect_err("should fail without resolver");
assert!(
matches!(err, DocError::NoResolver),
"expected NoResolver, got {err:?}"
);
}
#[test]
fn page_is_lazy_no_decode_before_thumbnail() {
let data =
std::fs::read(assets_path().join("boy_jb2.djvu")).expect("boy_jb2.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse should succeed");
let page = doc.page(0).expect("page 0 must exist");
assert!(!page.chunks.is_empty(), "chunks must be stored (lazy)");
let thumb = page.thumbnail().expect("thumbnail() should not error");
assert!(thumb.is_none());
}
#[test]
fn not_djvu_returns_error() {
let mut data = Vec::new();
data.extend_from_slice(b"AT&T");
data.extend_from_slice(b"FORM");
data.extend_from_slice(&8u32.to_be_bytes());
data.extend_from_slice(b"XXXXXXXX"); let err = DjVuDocument::parse(&data).expect_err("should fail");
assert!(
matches!(err, DocError::NotDjVu(_) | DocError::Iff(_)),
"expected NotDjVu or Iff error, got {err:?}"
);
}
fn build_indirect_djvm_bytes(_page_name: &str) -> Vec<u8> {
let bzz_meta: &[u8] = &[
0xff, 0xff, 0xed, 0xbf, 0x8a, 0x1f, 0xbe, 0xad, 0x14, 0x57, 0x10, 0xc9, 0x63, 0x19,
0x11, 0xf0, 0x85, 0x28, 0x12, 0x8a, 0xbf,
];
let mut dirm_data = Vec::new();
dirm_data.push(0x00); dirm_data.push(0x00); dirm_data.push(0x01); dirm_data.extend_from_slice(bzz_meta);
build_djvm_with_dirm(&dirm_data)
}
fn build_djvm_with_dirm(dirm_data: &[u8]) -> Vec<u8> {
let mut dirm_chunk = Vec::new();
dirm_chunk.extend_from_slice(b"DIRM");
dirm_chunk.extend_from_slice(&(dirm_data.len() as u32).to_be_bytes());
dirm_chunk.extend_from_slice(dirm_data);
if !dirm_data.len().is_multiple_of(2) {
dirm_chunk.push(0); }
let mut form_body = Vec::new();
form_body.extend_from_slice(b"DJVM");
form_body.extend_from_slice(&dirm_chunk);
let mut file = Vec::new();
file.extend_from_slice(b"AT&T");
file.extend_from_slice(b"FORM");
file.extend_from_slice(&(form_body.len() as u32).to_be_bytes());
file.extend_from_slice(&form_body);
file
}
#[test]
fn page_raw_chunk_info_present() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let page = doc.page(0).expect("page 0 must exist");
let info = page.raw_chunk(b"INFO").expect("INFO chunk must be present");
assert_eq!(info.len(), 10, "INFO chunk is always 10 bytes");
}
#[test]
fn page_raw_chunk_absent() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let page = doc.page(0).expect("page 0 must exist");
assert!(
page.raw_chunk(b"XXXX").is_none(),
"unknown chunk type must return None"
);
}
#[test]
fn page_all_chunks_bg44_multiple() {
let data = std::fs::read(
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/big-scanned-page.djvu"),
)
.expect("big-scanned-page.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let page = doc.page(0).expect("page 0 must exist");
let bg44 = page.all_chunks(b"BG44");
assert!(
bg44.len() >= 2,
"colour page must have ≥2 BG44 chunks, got {}",
bg44.len()
);
for (i, chunk) in bg44.iter().enumerate() {
assert!(!chunk.is_empty(), "BG44 chunk {i} must not be empty");
}
}
#[test]
fn page_chunk_ids_includes_info() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let page = doc.page(0).expect("page 0 must exist");
let ids = page.chunk_ids();
assert!(!ids.is_empty(), "chunk_ids must not be empty");
assert!(
ids.contains(b"INFO"),
"chunk_ids must include INFO, got: {:?}",
ids.iter()
.map(|id| std::str::from_utf8(id).unwrap_or("????"))
.collect::<Vec<_>>()
);
}
#[test]
fn document_raw_chunk_single_page() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let info = doc
.raw_chunk(b"INFO")
.expect("document must expose INFO chunk");
assert_eq!(info.len(), 10);
}
#[test]
fn djvi_shared_dict_parsed_from_bundled_djvm() {
let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/DjVu3Spec_bundled.djvu");
let data = std::fs::read(&path).expect("DjVu3Spec_bundled.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
assert!(doc.page_count() > 0, "document must have pages");
let pages_with_dict = doc.pages.iter().filter(|p| p.shared_djbz.is_some()).count();
assert!(
pages_with_dict > 0,
"at least one page must have a resolved shared DJVI dict"
);
}
#[test]
fn djvi_incl_page_mask_renders_ok() {
let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/DjVu3Spec_bundled.djvu");
let data = std::fs::read(&path).expect("DjVu3Spec_bundled.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let page = doc
.pages
.iter()
.find(|p| p.shared_djbz.is_some())
.expect("at least one page must have a shared dict");
let mask = page
.extract_mask()
.expect("extract_mask must succeed for INCL page");
assert!(mask.is_some(), "INCL page must have a JB2 mask");
let bm = mask.unwrap();
assert!(
bm.width > 0 && bm.height > 0,
"mask must have non-zero dimensions"
);
}
#[test]
fn no_regression_non_incl_pages() {
let data = std::fs::read(
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/boy_jb2.djvu"),
)
.expect("boy_jb2.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let page = doc.page(0).expect("page 0 must exist");
assert!(
page.shared_djbz.is_none(),
"single-page DJVU has no shared dict"
);
let mask = page.extract_mask().expect("extract_mask must succeed");
assert!(mask.is_some(), "boy_jb2.djvu page must have a JB2 mask");
}
#[test]
fn page_raw_chunk_info_roundtrip() {
let data =
std::fs::read(assets_path().join("chicken.djvu")).expect("chicken.djvu must exist");
let doc = DjVuDocument::parse(&data).expect("parse must succeed");
let page = doc.page(0).expect("page 0 must exist");
let raw_info = page.raw_chunk(b"INFO").expect("INFO chunk must be present");
let reparsed = crate::info::PageInfo::parse(raw_info).expect("re-parse must succeed");
assert_eq!(reparsed.width, page.width() as u16);
assert_eq!(reparsed.height, page.height() as u16);
assert_eq!(reparsed.dpi, page.dpi());
}
#[test]
#[cfg(feature = "mmap")]
fn mmap_document_matches_parse() {
let path = assets_path().join("chicken.djvu");
let mmap_doc = MmapDocument::open(&path).expect("mmap open should succeed");
let data = std::fs::read(&path).expect("read should succeed");
let mem_doc = DjVuDocument::parse(&data).expect("parse should succeed");
assert_eq!(mmap_doc.page_count(), mem_doc.page_count());
for i in 0..mmap_doc.page_count() {
let mp = mmap_doc.page(i).unwrap();
let pp = mem_doc.page(i).unwrap();
assert_eq!(mp.width(), pp.width());
assert_eq!(mp.height(), pp.height());
assert_eq!(mp.dpi(), pp.dpi());
}
}
}