use crate::bitmap::Bitmap;
use crate::error::Error;
use crate::iff::{Chunk, DjvuFile};
use crate::iw44::IW44Image;
use crate::jb2::JB2Dict;
use crate::pixmap::Pixmap;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
#[cfg(test)]
pub use crate::iw44::NormalizedPlanes;
#[derive(Debug, Clone)]
pub struct Bookmark {
pub title: String,
pub url: String,
pub children: Vec<Bookmark>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Rotation {
None,
Cw90,
Cw180,
Cw270,
}
#[derive(Debug, Clone)]
pub struct PageInfo {
pub width: u16,
pub height: u16,
pub dpi: u16,
pub gamma: f32,
pub rotation: Rotation,
}
#[derive(Debug, Clone)]
pub struct Palette {
pub colors: Vec<(u8, u8, u8)>,
pub indices: Vec<i16>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TextZoneKind {
Page = 1,
Column = 2,
Region = 3,
Paragraph = 4,
Line = 5,
Word = 6,
Character = 7,
}
#[derive(Debug, Clone)]
pub struct TextZone {
pub kind: TextZoneKind,
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
pub text_start: usize,
pub text_len: usize,
pub children: Vec<TextZone>,
}
#[derive(Debug, Clone)]
pub struct TextLayer {
pub text: String,
pub root: Option<TextZone>,
}
impl TextLayer {
pub fn zone_text(&self, zone: &TextZone) -> &str {
let end = (zone.text_start + zone.text_len).min(self.text.len());
let start = zone.text_start.min(end);
if self.text.is_char_boundary(start) && self.text.is_char_boundary(end) {
&self.text[start..end]
} else {
""
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ComponentType {
Shared, Page, Thumbnail, }
#[derive(Debug, Clone)]
struct DirmEntry {
comp_type: ComponentType,
id: String,
}
pub struct Document {
file: DjvuFile,
dirm_entries: Vec<DirmEntry>,
page_indices: Vec<usize>,
is_single_page: bool,
dict_cache: RwLock<HashMap<usize, Arc<JB2Dict>>>,
}
impl Document {
pub fn parse(data: &[u8]) -> Result<Self, Error> {
let file = crate::iff::parse(data)?;
match &file.root {
Chunk::Form {
secondary_id: [b'D', b'J', b'V', b'U'],
..
} => {
Ok(Document {
file,
dirm_entries: vec![],
page_indices: vec![0], is_single_page: true,
dict_cache: RwLock::new(HashMap::new()),
})
}
Chunk::Form {
secondary_id: [b'D', b'J', b'V', b'M'],
children,
..
} => {
let dirm_chunk = children
.iter()
.find_map(|c| match c {
Chunk::Leaf {
id: [b'D', b'I', b'R', b'M'],
data,
} => Some(data.as_slice()),
_ => None,
})
.ok_or(Error::MissingChunk("DIRM"))?;
let (dirm_entries, is_bundled) = parse_dirm(dirm_chunk)?;
if !is_bundled {
return Err(Error::Unsupported("indirect DJVM not supported"));
}
let page_indices: Vec<usize> = dirm_entries
.iter()
.enumerate()
.filter(|(_, e)| e.comp_type == ComponentType::Page)
.map(|(i, _)| i)
.collect();
Ok(Document {
file,
dirm_entries,
page_indices,
is_single_page: false,
dict_cache: RwLock::new(HashMap::new()),
})
}
_ => Err(Error::Unsupported("not a DJVU or DJVM document")),
}
}
pub fn page_count(&self) -> usize {
self.page_indices.len()
}
pub fn page(&self, index: usize) -> Result<Page<'_>, Error> {
if index >= self.page_count() {
return Err(Error::FormatError(format!(
"page index {} out of range ({})",
index,
self.page_count()
)));
}
if self.is_single_page {
return Page::from_form(&self.file.root, self);
}
let dirm_index = self.page_indices[index];
let form = self.get_component_form(dirm_index)?;
Page::from_form(form, self)
}
fn get_component_form(&self, dirm_index: usize) -> Result<&Chunk, Error> {
let forms: Vec<&Chunk> = self
.file
.root
.children()
.iter()
.filter(|c| matches!(c, Chunk::Form { .. }))
.collect();
forms
.get(dirm_index)
.copied()
.ok_or(Error::FormatError(format!(
"component {} not found",
dirm_index
)))
}
pub fn bookmarks(&self) -> Result<Vec<Bookmark>, Error> {
let navm_data = match self.file.root.find_first(b"NAVM") {
Some(c) => c.data(),
None => return Ok(vec![]),
};
let decoded = crate::bzz_new::bzz_decode(navm_data)
.map_err(|e| Error::FormatError(format!("NAVM BZZ decode: {}", e)))?;
if decoded.len() < 2 {
return Ok(vec![]);
}
let total_count = u16::from_be_bytes([decoded[0], decoded[1]]) as usize;
let mut pos = 2usize;
let mut bookmarks = Vec::new();
let mut decoded_count = 0usize;
while decoded_count < total_count {
let bm = parse_bookmark(&decoded, &mut pos, &mut decoded_count)?;
bookmarks.push(bm);
}
Ok(bookmarks)
}
pub fn thumbnail(&self, page_index: usize) -> Result<Option<Pixmap>, Error> {
if self.is_single_page {
return Ok(None);
}
let mut thumb_idx: usize = 0;
for (i, entry) in self.dirm_entries.iter().enumerate() {
if entry.comp_type != ComponentType::Thumbnail {
continue;
}
let form = self.get_component_form(i)?;
let th44_chunks: Vec<&[u8]> = form
.find_all(b"TH44")
.into_iter()
.map(|c| c.data())
.collect();
let mut img = IW44Image::new();
for chunk_data in &th44_chunks {
if chunk_data.is_empty() {
continue;
}
let serial = chunk_data[0];
if serial == 0 && img.width() > 0 {
if thumb_idx == page_index {
let pm = img
.to_pixmap()
.map_err(|e| Error::FormatError(e.to_string()))?;
return Ok(Some(pm));
}
thumb_idx += 1;
img = IW44Image::new();
}
img.decode_chunk(chunk_data)
.map_err(|e| Error::FormatError(e.to_string()))?;
}
if img.width() > 0 {
if thumb_idx == page_index {
let pm = img
.to_pixmap()
.map_err(|e| Error::FormatError(e.to_string()))?;
return Ok(Some(pm));
}
thumb_idx += 1;
}
}
Ok(None)
}
fn resolve_incl(&self, ref_id: &str) -> Result<&Chunk, Error> {
if self.is_single_page {
return Err(Error::FormatError("INCL in single-page document".into()));
}
for (i, entry) in self.dirm_entries.iter().enumerate() {
if entry.id == ref_id {
return self.get_component_form(i);
}
}
Err(Error::FormatError(format!(
"INCL target '{}' not found",
ref_id
)))
}
fn get_or_decode_dict(&self, djbz_data: &[u8]) -> Result<Arc<JB2Dict>, Error> {
let key = djbz_data.as_ptr() as usize;
{
let cache = self.dict_cache.read().unwrap();
if let Some(dict) = cache.get(&key) {
return Ok(Arc::clone(dict));
}
}
let dict = crate::jb2::decode_dict(djbz_data, None)
.map_err(|e| Error::FormatError(e.to_string()))?;
let arc = Arc::new(dict);
self.dict_cache
.write()
.unwrap()
.insert(key, Arc::clone(&arc));
Ok(arc)
}
}
pub struct Page<'a> {
pub info: PageInfo,
form: &'a Chunk,
doc: &'a Document,
}
impl<'a> Page<'a> {
fn from_form(form: &'a Chunk, doc: &'a Document) -> Result<Self, Error> {
let info_chunk = form
.find_first(b"INFO")
.ok_or(Error::MissingChunk("INFO"))?;
let info = parse_info(info_chunk.data())?;
Ok(Page { info, form, doc })
}
#[cfg(test)]
pub fn has_mask(&self) -> bool {
self.form.find_first(b"Sjbz").is_some()
}
#[cfg(test)]
pub fn has_background(&self) -> bool {
self.form.find_first(b"BG44").is_some()
}
pub fn has_foreground(&self) -> bool {
self.form.find_first(b"FG44").is_some()
}
pub fn has_palette(&self) -> bool {
self.form.find_first(b"FGbz").is_some()
}
pub fn decode_mask(&self) -> Result<Option<Bitmap>, Error> {
let sjbz = match self.form.find_first(b"Sjbz") {
Some(c) => c.data(),
None => return Ok(None),
};
let shared_dict = self.resolve_shared_dict()?;
let bitmap = crate::jb2::decode(sjbz, shared_dict.as_deref())
.map_err(|e| Error::FormatError(e.to_string()))?;
Ok(Some(bitmap))
}
pub fn decode_mask_indexed(&self) -> Result<Option<(Bitmap, Vec<i32>)>, Error> {
let sjbz = match self.form.find_first(b"Sjbz") {
Some(c) => c.data(),
None => return Ok(None),
};
let shared_dict = self.resolve_shared_dict()?;
let result = crate::jb2::decode_indexed(sjbz, shared_dict.as_deref())
.map_err(|e| Error::FormatError(e.to_string()))?;
Ok(Some(result))
}
pub fn decode_background(&self) -> Result<Option<Pixmap>, Error> {
let img = match self.decode_iw44_layer(b"BG44")? {
Some(img) => img,
None => return Ok(None),
};
let pm = img
.to_pixmap()
.map_err(|e| Error::FormatError(e.to_string()))?;
Ok(Some(pm))
}
pub fn bg44_chunk_count(&self) -> usize {
self.form.find_all(b"BG44").len()
}
pub fn decode_background_progressive(&self) -> Result<Option<Vec<Pixmap>>, Error> {
let chunks: Vec<&[u8]> = self
.form
.find_all(b"BG44")
.into_iter()
.map(|c| c.data())
.collect();
if chunks.is_empty() {
return Ok(None);
}
let mut img = IW44Image::new();
let mut frames = Vec::with_capacity(chunks.len());
for chunk_data in &chunks {
img.decode_chunk(chunk_data)
.map_err(|e| Error::FormatError(e.to_string()))?;
let pm = img
.to_pixmap()
.map_err(|e| Error::FormatError(e.to_string()))?;
frames.push(pm);
}
Ok(Some(frames))
}
pub fn decode_background_coarse(&self) -> Result<Option<Pixmap>, Error> {
let chunks: Vec<&[u8]> = self
.form
.find_all(b"BG44")
.into_iter()
.map(|c| c.data())
.collect();
if chunks.len() <= 1 {
return Ok(None);
}
let mut img = IW44Image::new();
img.decode_chunk(chunks[0])
.map_err(|e| Error::FormatError(e.to_string()))?;
let pm = img
.to_pixmap()
.map_err(|e| Error::FormatError(e.to_string()))?;
Ok(Some(pm))
}
pub fn decode_foreground(&self) -> Result<Option<Pixmap>, Error> {
let img = match self.decode_iw44_layer(b"FG44")? {
Some(img) => img,
None => return Ok(None),
};
let pm = img
.to_pixmap()
.map_err(|e| Error::FormatError(e.to_string()))?;
Ok(Some(pm))
}
#[cfg(test)]
pub fn decode_background_planes(&self) -> Result<Option<NormalizedPlanes>, Error> {
let img = match self.decode_iw44_layer(b"BG44")? {
Some(img) => img,
None => return Ok(None),
};
let planes = img
.to_normalized_planes_subsample(1)
.map_err(|e| Error::FormatError(e.to_string()))?;
Ok(Some(planes))
}
pub fn decode_palette(&self) -> Result<Option<Palette>, Error> {
let fgbz = match self.form.find_first(b"FGbz") {
Some(c) => c.data(),
None => return Ok(None),
};
let palette = parse_fgbz(fgbz)?;
Ok(Some(palette))
}
pub fn text_layer(&self) -> Result<Option<TextLayer>, Error> {
let data = if let Some(txtz) = self.form.find_first(b"TXTz") {
let compressed = txtz.data();
if compressed.is_empty() {
return Ok(None);
}
crate::bzz_new::bzz_decode(compressed)
.map_err(|e| Error::FormatError(format!("TXTz BZZ decode: {}", e)))?
} else if let Some(txta) = self.form.find_first(b"TXTa") {
txta.data().to_vec()
} else {
return Ok(None);
};
parse_text_layer(&data)
}
fn resolve_shared_dict(&self) -> Result<Option<Arc<JB2Dict>>, Error> {
for incl in self.form.find_all(b"INCL") {
let ref_id = std::str::from_utf8(incl.data())
.map_err(|_| Error::FormatError("invalid INCL UTF-8".into()))?
.trim_end_matches('\0')
.trim();
let shared_form = self.doc.resolve_incl(ref_id)?;
if let Some(djbz) = shared_form.find_first(b"Djbz") {
return Ok(Some(self.doc.get_or_decode_dict(djbz.data())?));
}
}
if let Some(djbz) = self.form.find_first(b"Djbz") {
return Ok(Some(self.doc.get_or_decode_dict(djbz.data())?));
}
Ok(None)
}
fn decode_iw44_layer(&self, chunk_id: &[u8; 4]) -> Result<Option<IW44Image>, Error> {
let chunks: Vec<&[u8]> = self
.form
.find_all(chunk_id)
.into_iter()
.map(|c| c.data())
.collect();
if chunks.is_empty() {
return Ok(None);
}
let mut img = IW44Image::new();
for chunk_data in &chunks {
img.decode_chunk(chunk_data)
.map_err(|e| Error::FormatError(e.to_string()))?;
}
Ok(Some(img))
}
}
fn parse_info(data: &[u8]) -> Result<PageInfo, Error> {
if data.len() < 5 {
return Err(Error::InvalidLength);
}
let width = u16::from_be_bytes([data[0], data[1]]);
let height = u16::from_be_bytes([data[2], data[3]]);
let _minver = data[4];
let _majver = if data.len() > 5 { data[5] } else { 0 };
let raw_dpi = if data.len() >= 8 {
u16::from_le_bytes([data[6], data[7]])
} else {
300
};
let dpi = if (25..=6000).contains(&raw_dpi) {
raw_dpi
} else {
300
};
let gamma_byte = if data.len() >= 9 { data[8] } else { 0 };
let gamma = if gamma_byte == 0 {
2.2_f32
} else {
gamma_byte as f32 / 10.0
};
let flags = if data.len() >= 10 { data[9] } else { 0 };
let rotation = match flags & 0x07 {
5 => Rotation::Cw90,
2 => Rotation::Cw180,
6 => Rotation::Cw270,
_ => Rotation::None,
};
Ok(PageInfo {
width,
height,
dpi,
gamma,
rotation,
})
}
fn parse_dirm(data: &[u8]) -> Result<(Vec<DirmEntry>, bool), Error> {
if data.len() < 3 {
return Err(Error::InvalidLength);
}
let dflags = data[0];
let is_bundled = (dflags >> 7) != 0;
let nfiles = u16::from_be_bytes([data[1], data[2]]) as usize;
let mut pos = 3;
if is_bundled {
let offsets_size = nfiles * 4;
if pos + offsets_size > data.len() {
return Err(Error::UnexpectedEof);
}
pos += offsets_size;
}
let bzz_data = &data[pos..];
let meta =
crate::bzz_new::bzz_decode(bzz_data).map_err(|e| Error::FormatError(e.to_string()))?;
let mut mpos = 0;
mpos += nfiles * 3;
if mpos + nfiles > meta.len() {
return Err(Error::UnexpectedEof);
}
let flags: Vec<u8> = meta[mpos..mpos + nfiles].to_vec();
mpos += nfiles;
let mut entries = Vec::with_capacity(nfiles);
for &flag in flags.iter().take(nfiles) {
let id = read_str_nt(&meta, &mut mpos)?;
let has_name = (flag & 0x80) != 0;
let has_title = (flag & 0x40) != 0;
if has_name {
let _ = read_str_nt(&meta, &mut mpos)?;
}
if has_title {
let _ = read_str_nt(&meta, &mut mpos)?;
}
let comp_type = match flag & 0x3f {
1 => ComponentType::Page,
2 => ComponentType::Thumbnail,
_ => ComponentType::Shared,
};
entries.push(DirmEntry { comp_type, id });
}
Ok((entries, is_bundled))
}
fn read_str_nt(data: &[u8], pos: &mut usize) -> Result<String, Error> {
let start = *pos;
while *pos < data.len() && data[*pos] != 0 {
*pos += 1;
}
if *pos >= data.len() {
return Err(Error::UnexpectedEof);
}
let s = std::str::from_utf8(&data[start..*pos])
.map_err(|_| Error::FormatError("invalid UTF-8 in DIRM".into()))?;
*pos += 1; Ok(s.to_string())
}
fn parse_fgbz(data: &[u8]) -> Result<Palette, Error> {
if data.len() < 3 {
return Err(Error::InvalidLength);
}
let version = data[0];
if (version & 0x7f) != 0 {
return Err(Error::Unsupported("unsupported FGbz version"));
}
let palette_size = u16::from_be_bytes([data[1], data[2]]) as usize;
let color_bytes = palette_size * 3;
if data.len() < 3 + color_bytes {
return Err(Error::UnexpectedEof);
}
let mut colors = Vec::with_capacity(palette_size);
for i in 0..palette_size {
let base = 3 + i * 3;
let b = data[base];
let g = data[base + 1];
let r = data[base + 2];
colors.push((r, g, b));
}
let mut indices = Vec::new();
if (version & 0x80) != 0 {
let idx_start = 3 + color_bytes;
if idx_start + 3 > data.len() {
return Err(Error::UnexpectedEof);
}
let data_size = ((data[idx_start] as u32) << 16)
| ((data[idx_start + 1] as u32) << 8)
| (data[idx_start + 2] as u32);
let bzz_data = &data[idx_start + 3..];
let decoded =
crate::bzz_new::bzz_decode(bzz_data).map_err(|e| Error::FormatError(e.to_string()))?;
let num_indices = data_size as usize;
if decoded.len() < num_indices * 2 {
return Err(Error::UnexpectedEof);
}
indices.reserve(num_indices);
for i in 0..num_indices {
let idx = i16::from_be_bytes([decoded[i * 2], decoded[i * 2 + 1]]);
indices.push(idx);
}
}
Ok(Palette { colors, indices })
}
fn parse_bookmark(data: &[u8], pos: &mut usize, counter: &mut usize) -> Result<Bookmark, Error> {
if *pos >= data.len() {
return Err(Error::UnexpectedEof);
}
let children_count = data[*pos] as usize;
*pos += 1;
let title = read_navm_string(data, pos)?;
let url = read_navm_string(data, pos)?;
*counter += 1;
let mut children = Vec::with_capacity(children_count);
for _ in 0..children_count {
children.push(parse_bookmark(data, pos, counter)?);
}
Ok(Bookmark {
title,
url,
children,
})
}
fn read_navm_string(data: &[u8], pos: &mut usize) -> Result<String, Error> {
if *pos + 3 > data.len() {
return Err(Error::UnexpectedEof);
}
let len = ((data[*pos] as usize) << 16)
| ((data[*pos + 1] as usize) << 8)
| (data[*pos + 2] as usize);
*pos += 3;
if *pos + len > data.len() {
return Err(Error::UnexpectedEof);
}
let s = std::str::from_utf8(&data[*pos..*pos + len])
.map_err(|_| Error::FormatError("invalid UTF-8 in NAVM bookmark".into()))?;
*pos += len;
Ok(s.to_string())
}
fn parse_text_layer(data: &[u8]) -> Result<Option<TextLayer>, Error> {
if data.len() < 3 {
return Ok(None);
}
let mut pos = 0;
let text_len = read_text_u24(data, &mut pos)?;
if pos + text_len > data.len() {
return Err(Error::UnexpectedEof);
}
let text = std::str::from_utf8(&data[pos..pos + text_len])
.map_err(|_| Error::FormatError("invalid UTF-8 in text layer".into()))?
.to_string();
pos += text_len;
if pos >= data.len() {
return Ok(Some(TextLayer { text, root: None }));
}
let _version = data[pos];
pos += 1;
if pos >= data.len() {
return Ok(Some(TextLayer { text, root: None }));
}
let root = parse_text_zone(data, &mut pos, None, None)?;
Ok(Some(TextLayer {
text,
root: Some(root),
}))
}
struct ZoneCtx {
x: i32,
y: i32,
width: i32,
height: i32,
text_start: i32,
text_len: i32,
}
fn parse_text_zone(
data: &[u8],
pos: &mut usize,
parent: Option<&ZoneCtx>,
prev: Option<&ZoneCtx>,
) -> Result<TextZone, Error> {
if *pos >= data.len() {
return Err(Error::UnexpectedEof);
}
let type_byte = data[*pos];
*pos += 1;
let kind = match type_byte {
1 => TextZoneKind::Page,
2 => TextZoneKind::Column,
3 => TextZoneKind::Region,
4 => TextZoneKind::Paragraph,
5 => TextZoneKind::Line,
6 => TextZoneKind::Word,
7 => TextZoneKind::Character,
_ => {
return Err(Error::FormatError(format!(
"unknown text zone type {}",
type_byte
)));
}
};
let mut x = read_text_i16_biased(data, pos)?;
let mut y = read_text_i16_biased(data, pos)?;
let width = read_text_i16_biased(data, pos)?;
let height = read_text_i16_biased(data, pos)?;
let mut text_start = read_text_i16_biased(data, pos)?;
let text_len = read_text_i24(data, pos)?;
if let Some(prev) = prev {
match type_byte {
1 | 4 | 5 => {
x += prev.x;
y = prev.y - (y + height);
}
_ => {
x += prev.x + prev.width;
y += prev.y;
}
}
text_start += prev.text_start + prev.text_len;
} else if let Some(parent) = parent {
x += parent.x;
y = parent.y + parent.height - (y + height);
text_start += parent.text_start;
}
let children_count = read_text_i24(data, pos)?.max(0) as usize;
let ctx = ZoneCtx {
x,
y,
width,
height,
text_start,
text_len,
};
let mut children = Vec::with_capacity(children_count);
let mut prev_child: Option<ZoneCtx> = None;
for _ in 0..children_count {
let child = parse_text_zone(data, pos, Some(&ctx), prev_child.as_ref())?;
prev_child = Some(ZoneCtx {
x: child.x,
y: child.y,
width: child.width,
height: child.height,
text_start: child.text_start as i32,
text_len: child.text_len as i32,
});
children.push(child);
}
Ok(TextZone {
kind,
x,
y,
width,
height,
text_start: text_start.max(0) as usize,
text_len: text_len.max(0) as usize,
children,
})
}
fn read_text_u24(data: &[u8], pos: &mut usize) -> Result<usize, Error> {
if *pos + 3 > data.len() {
return Err(Error::UnexpectedEof);
}
let val = ((data[*pos] as usize) << 16)
| ((data[*pos + 1] as usize) << 8)
| (data[*pos + 2] as usize);
*pos += 3;
Ok(val)
}
fn read_text_i16_biased(data: &[u8], pos: &mut usize) -> Result<i32, Error> {
if *pos + 2 > data.len() {
return Err(Error::UnexpectedEof);
}
let raw = u16::from_be_bytes([data[*pos], data[*pos + 1]]);
*pos += 2;
Ok(raw as i32 - 0x8000)
}
fn read_text_i24(data: &[u8], pos: &mut usize) -> Result<i32, Error> {
if *pos + 3 > data.len() {
return Err(Error::UnexpectedEof);
}
let val =
((data[*pos] as i32) << 16) | ((data[*pos + 1] as i32) << 8) | (data[*pos + 2] as i32);
*pos += 3;
Ok(val)
}
#[cfg(test)]
mod tests {
use super::*;
fn assets_path() -> std::path::PathBuf {
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("references/djvujs/library/assets")
}
fn golden_path() -> std::path::PathBuf {
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/golden/document")
}
#[test]
fn page_counts() {
let cases: &[(&str, usize)] = &[
("boy_jb2.djvu", 1),
("boy.djvu", 1),
("chicken.djvu", 1),
("navm_fgbz.djvu", 6),
("DjVu3Spec_bundled.djvu", 71),
("colorbook.djvu", 62),
];
for (file, expected) in cases {
let data = std::fs::read(assets_path().join(file)).unwrap();
let doc = Document::parse(&data).unwrap();
assert_eq!(
doc.page_count(),
*expected,
"page count mismatch for {}",
file
);
}
}
#[test]
fn page_dimensions_navm_fgbz() {
let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let golden = std::fs::read_to_string(golden_path().join("navm_fgbz_sizes.txt")).unwrap();
for (i, line) in golden.lines().enumerate() {
let page = doc.page(i).unwrap();
let expected = format!("width={} height={}", page.info.width, page.info.height);
assert_eq!(
expected,
line.trim(),
"size mismatch for navm_fgbz page {}",
i + 1
);
}
}
#[test]
fn page_dimensions_djvu3spec() {
let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let golden =
std::fs::read_to_string(golden_path().join("djvu3spec_bundled_sizes.txt")).unwrap();
for (i, line) in golden.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let page = doc.page(i).unwrap();
let expected = format!("width={} height={}", page.info.width, page.info.height);
assert_eq!(
expected,
line.trim(),
"size mismatch for djvu3spec page {}",
i + 1
);
}
}
#[test]
fn layer_availability() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let p = doc.page(0).unwrap();
assert!(p.has_mask());
assert!(!p.has_background());
assert!(!p.has_foreground());
assert!(!p.has_palette());
let data = std::fs::read(assets_path().join("chicken.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let p = doc.page(0).unwrap();
assert!(!p.has_mask());
assert!(p.has_background());
assert!(!p.has_foreground());
assert!(!p.has_palette());
let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let p = doc.page(0).unwrap();
assert!(p.has_mask());
assert!(p.has_background());
assert!(!p.has_foreground());
assert!(p.has_palette());
}
#[test]
fn decode_mask_matches_direct_boy_jb2() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let mask_via_doc = doc.page(0).unwrap().decode_mask().unwrap().unwrap();
let file = crate::iff::parse(&data).unwrap();
let sjbz = file.root.find_first(b"Sjbz").unwrap();
let mask_direct = crate::jb2::decode(sjbz.data(), None).unwrap();
assert_eq!(mask_via_doc.data, mask_direct.data, "mask data mismatch");
}
#[test]
fn decode_mask_with_shared_dict() {
let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let mask = doc.page(0).unwrap().decode_mask().unwrap();
assert!(mask.is_some(), "expected mask for navm_fgbz p1");
let bm = mask.unwrap();
assert_eq!(bm.width, 2550);
assert_eq!(bm.height, 3300);
}
#[test]
fn decode_mask_repeated_calls_identical() {
let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let bm1 = doc.page(0).unwrap().decode_mask().unwrap().unwrap();
let bm2 = doc.page(0).unwrap().decode_mask().unwrap().unwrap();
assert_eq!(
bm1.data, bm2.data,
"decode_mask repeated calls must be identical (cache must not corrupt dict state)"
);
}
#[test]
fn decode_background_chicken() {
let data = std::fs::read(assets_path().join("chicken.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let bg = doc.page(0).unwrap().decode_background().unwrap();
assert!(bg.is_some());
let pm = bg.unwrap();
assert_eq!(pm.width, 181);
assert_eq!(pm.height, 240);
}
#[test]
fn decode_palette_navm_fgbz() {
let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let pal = doc.page(0).unwrap().decode_palette().unwrap();
assert!(pal.is_some());
let p = pal.unwrap();
assert_eq!(p.colors.len(), 2); }
#[test]
fn page_info_dpi() {
let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let p = doc.page(0).unwrap();
assert_eq!(p.info.dpi, 300);
}
#[test]
#[ignore]
fn debug_bg_lowres_vs_ddjvu() {
let cases = [
("carte.djvu", 0usize, "/tmp/rdjvu_debug/carte_bg_sub3.ppm"),
(
"colorbook.djvu",
0usize,
"/tmp/rdjvu_debug/colorbook_p1_bg_sub3.ppm",
),
(
"navm_fgbz.djvu",
3usize,
"/tmp/rdjvu_debug/navm_p4_bg_sub3.ppm",
),
];
for (file, page_idx, ref_file) in cases {
let ref_path = std::path::Path::new(ref_file);
if !ref_path.exists() {
continue;
}
let data = std::fs::read(assets_path().join(file)).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(page_idx).unwrap();
let bg = page.decode_background().unwrap().unwrap();
let actual = bg.to_ppm();
let expected = std::fs::read(ref_path).unwrap();
let header_end = actual.iter().position(|&b| b == b'\n').unwrap() + 1;
let header_end = header_end
+ actual[header_end..]
.iter()
.position(|&b| b == b'\n')
.unwrap()
+ 1;
let header_end = header_end
+ actual[header_end..]
.iter()
.position(|&b| b == b'\n')
.unwrap()
+ 1;
let a = &actual[header_end..];
let e = &expected[header_end..];
let mut diff_px = 0usize;
let mut abs = [0u64; 3];
let px = (a.len().min(e.len())) / 3;
for p in 0..px {
let i = p * 3;
if a[i] != e[i] || a[i + 1] != e[i + 1] || a[i + 2] != e[i + 2] {
diff_px += 1;
}
abs[0] += (a[i] as i32 - e[i] as i32).unsigned_abs() as u64;
abs[1] += (a[i + 1] as i32 - e[i + 1] as i32).unsigned_abs() as u64;
abs[2] += (a[i + 2] as i32 - e[i + 2] as i32).unsigned_abs() as u64;
}
eprintln!(
"{} p{} bg-lowres mismatch_px={} mean_abs=({:.3},{:.3},{:.3}) dims_a={} dims_e={}",
file,
page_idx + 1,
diff_px,
abs[0] as f64 / px as f64,
abs[1] as f64 / px as f64,
abs[2] as f64 / px as f64,
a.len() / 3,
e.len() / 3
);
}
}
#[test]
fn bookmarks_navm_fgbz() {
let data = std::fs::read(assets_path().join("navm_fgbz.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let bm = doc.bookmarks().unwrap();
assert_eq!(bm.len(), 4);
assert_eq!(bm[0].title, "Links");
assert_eq!(bm[0].url, "#1");
assert!(bm[0].children.is_empty());
assert_eq!(bm[1].title, "Ink, Rectangles, Ellipses, Lines");
assert_eq!(bm[1].url, "#2");
assert!(bm[1].children.is_empty());
assert_eq!(bm[2].title, "Stamps");
assert_eq!(bm[2].url, "#3");
assert_eq!(bm[2].children.len(), 2);
assert_eq!(bm[2].children[0].title, "Stamps - Faces");
assert_eq!(bm[2].children[0].url, "#4");
assert_eq!(bm[2].children[1].title, "Stamps - Pointers");
assert_eq!(bm[2].children[1].url, "#5");
assert_eq!(bm[3].title, "Last Page");
assert_eq!(bm[3].url, "#6");
assert!(bm[3].children.is_empty());
}
#[test]
fn bookmarks_empty_for_single_page() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let bm = doc.bookmarks().unwrap();
assert!(bm.is_empty());
}
#[test]
fn bookmarks_empty_for_no_navm() {
let data = std::fs::read(assets_path().join("colorbook.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let bm = doc.bookmarks().unwrap();
assert!(bm.is_empty());
}
#[test]
fn document_empty_input() {
assert!(Document::parse(&[]).is_err());
}
#[test]
fn document_truncated_file() {
assert!(Document::parse(b"AT&T").is_err());
}
#[test]
fn document_missing_info_chunk() {
let mut data = b"AT&TFORM".to_vec();
let form_size = 4 + 4 + 4 + 4; data.extend_from_slice(&(form_size as u32).to_be_bytes());
data.extend_from_slice(b"DJVU");
data.extend_from_slice(b"Sjbz");
data.extend_from_slice(&4u32.to_be_bytes());
data.extend_from_slice(&[0u8; 4]);
let result = Document::parse(&data);
match result {
Err(_) => {} Ok(doc) => {
assert!(doc.page(0).is_err());
}
}
}
#[test]
fn document_page_out_of_bounds() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
assert_eq!(doc.page_count(), 1);
assert!(doc.page(1).is_err());
assert!(doc.page(100).is_err());
}
#[test]
fn document_missing_optional_chunks() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(0).unwrap();
assert!(page.decode_background().unwrap().is_none());
assert!(page.decode_foreground().unwrap().is_none());
assert!(!page.has_palette());
}
fn text_golden_path() -> std::path::PathBuf {
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/golden/text")
}
fn format_zone(layer: &TextLayer, zone: &TextZone, indent: usize) -> String {
let mut out = String::new();
let pad = " ".repeat(indent);
let kind_str = match zone.kind {
TextZoneKind::Page => "page",
TextZoneKind::Column => "column",
TextZoneKind::Region => "region",
TextZoneKind::Paragraph => "para",
TextZoneKind::Line => "line",
TextZoneKind::Word => "word",
TextZoneKind::Character => "char",
};
let x2 = zone.x + zone.width;
let y2 = zone.y + zone.height;
if zone.children.is_empty() {
let text = layer.zone_text(zone);
let trimmed = text.trim_end();
let escaped = djvused_escape(trimmed);
out.push_str(&format!(
"{}({} {} {} {} {} \"{}\")",
pad, kind_str, zone.x, zone.y, x2, y2, escaped
));
} else {
out.push_str(&format!(
"{}({} {} {} {} {}",
pad, kind_str, zone.x, zone.y, x2, y2
));
for child in &zone.children {
out.push('\n');
out.push_str(&format_zone(layer, child, indent + 1));
}
out.push(')');
}
out
}
fn djvused_escape(text: &str) -> String {
let mut out = String::new();
for b in text.bytes() {
match b {
b'\\' => out.push_str("\\\\"),
b'"' => out.push_str("\\\""),
0x20..=0x7e => out.push(b as char),
_ => out.push_str(&format!("\\{:03o}", b)),
}
}
out
}
fn format_text_layer(layer: &TextLayer) -> String {
match &layer.root {
Some(root) => format_zone(layer, root, 0),
None => String::new(),
}
}
#[test]
fn text_layer_none_for_no_text() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let tl = doc.page(0).unwrap().text_layer().unwrap();
assert!(tl.is_none());
}
#[test]
fn text_layer_carte_p1() {
let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
assert!(!tl.text.is_empty(), "carte text should not be empty");
let root = tl.root.as_ref().unwrap();
assert_eq!(root.kind, TextZoneKind::Page);
let golden = std::fs::read_to_string(text_golden_path().join("carte_p1.txt")).unwrap();
let actual = format_text_layer(&tl);
assert_eq!(actual.trim(), golden.trim(), "carte p1 text mismatch");
}
#[test]
fn text_layer_djvu3spec_p1() {
let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
assert!(!tl.text.is_empty());
let root = tl.root.as_ref().unwrap();
assert_eq!(root.kind, TextZoneKind::Page);
assert!(!root.children.is_empty());
let golden = std::fs::read_to_string(text_golden_path().join("djvu3spec_p1.txt")).unwrap();
let actual = format_text_layer(&tl);
assert_eq!(actual.trim(), golden.trim(), "djvu3spec p1 text mismatch");
}
#[test]
fn text_layer_colorbook_p1() {
let data = std::fs::read(assets_path().join("colorbook.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
assert!(!tl.text.is_empty());
let golden = std::fs::read_to_string(text_golden_path().join("colorbook_p1.txt")).unwrap();
let actual = format_text_layer(&tl);
assert_eq!(actual.trim(), golden.trim(), "colorbook p1 text mismatch");
}
#[test]
fn text_layer_czech_p6_utf8() {
let data = std::fs::read(assets_path().join("czech.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let tl = doc.page(5).unwrap().text_layer().unwrap().unwrap();
assert!(!tl.text.is_empty());
let golden = std::fs::read_to_string(text_golden_path().join("czech_p6.txt")).unwrap();
let actual = format_text_layer(&tl);
assert_eq!(actual.trim(), golden.trim(), "czech p6 text mismatch");
}
#[test]
fn text_layer_zone_text_access() {
let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let tl = doc.page(0).unwrap().text_layer().unwrap().unwrap();
fn find_first_word(zone: &TextZone) -> Option<&TextZone> {
if zone.kind == TextZoneKind::Word {
return Some(zone);
}
for child in &zone.children {
if let Some(w) = find_first_word(child) {
return Some(w);
}
}
None
}
let root = tl.root.as_ref().unwrap();
let word = find_first_word(root).expect("should have at least one word");
let text = tl.zone_text(word);
assert!(!text.is_empty(), "first word text should not be empty");
}
#[test]
fn text_layer_all_pages_djvu3spec() {
let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
for i in 0..doc.page_count() {
let result = doc.page(i).unwrap().text_layer();
assert!(result.is_ok(), "text_layer failed for djvu3spec page {}", i);
}
}
#[test]
fn thumbnail_carte() {
let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let thumb = doc
.thumbnail(0)
.unwrap()
.expect("carte should have a thumbnail");
assert!(
thumb.width > 0 && thumb.width < 500,
"thumb width: {}",
thumb.width
);
assert!(
thumb.height > 0 && thumb.height < 500,
"thumb height: {}",
thumb.height
);
assert_eq!(
thumb.data.len(),
thumb.width as usize * thumb.height as usize * 4
);
}
#[test]
fn thumbnail_djvu3spec_all_pages() {
let data = std::fs::read(assets_path().join("DjVu3Spec_bundled.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let mut count = 0;
for i in 0..doc.page_count() {
if let Some(thumb) = doc.thumbnail(i).unwrap() {
assert!(
thumb.width > 0 && thumb.height > 0,
"page {} thumb empty",
i
);
assert_eq!(
thumb.data.len(),
thumb.width as usize * thumb.height as usize * 4,
"page {} thumb data mismatch",
i
);
count += 1;
}
}
assert_eq!(count, 71, "expected 71 thumbnails, got {}", count);
}
#[test]
fn thumbnail_none_for_single_page() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
assert!(doc.thumbnail(0).unwrap().is_none());
}
#[test]
fn thumbnail_none_for_no_thum() {
let data = std::fs::read(assets_path().join("colorbook.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
assert!(doc.thumbnail(0).unwrap().is_none());
}
#[test]
fn progressive_bg_returns_frames_per_chunk() {
let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(0).unwrap();
let chunk_count = page.bg44_chunk_count();
assert!(
chunk_count > 1,
"need multi-chunk file for progressive test"
);
let frames = page.decode_background_progressive().unwrap().unwrap();
assert_eq!(frames.len(), chunk_count, "one frame per BG44 chunk");
let (w, h) = (frames[0].width, frames[0].height);
for (i, f) in frames.iter().enumerate() {
assert_eq!((f.width, f.height), (w, h), "frame {i} size mismatch");
}
}
#[test]
fn progressive_last_frame_matches_full_decode() {
let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(0).unwrap();
let full = page.decode_background().unwrap().unwrap();
let frames = page.decode_background_progressive().unwrap().unwrap();
let last = frames.last().unwrap();
assert_eq!(full.width, last.width);
assert_eq!(full.height, last.height);
assert_eq!(
full.data, last.data,
"last progressive frame must match full decode"
);
}
#[test]
fn progressive_single_chunk_returns_one_frame() {
let data = std::fs::read(assets_path().join("boy.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(0).unwrap();
if page.bg44_chunk_count() <= 1 {
let frames = page.decode_background_progressive().unwrap().unwrap();
assert_eq!(frames.len(), 1);
}
}
#[test]
fn coarse_decode_returns_blurry_frame() {
let data = std::fs::read(assets_path().join("carte.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(0).unwrap();
assert!(page.bg44_chunk_count() > 1);
let coarse = page.decode_background_coarse().unwrap().unwrap();
let full = page.decode_background().unwrap().unwrap();
assert_eq!(coarse.width, full.width);
assert_eq!(coarse.height, full.height);
assert_ne!(coarse.data, full.data, "coarse should differ from full");
}
#[test]
fn coarse_decode_single_chunk_returns_none() {
let data = std::fs::read(assets_path().join("boy.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(0).unwrap();
if page.bg44_chunk_count() <= 1 {
assert!(page.decode_background_coarse().unwrap().is_none());
}
}
#[test]
fn progressive_no_bg_returns_none() {
let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
let doc = Document::parse(&data).unwrap();
let page = doc.page(0).unwrap();
assert_eq!(page.bg44_chunk_count(), 0);
assert!(page.decode_background_progressive().unwrap().is_none());
}
}