djvu_rs/lib.rs
1//! Pure-Rust DjVu decoder written from the DjVu v3 public specification.
2//!
3//! This crate implements the full DjVu v3 document format in safe Rust,
4//! including IFF container parsing, JB2 bilevel decoding, IW44 wavelet
5//! decoding, BZZ decompression, text layer extraction, and annotation parsing.
6//! All algorithms are written from the public DjVu spec with no GPL code.
7//!
8//! # Key public types
9//!
10//! - [`DjVuError`] — top-level error enum (wraps [`IffError`], etc.)
11//! - [`IffError`] — errors from the IFF container parser
12//! - [`PageInfo`] — page metadata parsed from the INFO chunk
13//! - [`Rotation`] — page rotation enum (None, Ccw90, Rot180, Cw90)
14//! - [`DjVuDocument`] — high-level document model (IFF/BZZ/IW44 based)
15//! - [`DjVuPage`] — lazy page handle
16//! - [`DjVuBookmark`] — NAVM bookmark (table of contents)
17//! - [`DocError`] — error type for the document model
18//! - [`djvu_render::RenderOptions`] — render parameters
19//! - [`djvu_render::RenderError`] — render pipeline error type
20//! - [`text::TextLayer`] — text layer from TXTz/TXTa chunks
21//! - [`text::TextZone`] — a zone node in the text layer hierarchy
22//! - [`annotation::Annotation`] — page-level annotation
23//! - [`annotation::MapArea`] — clickable area with URL and shape
24//! - [`Pixmap`] — RGBA pixel buffer returned by render methods
25//! - [`Bitmap`] — 1-bit bitmap for JB2 mask layers
26//! - [`Document`] — owned DjVu document (high-level std API, requires std feature)
27//! - [`Page`] — a page within a [`Document`]
28//!
29//! # Quick start
30//!
31//! ```no_run
32//! use djvu_rs::Document;
33//!
34//! let doc = Document::open("file.djvu").unwrap();
35//! println!("{} pages", doc.page_count());
36//!
37//! let page = doc.page(0).unwrap();
38//! println!("{}x{} @ {} dpi", page.width(), page.height(), page.dpi());
39//!
40//! let pixmap = page.render().unwrap();
41//! // pixmap.data: RGBA bytes
42//! ```
43//!
44//! # IFF parser
45//!
46//! ```no_run
47//! use djvu_rs::iff::parse_form;
48//!
49//! let data = std::fs::read("file.djvu").unwrap();
50//! let form = parse_form(&data).unwrap();
51//! println!("form type: {:?}", std::str::from_utf8(&form.form_type));
52//! ```
53
54#![cfg_attr(not(feature = "std"), no_std)]
55#![deny(unsafe_code)]
56#[cfg(not(feature = "std"))]
57extern crate alloc;
58
59// ---- New phase-1 modules ---------------------------------------------------
60//
61// These are the new clean-room implementations written from the DjVu spec.
62// They are exposed under their natural names. The legacy modules that conflict
63// are kept under different names below.
64
65/// IFF container parser (phase 1, written from spec).
66pub mod iff;
67
68/// Typed error hierarchy for the new implementation (phase 1).
69///
70/// Key types: `DjVuError`, `IffError`, `BzzError`, `Jb2Error`, `Iw44Error`,
71/// `LegacyError`. See also `text::TextError` and `annotation::AnnotationError`.
72pub mod error;
73
74/// INFO chunk parser (phase 1).
75pub(crate) mod info;
76
77/// ZP arithmetic coder — clean-room implementation (phase 2a).
78///
79/// Provides `ZpDecoder` for use by the new BZZ decompressor and future
80/// phase decoders (JB2, IW44). Not yet wired into the legacy rendering path.
81#[path = "zp/mod.rs"]
82#[allow(dead_code)]
83pub(crate) mod zp_impl;
84
85/// BZZ decompressor — clean-room implementation.
86///
87/// Provides `bzz_new::bzz_decode` for decompressing DjVu BZZ streams
88/// (DIRM, NAVM, ANTz chunks).
89#[allow(dead_code)]
90pub mod bzz_new;
91
92/// JB2 bilevel image decoder — clean-room implementation (phase 2b).
93///
94/// Decodes JB2-encoded bitonal images from DjVu Sjbz and Djbz chunks using
95/// ZP adaptive arithmetic coding with a symbol dictionary.
96///
97/// Key public types: `jb2_new::Jb2Dict`, `jb2_new::decode`, `jb2_new::decode_dict`.
98#[path = "jb2_new.rs"]
99pub mod jb2_new;
100
101/// IW44 wavelet image decoder — clean-room implementation (phase 2c).
102///
103/// Provides `iw44_new::Iw44Image` for decoding BG44/FG44/TH44 chunks.
104/// Uses planar YCbCr storage and a ZP arithmetic coder.
105/// RGB conversion happens only in `iw44_new::Iw44Image::to_rgb`.
106#[path = "iw44_new.rs"]
107pub mod iw44_new;
108
109/// New document model — phase 3.
110///
111/// Provides [`DjVuDocument`] (high-level document API built on the new IFF/BZZ/IW44
112/// clean-room implementations), [`DjVuPage`] (lazy page handle), and
113/// [`DjVuBookmark`] (NAVM table-of-contents entry).
114pub mod djvu_document;
115
116/// Rendering pipeline for [`DjVuPage`] — phase 5.
117///
118/// Provides `djvu_render::RenderOptions`, `djvu_render::RenderRect`,
119/// `djvu_render::render_into`, `djvu_render::render_pixmap`,
120/// `djvu_render::render_region`, `djvu_render::render_coarse`, and
121/// `djvu_render::render_progressive`.
122pub mod djvu_render;
123
124/// Text layer parser for DjVu TXTz/TXTa chunks — phase 4.
125///
126/// Provides [`text::parse_text_layer`] and [`text::parse_text_layer_bzz`]
127/// plus typed structs [`text::TextLayer`], [`text::TextZone`],
128/// [`text::TextZoneKind`], and [`text::Rect`].
129pub mod text;
130
131/// Annotation parser for DjVu ANTz/ANTa chunks — phase 4.
132///
133/// Provides [`annotation::parse_annotations`] and [`annotation::parse_annotations_bzz`]
134/// plus typed structs [`annotation::Annotation`], [`annotation::MapArea`],
135/// [`annotation::Shape`], and [`annotation::Color`].
136pub mod annotation;
137
138/// Document metadata parser for METa/METz chunks — phase 4 extension.
139///
140/// Provides [`metadata::parse_metadata`] and [`metadata::parse_metadata_bzz`]
141/// plus [`metadata::DjVuMetadata`] and [`metadata::MetadataError`].
142pub mod metadata;
143
144/// DjVu to PDF converter — phase 6.
145///
146/// Converts DjVu documents to PDF preserving structure: rasterized page images,
147/// invisible text layer (searchable), bookmarks (PDF outline), and hyperlinks
148/// (PDF link annotations).
149///
150/// Key function: [`pdf::djvu_to_pdf`].
151#[cfg(feature = "std")]
152pub mod pdf;
153
154/// DjVu to TIFF exporter — phase 4 format extension.
155///
156/// Converts DjVu documents to multi-page TIFF files in color (RGB8) or
157/// bilevel (Gray8) modes.
158///
159/// Key function: [`tiff_export::djvu_to_tiff`].
160#[cfg(feature = "tiff")]
161pub mod tiff_export;
162
163/// Async render surface for [`DjVuPage`] — phase 5 extension.
164///
165/// Wraps the synchronous render pipeline in [`tokio::task::spawn_blocking`]
166/// so CPU-bound IW44/JB2 work runs on the blocking thread pool without
167/// blocking the async runtime.
168///
169/// Key functions: [`djvu_async::render_pixmap_async`], [`djvu_async::render_gray8_async`], [`djvu_async::render_progressive_stream`].
170#[cfg(feature = "async")]
171pub mod djvu_async;
172
173/// `image::ImageDecoder` integration — allows DjVu pages to be used as
174/// first-class image sources in the `image` crate ecosystem.
175///
176/// Key types: [`image_compat::DjVuDecoder`], [`image_compat::ImageCompatError`].
177#[cfg(feature = "image")]
178pub mod image_compat;
179
180/// hOCR and ALTO XML export for the text layer.
181///
182/// Key functions: [`ocr_export::to_hocr`], [`ocr_export::to_alto`].
183/// Key types: [`ocr_export::HocrOptions`], [`ocr_export::AltoOptions`],
184/// [`ocr_export::OcrExportError`].
185#[cfg(feature = "std")]
186pub mod ocr_export;
187
188#[cfg(feature = "wasm")]
189pub mod wasm;
190
191// Re-export new phase-1 error types
192pub use error::{BzzError, DjVuError, IffError, Iw44Error, Jb2Error};
193
194// Re-export new phase-3 document model
195pub use djvu_document::{DjVuBookmark, DjVuDocument, DjVuPage, DocError};
196
197// Re-export new phase-1 page info types
198pub use info::{PageInfo, Rotation};
199
200// ---- Rendering / document modules ------------------------------------------
201//
202// These modules implement the rendering pipeline. They depend on bitmap,
203// pixmap, iw44, jb2, bzz. They require std (std::io, std::path, Vec, etc.)
204// so they are gated behind #[cfg(feature = "std")].
205
206#[doc(hidden)]
207pub(crate) mod bitmap;
208
209#[cfg(feature = "std")]
210#[doc(hidden)]
211pub mod document;
212
213#[cfg(feature = "std")]
214#[doc(hidden)]
215pub mod iw44;
216
217#[cfg(feature = "std")]
218#[doc(hidden)]
219pub mod jb2;
220
221#[doc(hidden)]
222pub(crate) mod pixmap;
223
224#[cfg(feature = "std")]
225#[doc(hidden)]
226pub mod render;
227
228#[cfg(feature = "std")]
229#[doc(hidden)]
230#[path = "zp_legacy/mod.rs"]
231pub mod zp;
232
233// Re-export types needed by both legacy and new phase modules
234pub use bitmap::Bitmap;
235pub use pixmap::{GrayPixmap, Pixmap};
236
237// Re-export legacy types (only with std feature)
238#[cfg(feature = "std")]
239pub use document::{Bookmark, TextLayer, TextZone, TextZoneKind};
240
241// Legacy error type (re-exported from legacy_error module included via error.rs)
242#[cfg(feature = "std")]
243pub use error::LegacyError as Error;
244
245/// A parsed DjVu document. Owns the parsed structure.
246#[cfg(feature = "std")]
247///
248/// Parsing happens once at construction time. All subsequent `page()` and
249/// `render()` calls reuse the parsed chunk tree with zero re-parsing overhead.
250pub struct Document {
251 doc: document::Document,
252}
253
254#[cfg(feature = "std")]
255impl Document {
256 /// Open a DjVu file from disk.
257 pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
258 let data = std::fs::read(path.as_ref())
259 .map_err(|e| Error::FormatError(format!("failed to read file: {}", e)))?;
260 Self::from_bytes(data)
261 }
262
263 /// Parse a DjVu document from a reader (reads all bytes into memory).
264 pub fn from_reader(reader: impl std::io::Read) -> Result<Self, Error> {
265 let mut reader = reader;
266 let mut data = Vec::new();
267 reader
268 .read_to_end(&mut data)
269 .map_err(|e| Error::FormatError(format!("failed to read: {}", e)))?;
270 Self::from_bytes(data)
271 }
272
273 /// Parse a DjVu document from owned bytes.
274 pub fn from_bytes(data: Vec<u8>) -> Result<Self, Error> {
275 let doc = document::Document::parse(&data)?;
276 Ok(Document { doc })
277 }
278
279 /// Parse the NAVM bookmarks (table of contents).
280 pub fn bookmarks(&self) -> Result<Vec<Bookmark>, Error> {
281 self.doc.bookmarks()
282 }
283
284 /// Number of pages.
285 pub fn page_count(&self) -> usize {
286 self.doc.page_count()
287 }
288
289 /// Access a page by 0-based index.
290 pub fn page(&self, index: usize) -> Result<Page<'_>, Error> {
291 let inner = self.doc.page(index)?;
292 Ok(Page {
293 width: inner.info.width,
294 height: inner.info.height,
295 dpi: inner.info.dpi,
296 rotation: inner.info.rotation,
297 index,
298 doc: self,
299 })
300 }
301}
302
303/// A page within a DjVu document.
304#[cfg(feature = "std")]
305pub struct Page<'a> {
306 width: u16,
307 height: u16,
308 dpi: u16,
309 rotation: document::Rotation,
310 index: usize,
311 doc: &'a Document,
312}
313
314#[cfg(feature = "std")]
315impl<'a> Page<'a> {
316 /// Page width in pixels (before rotation).
317 pub fn width(&self) -> u32 {
318 self.width as u32
319 }
320
321 /// Page height in pixels (before rotation).
322 pub fn height(&self) -> u32 {
323 self.height as u32
324 }
325
326 /// Effective page width after rotation.
327 pub fn display_width(&self) -> u32 {
328 match self.rotation {
329 document::Rotation::Cw90 | document::Rotation::Cw270 => self.height as u32,
330 _ => self.width as u32,
331 }
332 }
333
334 /// Effective page height after rotation.
335 pub fn display_height(&self) -> u32 {
336 match self.rotation {
337 document::Rotation::Cw90 | document::Rotation::Cw270 => self.width as u32,
338 _ => self.height as u32,
339 }
340 }
341
342 /// Page resolution in dots per inch.
343 pub fn dpi(&self) -> u16 {
344 self.dpi
345 }
346
347 /// The 0-based index of this page within the document.
348 pub fn index(&self) -> usize {
349 self.index
350 }
351
352 /// Page rotation from the INFO chunk.
353 pub fn rotation(&self) -> document::Rotation {
354 self.rotation
355 }
356
357 /// Decode the JB2 mask layer only (no compositing).
358 ///
359 /// Returns `None` when the page has no Sjbz chunk (pure IW44 background page).
360 /// Useful for benchmarking the decode phase in isolation.
361 pub fn decode_mask(&self) -> Result<Option<Bitmap>, Error> {
362 let page = self.doc.doc.page(self.index)?;
363 page.decode_mask()
364 }
365
366 /// Render the page to an RGBA pixmap at native resolution.
367 pub fn render(&self) -> Result<Pixmap, Error> {
368 let page = self.doc.doc.page(self.index)?;
369 render::render(&page)
370 }
371
372 /// Render the page to an RGBA pixmap at a target size.
373 pub fn render_to_size(&self, width: u32, height: u32) -> Result<Pixmap, Error> {
374 let page = self.doc.doc.page(self.index)?;
375 render::render_to_size(&page, width, height)
376 }
377
378 /// Render the page at native resolution with mask dilation for bolder text.
379 pub fn render_bold(&self, dilate_passes: u32) -> Result<Pixmap, Error> {
380 let page = self.doc.doc.page(self.index)?;
381 render::render_to_size_bold(
382 &page,
383 page.info.width as u32,
384 page.info.height as u32,
385 dilate_passes,
386 )
387 }
388
389 /// Render the page to a target size with mask dilation for bolder text.
390 pub fn render_to_size_bold(
391 &self,
392 width: u32,
393 height: u32,
394 dilate_passes: u32,
395 ) -> Result<Pixmap, Error> {
396 let page = self.doc.doc.page(self.index)?;
397 render::render_to_size_bold(&page, width, height, dilate_passes)
398 }
399
400 /// Render the page at a target size with anti-aliased downscaling.
401 pub fn render_aa(&self, width: u32, height: u32, boldness: f32) -> Result<Pixmap, Error> {
402 let page = self.doc.doc.page(self.index)?;
403 render::render_aa(&page, width, height, boldness)
404 }
405
406 /// Decode the page thumbnail, if available.
407 pub fn thumbnail(&self) -> Result<Option<Pixmap>, Error> {
408 self.doc.doc.thumbnail(self.index)
409 }
410
411 /// Extract the text layer (TXTz/TXTa) with zone hierarchy.
412 pub fn text_layer(&self) -> Result<Option<TextLayer>, Error> {
413 let page = self.doc.doc.page(self.index)?;
414 page.text_layer()
415 }
416
417 /// Extract the plain text content of the page.
418 pub fn text(&self) -> Result<Option<String>, Error> {
419 Ok(self.text_layer()?.map(|tl| tl.text))
420 }
421
422 /// Fast coarse render: decode only the first BG44 chunk (blurry preview).
423 pub fn render_scaled_coarse(&self, scale: f32) -> Result<Option<Pixmap>, Error> {
424 let dw = self.display_width();
425 let dh = self.display_height();
426 let w = ((dw as f32 * scale).round() as u32).max(1);
427 let h = ((dh as f32 * scale).round() as u32).max(1);
428 let (tw, th) = match self.rotation {
429 document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
430 _ => (w, h),
431 };
432 let page = self.doc.doc.page(self.index)?;
433 render::render_to_size_coarse(&page, tw, th)
434 }
435
436 /// Progressive rendering: returns increasingly refined pixmaps.
437 pub fn render_scaled_progressive(&self, scale: f32) -> Result<Vec<Pixmap>, Error> {
438 let dw = self.display_width();
439 let dh = self.display_height();
440 let w = ((dw as f32 * scale).round() as u32).max(1);
441 let h = ((dh as f32 * scale).round() as u32).max(1);
442 let (tw, th) = match self.rotation {
443 document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
444 _ => (w, h),
445 };
446 let page = self.doc.doc.page(self.index)?;
447 render::render_to_size_progressive(&page, tw, th)
448 }
449
450 /// Render the page scaled by a factor (e.g. 0.5 = half size, 2.0 = double).
451 pub fn render_scaled(&self, scale: f32) -> Result<Pixmap, Error> {
452 let dw = self.display_width();
453 let dh = self.display_height();
454 let w = ((dw as f32 * scale).round() as u32).max(1);
455 let h = ((dh as f32 * scale).round() as u32).max(1);
456 let (tw, th) = match self.rotation {
457 document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
458 _ => (w, h),
459 };
460 let page = self.doc.doc.page(self.index)?;
461 render::render_to_size(&page, tw, th)
462 }
463}
464
465// Compile-time assertions: Document is Send + Sync.
466#[cfg(feature = "std")]
467#[allow(dead_code)]
468const _: () = {
469 fn assert_send<T: Send>() {}
470 fn assert_sync<T: Sync>() {}
471 fn assertions() {
472 assert_send::<Document>();
473 assert_sync::<Document>();
474 }
475};