djvu_rs/lib.rs
1//! Pure-Rust DjVu decoder written from the DjVu v3 public specification.
2//!
3//! This crate implements the full DjVu v3 document format in safe Rust,
4//! including IFF container parsing, JB2 bilevel decoding, IW44 wavelet
5//! decoding, BZZ decompression, text layer extraction, and annotation parsing.
6//! All algorithms are written from the public DjVu spec with no GPL code.
7//!
8//! # Key public types
9//!
10//! - [`DjVuError`] — top-level error enum (wraps [`IffError`], etc.)
11//! - [`IffError`] — errors from the IFF container parser
12//! - [`PageInfo`] — page metadata parsed from the INFO chunk
13//! - [`Rotation`] — page rotation enum (None, Ccw90, Rot180, Cw90)
14//! - [`DjVuDocument`] — high-level document model (IFF/BZZ/IW44 based)
15//! - [`DjVuPage`] — lazy page handle
16//! - [`DjVuBookmark`] — NAVM bookmark (table of contents)
17//! - [`DocError`] — error type for the document model
18//! - [`djvu_render::RenderOptions`] — render parameters
19//! - [`djvu_render::RenderError`] — render pipeline error type
20//! - [`text::TextLayer`] — text layer from TXTz/TXTa chunks
21//! - [`text::TextZone`] — a zone node in the text layer hierarchy
22//! - [`annotation::Annotation`] — page-level annotation
23//! - [`annotation::MapArea`] — clickable area with URL and shape
24//! - [`Pixmap`] — RGBA pixel buffer returned by render methods
25//! - [`Bitmap`] — 1-bit bitmap for JB2 mask layers
26//! - [`Document`] — owned DjVu document (high-level std API, requires std feature)
27//! - [`Page`] — a page within a [`Document`]
28//!
29//! # Quick start
30//!
31//! ```no_run
32//! use djvu_rs::Document;
33//!
34//! let doc = Document::open("file.djvu").unwrap();
35//! println!("{} pages", doc.page_count());
36//!
37//! let page = doc.page(0).unwrap();
38//! println!("{}x{} @ {} dpi", page.width(), page.height(), page.dpi());
39//!
40//! let pixmap = page.render().unwrap();
41//! // pixmap.data: RGBA bytes
42//! ```
43//!
44//! # IFF parser
45//!
46//! ```no_run
47//! use djvu_rs::iff::parse_form;
48//!
49//! let data = std::fs::read("file.djvu").unwrap();
50//! let form = parse_form(&data).unwrap();
51//! println!("form type: {:?}", std::str::from_utf8(&form.form_type));
52//! ```
53
54#![cfg_attr(not(feature = "std"), no_std)]
55#![deny(unsafe_code)]
56#[cfg(not(feature = "std"))]
57extern crate alloc;
58
59// ---- New phase-1 modules ---------------------------------------------------
60//
61// These are the new clean-room implementations written from the DjVu spec.
62// They are exposed under their natural names. The legacy modules that conflict
63// are kept under different names below.
64
65/// IFF container parser (phase 1, written from spec).
66pub mod iff;
67
68/// Typed error hierarchy for the new implementation (phase 1).
69///
70/// Key types: `DjVuError`, `IffError`, `BzzError`, `Jb2Error`, `Iw44Error`,
71/// `LegacyError`. See also `text::TextError` and `annotation::AnnotationError`.
72pub mod error;
73
74/// INFO chunk parser (phase 1).
75pub(crate) mod info;
76
77/// ZP arithmetic coder — clean-room implementation (phase 2a).
78///
79/// Provides `ZpDecoder` for use by the new BZZ decompressor and future
80/// phase decoders (JB2, IW44). Not yet wired into the legacy rendering path.
81#[path = "zp/mod.rs"]
82#[allow(dead_code)]
83pub(crate) mod zp_impl;
84
85/// BZZ decompressor — clean-room implementation.
86///
87/// Provides `bzz_new::bzz_decode` for decompressing DjVu BZZ streams
88/// (DIRM, NAVM, ANTz chunks).
89#[allow(dead_code)]
90pub mod bzz_new;
91
92/// JB2 bilevel image decoder — clean-room implementation (phase 2b).
93///
94/// Decodes JB2-encoded bitonal images from DjVu Sjbz and Djbz chunks using
95/// ZP adaptive arithmetic coding with a symbol dictionary.
96///
97/// Key public types: `jb2_new::Jb2Dict`, `jb2_new::decode`, `jb2_new::decode_dict`.
98#[path = "jb2_new.rs"]
99pub mod jb2_new;
100
101/// IW44 wavelet image decoder — clean-room implementation (phase 2c).
102///
103/// Provides `iw44_new::Iw44Image` for decoding BG44/FG44/TH44 chunks.
104/// Uses planar YCbCr storage and a ZP arithmetic coder.
105/// RGB conversion happens only in `iw44_new::Iw44Image::to_rgb`.
106#[path = "iw44_new.rs"]
107pub mod iw44_new;
108
109/// New document model — phase 3.
110///
111/// Provides [`DjVuDocument`] (high-level document API built on the new IFF/BZZ/IW44
112/// clean-room implementations), [`DjVuPage`] (lazy page handle), and
113/// [`DjVuBookmark`] (NAVM table-of-contents entry).
114pub mod djvu_document;
115
116/// Rendering pipeline for [`DjVuPage`] — phase 5.
117///
118/// Provides `djvu_render::RenderOptions`, `djvu_render::RenderRect`,
119/// `djvu_render::render_into`, `djvu_render::render_pixmap`,
120/// `djvu_render::render_region`, `djvu_render::render_coarse`, and
121/// `djvu_render::render_progressive`.
122pub mod djvu_render;
123
124/// Text layer parser for DjVu TXTz/TXTa chunks — phase 4.
125///
126/// Provides [`text::parse_text_layer`] and [`text::parse_text_layer_bzz`]
127/// plus typed structs [`text::TextLayer`], [`text::TextZone`],
128/// [`text::TextZoneKind`], and [`text::Rect`].
129pub mod text;
130
131/// Annotation parser for DjVu ANTz/ANTa chunks — phase 4.
132///
133/// Provides [`annotation::parse_annotations`] and [`annotation::parse_annotations_bzz`]
134/// plus typed structs [`annotation::Annotation`], [`annotation::MapArea`],
135/// [`annotation::Shape`], and [`annotation::Color`].
136pub mod annotation;
137
138/// Document metadata parser for METa/METz chunks — phase 4 extension.
139///
140/// Provides [`metadata::parse_metadata`] and [`metadata::parse_metadata_bzz`]
141/// plus [`metadata::DjVuMetadata`] and [`metadata::MetadataError`].
142pub mod metadata;
143
144/// DjVu to PDF converter — phase 6.
145///
146/// Converts DjVu documents to PDF preserving structure: rasterized page images,
147/// invisible text layer (searchable), bookmarks (PDF outline), and hyperlinks
148/// (PDF link annotations).
149///
150/// Key function: [`pdf::djvu_to_pdf`].
151#[cfg(feature = "std")]
152pub mod pdf;
153
154/// DjVu to EPUB 3 exporter.
155///
156/// Converts DjVu documents to EPUB 3 while preserving page images,
157/// invisible text overlay for search/copy, and NAVM bookmarks as navigation.
158///
159/// Key function: [`epub::djvu_to_epub`].
160#[cfg(feature = "epub")]
161pub mod epub;
162
163/// DjVu to TIFF exporter — phase 4 format extension.
164///
165/// Converts DjVu documents to multi-page TIFF files in color (RGB8) or
166/// bilevel (Gray8) modes.
167///
168/// Key function: [`tiff_export::djvu_to_tiff`].
169#[cfg(feature = "tiff")]
170pub mod tiff_export;
171
172/// Async render surface for [`DjVuPage`] — phase 5 extension.
173///
174/// Wraps the synchronous render pipeline in [`tokio::task::spawn_blocking`]
175/// so CPU-bound IW44/JB2 work runs on the blocking thread pool without
176/// blocking the async runtime.
177///
178/// Key functions: [`djvu_async::render_pixmap_async`], [`djvu_async::render_gray8_async`], [`djvu_async::render_progressive_stream`].
179#[cfg(feature = "async")]
180pub mod djvu_async;
181
182/// `image::ImageDecoder` integration — allows DjVu pages to be used as
183/// first-class image sources in the `image` crate ecosystem.
184///
185/// Key types: [`image_compat::DjVuDecoder`], [`image_compat::ImageCompatError`].
186#[cfg(feature = "image")]
187pub mod image_compat;
188
189/// hOCR and ALTO XML export for the text layer.
190///
191/// Key functions: [`ocr_export::to_hocr`], [`ocr_export::to_alto`].
192/// Key types: [`ocr_export::HocrOptions`], [`ocr_export::AltoOptions`],
193/// [`ocr_export::OcrExportError`].
194#[cfg(feature = "std")]
195pub mod ocr_export;
196
197#[cfg(feature = "wasm")]
198pub mod wasm;
199
200// Re-export new phase-1 error types
201pub use error::{BzzError, DjVuError, IffError, Iw44Error, Jb2Error};
202
203// Re-export new phase-3 document model
204pub use djvu_document::{DjVuBookmark, DjVuDocument, DjVuPage, DocError};
205
206// Re-export new phase-1 page info types
207pub use info::{PageInfo, Rotation};
208
209// ---- Rendering / document modules ------------------------------------------
210//
211// These modules implement the rendering pipeline. They depend on bitmap,
212// pixmap, iw44, jb2, bzz. They require std (std::io, std::path, Vec, etc.)
213// so they are gated behind #[cfg(feature = "std")].
214
215#[doc(hidden)]
216pub(crate) mod bitmap;
217
218#[cfg(feature = "std")]
219#[doc(hidden)]
220pub mod document;
221
222#[cfg(feature = "std")]
223#[doc(hidden)]
224pub mod iw44;
225
226#[cfg(feature = "std")]
227#[doc(hidden)]
228pub mod jb2;
229
230#[doc(hidden)]
231pub(crate) mod pixmap;
232
233#[cfg(feature = "std")]
234#[doc(hidden)]
235pub mod render;
236
237#[cfg(feature = "std")]
238#[doc(hidden)]
239#[path = "zp_legacy/mod.rs"]
240pub mod zp;
241
242// Re-export types needed by both legacy and new phase modules
243pub use bitmap::Bitmap;
244pub use pixmap::{GrayPixmap, Pixmap};
245
246// Re-export legacy types (only with std feature)
247#[cfg(feature = "std")]
248pub use document::{Bookmark, TextLayer, TextZone, TextZoneKind};
249
250// Legacy error type (re-exported from legacy_error module included via error.rs)
251#[cfg(feature = "std")]
252pub use error::LegacyError as Error;
253
254/// A parsed DjVu document. Owns the parsed structure.
255#[cfg(feature = "std")]
256///
257/// Parsing happens once at construction time. All subsequent `page()` and
258/// `render()` calls reuse the parsed chunk tree with zero re-parsing overhead.
259pub struct Document {
260 doc: document::Document,
261}
262
263#[cfg(feature = "std")]
264impl Document {
265 /// Open a DjVu file from disk.
266 pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
267 let data = std::fs::read(path.as_ref())
268 .map_err(|e| Error::FormatError(format!("failed to read file: {}", e)))?;
269 Self::from_bytes(data)
270 }
271
272 /// Parse a DjVu document from a reader (reads all bytes into memory).
273 pub fn from_reader(reader: impl std::io::Read) -> Result<Self, Error> {
274 let mut reader = reader;
275 let mut data = Vec::new();
276 reader
277 .read_to_end(&mut data)
278 .map_err(|e| Error::FormatError(format!("failed to read: {}", e)))?;
279 Self::from_bytes(data)
280 }
281
282 /// Parse a DjVu document from owned bytes.
283 pub fn from_bytes(data: Vec<u8>) -> Result<Self, Error> {
284 let doc = document::Document::parse(&data)?;
285 Ok(Document { doc })
286 }
287
288 /// Parse the NAVM bookmarks (table of contents).
289 pub fn bookmarks(&self) -> Result<Vec<Bookmark>, Error> {
290 self.doc.bookmarks()
291 }
292
293 /// Number of pages.
294 pub fn page_count(&self) -> usize {
295 self.doc.page_count()
296 }
297
298 /// Access a page by 0-based index.
299 pub fn page(&self, index: usize) -> Result<Page<'_>, Error> {
300 let inner = self.doc.page(index)?;
301 Ok(Page {
302 width: inner.info.width,
303 height: inner.info.height,
304 dpi: inner.info.dpi,
305 rotation: inner.info.rotation,
306 index,
307 doc: self,
308 })
309 }
310}
311
312/// A page within a DjVu document.
313#[cfg(feature = "std")]
314pub struct Page<'a> {
315 width: u16,
316 height: u16,
317 dpi: u16,
318 rotation: document::Rotation,
319 index: usize,
320 doc: &'a Document,
321}
322
323#[cfg(feature = "std")]
324impl<'a> Page<'a> {
325 /// Page width in pixels (before rotation).
326 pub fn width(&self) -> u32 {
327 self.width as u32
328 }
329
330 /// Page height in pixels (before rotation).
331 pub fn height(&self) -> u32 {
332 self.height as u32
333 }
334
335 /// Effective page width after rotation.
336 pub fn display_width(&self) -> u32 {
337 match self.rotation {
338 document::Rotation::Cw90 | document::Rotation::Cw270 => self.height as u32,
339 _ => self.width as u32,
340 }
341 }
342
343 /// Effective page height after rotation.
344 pub fn display_height(&self) -> u32 {
345 match self.rotation {
346 document::Rotation::Cw90 | document::Rotation::Cw270 => self.width as u32,
347 _ => self.height as u32,
348 }
349 }
350
351 /// Page resolution in dots per inch.
352 pub fn dpi(&self) -> u16 {
353 self.dpi
354 }
355
356 /// The 0-based index of this page within the document.
357 pub fn index(&self) -> usize {
358 self.index
359 }
360
361 /// Page rotation from the INFO chunk.
362 pub fn rotation(&self) -> document::Rotation {
363 self.rotation
364 }
365
366 /// Decode the JB2 mask layer only (no compositing).
367 ///
368 /// Returns `None` when the page has no Sjbz chunk (pure IW44 background page).
369 /// Useful for benchmarking the decode phase in isolation.
370 pub fn decode_mask(&self) -> Result<Option<Bitmap>, Error> {
371 let page = self.doc.doc.page(self.index)?;
372 page.decode_mask()
373 }
374
375 /// Render the page to an RGBA pixmap at native resolution.
376 pub fn render(&self) -> Result<Pixmap, Error> {
377 let page = self.doc.doc.page(self.index)?;
378 render::render(&page)
379 }
380
381 /// Render the page to an RGBA pixmap at a target size.
382 pub fn render_to_size(&self, width: u32, height: u32) -> Result<Pixmap, Error> {
383 let page = self.doc.doc.page(self.index)?;
384 render::render_to_size(&page, width, height)
385 }
386
387 /// Render the page at native resolution with mask dilation for bolder text.
388 pub fn render_bold(&self, dilate_passes: u32) -> Result<Pixmap, Error> {
389 let page = self.doc.doc.page(self.index)?;
390 render::render_to_size_bold(
391 &page,
392 page.info.width as u32,
393 page.info.height as u32,
394 dilate_passes,
395 )
396 }
397
398 /// Render the page to a target size with mask dilation for bolder text.
399 pub fn render_to_size_bold(
400 &self,
401 width: u32,
402 height: u32,
403 dilate_passes: u32,
404 ) -> Result<Pixmap, Error> {
405 let page = self.doc.doc.page(self.index)?;
406 render::render_to_size_bold(&page, width, height, dilate_passes)
407 }
408
409 /// Render the page at a target size with anti-aliased downscaling.
410 pub fn render_aa(&self, width: u32, height: u32, boldness: f32) -> Result<Pixmap, Error> {
411 let page = self.doc.doc.page(self.index)?;
412 render::render_aa(&page, width, height, boldness)
413 }
414
415 /// Decode the page thumbnail, if available.
416 pub fn thumbnail(&self) -> Result<Option<Pixmap>, Error> {
417 self.doc.doc.thumbnail(self.index)
418 }
419
420 /// Extract the text layer (TXTz/TXTa) with zone hierarchy.
421 pub fn text_layer(&self) -> Result<Option<TextLayer>, Error> {
422 let page = self.doc.doc.page(self.index)?;
423 page.text_layer()
424 }
425
426 /// Extract the plain text content of the page.
427 pub fn text(&self) -> Result<Option<String>, Error> {
428 Ok(self.text_layer()?.map(|tl| tl.text))
429 }
430
431 /// Fast coarse render: decode only the first BG44 chunk (blurry preview).
432 pub fn render_scaled_coarse(&self, scale: f32) -> Result<Option<Pixmap>, Error> {
433 let dw = self.display_width();
434 let dh = self.display_height();
435 let w = ((dw as f32 * scale).round() as u32).max(1);
436 let h = ((dh as f32 * scale).round() as u32).max(1);
437 let (tw, th) = match self.rotation {
438 document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
439 _ => (w, h),
440 };
441 let page = self.doc.doc.page(self.index)?;
442 render::render_to_size_coarse(&page, tw, th)
443 }
444
445 /// Progressive rendering: returns increasingly refined pixmaps.
446 pub fn render_scaled_progressive(&self, scale: f32) -> Result<Vec<Pixmap>, Error> {
447 let dw = self.display_width();
448 let dh = self.display_height();
449 let w = ((dw as f32 * scale).round() as u32).max(1);
450 let h = ((dh as f32 * scale).round() as u32).max(1);
451 let (tw, th) = match self.rotation {
452 document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
453 _ => (w, h),
454 };
455 let page = self.doc.doc.page(self.index)?;
456 render::render_to_size_progressive(&page, tw, th)
457 }
458
459 /// Render the page scaled by a factor (e.g. 0.5 = half size, 2.0 = double).
460 pub fn render_scaled(&self, scale: f32) -> Result<Pixmap, Error> {
461 let dw = self.display_width();
462 let dh = self.display_height();
463 let w = ((dw as f32 * scale).round() as u32).max(1);
464 let h = ((dh as f32 * scale).round() as u32).max(1);
465 let (tw, th) = match self.rotation {
466 document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
467 _ => (w, h),
468 };
469 let page = self.doc.doc.page(self.index)?;
470 render::render_to_size(&page, tw, th)
471 }
472}
473
474// Compile-time assertions: Document is Send + Sync.
475#[cfg(feature = "std")]
476#[allow(dead_code)]
477const _: () = {
478 fn assert_send<T: Send>() {}
479 fn assert_sync<T: Sync>() {}
480 fn assertions() {
481 assert_send::<Document>();
482 assert_sync::<Document>();
483 }
484};