Skip to main content

djvu_rs/
lib.rs

1//! Pure-Rust DjVu decoder written from the DjVu v3 public specification.
2//!
3//! This crate implements the full DjVu v3 document format in safe Rust,
4//! including IFF container parsing, JB2 bilevel decoding, IW44 wavelet
5//! decoding, BZZ decompression, text layer extraction, and annotation parsing.
6//! All algorithms are written from the public DjVu spec with no GPL code.
7//!
8//! # Key public types
9//!
10//! - [`DjVuError`] — top-level error enum (wraps [`IffError`], etc.)
11//! - [`IffError`] — errors from the IFF container parser
12//! - [`PageInfo`] — page metadata parsed from the INFO chunk
13//! - [`Rotation`] — page rotation enum (None, Ccw90, Rot180, Cw90)
14//! - [`DjVuDocument`] — high-level document model (IFF/BZZ/IW44 based)
15//! - [`DjVuPage`] — lazy page handle
16//! - [`DjVuBookmark`] — NAVM bookmark (table of contents)
17//! - [`DocError`] — error type for the document model
18//! - [`djvu_render::RenderOptions`] — render parameters
19//! - [`djvu_render::RenderError`] — render pipeline error type
20//! - [`text::TextLayer`] — text layer from TXTz/TXTa chunks
21//! - [`text::TextZone`] — a zone node in the text layer hierarchy
22//! - [`annotation::Annotation`] — page-level annotation
23//! - [`annotation::MapArea`] — clickable area with URL and shape
24//! - [`Pixmap`] — RGBA pixel buffer returned by render methods
25//! - [`Bitmap`] — 1-bit bitmap for JB2 mask layers
26//! - [`Document`] — owned DjVu document (high-level std API, requires std feature)
27//! - [`Page`] — a page within a [`Document`]
28//!
29//! # Quick start
30//!
31//! ```no_run
32//! use djvu_rs::Document;
33//!
34//! let doc = Document::open("file.djvu").unwrap();
35//! println!("{} pages", doc.page_count());
36//!
37//! let page = doc.page(0).unwrap();
38//! println!("{}x{} @ {} dpi", page.width(), page.height(), page.dpi());
39//!
40//! let pixmap = page.render().unwrap();
41//! // pixmap.data: RGBA bytes
42//! ```
43//!
44//! # IFF parser
45//!
46//! ```no_run
47//! use djvu_rs::iff::parse_form;
48//!
49//! let data = std::fs::read("file.djvu").unwrap();
50//! let form = parse_form(&data).unwrap();
51//! println!("form type: {:?}", std::str::from_utf8(&form.form_type));
52//! ```
53
54#![cfg_attr(not(feature = "std"), no_std)]
55#![deny(unsafe_code)]
56#[cfg(not(feature = "std"))]
57extern crate alloc;
58
59// ---- New phase-1 modules ---------------------------------------------------
60//
61// These are the new clean-room implementations written from the DjVu spec.
62// They are exposed under their natural names. The legacy modules that conflict
63// are kept under different names below.
64
65/// IFF container parser (phase 1, written from spec).
66pub mod iff;
67
68/// Typed error hierarchy for the new implementation (phase 1).
69///
70/// Key types: `DjVuError`, `IffError`, `BzzError`, `Jb2Error`, `Iw44Error`,
71/// `LegacyError`. See also `text::TextError` and `annotation::AnnotationError`.
72pub mod error;
73
74/// INFO chunk parser (phase 1).
75pub(crate) mod info;
76
77/// ZP arithmetic coder — clean-room implementation (phase 2a).
78///
79/// Provides `ZpDecoder` for use by the new BZZ decompressor and future
80/// phase decoders (JB2, IW44). Not yet wired into the legacy rendering path.
81#[path = "zp/mod.rs"]
82#[allow(dead_code)]
83pub(crate) mod zp_impl;
84
85/// BZZ decompressor — clean-room implementation.
86///
87/// Provides `bzz_new::bzz_decode` for decompressing DjVu BZZ streams
88/// (DIRM, NAVM, ANTz chunks).
89#[allow(dead_code)]
90pub mod bzz_new;
91
92/// JB2 bilevel image decoder — clean-room implementation (phase 2b).
93///
94/// Decodes JB2-encoded bitonal images from DjVu Sjbz and Djbz chunks using
95/// ZP adaptive arithmetic coding with a symbol dictionary.
96///
97/// Key public types: `jb2_new::Jb2Dict`, `jb2_new::decode`, `jb2_new::decode_dict`.
98#[path = "jb2_new.rs"]
99pub mod jb2_new;
100
101/// IW44 wavelet image decoder — clean-room implementation (phase 2c).
102///
103/// Provides `iw44_new::Iw44Image` for decoding BG44/FG44/TH44 chunks.
104/// Uses planar YCbCr storage and a ZP arithmetic coder.
105/// RGB conversion happens only in `iw44_new::Iw44Image::to_rgb`.
106#[path = "iw44_new.rs"]
107pub mod iw44_new;
108
109/// New document model — phase 3.
110///
111/// Provides [`DjVuDocument`] (high-level document API built on the new IFF/BZZ/IW44
112/// clean-room implementations), [`DjVuPage`] (lazy page handle), and
113/// [`DjVuBookmark`] (NAVM table-of-contents entry).
114pub mod djvu_document;
115
116/// Rendering pipeline for [`DjVuPage`] — phase 5.
117///
118/// Provides `djvu_render::RenderOptions`, `djvu_render::RenderRect`,
119/// `djvu_render::render_into`, `djvu_render::render_pixmap`,
120/// `djvu_render::render_region`, `djvu_render::render_coarse`, and
121/// `djvu_render::render_progressive`.
122pub mod djvu_render;
123
124/// Text layer parser for DjVu TXTz/TXTa chunks — phase 4.
125///
126/// Provides [`text::parse_text_layer`] and [`text::parse_text_layer_bzz`]
127/// plus typed structs [`text::TextLayer`], [`text::TextZone`],
128/// [`text::TextZoneKind`], and [`text::Rect`].
129pub mod text;
130
131/// Annotation parser for DjVu ANTz/ANTa chunks — phase 4.
132///
133/// Provides [`annotation::parse_annotations`] and [`annotation::parse_annotations_bzz`]
134/// plus typed structs [`annotation::Annotation`], [`annotation::MapArea`],
135/// [`annotation::Shape`], and [`annotation::Color`].
136pub mod annotation;
137
138/// Document metadata parser for METa/METz chunks — phase 4 extension.
139///
140/// Provides [`metadata::parse_metadata`] and [`metadata::parse_metadata_bzz`]
141/// plus [`metadata::DjVuMetadata`] and [`metadata::MetadataError`].
142pub mod metadata;
143
144/// DjVu to PDF converter — phase 6.
145///
146/// Converts DjVu documents to PDF preserving structure: rasterized page images,
147/// invisible text layer (searchable), bookmarks (PDF outline), and hyperlinks
148/// (PDF link annotations).
149///
150/// Key function: [`pdf::djvu_to_pdf`].
151#[cfg(feature = "std")]
152pub mod pdf;
153
154/// DjVu to EPUB 3 exporter.
155///
156/// Converts DjVu documents to EPUB 3 while preserving page images,
157/// invisible text overlay for search/copy, and NAVM bookmarks as navigation.
158///
159/// Key function: [`epub::djvu_to_epub`].
160#[cfg(feature = "epub")]
161pub mod epub;
162
163/// DjVu to TIFF exporter — phase 4 format extension.
164///
165/// Converts DjVu documents to multi-page TIFF files in color (RGB8) or
166/// bilevel (Gray8) modes.
167///
168/// Key function: [`tiff_export::djvu_to_tiff`].
169#[cfg(feature = "tiff")]
170pub mod tiff_export;
171
172/// Async render surface for [`DjVuPage`] — phase 5 extension.
173///
174/// Wraps the synchronous render pipeline in [`tokio::task::spawn_blocking`]
175/// so CPU-bound IW44/JB2 work runs on the blocking thread pool without
176/// blocking the async runtime.
177///
178/// Key functions: [`djvu_async::render_pixmap_async`], [`djvu_async::render_gray8_async`], [`djvu_async::render_progressive_stream`].
179#[cfg(feature = "async")]
180pub mod djvu_async;
181
182/// `image::ImageDecoder` integration — allows DjVu pages to be used as
183/// first-class image sources in the `image` crate ecosystem.
184///
185/// Key types: [`image_compat::DjVuDecoder`], [`image_compat::ImageCompatError`].
186#[cfg(feature = "image")]
187pub mod image_compat;
188
189/// hOCR and ALTO XML export for the text layer.
190///
191/// Key functions: [`ocr_export::to_hocr`], [`ocr_export::to_alto`].
192/// Key types: [`ocr_export::HocrOptions`], [`ocr_export::AltoOptions`],
193/// [`ocr_export::OcrExportError`].
194#[cfg(feature = "std")]
195pub mod ocr_export;
196
197#[cfg(feature = "wasm")]
198pub mod wasm;
199
200// Re-export new phase-1 error types
201pub use error::{BzzError, DjVuError, IffError, Iw44Error, Jb2Error};
202
203// Re-export new phase-3 document model
204pub use djvu_document::{DjVuBookmark, DjVuDocument, DjVuPage, DocError};
205
206// Re-export new phase-1 page info types
207pub use info::{PageInfo, Rotation};
208
209// ---- Rendering / document modules ------------------------------------------
210//
211// These modules implement the rendering pipeline. They depend on bitmap,
212// pixmap, iw44, jb2, bzz. They require std (std::io, std::path, Vec, etc.)
213// so they are gated behind #[cfg(feature = "std")].
214
215#[doc(hidden)]
216pub(crate) mod bitmap;
217
218#[cfg(feature = "std")]
219#[doc(hidden)]
220pub mod document;
221
222#[cfg(feature = "std")]
223#[doc(hidden)]
224pub mod iw44;
225
226#[cfg(feature = "std")]
227#[doc(hidden)]
228pub mod jb2;
229
230#[doc(hidden)]
231pub(crate) mod pixmap;
232
233#[cfg(feature = "std")]
234#[doc(hidden)]
235pub mod render;
236
237#[cfg(feature = "std")]
238#[doc(hidden)]
239#[path = "zp_legacy/mod.rs"]
240pub mod zp;
241
242// Re-export types needed by both legacy and new phase modules
243pub use bitmap::Bitmap;
244pub use pixmap::{GrayPixmap, Pixmap};
245
246// Re-export legacy types (only with std feature)
247#[cfg(feature = "std")]
248pub use document::{Bookmark, TextLayer, TextZone, TextZoneKind};
249
250// Legacy error type (re-exported from legacy_error module included via error.rs)
251#[cfg(feature = "std")]
252pub use error::LegacyError as Error;
253
254/// A parsed DjVu document. Owns the parsed structure.
255#[cfg(feature = "std")]
256///
257/// Parsing happens once at construction time. All subsequent `page()` and
258/// `render()` calls reuse the parsed chunk tree with zero re-parsing overhead.
259pub struct Document {
260    doc: document::Document,
261}
262
263#[cfg(feature = "std")]
264impl Document {
265    /// Open a DjVu file from disk.
266    pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
267        let data = std::fs::read(path.as_ref())
268            .map_err(|e| Error::FormatError(format!("failed to read file: {}", e)))?;
269        Self::from_bytes(data)
270    }
271
272    /// Parse a DjVu document from a reader (reads all bytes into memory).
273    pub fn from_reader(reader: impl std::io::Read) -> Result<Self, Error> {
274        let mut reader = reader;
275        let mut data = Vec::new();
276        reader
277            .read_to_end(&mut data)
278            .map_err(|e| Error::FormatError(format!("failed to read: {}", e)))?;
279        Self::from_bytes(data)
280    }
281
282    /// Parse a DjVu document from owned bytes.
283    pub fn from_bytes(data: Vec<u8>) -> Result<Self, Error> {
284        let doc = document::Document::parse(&data)?;
285        Ok(Document { doc })
286    }
287
288    /// Parse the NAVM bookmarks (table of contents).
289    pub fn bookmarks(&self) -> Result<Vec<Bookmark>, Error> {
290        self.doc.bookmarks()
291    }
292
293    /// Number of pages.
294    pub fn page_count(&self) -> usize {
295        self.doc.page_count()
296    }
297
298    /// Access a page by 0-based index.
299    pub fn page(&self, index: usize) -> Result<Page<'_>, Error> {
300        let inner = self.doc.page(index)?;
301        Ok(Page {
302            width: inner.info.width,
303            height: inner.info.height,
304            dpi: inner.info.dpi,
305            rotation: inner.info.rotation,
306            index,
307            doc: self,
308        })
309    }
310}
311
312/// A page within a DjVu document.
313#[cfg(feature = "std")]
314pub struct Page<'a> {
315    width: u16,
316    height: u16,
317    dpi: u16,
318    rotation: document::Rotation,
319    index: usize,
320    doc: &'a Document,
321}
322
323#[cfg(feature = "std")]
324impl<'a> Page<'a> {
325    /// Page width in pixels (before rotation).
326    pub fn width(&self) -> u32 {
327        self.width as u32
328    }
329
330    /// Page height in pixels (before rotation).
331    pub fn height(&self) -> u32 {
332        self.height as u32
333    }
334
335    /// Effective page width after rotation.
336    pub fn display_width(&self) -> u32 {
337        match self.rotation {
338            document::Rotation::Cw90 | document::Rotation::Cw270 => self.height as u32,
339            _ => self.width as u32,
340        }
341    }
342
343    /// Effective page height after rotation.
344    pub fn display_height(&self) -> u32 {
345        match self.rotation {
346            document::Rotation::Cw90 | document::Rotation::Cw270 => self.width as u32,
347            _ => self.height as u32,
348        }
349    }
350
351    /// Page resolution in dots per inch.
352    pub fn dpi(&self) -> u16 {
353        self.dpi
354    }
355
356    /// The 0-based index of this page within the document.
357    pub fn index(&self) -> usize {
358        self.index
359    }
360
361    /// Page rotation from the INFO chunk.
362    pub fn rotation(&self) -> document::Rotation {
363        self.rotation
364    }
365
366    /// Decode the JB2 mask layer only (no compositing).
367    ///
368    /// Returns `None` when the page has no Sjbz chunk (pure IW44 background page).
369    /// Useful for benchmarking the decode phase in isolation.
370    pub fn decode_mask(&self) -> Result<Option<Bitmap>, Error> {
371        let page = self.doc.doc.page(self.index)?;
372        page.decode_mask()
373    }
374
375    /// Render the page to an RGBA pixmap at native resolution.
376    pub fn render(&self) -> Result<Pixmap, Error> {
377        let page = self.doc.doc.page(self.index)?;
378        render::render(&page)
379    }
380
381    /// Render the page to an RGBA pixmap at a target size.
382    pub fn render_to_size(&self, width: u32, height: u32) -> Result<Pixmap, Error> {
383        let page = self.doc.doc.page(self.index)?;
384        render::render_to_size(&page, width, height)
385    }
386
387    /// Render the page at native resolution with mask dilation for bolder text.
388    pub fn render_bold(&self, dilate_passes: u32) -> Result<Pixmap, Error> {
389        let page = self.doc.doc.page(self.index)?;
390        render::render_to_size_bold(
391            &page,
392            page.info.width as u32,
393            page.info.height as u32,
394            dilate_passes,
395        )
396    }
397
398    /// Render the page to a target size with mask dilation for bolder text.
399    pub fn render_to_size_bold(
400        &self,
401        width: u32,
402        height: u32,
403        dilate_passes: u32,
404    ) -> Result<Pixmap, Error> {
405        let page = self.doc.doc.page(self.index)?;
406        render::render_to_size_bold(&page, width, height, dilate_passes)
407    }
408
409    /// Render the page at a target size with anti-aliased downscaling.
410    pub fn render_aa(&self, width: u32, height: u32, boldness: f32) -> Result<Pixmap, Error> {
411        let page = self.doc.doc.page(self.index)?;
412        render::render_aa(&page, width, height, boldness)
413    }
414
415    /// Decode the page thumbnail, if available.
416    pub fn thumbnail(&self) -> Result<Option<Pixmap>, Error> {
417        self.doc.doc.thumbnail(self.index)
418    }
419
420    /// Extract the text layer (TXTz/TXTa) with zone hierarchy.
421    pub fn text_layer(&self) -> Result<Option<TextLayer>, Error> {
422        let page = self.doc.doc.page(self.index)?;
423        page.text_layer()
424    }
425
426    /// Extract the plain text content of the page.
427    pub fn text(&self) -> Result<Option<String>, Error> {
428        Ok(self.text_layer()?.map(|tl| tl.text))
429    }
430
431    /// Fast coarse render: decode only the first BG44 chunk (blurry preview).
432    pub fn render_scaled_coarse(&self, scale: f32) -> Result<Option<Pixmap>, Error> {
433        let dw = self.display_width();
434        let dh = self.display_height();
435        let w = ((dw as f32 * scale).round() as u32).max(1);
436        let h = ((dh as f32 * scale).round() as u32).max(1);
437        let (tw, th) = match self.rotation {
438            document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
439            _ => (w, h),
440        };
441        let page = self.doc.doc.page(self.index)?;
442        render::render_to_size_coarse(&page, tw, th)
443    }
444
445    /// Progressive rendering: returns increasingly refined pixmaps.
446    pub fn render_scaled_progressive(&self, scale: f32) -> Result<Vec<Pixmap>, Error> {
447        let dw = self.display_width();
448        let dh = self.display_height();
449        let w = ((dw as f32 * scale).round() as u32).max(1);
450        let h = ((dh as f32 * scale).round() as u32).max(1);
451        let (tw, th) = match self.rotation {
452            document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
453            _ => (w, h),
454        };
455        let page = self.doc.doc.page(self.index)?;
456        render::render_to_size_progressive(&page, tw, th)
457    }
458
459    /// Render the page scaled by a factor (e.g. 0.5 = half size, 2.0 = double).
460    pub fn render_scaled(&self, scale: f32) -> Result<Pixmap, Error> {
461        let dw = self.display_width();
462        let dh = self.display_height();
463        let w = ((dw as f32 * scale).round() as u32).max(1);
464        let h = ((dh as f32 * scale).round() as u32).max(1);
465        let (tw, th) = match self.rotation {
466            document::Rotation::Cw90 | document::Rotation::Cw270 => (h, w),
467            _ => (w, h),
468        };
469        let page = self.doc.doc.page(self.index)?;
470        render::render_to_size(&page, tw, th)
471    }
472}
473
474// Compile-time assertions: Document is Send + Sync.
475#[cfg(feature = "std")]
476#[allow(dead_code)]
477const _: () = {
478    fn assert_send<T: Send>() {}
479    fn assert_sync<T: Sync>() {}
480    fn assertions() {
481        assert_send::<Document>();
482        assert_sync::<Document>();
483    }
484};