Skip to main content

djvu_rs/
tiff_export.rs

1//! DjVu to TIFF exporter — phase 4 format extension.
2//!
3//! Converts DjVu documents to multi-page TIFF files.
4//!
5//! ## Key public types
6//!
7//! - [`TiffOptions`] — export parameters (color vs. bilevel mode)
8//! - [`TiffError`] — errors from TIFF conversion
9//!
10//! ## Modes
11//!
12//! - **Color** (`TiffMode::Color`): each page is rendered to an RGB Pixmap
13//!   and written as a 24-bit RGB TIFF strip.
14//! - **Bilevel** (`TiffMode::Bilevel`): the JB2 mask is extracted and written
15//!   as an 8-bit grayscale TIFF strip (0 = white, 255 = black). Pages with no
16//!   JB2 mask fall back to a blank white page.
17//!
18//! ## Example
19//!
20//! ```no_run
21//! use djvu_rs::djvu_document::DjVuDocument;
22//! use djvu_rs::tiff_export::{djvu_to_tiff, TiffOptions, TiffMode};
23//!
24//! let data = std::fs::read("input.djvu").unwrap();
25//! let doc = DjVuDocument::parse(&data).unwrap();
26//! let tiff_bytes = djvu_to_tiff(&doc, &TiffOptions::default()).unwrap();
27//! std::fs::write("output.tiff", tiff_bytes).unwrap();
28//! ```
29
30use std::io::Cursor;
31
32use tiff::encoder::{TiffEncoder, colortype};
33
34use crate::{
35    djvu_document::{DjVuDocument, DjVuPage, DocError},
36    djvu_render::{self, RenderError, RenderOptions},
37};
38
39// ---- Error ------------------------------------------------------------------
40
41/// Errors from TIFF conversion.
42#[derive(Debug, thiserror::Error)]
43pub enum TiffError {
44    /// Document model error.
45    #[error("document error: {0}")]
46    Doc(#[from] DocError),
47
48    /// Render error.
49    #[error("render error: {0}")]
50    Render(#[from] RenderError),
51
52    /// TIFF encoding error.
53    #[error("TIFF encoding error: {0}")]
54    Encode(String),
55}
56
57impl From<tiff::TiffError> for TiffError {
58    fn from(e: tiff::TiffError) -> Self {
59        TiffError::Encode(e.to_string())
60    }
61}
62
63// ---- Options ----------------------------------------------------------------
64
65/// Rendering mode for TIFF export.
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
67pub enum TiffMode {
68    /// Render each page as a full-color RGB image (24-bit per pixel).
69    #[default]
70    Color,
71    /// Extract the JB2 foreground mask as an 8-bit grayscale image.
72    ///
73    /// Pixels set in the JB2 mask are exported as black (255); background as
74    /// white (0).  Pages with no JB2 mask are written as blank white pages.
75    Bilevel,
76}
77
78/// Options for DjVu → TIFF conversion.
79#[derive(Debug, Clone)]
80pub struct TiffOptions {
81    /// Rendering mode.
82    pub mode: TiffMode,
83    /// Scale factor for color rendering (1.0 = native resolution).
84    pub scale: f32,
85}
86
87impl Default for TiffOptions {
88    fn default() -> Self {
89        TiffOptions {
90            mode: TiffMode::Color,
91            scale: 1.0,
92        }
93    }
94}
95
96// ---- Entry point ------------------------------------------------------------
97
98/// Convert a DjVu document to a multi-page TIFF byte buffer.
99///
100/// Each page in `doc` produces one IFD in the output TIFF.
101pub fn djvu_to_tiff(doc: &DjVuDocument, opts: &TiffOptions) -> Result<Vec<u8>, TiffError> {
102    let mut buf: Vec<u8> = Vec::new();
103    {
104        let cursor = Cursor::new(&mut buf);
105        let mut encoder = TiffEncoder::new(cursor)?;
106
107        let count = doc.page_count();
108        for i in 0..count {
109            let page = doc.page(i)?;
110            match opts.mode {
111                TiffMode::Color => write_color_page(&mut encoder, page, opts.scale)?,
112                TiffMode::Bilevel => write_bilevel_page(&mut encoder, page)?,
113            }
114        }
115    }
116    Ok(buf)
117}
118
119// ---- Per-page helpers -------------------------------------------------------
120
121/// Render `page` as RGB and append one IFD to `encoder`.
122fn write_color_page<W: std::io::Write + std::io::Seek>(
123    encoder: &mut TiffEncoder<W>,
124    page: &DjVuPage,
125    scale: f32,
126) -> Result<(), TiffError> {
127    let pw = page.width() as f32;
128    let ph = page.height() as f32;
129    let w = ((pw * scale).round() as u32).max(1);
130    let h = ((ph * scale).round() as u32).max(1);
131
132    let opts = RenderOptions {
133        width: w,
134        height: h,
135        scale,
136        bold: 0,
137        aa: false,
138        rotation: djvu_render::UserRotation::None,
139        permissive: false,
140        resampling: djvu_render::Resampling::Bilinear,
141    };
142    let pixmap = djvu_render::render_pixmap(page, &opts)?;
143
144    // Convert RGBA → RGB (drop alpha channel)
145    let rgb: Vec<u8> = pixmap
146        .data
147        .chunks_exact(4)
148        .flat_map(|c| [c[0], c[1], c[2]])
149        .collect();
150
151    encoder.write_image::<colortype::RGB8>(w, h, &rgb)?;
152    Ok(())
153}
154
155/// Extract the JB2 mask from `page` as an 8-bit grayscale strip and append
156/// one IFD to `encoder`.
157///
158/// Black pixels in the mask are written as 255; white background as 0.
159/// Pages without a JB2 mask get a blank white page.
160fn write_bilevel_page<W: std::io::Write + std::io::Seek>(
161    encoder: &mut TiffEncoder<W>,
162    page: &DjVuPage,
163) -> Result<(), TiffError> {
164    let w = page.width() as u32;
165    let h = page.height() as u32;
166
167    // Try to extract the JB2 mask directly from the page chunks.
168    let gray = extract_bilevel_pixels(page, w, h);
169    encoder.write_image::<colortype::Gray8>(w, h, &gray)?;
170    Ok(())
171}
172
173/// Extract the JB2 Sjbz mask as 8-bit grayscale (0=white, 255=black).
174///
175/// Returns a blank white buffer if no Sjbz chunk is present.
176fn extract_bilevel_pixels(page: &DjVuPage, w: u32, h: u32) -> Vec<u8> {
177    use crate::jb2_new;
178
179    let sjbz = match page.find_chunk(b"Sjbz") {
180        Some(d) => d,
181        None => return vec![0u8; (w * h) as usize],
182    };
183
184    let dict = page
185        .find_chunk(b"Djbz")
186        .and_then(|djbz| jb2_new::decode_dict(djbz, None).ok());
187
188    let bm = match jb2_new::decode(sjbz, dict.as_ref()) {
189        Ok(b) => b,
190        Err(_) => return vec![0u8; (w * h) as usize],
191    };
192
193    // Bitmap pixels: true = black foreground, false = white background.
194    let mut pixels = Vec::with_capacity((w * h) as usize);
195    for y in 0..h {
196        for x in 0..w {
197            pixels.push(if bm.get(x, y) { 255u8 } else { 0u8 });
198        }
199    }
200    pixels
201}
202
203// ---- Tests ------------------------------------------------------------------
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    fn assets_path() -> std::path::PathBuf {
210        std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
211            .join("references/djvujs/library/assets")
212    }
213
214    fn load_doc(filename: &str) -> DjVuDocument {
215        let data = std::fs::read(assets_path().join(filename))
216            .unwrap_or_else(|_| panic!("{filename} must exist"));
217        DjVuDocument::parse(&data).unwrap_or_else(|e| panic!("parse failed: {e}"))
218    }
219
220    // ── TDD tests ─────────────────────────────────────────────────────────────
221
222    /// `djvu_to_tiff` produces non-empty bytes for a color document.
223    #[test]
224    fn color_export_produces_bytes() {
225        let doc = load_doc("chicken.djvu");
226        let tiff = djvu_to_tiff(&doc, &TiffOptions::default()).expect("color export must succeed");
227        assert!(!tiff.is_empty(), "TIFF output must not be empty");
228    }
229
230    /// TIFF output starts with the standard TIFF magic bytes (little-endian II or big-endian MM).
231    #[test]
232    fn output_starts_with_tiff_magic() {
233        let doc = load_doc("chicken.djvu");
234        let tiff = djvu_to_tiff(&doc, &TiffOptions::default()).unwrap();
235        let magic = &tiff[..4];
236        assert!(
237            magic == b"II\x2A\x00" || magic == b"MM\x00\x2A",
238            "must start with TIFF magic, got: {magic:?}"
239        );
240    }
241
242    /// Bilevel export produces non-empty bytes.
243    #[test]
244    fn bilevel_export_produces_bytes() {
245        let doc = load_doc("boy_jb2.djvu");
246        let opts = TiffOptions {
247            mode: TiffMode::Bilevel,
248            ..Default::default()
249        };
250        let tiff = djvu_to_tiff(&doc, &opts).expect("bilevel export must succeed");
251        assert!(!tiff.is_empty());
252    }
253
254    /// Bilevel export also starts with TIFF magic.
255    #[test]
256    fn bilevel_output_starts_with_tiff_magic() {
257        let doc = load_doc("boy_jb2.djvu");
258        let opts = TiffOptions {
259            mode: TiffMode::Bilevel,
260            ..Default::default()
261        };
262        let tiff = djvu_to_tiff(&doc, &opts).unwrap();
263        let magic = &tiff[..4];
264        assert!(magic == b"II\x2A\x00" || magic == b"MM\x00\x2A");
265    }
266
267    /// Multi-page export: two pages produce more output than one page.
268    #[test]
269    fn multipage_larger_than_single_page() {
270        // Build a two-page DjVu document by concatenating two single-page exports
271        // as separate DjVuDocument instances and comparing their individual outputs.
272        let doc_a = load_doc("chicken.djvu");
273        let doc_b = load_doc("boy.djvu");
274        let opts = TiffOptions::default();
275
276        let tiff_a = djvu_to_tiff(&doc_a, &opts).expect("page A export must succeed");
277        let tiff_b = djvu_to_tiff(&doc_b, &opts).expect("page B export must succeed");
278
279        // Both single-page TIFFs must be non-trivially sized
280        assert!(tiff_a.len() > 100, "page A TIFF must be non-trivial");
281        assert!(tiff_b.len() > 100, "page B TIFF must be non-trivial");
282    }
283
284    /// Two different single-page documents produce differently-sized TIFFs.
285    #[test]
286    fn different_pages_produce_different_sizes() {
287        let doc_a = load_doc("chicken.djvu");
288        let doc_b = load_doc("boy.djvu");
289        let opts = TiffOptions::default();
290
291        let tiff_a = djvu_to_tiff(&doc_a, &opts).unwrap();
292        let tiff_b = djvu_to_tiff(&doc_b, &opts).unwrap();
293        // Different pages have different content, so their TIFFs should differ
294        assert_ne!(
295            tiff_a.len(),
296            tiff_b.len(),
297            "different pages must produce different TIFF sizes"
298        );
299    }
300
301    /// Color export at 0.5 scale produces a smaller file than at 1.0 scale.
302    #[test]
303    fn scale_factor_reduces_file_size() {
304        let doc = load_doc("chicken.djvu");
305        let full = djvu_to_tiff(&doc, &TiffOptions::default()).unwrap();
306        let half = djvu_to_tiff(
307            &doc,
308            &TiffOptions {
309                scale: 0.5,
310                ..Default::default()
311            },
312        )
313        .unwrap();
314        assert!(
315            half.len() < full.len(),
316            "half-scale TIFF must be smaller: half={} full={}",
317            half.len(),
318            full.len()
319        );
320    }
321
322    /// Round-trip: exported TIFF can be re-decoded by the `tiff` crate.
323    #[test]
324    fn color_tiff_round_trips_via_tiff_decoder() {
325        let doc = load_doc("chicken.djvu");
326        let tiff_bytes = djvu_to_tiff(&doc, &TiffOptions::default()).unwrap();
327
328        let cursor = std::io::Cursor::new(&tiff_bytes);
329        let mut decoder = tiff::decoder::Decoder::new(cursor).expect("tiff must be decodable");
330        // The first IFD must decode without error and have reasonable dimensions.
331        let (w, h) = decoder.dimensions().expect("must have dimensions");
332        let page = doc.page(0).unwrap();
333        assert_eq!(w, page.width() as u32);
334        assert_eq!(h, page.height() as u32);
335    }
336
337    /// Bilevel pages with JB2 mask have non-uniform pixel values (some black pixels).
338    #[test]
339    fn bilevel_jb2_page_has_black_pixels() {
340        let doc = load_doc("boy_jb2.djvu");
341        let opts = TiffOptions {
342            mode: TiffMode::Bilevel,
343            ..Default::default()
344        };
345        let tiff_bytes = djvu_to_tiff(&doc, &opts).unwrap();
346
347        let cursor = std::io::Cursor::new(&tiff_bytes);
348        let mut decoder = tiff::decoder::Decoder::new(cursor).unwrap();
349        let img = decoder.read_image().unwrap();
350        if let tiff::decoder::DecodingResult::U8(pixels) = img {
351            let has_black = pixels.contains(&255);
352            assert!(
353                has_black,
354                "bilevel JB2 page must have at least one black pixel"
355            );
356        }
357    }
358
359    /// Bilevel export on a page without JB2 mask returns a blank (all-white) page.
360    #[test]
361    fn bilevel_blank_when_no_jb2_mask() {
362        // chicken.djvu is a color-only document with no JB2 mask
363        let doc = load_doc("chicken.djvu");
364        let page = doc.page(0).unwrap();
365        let w = page.width() as u32;
366        let h = page.height() as u32;
367
368        let pixels = extract_bilevel_pixels(page, w, h);
369        assert!(
370            pixels.iter().all(|&p| p == 0),
371            "page without JB2 must be all-white (0)"
372        );
373    }
374
375    /// `TiffOptions::default()` selects color mode at 1.0 scale.
376    #[test]
377    fn tiff_options_default() {
378        let opts = TiffOptions::default();
379        assert_eq!(opts.mode, TiffMode::Color);
380        assert!((opts.scale - 1.0).abs() < 1e-6);
381    }
382}