Skip to main content

djvu_rs/
epub.rs

1//! DjVu to EPUB 3 converter — preserves document structure.
2//!
3//! Converts DjVu documents to EPUB 3 while preserving:
4//! - Page images as PNG (one per page)
5//! - Invisible text overlay for search and copy
6//! - NAVM bookmarks as EPUB navigation (`nav.xhtml`)
7//! - ANTz hyperlinks as `<a href>` in page XHTML
8//!
9//! # Example
10//!
11//! ```no_run
12//! use djvu_rs::djvu_document::DjVuDocument;
13//! use djvu_rs::epub::{djvu_to_epub, EpubOptions};
14//!
15//! let data = std::fs::read("book.djvu").unwrap();
16//! let doc = DjVuDocument::parse(&data).unwrap();
17//! let epub_bytes = djvu_to_epub(&doc, &EpubOptions::default()).unwrap();
18//! std::fs::write("book.epub", epub_bytes).unwrap();
19//! ```
20
21use std::io::Write;
22
23use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions};
24
25use crate::{
26    djvu_document::{DjVuBookmark, DjVuDocument, DjVuPage, DocError},
27    djvu_render::{self, RenderError, RenderOptions},
28    text::TextZoneKind,
29};
30
31// ── Errors ────────────────────────────────────────────────────────────────────
32
33/// Errors from EPUB conversion.
34#[derive(Debug, thiserror::Error)]
35pub enum EpubError {
36    /// Document model error.
37    #[error("document error: {0}")]
38    Doc(#[from] DocError),
39    /// Render error.
40    #[error("render error: {0}")]
41    Render(#[from] RenderError),
42    /// ZIP I/O error.
43    #[error("zip error: {0}")]
44    Zip(#[from] zip::result::ZipError),
45    /// I/O error.
46    #[error("io error: {0}")]
47    Io(#[from] std::io::Error),
48}
49
50// ── Options ───────────────────────────────────────────────────────────────────
51
52/// Options for EPUB conversion.
53#[derive(Debug, Clone)]
54pub struct EpubOptions {
55    /// Title embedded in the OPF metadata. Defaults to `"DjVu Document"`.
56    pub title: String,
57    /// Author embedded in the OPF metadata. Defaults to empty.
58    pub author: String,
59    /// DPI for page rendering. Defaults to 150.
60    pub dpi: u32,
61}
62
63impl Default for EpubOptions {
64    fn default() -> Self {
65        Self {
66            title: "DjVu Document".to_owned(),
67            author: String::new(),
68            dpi: 150,
69        }
70    }
71}
72
73// ── Public API ────────────────────────────────────────────────────────────────
74
75/// Convert a DjVu document to EPUB 3.
76///
77/// Returns the raw bytes of a valid EPUB file (ZIP archive).
78///
79/// # Errors
80///
81/// Returns [`EpubError`] if page rendering or ZIP writing fails.
82pub fn djvu_to_epub(doc: &DjVuDocument, opts: &EpubOptions) -> Result<Vec<u8>, EpubError> {
83    let buf = Vec::new();
84    let cursor = std::io::Cursor::new(buf);
85    let mut zip = ZipWriter::new(cursor);
86
87    // 1. mimetype — MUST be first and STORED (no compression), per EPUB spec
88    zip.start_file(
89        "mimetype",
90        SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
91    )?;
92    zip.write_all(b"application/epub+zip")?;
93
94    // 2. META-INF/container.xml
95    zip.start_file(
96        "META-INF/container.xml",
97        SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
98    )?;
99    zip.write_all(CONTAINER_XML.as_bytes())?;
100
101    // 3. Per-page content
102    let page_count = doc.page_count();
103    for i in 0..page_count {
104        let page = doc.page(i)?;
105        write_page(&mut zip, page, i, opts)?;
106    }
107
108    // 4. Navigation document
109    let nav_xhtml = build_nav(doc.bookmarks(), page_count);
110    zip.start_file(
111        "OEBPS/nav.xhtml",
112        SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
113    )?;
114    zip.write_all(nav_xhtml.as_bytes())?;
115
116    // 5. OPF package document
117    let opf = build_opf(opts, page_count);
118    zip.start_file(
119        "OEBPS/content.opf",
120        SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
121    )?;
122    zip.write_all(opf.as_bytes())?;
123
124    let cursor = zip.finish()?;
125    Ok(cursor.into_inner())
126}
127
128// ── Per-page writer ───────────────────────────────────────────────────────────
129
130fn write_page(
131    zip: &mut ZipWriter<std::io::Cursor<Vec<u8>>>,
132    page: &DjVuPage,
133    index: usize,
134    opts: &EpubOptions,
135) -> Result<(), EpubError> {
136    let pw = page.width() as u32;
137    let ph = page.height() as u32;
138    let dpi = page.dpi().max(1) as f32;
139
140    let render_opts = RenderOptions {
141        width: pw,
142        height: ph,
143        ..RenderOptions::default()
144    };
145    let pixmap = djvu_render::render_pixmap(page, &render_opts)?;
146
147    // Encode as PNG
148    let png_bytes = encode_rgba_to_png(&pixmap.data, pw, ph);
149
150    let page_num = index + 1;
151    let img_name = format!("page_{page_num:04}.png");
152    let img_path = format!("OEBPS/images/{img_name}");
153
154    zip.start_file(
155        &img_path,
156        SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
157    )?;
158    zip.write_all(&png_bytes)?;
159
160    // Build text overlay from text layer
161    let text_overlay = build_text_overlay(page, dpi, pw, ph);
162
163    // Build XHTML page
164    let xhtml = build_page_xhtml(&img_name, pw, ph, &text_overlay, opts);
165    let xhtml_path = format!("OEBPS/pages/page_{page_num:04}.xhtml");
166
167    zip.start_file(
168        &xhtml_path,
169        SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
170    )?;
171    zip.write_all(xhtml.as_bytes())?;
172
173    Ok(())
174}
175
176// ── PNG encoder ───────────────────────────────────────────────────────────────
177
178fn encode_rgba_to_png(rgba: &[u8], width: u32, height: u32) -> Vec<u8> {
179    let mut buf = Vec::new();
180    {
181        let mut enc = png::Encoder::new(std::io::Cursor::new(&mut buf), width, height);
182        enc.set_color(png::ColorType::Rgba);
183        enc.set_depth(png::BitDepth::Eight);
184        if let Ok(mut writer) = enc.write_header() {
185            let _ = writer.write_image_data(rgba);
186        }
187    }
188    buf
189}
190
191// ── Text overlay ─────────────────────────────────────────────────────────────
192
193/// Returns a Vec of `(x_pct, y_pct, w_pct, h_pct, text)` for word/char zones.
194/// Coordinates are expressed as percentages of page dimensions for CSS positioning.
195fn build_text_overlay(
196    page: &DjVuPage,
197    _dpi: f32,
198    pw: u32,
199    ph: u32,
200) -> Vec<(f32, f32, f32, f32, String)> {
201    let text_layer = match page.text_layer() {
202        Ok(Some(tl)) => tl,
203        _ => return Vec::new(),
204    };
205
206    let mut spans = Vec::new();
207
208    fn walk(
209        zones: &[crate::text::TextZone],
210        spans: &mut Vec<(f32, f32, f32, f32, String)>,
211        pw: u32,
212        ph: u32,
213    ) {
214        for zone in zones {
215            match zone.kind {
216                TextZoneKind::Word | TextZoneKind::Character => {
217                    if zone.text.is_empty() {
218                        continue;
219                    }
220                    let r = &zone.rect;
221                    let x = r.x as f32 / pw as f32 * 100.0;
222                    let y = r.y as f32 / ph as f32 * 100.0;
223                    let w = r.width as f32 / pw as f32 * 100.0;
224                    let h = r.height as f32 / ph as f32 * 100.0;
225                    if w > 0.0 && h > 0.0 {
226                        spans.push((x, y, w, h, xml_escape(&zone.text)));
227                    }
228                }
229                _ => walk(&zone.children, spans, pw, ph),
230            }
231        }
232    }
233
234    walk(&text_layer.zones, &mut spans, pw, ph);
235    spans
236}
237
238// ── XHTML page ────────────────────────────────────────────────────────────────
239
240fn build_page_xhtml(
241    img_name: &str,
242    pw: u32,
243    ph: u32,
244    text_overlay: &[(f32, f32, f32, f32, String)],
245    _opts: &EpubOptions,
246) -> String {
247    let mut html = String::new();
248    html.push_str(
249        r#"<?xml version="1.0" encoding="UTF-8"?>
250<!DOCTYPE html>
251<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
252<head>
253<meta charset="UTF-8"/>
254<title>Page</title>
255<style>
256body { margin: 0; padding: 0; }
257.djvu-page { position: relative; display: block; width: 100%; }
258.djvu-page img { display: block; width: 100%; height: auto; }
259.djvu-text {
260  position: absolute;
261  color: transparent;
262  background: transparent;
263  white-space: pre;
264  overflow: hidden;
265  pointer-events: none;
266}
267</style>
268</head>
269<body>
270"#,
271    );
272
273    html.push_str(&format!(
274        r#"<div class="djvu-page" style="width:{pw}px; height:{ph}px;">"#
275    ));
276    html.push_str(&format!(
277        r#"<img src="../images/{img_name}" alt="page" width="{pw}" height="{ph}"/>"#
278    ));
279
280    for (x, y, w, h, text) in text_overlay {
281        html.push_str(&format!(
282            r#"<span class="djvu-text" aria-hidden="true" style="left:{x:.3}%;top:{y:.3}%;width:{w:.3}%;height:{h:.3}%;">{text}</span>"#
283        ));
284    }
285
286    html.push_str("</div>\n</body>\n</html>\n");
287    html
288}
289
290// ── OPF package ──────────────────────────────────────────────────────────────
291
292fn build_opf(opts: &EpubOptions, page_count: usize) -> String {
293    let title = xml_escape(&opts.title);
294    let author = xml_escape(&opts.author);
295
296    let mut manifest_items = String::new();
297    let mut spine_items = String::new();
298
299    // nav document
300    manifest_items.push_str(
301        r#"    <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
302"#,
303    );
304
305    for i in 1..=page_count {
306        let pid = format!("page_{i:04}");
307        manifest_items.push_str(&format!(
308            "    <item id=\"{pid}\" href=\"pages/page_{i:04}.xhtml\" media-type=\"application/xhtml+xml\"/>\n"
309        ));
310        manifest_items.push_str(&format!(
311            "    <item id=\"img_{pid}\" href=\"images/page_{i:04}.png\" media-type=\"image/png\"/>\n"
312        ));
313        spine_items.push_str(&format!("    <itemref idref=\"{pid}\"/>\n"));
314    }
315
316    format!(
317        r#"<?xml version="1.0" encoding="UTF-8"?>
318<package xmlns="http://www.idpf.org/2007/opf" version="3.0" epub:type="book"
319         xmlns:epub="http://www.idpf.org/2007/ops" unique-identifier="uid">
320  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
321    <dc:title>{title}</dc:title>
322    <dc:creator>{author}</dc:creator>
323    <dc:language>en</dc:language>
324    <dc:identifier id="uid">djvu-rs-export</dc:identifier>
325    <meta property="dcterms:modified">2024-01-01T00:00:00Z</meta>
326  </metadata>
327  <manifest>
328{manifest_items}  </manifest>
329  <spine>
330{spine_items}  </spine>
331</package>
332"#
333    )
334}
335
336// ── Navigation document ───────────────────────────────────────────────────────
337
338fn build_nav(bookmarks: &[DjVuBookmark], page_count: usize) -> String {
339    let toc_items = if bookmarks.is_empty() {
340        build_default_nav_items(page_count)
341    } else {
342        build_bookmark_nav_items(bookmarks)
343    };
344
345    format!(
346        r#"<?xml version="1.0" encoding="UTF-8"?>
347<!DOCTYPE html>
348<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
349<head><meta charset="UTF-8"/><title>Navigation</title></head>
350<body>
351<nav epub:type="toc" id="toc">
352  <h1>Contents</h1>
353  <ol>
354{toc_items}  </ol>
355</nav>
356</body>
357</html>
358"#
359    )
360}
361
362fn build_default_nav_items(page_count: usize) -> String {
363    let mut s = String::new();
364    for i in 1..=page_count {
365        s.push_str(&format!(
366            "    <li><a href=\"pages/page_{i:04}.xhtml\">Page {i}</a></li>\n"
367        ));
368    }
369    s
370}
371
372fn build_bookmark_nav_items(bookmarks: &[DjVuBookmark]) -> String {
373    let mut s = String::new();
374    for bm in bookmarks {
375        let title = xml_escape(&bm.title);
376        let href = bookmark_href(&bm.url);
377        s.push_str(&format!("    <li><a href=\"{href}\">{title}</a>"));
378        if !bm.children.is_empty() {
379            s.push_str("\n    <ol>\n");
380            s.push_str(&build_bookmark_nav_items_inner(&bm.children, 2));
381            s.push_str("    </ol>");
382        }
383        s.push_str("</li>\n");
384    }
385    s
386}
387
388fn build_bookmark_nav_items_inner(bookmarks: &[DjVuBookmark], depth: usize) -> String {
389    let indent = "  ".repeat(depth + 1);
390    let mut s = String::new();
391    for bm in bookmarks {
392        let title = xml_escape(&bm.title);
393        let href = bookmark_href(&bm.url);
394        s.push_str(&format!("{indent}<li><a href=\"{href}\">{title}</a>"));
395        if !bm.children.is_empty() {
396            s.push_str(&format!("\n{indent}<ol>\n"));
397            s.push_str(&build_bookmark_nav_items_inner(&bm.children, depth + 1));
398            s.push_str(&format!("{indent}</ol>"));
399        }
400        s.push_str("</li>\n");
401    }
402    s
403}
404
405/// Convert a DjVu bookmark URL to an EPUB relative href.
406/// DjVu bookmarks use `#page=N` (1-based) or bare `#anchor` format.
407fn bookmark_href(url: &str) -> String {
408    // Try to parse `#page=N` pattern
409    if let Some(rest) = url.strip_prefix('#') {
410        if let Some(n_str) = rest.strip_prefix("page=")
411            && let Ok(n) = n_str.trim().parse::<usize>()
412            && n >= 1
413        {
414            return format!("pages/page_{n:04}.xhtml");
415        }
416        // plain anchor — link to page 1 with anchor
417        return format!("pages/page_0001.xhtml{}", xml_escape(url));
418    }
419    // External URL — keep as-is
420    xml_escape(url)
421}
422
423// ── Helpers ───────────────────────────────────────────────────────────────────
424
425fn xml_escape(s: &str) -> String {
426    s.replace('&', "&amp;")
427        .replace('<', "&lt;")
428        .replace('>', "&gt;")
429        .replace('"', "&quot;")
430        .replace('\'', "&apos;")
431}
432
433const CONTAINER_XML: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
434<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
435  <rootfiles>
436    <rootfile full-path="OEBPS/content.opf"
437              media-type="application/oebps-package+xml"/>
438  </rootfiles>
439</container>
440"#;
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445
446    #[test]
447    fn xml_escape_basic() {
448        assert_eq!(
449            xml_escape("a&b<c>d\"e'f"),
450            "a&amp;b&lt;c&gt;d&quot;e&apos;f"
451        );
452    }
453
454    #[test]
455    fn bookmark_href_page_number() {
456        assert_eq!(bookmark_href("#page=3"), "pages/page_0003.xhtml");
457        assert_eq!(bookmark_href("#page=1"), "pages/page_0001.xhtml");
458    }
459
460    #[test]
461    fn bookmark_href_external() {
462        assert_eq!(bookmark_href("https://example.com"), "https://example.com");
463    }
464
465    #[test]
466    fn nav_has_toc_for_empty_bookmarks() {
467        let nav = build_nav(&[], 2);
468        assert!(nav.contains("epub:type=\"toc\""));
469        assert!(nav.contains("page_0001.xhtml"));
470        assert!(nav.contains("page_0002.xhtml"));
471    }
472}