1use std::io::Write;
22
23use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions};
24
25use crate::{
26 djvu_document::{DjVuBookmark, DjVuDocument, DjVuPage, DocError},
27 djvu_render::{self, RenderError, RenderOptions},
28 text::TextZoneKind,
29};
30
31#[derive(Debug, thiserror::Error)]
35pub enum EpubError {
36 #[error("document error: {0}")]
38 Doc(#[from] DocError),
39 #[error("render error: {0}")]
41 Render(#[from] RenderError),
42 #[error("zip error: {0}")]
44 Zip(#[from] zip::result::ZipError),
45 #[error("io error: {0}")]
47 Io(#[from] std::io::Error),
48}
49
50#[derive(Debug, Clone)]
54pub struct EpubOptions {
55 pub title: String,
57 pub author: String,
59 pub dpi: u32,
61}
62
63impl Default for EpubOptions {
64 fn default() -> Self {
65 Self {
66 title: "DjVu Document".to_owned(),
67 author: String::new(),
68 dpi: 150,
69 }
70 }
71}
72
73pub fn djvu_to_epub(doc: &DjVuDocument, opts: &EpubOptions) -> Result<Vec<u8>, EpubError> {
83 let buf = Vec::new();
84 let cursor = std::io::Cursor::new(buf);
85 let mut zip = ZipWriter::new(cursor);
86
87 zip.start_file(
89 "mimetype",
90 SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
91 )?;
92 zip.write_all(b"application/epub+zip")?;
93
94 zip.start_file(
96 "META-INF/container.xml",
97 SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
98 )?;
99 zip.write_all(CONTAINER_XML.as_bytes())?;
100
101 let page_count = doc.page_count();
103 for i in 0..page_count {
104 let page = doc.page(i)?;
105 write_page(&mut zip, page, i, opts)?;
106 }
107
108 let nav_xhtml = build_nav(doc.bookmarks(), page_count);
110 zip.start_file(
111 "OEBPS/nav.xhtml",
112 SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
113 )?;
114 zip.write_all(nav_xhtml.as_bytes())?;
115
116 let opf = build_opf(opts, page_count);
118 zip.start_file(
119 "OEBPS/content.opf",
120 SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
121 )?;
122 zip.write_all(opf.as_bytes())?;
123
124 let cursor = zip.finish()?;
125 Ok(cursor.into_inner())
126}
127
128fn write_page(
131 zip: &mut ZipWriter<std::io::Cursor<Vec<u8>>>,
132 page: &DjVuPage,
133 index: usize,
134 opts: &EpubOptions,
135) -> Result<(), EpubError> {
136 let pw = page.width() as u32;
137 let ph = page.height() as u32;
138 let dpi = page.dpi().max(1) as f32;
139
140 let render_opts = RenderOptions {
141 width: pw,
142 height: ph,
143 ..RenderOptions::default()
144 };
145 let pixmap = djvu_render::render_pixmap(page, &render_opts)?;
146
147 let png_bytes = encode_rgba_to_png(&pixmap.data, pw, ph);
149
150 let page_num = index + 1;
151 let img_name = format!("page_{page_num:04}.png");
152 let img_path = format!("OEBPS/images/{img_name}");
153
154 zip.start_file(
155 &img_path,
156 SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
157 )?;
158 zip.write_all(&png_bytes)?;
159
160 let text_overlay = build_text_overlay(page, dpi, pw, ph);
162
163 let xhtml = build_page_xhtml(&img_name, pw, ph, &text_overlay, opts);
165 let xhtml_path = format!("OEBPS/pages/page_{page_num:04}.xhtml");
166
167 zip.start_file(
168 &xhtml_path,
169 SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
170 )?;
171 zip.write_all(xhtml.as_bytes())?;
172
173 Ok(())
174}
175
176fn encode_rgba_to_png(rgba: &[u8], width: u32, height: u32) -> Vec<u8> {
179 let mut buf = Vec::new();
180 {
181 let mut enc = png::Encoder::new(std::io::Cursor::new(&mut buf), width, height);
182 enc.set_color(png::ColorType::Rgba);
183 enc.set_depth(png::BitDepth::Eight);
184 if let Ok(mut writer) = enc.write_header() {
185 let _ = writer.write_image_data(rgba);
186 }
187 }
188 buf
189}
190
191fn build_text_overlay(
196 page: &DjVuPage,
197 _dpi: f32,
198 pw: u32,
199 ph: u32,
200) -> Vec<(f32, f32, f32, f32, String)> {
201 let text_layer = match page.text_layer() {
202 Ok(Some(tl)) => tl,
203 _ => return Vec::new(),
204 };
205
206 let mut spans = Vec::new();
207
208 fn walk(
209 zones: &[crate::text::TextZone],
210 spans: &mut Vec<(f32, f32, f32, f32, String)>,
211 pw: u32,
212 ph: u32,
213 ) {
214 for zone in zones {
215 match zone.kind {
216 TextZoneKind::Word | TextZoneKind::Character => {
217 if zone.text.is_empty() {
218 continue;
219 }
220 let r = &zone.rect;
221 let x = r.x as f32 / pw as f32 * 100.0;
222 let y = r.y as f32 / ph as f32 * 100.0;
223 let w = r.width as f32 / pw as f32 * 100.0;
224 let h = r.height as f32 / ph as f32 * 100.0;
225 if w > 0.0 && h > 0.0 {
226 spans.push((x, y, w, h, xml_escape(&zone.text)));
227 }
228 }
229 _ => walk(&zone.children, spans, pw, ph),
230 }
231 }
232 }
233
234 walk(&text_layer.zones, &mut spans, pw, ph);
235 spans
236}
237
238fn build_page_xhtml(
241 img_name: &str,
242 pw: u32,
243 ph: u32,
244 text_overlay: &[(f32, f32, f32, f32, String)],
245 _opts: &EpubOptions,
246) -> String {
247 let mut html = String::new();
248 html.push_str(
249 r#"<?xml version="1.0" encoding="UTF-8"?>
250<!DOCTYPE html>
251<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
252<head>
253<meta charset="UTF-8"/>
254<title>Page</title>
255<style>
256body { margin: 0; padding: 0; }
257.djvu-page { position: relative; display: block; width: 100%; }
258.djvu-page img { display: block; width: 100%; height: auto; }
259.djvu-text {
260 position: absolute;
261 color: transparent;
262 background: transparent;
263 white-space: pre;
264 overflow: hidden;
265 pointer-events: none;
266}
267</style>
268</head>
269<body>
270"#,
271 );
272
273 html.push_str(&format!(
274 r#"<div class="djvu-page" style="width:{pw}px; height:{ph}px;">"#
275 ));
276 html.push_str(&format!(
277 r#"<img src="../images/{img_name}" alt="page" width="{pw}" height="{ph}"/>"#
278 ));
279
280 for (x, y, w, h, text) in text_overlay {
281 html.push_str(&format!(
282 r#"<span class="djvu-text" aria-hidden="true" style="left:{x:.3}%;top:{y:.3}%;width:{w:.3}%;height:{h:.3}%;">{text}</span>"#
283 ));
284 }
285
286 html.push_str("</div>\n</body>\n</html>\n");
287 html
288}
289
290fn build_opf(opts: &EpubOptions, page_count: usize) -> String {
293 let title = xml_escape(&opts.title);
294 let author = xml_escape(&opts.author);
295
296 let mut manifest_items = String::new();
297 let mut spine_items = String::new();
298
299 manifest_items.push_str(
301 r#" <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
302"#,
303 );
304
305 for i in 1..=page_count {
306 let pid = format!("page_{i:04}");
307 manifest_items.push_str(&format!(
308 " <item id=\"{pid}\" href=\"pages/page_{i:04}.xhtml\" media-type=\"application/xhtml+xml\"/>\n"
309 ));
310 manifest_items.push_str(&format!(
311 " <item id=\"img_{pid}\" href=\"images/page_{i:04}.png\" media-type=\"image/png\"/>\n"
312 ));
313 spine_items.push_str(&format!(" <itemref idref=\"{pid}\"/>\n"));
314 }
315
316 format!(
317 r#"<?xml version="1.0" encoding="UTF-8"?>
318<package xmlns="http://www.idpf.org/2007/opf" version="3.0" epub:type="book"
319 xmlns:epub="http://www.idpf.org/2007/ops" unique-identifier="uid">
320 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
321 <dc:title>{title}</dc:title>
322 <dc:creator>{author}</dc:creator>
323 <dc:language>en</dc:language>
324 <dc:identifier id="uid">djvu-rs-export</dc:identifier>
325 <meta property="dcterms:modified">2024-01-01T00:00:00Z</meta>
326 </metadata>
327 <manifest>
328{manifest_items} </manifest>
329 <spine>
330{spine_items} </spine>
331</package>
332"#
333 )
334}
335
336fn build_nav(bookmarks: &[DjVuBookmark], page_count: usize) -> String {
339 let toc_items = if bookmarks.is_empty() {
340 build_default_nav_items(page_count)
341 } else {
342 build_bookmark_nav_items(bookmarks)
343 };
344
345 format!(
346 r#"<?xml version="1.0" encoding="UTF-8"?>
347<!DOCTYPE html>
348<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
349<head><meta charset="UTF-8"/><title>Navigation</title></head>
350<body>
351<nav epub:type="toc" id="toc">
352 <h1>Contents</h1>
353 <ol>
354{toc_items} </ol>
355</nav>
356</body>
357</html>
358"#
359 )
360}
361
362fn build_default_nav_items(page_count: usize) -> String {
363 let mut s = String::new();
364 for i in 1..=page_count {
365 s.push_str(&format!(
366 " <li><a href=\"pages/page_{i:04}.xhtml\">Page {i}</a></li>\n"
367 ));
368 }
369 s
370}
371
372fn build_bookmark_nav_items(bookmarks: &[DjVuBookmark]) -> String {
373 let mut s = String::new();
374 for bm in bookmarks {
375 let title = xml_escape(&bm.title);
376 let href = bookmark_href(&bm.url);
377 s.push_str(&format!(" <li><a href=\"{href}\">{title}</a>"));
378 if !bm.children.is_empty() {
379 s.push_str("\n <ol>\n");
380 s.push_str(&build_bookmark_nav_items_inner(&bm.children, 2));
381 s.push_str(" </ol>");
382 }
383 s.push_str("</li>\n");
384 }
385 s
386}
387
388fn build_bookmark_nav_items_inner(bookmarks: &[DjVuBookmark], depth: usize) -> String {
389 let indent = " ".repeat(depth + 1);
390 let mut s = String::new();
391 for bm in bookmarks {
392 let title = xml_escape(&bm.title);
393 let href = bookmark_href(&bm.url);
394 s.push_str(&format!("{indent}<li><a href=\"{href}\">{title}</a>"));
395 if !bm.children.is_empty() {
396 s.push_str(&format!("\n{indent}<ol>\n"));
397 s.push_str(&build_bookmark_nav_items_inner(&bm.children, depth + 1));
398 s.push_str(&format!("{indent}</ol>"));
399 }
400 s.push_str("</li>\n");
401 }
402 s
403}
404
405fn bookmark_href(url: &str) -> String {
408 if let Some(rest) = url.strip_prefix('#') {
410 if let Some(n_str) = rest.strip_prefix("page=")
411 && let Ok(n) = n_str.trim().parse::<usize>()
412 && n >= 1
413 {
414 return format!("pages/page_{n:04}.xhtml");
415 }
416 return format!("pages/page_0001.xhtml{}", xml_escape(url));
418 }
419 xml_escape(url)
421}
422
423fn xml_escape(s: &str) -> String {
426 s.replace('&', "&")
427 .replace('<', "<")
428 .replace('>', ">")
429 .replace('"', """)
430 .replace('\'', "'")
431}
432
433const CONTAINER_XML: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
434<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
435 <rootfiles>
436 <rootfile full-path="OEBPS/content.opf"
437 media-type="application/oebps-package+xml"/>
438 </rootfiles>
439</container>
440"#;
441
442#[cfg(test)]
443mod tests {
444 use super::*;
445
446 #[test]
447 fn xml_escape_basic() {
448 assert_eq!(
449 xml_escape("a&b<c>d\"e'f"),
450 "a&b<c>d"e'f"
451 );
452 }
453
454 #[test]
455 fn bookmark_href_page_number() {
456 assert_eq!(bookmark_href("#page=3"), "pages/page_0003.xhtml");
457 assert_eq!(bookmark_href("#page=1"), "pages/page_0001.xhtml");
458 }
459
460 #[test]
461 fn bookmark_href_external() {
462 assert_eq!(bookmark_href("https://example.com"), "https://example.com");
463 }
464
465 #[test]
466 fn nav_has_toc_for_empty_bookmarks() {
467 let nav = build_nav(&[], 2);
468 assert!(nav.contains("epub:type=\"toc\""));
469 assert!(nav.contains("page_0001.xhtml"));
470 assert!(nav.contains("page_0002.xhtml"));
471 }
472}