Skip to main content

djvu_rs/
pdf.rs

1//! DjVu to PDF converter — preserves document structure.
2//!
3//! Converts DjVu documents to PDF while preserving:
4//! - IW44 background as compressed RGB image (#2)
5//! - JB2 foreground mask as 1-bit image (#3)
6//! - Text layer as invisible selectable text (#4)
7//! - NAVM bookmarks as PDF outline / table of contents (#5)
8//! - ANTz hyperlinks as PDF link annotations (#6)
9//!
10//! # Example
11//!
12//! ```no_run
13//! use djvu_rs::djvu_document::DjVuDocument;
14//! use djvu_rs::pdf::djvu_to_pdf;
15//!
16//! let data = std::fs::read("input.djvu").unwrap();
17//! let doc = DjVuDocument::parse(&data).unwrap();
18//! let pdf_bytes = djvu_to_pdf(&doc).unwrap();
19//! std::fs::write("output.pdf", pdf_bytes).unwrap();
20//! ```
21
22#[cfg(not(feature = "std"))]
23use alloc::{format, string::String, vec, vec::Vec};
24
25use crate::{
26    annotation::Shape,
27    djvu_document::{DjVuBookmark, DjVuDocument, DjVuPage, DocError},
28    djvu_render::{self, RenderOptions},
29    text::{TextZone, TextZoneKind},
30};
31
32// ---- Error ------------------------------------------------------------------
33
34/// Errors from PDF conversion.
35#[derive(Debug, thiserror::Error)]
36pub enum PdfError {
37    /// Document model error.
38    #[error("document error: {0}")]
39    Doc(#[from] DocError),
40    /// Render error.
41    #[error("render error: {0}")]
42    Render(#[from] djvu_render::RenderError),
43}
44
45// ---- Low-level PDF object writer --------------------------------------------
46
47/// A PDF object body (bytes between `N 0 obj\n` and `\nendobj\n`).
48struct PdfObj {
49    id: usize,
50    body: Vec<u8>,
51}
52
53/// Accumulates PDF objects and serializes them into a valid PDF 1.4 file.
54struct PdfWriter {
55    objects: Vec<PdfObj>,
56    next_id: usize,
57}
58
59impl PdfWriter {
60    fn new() -> Self {
61        PdfWriter {
62            objects: Vec::new(),
63            next_id: 1,
64        }
65    }
66
67    /// Reserve the next object ID.
68    fn alloc_id(&mut self) -> usize {
69        let id = self.next_id;
70        self.next_id += 1;
71        id
72    }
73
74    /// Add an object with a pre-allocated ID.
75    fn add_obj(&mut self, id: usize, body: Vec<u8>) {
76        self.objects.push(PdfObj { id, body });
77    }
78
79    /// Allocate and add an object, returning its ID.
80    fn add(&mut self, body: Vec<u8>) -> usize {
81        let id = self.alloc_id();
82        self.add_obj(id, body);
83        id
84    }
85
86    /// Serialize all objects into a complete PDF file.
87    fn serialize(self) -> Vec<u8> {
88        let mut buf: Vec<u8> = Vec::new();
89        buf.extend_from_slice(b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n");
90
91        let mut offsets: Vec<(usize, usize)> = Vec::new();
92        for obj in &self.objects {
93            offsets.push((obj.id, buf.len()));
94            buf.extend_from_slice(format!("{} 0 obj\n", obj.id).as_bytes());
95            buf.extend_from_slice(&obj.body);
96            buf.extend_from_slice(b"\nendobj\n");
97        }
98
99        // Cross-reference table
100        let xref_offset = buf.len();
101        let max_id = offsets.iter().map(|(id, _)| *id).max().unwrap_or(0);
102        buf.extend_from_slice(format!("xref\n0 {}\n", max_id + 1).as_bytes());
103        buf.extend_from_slice(b"0000000000 65535 f \n");
104
105        let mut offset_map = vec![None; max_id + 1];
106        for (obj_id, off) in &offsets {
107            if *obj_id <= max_id {
108                offset_map[*obj_id] = Some(*off);
109            }
110        }
111        for entry in offset_map.iter().skip(1) {
112            match entry {
113                Some(off) => {
114                    buf.extend_from_slice(format!("{off:010} 00000 n \n").as_bytes());
115                }
116                None => buf.extend_from_slice(b"0000000000 65535 f \n"),
117            }
118        }
119
120        buf.extend_from_slice(
121            format!(
122                "trailer\n<< /Size {} /Root 1 0 R >>\nstartxref\n{}\n%%EOF\n",
123                max_id + 1,
124                xref_offset
125            )
126            .as_bytes(),
127        );
128
129        buf
130    }
131}
132
133/// Helper: make a PDF stream object `<< ... /Length N >> stream\n...\nendstream`.
134fn make_stream(dict_extra: &str, data: &[u8]) -> Vec<u8> {
135    let len = data.len();
136    let mut body = format!("<< /Length {len}{dict_extra} >>\nstream\n").into_bytes();
137    body.extend_from_slice(data);
138    body.extend_from_slice(b"\nendstream");
139    body
140}
141
142/// Compress bytes using zlib/deflate.
143fn deflate(data: &[u8]) -> Vec<u8> {
144    miniz_oxide::deflate::compress_to_vec_zlib(data, 6)
145}
146
147/// Helper: make a compressed stream object.
148fn make_deflate_stream(dict_extra: &str, data: &[u8]) -> Vec<u8> {
149    let compressed = deflate(data);
150    let extra = format!(" /Filter /FlateDecode{dict_extra}");
151    make_stream(&extra, &compressed)
152}
153
154/// Encode RGB bytes as JPEG and return the compressed bytes.
155///
156/// `quality` is in range 1–100. Values around 75–85 give excellent
157/// perceptual quality for typical DjVu backgrounds at a fraction of the
158/// FlateDecode+RGB size.
159fn encode_rgb_to_jpeg(rgb: &[u8], width: u32, height: u32, quality: u8) -> Vec<u8> {
160    use jpeg_encoder::{ColorType, Encoder};
161    let mut out = Vec::new();
162    let enc = Encoder::new(&mut out, quality);
163    // Ignore encoding errors — fallback to empty, which will be caught at
164    // the caller and downgraded to FlateDecode.
165    let _ = enc.encode(rgb, width as u16, height as u16, ColorType::Rgb);
166    out
167}
168
169/// Helper: make a DCTDecode (JPEG) stream object.
170fn make_dct_stream(dict_extra: &str, jpeg_bytes: &[u8]) -> Vec<u8> {
171    let extra = format!(" /Filter /DCTDecode{dict_extra}");
172    make_stream(&extra, jpeg_bytes)
173}
174
175// ---- PDF font for invisible text --------------------------------------------
176
177/// Build a Type1 font dictionary for Helvetica (standard 14 font, no embedding needed).
178/// Returns object body bytes.
179fn font_dict() -> Vec<u8> {
180    b"<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >>".to_vec()
181}
182
183// ---- Coordinate helpers -----------------------------------------------------
184
185/// Convert DjVu pixel coordinates to PDF points.
186/// DjVu uses bottom-left origin (like PDF), so y-coordinates can be used directly
187/// after scaling by 72/dpi.
188fn px_to_pt(px: f32, dpi: f32) -> f32 {
189    px * 72.0 / dpi
190}
191
192// ---- Page rendering ---------------------------------------------------------
193
194/// Build PDF objects for one page. Returns (page_obj_id, list of annotation obj ids).
195fn build_page_objects(
196    w: &mut PdfWriter,
197    page: &DjVuPage,
198    pages_id: usize,
199    font_id: usize,
200    pdf_opts: &PdfOptions,
201) -> Result<usize, PdfError> {
202    let pw = page.width() as u32;
203    let ph = page.height() as u32;
204    let dpi = page.dpi().max(1) as f32;
205    let pt_w = px_to_pt(pw as f32, dpi);
206    let pt_h = px_to_pt(ph as f32, dpi);
207
208    // Render page to RGB
209    let render_opts = RenderOptions {
210        width: pw,
211        height: ph,
212        ..RenderOptions::default()
213    };
214    let pixmap = djvu_render::render_pixmap(page, &render_opts)?;
215    let rgb = pixmap.to_rgb();
216
217    // Background image XObject — DCTDecode (JPEG) when quality is set,
218    // FlateDecode (deflate+RGB) otherwise.
219    let img_dict = format!(
220        " /Type /XObject /Subtype /Image /Width {pw} /Height {ph}\
221         /ColorSpace /DeviceRGB /BitsPerComponent 8"
222    );
223    let img_body = match pdf_opts.jpeg_quality {
224        Some(quality) => {
225            let jpeg = encode_rgb_to_jpeg(&rgb, pw, ph, quality);
226            if jpeg.is_empty() {
227                // JPEG encoding failed — fall back to deflate
228                make_deflate_stream(&img_dict, &rgb)
229            } else {
230                make_dct_stream(&img_dict, &jpeg)
231            }
232        }
233        None => make_deflate_stream(&img_dict, &rgb),
234    };
235    let img_id = w.add(img_body);
236
237    // JB2 mask as 1-bit image (if present)
238    let mask_img_id = build_mask_image(w, page, pw, ph);
239
240    // Content stream: draw background image, then mask overlay, then invisible text
241    let mut content = String::new();
242
243    // Draw background image filling the page
244    content.push_str(&format!("q {pt_w:.4} 0 0 {pt_h:.4} 0 0 cm /Im0 Do Q\n"));
245
246    // Draw mask overlay (black foreground on transparent)
247    if let Some(mask_id) = mask_img_id {
248        // Use the mask as a stencil: set fill color to black, then draw mask as image mask
249        content.push_str(&format!(
250            "q 0 0 0 rg {pt_w:.4} 0 0 {pt_h:.4} 0 0 cm /Mask0 Do Q\n"
251        ));
252        let _ = mask_id; // used in resources below
253    }
254
255    // Invisible text layer
256    let text_ops = build_text_content(page, dpi, pt_h);
257    if !text_ops.is_empty() {
258        content.push_str(&text_ops);
259    }
260
261    // Content stream object
262    let content_bytes = content.as_bytes();
263    let content_body = make_deflate_stream("", content_bytes);
264    let content_id = w.add(content_body);
265
266    // Resources dictionary
267    let mut resources = format!("/XObject << /Im0 {img_id} 0 R");
268    if let Some(mid) = mask_img_id {
269        resources.push_str(&format!(" /Mask0 {mid} 0 R"));
270    }
271    resources.push_str(" >>");
272    if !text_ops.is_empty() {
273        resources.push_str(&format!(" /Font << /F1 {font_id} 0 R >>"));
274    }
275
276    // Annotations (hyperlinks)
277    let annot_ids = build_link_annotations(w, page, dpi, pt_h);
278    let mut annots_str = String::new();
279    if !annot_ids.is_empty() {
280        annots_str.push_str(" /Annots [");
281        for aid in &annot_ids {
282            annots_str.push_str(&format!(" {aid} 0 R"));
283        }
284        annots_str.push_str(" ]");
285    }
286
287    // Page object
288    let page_id = w.add(
289        format!(
290            "<< /Type /Page /Parent {pages_id} 0 R\n\
291               /MediaBox [0 0 {pt_w:.4} {pt_h:.4}]\n\
292               /Contents {content_id} 0 R\n\
293               /Resources << {resources} >>{annots_str} >>"
294        )
295        .into_bytes(),
296    );
297
298    Ok(page_id)
299}
300
301/// Build a 1-bit image mask from the JB2 foreground mask.
302fn build_mask_image(w: &mut PdfWriter, page: &DjVuPage, _pw: u32, _ph: u32) -> Option<usize> {
303    // Decode JB2 mask
304    let sjbz = page.find_chunk(b"Sjbz")?;
305    let dict = page
306        .find_chunk(b"Djbz")
307        .and_then(|djbz| crate::jb2_new::decode_dict(djbz, None).ok());
308    let bitmap = crate::jb2_new::decode(sjbz, dict.as_ref()).ok()?;
309
310    let bw = bitmap.width;
311    let bh = bitmap.height;
312
313    // Bitmap data is already packed 1-bit MSB-first, which is what PDF expects
314    // for an ImageMask with /Decode [1 0] (1=black=marked).
315    // PDF ImageMask: painted where sample = 1 in the image data.
316    let dict_extra = format!(
317        " /Type /XObject /Subtype /Image /Width {bw} /Height {bh}\
318         /ImageMask true /BitsPerComponent 1 /Decode [1 0]"
319    );
320    let body = make_deflate_stream(&dict_extra, &bitmap.data);
321    let id = w.add(body);
322    Some(id)
323}
324
325/// Build invisible text operators for the text layer.
326fn build_text_content(page: &DjVuPage, dpi: f32, pt_h: f32) -> String {
327    let text_layer = match page.text_layer() {
328        Ok(Some(tl)) => tl,
329        _ => return String::new(),
330    };
331
332    let mut ops = String::new();
333    // Begin text object
334    ops.push_str("BT\n");
335    // Set text rendering mode to invisible (mode 3)
336    ops.push_str("3 Tr\n");
337    // Set font — use a small size, we scale per-word
338    ops.push_str("/F1 1 Tf\n");
339
340    // Walk the zone tree and emit text for word/character zones
341    for zone in &text_layer.zones {
342        emit_text_zones(&mut ops, zone, dpi, pt_h);
343    }
344
345    ops.push_str("ET\n");
346
347    if ops == "BT\n3 Tr\n/F1 1 Tf\nET\n" {
348        // No actual text was emitted
349        return String::new();
350    }
351
352    ops
353}
354
355/// Recursively emit text positioning operators for word-level zones.
356fn emit_text_zones(ops: &mut String, zone: &TextZone, dpi: f32, pt_h: f32) {
357    match zone.kind {
358        TextZoneKind::Word | TextZoneKind::Character => {
359            if zone.text.is_empty() {
360                return;
361            }
362            let r = &zone.rect;
363            // zone.rect is in top-left-origin pixel coords
364            // PDF uses bottom-left origin, so: pdf_y = pt_h - (r.y + r.height) * 72/dpi
365            let x = px_to_pt(r.x as f32, dpi);
366            let y = pt_h - px_to_pt((r.y + r.height) as f32, dpi);
367            let w = px_to_pt(r.width as f32, dpi);
368            let h = px_to_pt(r.height as f32, dpi);
369
370            if w <= 0.0 || h <= 0.0 {
371                return;
372            }
373
374            // Font size = zone height in points
375            let font_size = h;
376            if font_size < 0.5 {
377                return;
378            }
379
380            // Horizontal scale to fit text width
381            let text_escaped = pdf_escape_string(&zone.text);
382            let char_count = zone.text.chars().count().max(1) as f32;
383            // Approximate: each glyph in Helvetica is ~0.5 * font_size wide
384            let natural_width = char_count * 0.5 * font_size;
385            let h_scale = if natural_width > 0.01 {
386                (w / natural_width) * 100.0
387            } else {
388                100.0
389            };
390
391            ops.push_str(&format!(
392                "{font_size:.2} 0 0 {font_size:.2} {x:.4} {y:.4} Tm\n"
393            ));
394            if (h_scale - 100.0).abs() > 1.0 {
395                ops.push_str(&format!("{h_scale:.2} Tz\n"));
396            }
397            ops.push_str(&format!("({text_escaped}) Tj\n"));
398        }
399        _ => {
400            // Recurse into children
401            for child in &zone.children {
402                emit_text_zones(ops, child, dpi, pt_h);
403            }
404        }
405    }
406}
407
408/// Escape a string for PDF literal string syntax.
409fn pdf_escape_string(s: &str) -> String {
410    let mut out = String::with_capacity(s.len());
411    for c in s.chars() {
412        match c {
413            '(' => out.push_str("\\("),
414            ')' => out.push_str("\\)"),
415            '\\' => out.push_str("\\\\"),
416            c if c.is_ascii() => out.push(c),
417            // Non-ASCII: encode as UTF-16BE with BOM for PDF
418            _ => {
419                // For simplicity, skip non-ASCII chars in text positioning
420                // (they'll still be in the document via the image)
421                out.push('?');
422            }
423        }
424    }
425    out
426}
427
428/// Build PDF link annotation objects for hyperlinks from the ANTz layer.
429fn build_link_annotations(w: &mut PdfWriter, page: &DjVuPage, dpi: f32, pt_h: f32) -> Vec<usize> {
430    let hyperlinks = match page.hyperlinks() {
431        Ok(links) => links,
432        Err(_) => return Vec::new(),
433    };
434
435    let mut ids = Vec::new();
436    for link in &hyperlinks {
437        if let Some(rect) = shape_to_pdf_rect(&link.shape, dpi, pt_h) {
438            let url_escaped = pdf_escape_string(&link.url);
439            let body = format!(
440                "<< /Type /Annot /Subtype /Link\n\
441                   /Rect [{:.4} {:.4} {:.4} {:.4}]\n\
442                   /Border [0 0 0]\n\
443                   /A << /S /URI /URI ({url_escaped}) >> >>",
444                rect.0, rect.1, rect.2, rect.3
445            );
446            let id = w.add(body.into_bytes());
447            ids.push(id);
448        }
449    }
450    ids
451}
452
453/// Convert a DjVu shape to a PDF rectangle [x1, y1, x2, y2] in points.
454/// DjVu annotation coordinates use bottom-left origin (same as PDF).
455fn shape_to_pdf_rect(shape: &Shape, dpi: f32, _pt_h: f32) -> Option<(f32, f32, f32, f32)> {
456    match shape {
457        Shape::Rect(r) | Shape::Oval(r) | Shape::Text(r) => {
458            let x1 = px_to_pt(r.x as f32, dpi);
459            let y1 = px_to_pt(r.y as f32, dpi);
460            let x2 = px_to_pt((r.x + r.width) as f32, dpi);
461            let y2 = px_to_pt((r.y + r.height) as f32, dpi);
462            Some((x1, y1, x2, y2))
463        }
464        Shape::Poly(points) => {
465            if points.is_empty() {
466                return None;
467            }
468            let mut min_x = f32::MAX;
469            let mut min_y = f32::MAX;
470            let mut max_x = f32::MIN;
471            let mut max_y = f32::MIN;
472            for (px, py) in points {
473                let x = px_to_pt(*px as f32, dpi);
474                let y = px_to_pt(*py as f32, dpi);
475                min_x = min_x.min(x);
476                min_y = min_y.min(y);
477                max_x = max_x.max(x);
478                max_y = max_y.max(y);
479            }
480            Some((min_x, min_y, max_x, max_y))
481        }
482        Shape::Line(x1, y1, x2, y2) => {
483            let px1 = px_to_pt(*x1 as f32, dpi);
484            let py1 = px_to_pt(*y1 as f32, dpi);
485            let px2 = px_to_pt(*x2 as f32, dpi);
486            let py2 = px_to_pt(*y2 as f32, dpi);
487            Some((px1.min(px2), py1.min(py2), px1.max(px2), py1.max(py2)))
488        }
489    }
490}
491
492// ---- Bookmarks (PDF outline) ------------------------------------------------
493
494/// Build PDF outline objects from NAVM bookmarks.
495/// Returns the outline root object ID, or None if no bookmarks.
496fn build_outline(
497    w: &mut PdfWriter,
498    bookmarks: &[DjVuBookmark],
499    page_ids: &[usize],
500) -> Option<usize> {
501    if bookmarks.is_empty() {
502        return None;
503    }
504
505    let outline_id = w.alloc_id();
506
507    // Flatten the bookmark tree into outline item objects
508    let item_ids = build_outline_items(w, bookmarks, outline_id, page_ids);
509
510    if item_ids.is_empty() {
511        return None;
512    }
513
514    let first = item_ids[0];
515    let last = *item_ids.last().unwrap();
516    let count = count_outline_items(bookmarks);
517
518    w.add_obj(
519        outline_id,
520        format!("<< /Type /Outlines /First {first} 0 R /Last {last} 0 R /Count {count} >>")
521            .into_bytes(),
522    );
523
524    Some(outline_id)
525}
526
527/// Recursively build outline items. Returns IDs of top-level items at this level.
528fn build_outline_items(
529    w: &mut PdfWriter,
530    bookmarks: &[DjVuBookmark],
531    parent_id: usize,
532    page_ids: &[usize],
533) -> Vec<usize> {
534    let mut ids = Vec::new();
535
536    for _bm in bookmarks {
537        let item_id = w.alloc_id();
538        ids.push(item_id);
539    }
540
541    for (i, bm) in bookmarks.iter().enumerate() {
542        let item_id = ids[i];
543        let prev = if i > 0 {
544            format!(" /Prev {} 0 R", ids[i - 1])
545        } else {
546            String::new()
547        };
548        let next = if i + 1 < ids.len() {
549            format!(" /Next {} 0 R", ids[i + 1])
550        } else {
551            String::new()
552        };
553
554        // Resolve bookmark URL to page index
555        let dest = resolve_bookmark_dest(&bm.url, page_ids);
556
557        // Build children
558        let child_ids = build_outline_items(w, &bm.children, item_id, page_ids);
559        let children_str = if !child_ids.is_empty() {
560            let first = child_ids[0];
561            let last = *child_ids.last().unwrap();
562            let count = count_outline_items(&bm.children);
563            format!(" /First {first} 0 R /Last {last} 0 R /Count {count}")
564        } else {
565            String::new()
566        };
567
568        let title = pdf_escape_string(&bm.title);
569        w.add_obj(
570            item_id,
571            format!(
572                "<< /Title ({title}) /Parent {parent_id} 0 R{prev}{next}{dest}{children_str} >>"
573            )
574            .into_bytes(),
575        );
576    }
577
578    ids
579}
580
581/// Count total outline items (including nested children).
582fn count_outline_items(bookmarks: &[DjVuBookmark]) -> usize {
583    let mut n = bookmarks.len();
584    for bm in bookmarks {
585        n += count_outline_items(&bm.children);
586    }
587    n
588}
589
590/// Resolve a DjVu bookmark URL to a PDF destination string.
591/// DjVu internal URLs look like `#page_N` or `#+N` or `#-N`.
592fn resolve_bookmark_dest(url: &str, page_ids: &[usize]) -> String {
593    if let Some(stripped) = url.strip_prefix('#') {
594        // Try to parse as page number
595        if let Some(page_str) = stripped.strip_prefix("page")
596            && let Ok(page_num) = page_str.trim_start_matches('_').parse::<usize>()
597        {
598            let idx = page_num.saturating_sub(1);
599            if let Some(&pid) = page_ids.get(idx) {
600                return format!(" /Dest [{pid} 0 R /Fit]");
601            }
602        }
603        // Try +N / -N (relative, but treat as absolute from 1)
604        if let Ok(n) = stripped.parse::<i64>() {
605            let idx = (n.max(1) - 1) as usize;
606            if let Some(&pid) = page_ids.get(idx) {
607                return format!(" /Dest [{pid} 0 R /Fit]");
608            }
609        }
610        // Try bare number
611        if let Ok(n) = stripped.parse::<usize>() {
612            let idx = n.saturating_sub(1);
613            if let Some(&pid) = page_ids.get(idx) {
614                return format!(" /Dest [{pid} 0 R /Fit]");
615            }
616        }
617    }
618
619    // External URL or unparseable — use URI action
620    if !url.is_empty() {
621        let escaped = pdf_escape_string(url);
622        return format!(" /A << /S /URI /URI ({escaped}) >>");
623    }
624
625    String::new()
626}
627
628// ---- Public API -------------------------------------------------------------
629
630/// Convert a DjVu document to PDF bytes.
631///
632/// Options for DjVu → PDF conversion.
633///
634/// Use `PdfOptions::default()` for sensible defaults (DCTDecode background
635/// at quality 80, which produces much smaller files than the FlateDecode path
636/// with comparable visual quality).
637#[derive(Debug, Clone)]
638pub struct PdfOptions {
639    /// JPEG quality for background image encoding (1–100).
640    ///
641    /// Higher values produce better quality at larger file sizes.
642    /// Set to `None` to use lossless FlateDecode (PNG-like, larger output).
643    pub jpeg_quality: Option<u8>,
644}
645
646impl Default for PdfOptions {
647    fn default() -> Self {
648        PdfOptions {
649            jpeg_quality: Some(80),
650        }
651    }
652}
653
654/// Convert a DjVu document to PDF bytes using custom options.
655///
656/// See [`PdfOptions`] for available settings.
657pub fn djvu_to_pdf_with_options(
658    doc: &DjVuDocument,
659    opts: &PdfOptions,
660) -> Result<Vec<u8>, PdfError> {
661    djvu_to_pdf_impl(doc, opts)
662}
663
664/// This produces a PDF 1.4 file with:
665/// - Rasterized page images (IW44 background + JB2 mask composite)
666/// - Invisible text layer for search and selection
667/// - Bookmarks (PDF outline) from NAVM
668/// - Hyperlink annotations from ANTz
669///
670/// Background images are encoded as DCTDecode (JPEG at quality 80) by default,
671/// producing significantly smaller files than the legacy FlateDecode path.
672/// Use [`djvu_to_pdf_with_options`] with `jpeg_quality: None` for lossless output.
673///
674/// # Errors
675///
676/// Returns `PdfError` if page rendering or text layer parsing fails.
677pub fn djvu_to_pdf(doc: &DjVuDocument) -> Result<Vec<u8>, PdfError> {
678    djvu_to_pdf_impl(doc, &PdfOptions::default())
679}
680
681fn djvu_to_pdf_impl(doc: &DjVuDocument, opts: &PdfOptions) -> Result<Vec<u8>, PdfError> {
682    let mut w = PdfWriter::new();
683
684    // Reserve IDs for catalog and pages
685    let catalog_id = w.alloc_id(); // 1
686    let pages_id = w.alloc_id(); // 2
687
688    // Reserve a font object ID
689    let font_id = w.alloc_id(); // 3
690    w.add_obj(font_id, font_dict());
691
692    // Build page objects (tolerate per-page errors with blank fallback)
693    let mut page_obj_ids = Vec::new();
694    for i in 0..doc.page_count() {
695        let page = doc.page(i)?;
696        let page_id = match build_page_objects(&mut w, page, pages_id, font_id, opts) {
697            Ok(id) => id,
698            Err(_) => {
699                // Fallback: blank page at native dimensions
700                let dpi = page.dpi().max(1) as f32;
701                let pt_w = px_to_pt(page.width() as f32, dpi);
702                let pt_h = px_to_pt(page.height() as f32, dpi);
703                w.add(
704                    format!(
705                        "<< /Type /Page /Parent {pages_id} 0 R\n\
706                           /MediaBox [0 0 {pt_w:.4} {pt_h:.4}]\n\
707                           /Resources << >> >>"
708                    )
709                    .into_bytes(),
710                )
711            }
712        };
713        page_obj_ids.push(page_id);
714    }
715
716    // Build outline from bookmarks
717    let outline_id = build_outline(&mut w, doc.bookmarks(), &page_obj_ids);
718
719    // Pages object
720    let kids = page_obj_ids
721        .iter()
722        .map(|id| format!("{id} 0 R"))
723        .collect::<Vec<_>>()
724        .join(" ");
725    let n = page_obj_ids.len();
726    w.add_obj(
727        pages_id,
728        format!("<< /Type /Pages /Kids [{kids}] /Count {n} >>").into_bytes(),
729    );
730
731    // Catalog
732    let outline_ref = match outline_id {
733        Some(oid) => format!(" /Outlines {oid} 0 R /PageMode /UseOutlines"),
734        None => String::new(),
735    };
736    w.add_obj(
737        catalog_id,
738        format!("<< /Type /Catalog /Pages {pages_id} 0 R{outline_ref} >>").into_bytes(),
739    );
740
741    Ok(w.serialize())
742}
743
744#[cfg(test)]
745mod tests {
746    use super::*;
747
748    #[test]
749    fn test_pdf_escape_string() {
750        assert_eq!(pdf_escape_string("hello"), "hello");
751        assert_eq!(pdf_escape_string("a(b)c"), "a\\(b\\)c");
752        assert_eq!(pdf_escape_string("a\\b"), "a\\\\b");
753    }
754
755    #[test]
756    fn test_px_to_pt() {
757        // At 72 dpi, 72 pixels = 72 points
758        assert!((px_to_pt(72.0, 72.0) - 72.0).abs() < 0.01);
759        // At 300 dpi, 300 pixels = 72 points
760        assert!((px_to_pt(300.0, 300.0) - 72.0).abs() < 0.01);
761    }
762
763    #[test]
764    fn test_resolve_bookmark_dest_page_number() {
765        let page_ids = vec![10, 20, 30];
766        let dest = resolve_bookmark_dest("#1", &page_ids);
767        assert!(dest.contains("10 0 R"));
768    }
769
770    #[test]
771    fn test_pdf_writer_serialize() {
772        let mut w = PdfWriter::new();
773        let id = w.add(b"<< /Type /Catalog >>".to_vec());
774        assert_eq!(id, 1);
775        let pdf = w.serialize();
776        assert!(pdf.starts_with(b"%PDF-1.4"));
777        assert!(pdf.windows(5).any(|w| w == b"%%EOF"));
778    }
779
780    #[test]
781    fn test_make_stream() {
782        let stream = make_stream(" /Filter /FlateDecode", b"hello");
783        let s = String::from_utf8_lossy(&stream);
784        assert!(s.contains("/Length 5"));
785        assert!(s.contains("stream\nhello\nendstream"));
786    }
787
788    #[test]
789    fn test_deflate_roundtrip() {
790        let data = b"hello world, this is a test of deflate compression";
791        let compressed = deflate(data);
792        // Compressed data should be non-empty
793        assert!(!compressed.is_empty());
794        // Decompress and verify
795        let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap();
796        assert_eq!(&decompressed, data);
797    }
798
799    #[test]
800    fn test_make_deflate_stream() {
801        let body = make_deflate_stream(" /Type /XObject", b"test data");
802        let s = String::from_utf8_lossy(&body);
803        assert!(s.contains("/Filter /FlateDecode"));
804        assert!(s.contains("/Type /XObject"));
805        assert!(s.contains("stream\n"));
806        assert!(s.contains("\nendstream"));
807    }
808
809    #[test]
810    fn test_font_dict() {
811        let d = font_dict();
812        let s = String::from_utf8_lossy(&d);
813        assert!(s.contains("/Type /Font"));
814        assert!(s.contains("/BaseFont /Helvetica"));
815    }
816
817    #[test]
818    fn test_pdf_writer_alloc_ids() {
819        let mut w = PdfWriter::new();
820        let id1 = w.alloc_id();
821        let id2 = w.alloc_id();
822        let id3 = w.alloc_id();
823        assert_eq!(id1, 1);
824        assert_eq!(id2, 2);
825        assert_eq!(id3, 3);
826    }
827
828    #[test]
829    fn test_pdf_writer_multiple_objects() {
830        let mut w = PdfWriter::new();
831        w.add(b"<< /Type /Catalog >>".to_vec());
832        w.add(b"<< /Type /Pages >>".to_vec());
833        let pdf = w.serialize();
834        let s = String::from_utf8_lossy(&pdf);
835        assert!(s.contains("1 0 obj"));
836        assert!(s.contains("2 0 obj"));
837        assert!(s.contains("/Size 3")); // 0, 1, 2
838    }
839
840    #[test]
841    fn test_resolve_bookmark_dest_page_prefix() {
842        let page_ids = vec![10, 20, 30];
843        let dest = resolve_bookmark_dest("#page2", &page_ids);
844        assert!(dest.contains("20 0 R"));
845        assert!(dest.contains("/Fit"));
846    }
847
848    #[test]
849    fn test_resolve_bookmark_dest_page_underscore() {
850        let page_ids = vec![10, 20, 30];
851        let dest = resolve_bookmark_dest("#page_3", &page_ids);
852        assert!(dest.contains("30 0 R"));
853    }
854
855    #[test]
856    fn test_resolve_bookmark_dest_out_of_range() {
857        let page_ids = vec![10];
858        let dest = resolve_bookmark_dest("#page99", &page_ids);
859        // Should fall through to bare number parse or be empty
860        assert!(!dest.contains("10 0 R"));
861    }
862
863    #[test]
864    fn test_resolve_bookmark_dest_external_url() {
865        let page_ids = vec![10];
866        let dest = resolve_bookmark_dest("http://example.com", &page_ids);
867        assert!(dest.contains("/S /URI"));
868        assert!(dest.contains("http://example.com"));
869    }
870
871    #[test]
872    fn test_resolve_bookmark_dest_empty_url() {
873        let page_ids = vec![10];
874        let dest = resolve_bookmark_dest("", &page_ids);
875        assert!(dest.is_empty());
876    }
877
878    #[test]
879    fn test_pdf_escape_special_chars() {
880        assert_eq!(pdf_escape_string("a(b)c\\d"), "a\\(b\\)c\\\\d");
881    }
882
883    #[test]
884    fn test_pdf_escape_non_ascii() {
885        // Non-ASCII chars should be replaced with ?
886        let result = pdf_escape_string("caf\u{00e9}");
887        assert_eq!(result, "caf?");
888    }
889
890    #[test]
891    fn test_shape_to_pdf_rect_rect() {
892        use crate::annotation;
893        let shape = annotation::Shape::Rect(annotation::Rect {
894            x: 0,
895            y: 0,
896            width: 300,
897            height: 300,
898        });
899        let rect = shape_to_pdf_rect(&shape, 300.0, 72.0).unwrap();
900        assert!((rect.0 - 0.0).abs() < 0.01); // x1
901        assert!((rect.2 - 72.0).abs() < 0.01); // x2 = 300 * 72/300
902    }
903
904    #[test]
905    fn test_shape_to_pdf_rect_poly() {
906        use crate::annotation;
907        let shape = annotation::Shape::Poly(vec![(0, 0), (300, 0), (300, 300), (0, 300)]);
908        let rect = shape_to_pdf_rect(&shape, 300.0, 72.0).unwrap();
909        assert!((rect.0 - 0.0).abs() < 0.01);
910        assert!((rect.2 - 72.0).abs() < 0.01);
911    }
912
913    #[test]
914    fn test_shape_to_pdf_rect_empty_poly() {
915        use crate::annotation;
916        let shape = annotation::Shape::Poly(vec![]);
917        assert!(shape_to_pdf_rect(&shape, 300.0, 72.0).is_none());
918    }
919
920    #[test]
921    fn test_shape_to_pdf_rect_line() {
922        use crate::annotation;
923        let shape = annotation::Shape::Line(0, 0, 150, 150);
924        let rect = shape_to_pdf_rect(&shape, 150.0, 72.0).unwrap();
925        assert!((rect.0 - 0.0).abs() < 0.01);
926        assert!((rect.2 - 72.0).abs() < 0.01);
927    }
928
929    #[test]
930    fn test_count_outline_items_empty() {
931        let bookmarks: Vec<crate::djvu_document::DjVuBookmark> = vec![];
932        assert_eq!(count_outline_items(&bookmarks), 0);
933    }
934
935    #[test]
936    fn test_count_outline_items_nested() {
937        use crate::djvu_document::DjVuBookmark;
938        let bookmarks = vec![DjVuBookmark {
939            title: "Chapter 1".into(),
940            url: "#1".into(),
941            children: vec![
942                DjVuBookmark {
943                    title: "Section 1.1".into(),
944                    url: "#2".into(),
945                    children: vec![],
946                },
947                DjVuBookmark {
948                    title: "Section 1.2".into(),
949                    url: "#3".into(),
950                    children: vec![],
951                },
952            ],
953        }];
954        assert_eq!(count_outline_items(&bookmarks), 3);
955    }
956
957    // ── DCTDecode / PdfOptions tests ──────────────────────────────────────────
958
959    fn assets_path() -> std::path::PathBuf {
960        std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
961            .join("references/djvujs/library/assets")
962    }
963
964    fn load_doc(name: &str) -> crate::djvu_document::DjVuDocument {
965        let data =
966            std::fs::read(assets_path().join(name)).unwrap_or_else(|_| panic!("{name} must exist"));
967        crate::djvu_document::DjVuDocument::parse(&data)
968            .unwrap_or_else(|e| panic!("parse failed: {e}"))
969    }
970
971    /// `PdfOptions::default()` uses jpeg_quality = Some(80).
972    #[test]
973    fn pdf_options_default_is_jpeg80() {
974        let opts = PdfOptions::default();
975        assert_eq!(opts.jpeg_quality, Some(80));
976    }
977
978    /// JPEG encoding roundtrip: `encode_rgb_to_jpeg` returns a non-empty JPEG.
979    #[test]
980    fn encode_rgb_to_jpeg_returns_jpeg() {
981        // 4×4 solid red image
982        let rgb = [255u8, 0, 0].repeat(16); // 16 pixels * 3 channels
983        let jpeg = encode_rgb_to_jpeg(&rgb, 4, 4, 80);
984        assert!(!jpeg.is_empty(), "JPEG output must not be empty");
985        // JPEG starts with FF D8
986        assert_eq!(jpeg[0], 0xFF);
987        assert_eq!(jpeg[1], 0xD8);
988    }
989
990    /// `make_dct_stream` embeds /Filter /DCTDecode in the PDF stream dict.
991    #[test]
992    fn make_dct_stream_has_dctdecode_filter() {
993        let fake_jpeg = b"\xFF\xD8\xFF\xD9"; // minimal JPEG markers
994        let stream = make_dct_stream(" /Type /XObject", fake_jpeg);
995        let s = String::from_utf8_lossy(&stream);
996        assert!(
997            s.contains("/Filter /DCTDecode"),
998            "must contain DCTDecode filter"
999        );
1000        assert!(s.contains("/Type /XObject"));
1001    }
1002
1003    /// DCT PDF is smaller than deflate PDF for the same page.
1004    #[test]
1005    fn dct_pdf_is_smaller_than_deflate_pdf() {
1006        let doc = load_doc("chicken.djvu");
1007        let dct_pdf = djvu_to_pdf_with_options(
1008            &doc,
1009            &PdfOptions {
1010                jpeg_quality: Some(75),
1011            },
1012        )
1013        .expect("DCT conversion must succeed");
1014        let flat_pdf = djvu_to_pdf_with_options(&doc, &PdfOptions { jpeg_quality: None })
1015            .expect("FlateDecode conversion must succeed");
1016        assert!(
1017            dct_pdf.len() < flat_pdf.len(),
1018            "DCT PDF ({} bytes) must be smaller than FlateDecode PDF ({} bytes)",
1019            dct_pdf.len(),
1020            flat_pdf.len()
1021        );
1022    }
1023
1024    /// Output PDF contains /DCTDecode when jpeg_quality is set.
1025    #[test]
1026    fn pdf_with_dct_contains_dctdecode_marker() {
1027        let doc = load_doc("chicken.djvu");
1028        let pdf = djvu_to_pdf_with_options(
1029            &doc,
1030            &PdfOptions {
1031                jpeg_quality: Some(80),
1032            },
1033        )
1034        .unwrap();
1035        let has_dct = pdf.windows(9).any(|w| w == b"DCTDecode");
1036        assert!(has_dct, "PDF must contain DCTDecode");
1037    }
1038
1039    /// Output PDF does NOT contain /DCTDecode when jpeg_quality is None.
1040    #[test]
1041    fn pdf_without_dct_has_no_dctdecode() {
1042        let doc = load_doc("chicken.djvu");
1043        let pdf = djvu_to_pdf_with_options(&doc, &PdfOptions { jpeg_quality: None }).unwrap();
1044        let has_dct = pdf.windows(9).any(|w| w == b"DCTDecode");
1045        assert!(!has_dct, "FlateDecode PDF must not contain DCTDecode");
1046    }
1047
1048    /// `djvu_to_pdf` (default, DCT at 80) is smaller than FlateDecode.
1049    #[test]
1050    fn default_djvu_to_pdf_is_dct() {
1051        let doc = load_doc("chicken.djvu");
1052        let default_pdf = djvu_to_pdf(&doc).unwrap();
1053        let flat_pdf = djvu_to_pdf_with_options(&doc, &PdfOptions { jpeg_quality: None }).unwrap();
1054        assert!(
1055            default_pdf.len() < flat_pdf.len(),
1056            "default PDF must use DCT and be smaller than FlateDecode"
1057        );
1058    }
1059}