Skip to main content

dais_document/
pdf_hayro.rs

1//! Hayro-backed PDF document source.
2//!
3//! Uses the pure-Rust hayro crate for CPU-based PDF rendering.
4//! No system library dependencies required.
5
6use crate::page::{PageDimensions, RenderSize, RenderedPage};
7use crate::source::{DocumentError, DocumentSource, EmbeddedMetadata, OutlineEntry};
8
9use hayro::hayro_interpret::InterpreterSettings;
10use hayro::hayro_syntax::Pdf;
11use hayro::{RenderCache, RenderSettings, render};
12
13/// A PDF document backed by the hayro renderer.
14pub struct HayroDocument {
15    pdf: Pdf,
16}
17
18impl HayroDocument {
19    /// Open a PDF file from a byte buffer.
20    pub fn from_bytes(data: Vec<u8>) -> Result<Self, DocumentError> {
21        let pdf = Pdf::new(data)
22            .map_err(|e| DocumentError::Open(format!("Failed to parse PDF: {e:?}")))?;
23        Ok(Self { pdf })
24    }
25
26    /// Open a PDF file from a path.
27    pub fn open(path: &std::path::Path) -> Result<Self, DocumentError> {
28        let data = std::fs::read(path)?;
29        Self::from_bytes(data)
30    }
31}
32
33impl DocumentSource for HayroDocument {
34    fn page_count(&self) -> usize {
35        self.pdf.pages().len()
36    }
37
38    fn page_dimensions(&self, page_index: usize) -> PageDimensions {
39        let pages = self.pdf.pages();
40        let page = &pages[page_index];
41        let (w, h) = page.render_dimensions();
42        PageDimensions { width_pts: w, height_pts: h }
43    }
44
45    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
46    fn render_page(
47        &self,
48        page_index: usize,
49        target_size: RenderSize,
50    ) -> Result<RenderedPage, DocumentError> {
51        let pages = self.pdf.pages();
52        if page_index >= pages.len() {
53            return Err(DocumentError::PageOutOfRange(page_index));
54        }
55
56        let page = &pages[page_index];
57        let (page_w, page_h) = page.render_dimensions();
58
59        // Target dimensions clamped to u16 range for hayro's RenderSettings
60        let target_w = u16::try_from(target_size.width).unwrap_or(u16::MAX);
61        let target_h = u16::try_from(target_size.height).unwrap_or(u16::MAX);
62
63        // Preserve aspect ratio by fitting the page within the target box.
64        // Independent x/y scaling distorts slides and makes previews look wrong.
65        let x_scale = f32::from(target_w) / page_w;
66        let y_scale = f32::from(target_h) / page_h;
67        let scale = x_scale.min(y_scale);
68        let render_w = (page_w * scale).round().clamp(1.0, f32::from(target_w)) as u16;
69        let render_h = (page_h * scale).round().clamp(1.0, f32::from(target_h)) as u16;
70
71        let cache = RenderCache::new();
72        let interpreter_settings = InterpreterSettings::default();
73
74        let render_settings = RenderSettings {
75            x_scale: scale,
76            y_scale: scale,
77            width: Some(render_w),
78            height: Some(render_h),
79            ..Default::default()
80        };
81
82        let pixmap = render(page, &cache, &interpreter_settings, &render_settings);
83
84        let width = pixmap.width();
85        let height = pixmap.height();
86        // data_as_u8_slice() returns premultiplied RGBA8 bytes. For opaque PDF content
87        // (white background, no transparency), premultiplied == unpremultiplied.
88        let data = pixmap.data_as_u8_slice().to_vec();
89
90        Ok(RenderedPage { data, width: u32::from(width), height: u32::from(height) })
91    }
92
93    fn embedded_metadata(&self) -> Option<EmbeddedMetadata> {
94        // hayro-syntax's Metadata struct exposes standard PDF info dict fields
95        // (title, author, subject, keywords, creator, producer) but not custom
96        // properties like pdfpc embeds. For now, we check if the subject or
97        // keywords field contains pdfpc-formatted data as a heuristic.
98        // Full XMP metadata extraction is a future enhancement.
99        let metadata = self.pdf.metadata();
100        let subject = metadata.subject.as_ref().and_then(|b| String::from_utf8(b.clone()).ok());
101
102        // Check if subject contains pdfpc-style metadata
103        if let Some(ref s) = subject
104            && (s.contains("[notes]") || s.contains("[overlay]"))
105        {
106            return Some(EmbeddedMetadata { pdfpc_data: Some(s.clone()) });
107        }
108
109        None
110    }
111
112    fn outline(&self) -> Option<Vec<OutlineEntry>> {
113        // Outline/bookmark extraction is not available in hayro-syntax's
114        // public API in v0.6. Return None — non-critical for v1.
115        None
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122    use std::time::Instant;
123
124    fn test_pdf_path() -> std::path::PathBuf {
125        let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
126        path.pop(); // crates/
127        path.pop(); // repo root
128        path.push("tests");
129        path.push("fixtures");
130        path.push("test.pdf");
131        path
132    }
133
134    #[test]
135    fn load_pdf_and_query_page_count() {
136        let doc = HayroDocument::open(&test_pdf_path()).expect("should load test PDF");
137        assert_eq!(doc.page_count(), 1);
138    }
139
140    #[test]
141    fn query_page_dimensions() {
142        let doc = HayroDocument::open(&test_pdf_path()).expect("should load test PDF");
143        let dims = doc.page_dimensions(0);
144        // US Letter: 612 x 792 points
145        assert!((dims.width_pts - 612.0).abs() < 1.0);
146        assert!((dims.height_pts - 792.0).abs() < 1.0);
147        assert!(dims.aspect_ratio() > 0.0);
148    }
149
150    #[test]
151    fn render_page_to_rgba() {
152        let doc = HayroDocument::open(&test_pdf_path()).expect("should load test PDF");
153        let size = RenderSize { width: 800, height: 600 };
154        let rendered = doc.render_page(0, size).expect("should render page");
155        assert_eq!(rendered.width, 464);
156        assert_eq!(rendered.height, 600);
157        // RGBA: 4 bytes per pixel
158        assert_eq!(rendered.data.len(), 464 * 600 * 4);
159    }
160
161    #[test]
162    fn render_at_1080p_under_500ms() {
163        let doc = HayroDocument::open(&test_pdf_path()).expect("should load test PDF");
164        let size = RenderSize { width: 1920, height: 1080 };
165        let start = Instant::now();
166        let rendered = doc.render_page(0, size).expect("should render page");
167        let elapsed = start.elapsed();
168        assert_eq!(rendered.width, 835);
169        assert_eq!(rendered.height, 1080);
170        assert!(
171            elapsed.as_millis() < 500,
172            "Render took {}ms, expected <500ms",
173            elapsed.as_millis()
174        );
175    }
176
177    #[test]
178    fn render_page_preserves_aspect_ratio() {
179        let doc = HayroDocument::open(&test_pdf_path()).expect("should load test PDF");
180        let rendered = doc
181            .render_page(0, RenderSize { width: 1280, height: 720 })
182            .expect("should render page");
183
184        let aspect = f64::from(rendered.width) / f64::from(rendered.height);
185        let expected = 612.0_f64 / 792.0_f64;
186        assert!((aspect - expected).abs() < 0.01, "got aspect {aspect}, expected {expected}");
187    }
188
189    #[test]
190    fn page_out_of_range_returns_error() {
191        let doc = HayroDocument::open(&test_pdf_path()).expect("should load test PDF");
192        let size = RenderSize { width: 100, height: 100 };
193        assert!(doc.render_page(99, size).is_err());
194    }
195
196    #[test]
197    fn from_bytes_works() {
198        let data = std::fs::read(test_pdf_path()).expect("should read file");
199        let doc = HayroDocument::from_bytes(data).expect("should load from bytes");
200        assert_eq!(doc.page_count(), 1);
201    }
202}