Skip to main content

fop_render/pdf/
streaming.rs

1//! Streaming PDF renderer for incremental document generation
2//!
3//! This module provides a streaming PDF renderer that can build PDF documents
4//! incrementally, adding pages one at a time without keeping all pages in memory.
5
6use super::document::{PdfDocument, PdfPage};
7use super::image::ImageXObject;
8use crate::image::ImageInfo;
9use fop_layout::{AreaId, AreaTree, AreaType};
10use fop_types::{Length, Result};
11use std::collections::HashMap;
12use std::io::Write;
13
14/// Streaming PDF renderer that builds documents incrementally
15///
16/// Unlike the standard PdfRenderer which requires the full area tree,
17/// this renderer accepts pages one at a time and can generate PDF output
18/// incrementally to minimize memory usage.
19pub struct StreamingPdfRenderer {
20    /// The PDF document being built
21    document: PdfDocument,
22
23    /// Map from area IDs to image indices (within current page)
24    image_map: HashMap<AreaId, usize>,
25
26    /// Total number of pages added
27    page_count: usize,
28}
29
30impl StreamingPdfRenderer {
31    /// Create a new streaming PDF renderer
32    pub fn new() -> Self {
33        let mut document = PdfDocument::new();
34        document.info.title = Some("FOP Streaming PDF".to_string());
35
36        Self {
37            document,
38            image_map: HashMap::new(),
39            page_count: 0,
40        }
41    }
42
43    /// Add a single page from an area tree to the PDF
44    ///
45    /// This method processes one page at a time. After this call returns,
46    /// the area_tree can be dropped to free memory.
47    ///
48    /// # Arguments
49    ///
50    /// * `area_tree` - Area tree containing exactly one page
51    ///
52    /// # Returns
53    ///
54    /// Returns Ok(()) if the page was successfully added, or an error otherwise.
55    pub fn add_page(&mut self, area_tree: &AreaTree) -> Result<()> {
56        // Clear image map for this page
57        self.image_map.clear();
58
59        // First pass: collect all images and add them to the document
60        self.collect_images(area_tree)?;
61
62        // Second pass: find the page area and render it
63        for (id, node) in area_tree.iter() {
64            if matches!(node.area.area_type, AreaType::Page) {
65                let page = self.render_page(area_tree, id)?;
66                self.document.add_page(page);
67                self.page_count += 1;
68                return Ok(());
69            }
70        }
71
72        Err(fop_types::FopError::Generic(
73            "No page area found in area tree".to_string(),
74        ))
75    }
76
77    /// Write the PDF to an output stream
78    ///
79    /// This finalizes the PDF document and writes all data to the output stream.
80    /// After this call, no more pages can be added.
81    pub fn write_to<W: Write>(self, writer: &mut W) -> Result<()> {
82        let bytes = self.document.to_bytes()?;
83        writer
84            .write_all(&bytes)
85            .map_err(|e| fop_types::FopError::Generic(format!("Failed to write PDF: {}", e)))?;
86        Ok(())
87    }
88
89    /// Get the bytes of the PDF document
90    ///
91    /// This finalizes the PDF document and returns all bytes.
92    /// After this call, no more pages can be added.
93    pub fn to_bytes(self) -> Result<Vec<u8>> {
94        self.document.to_bytes()
95    }
96
97    /// Get the number of pages added so far
98    pub fn page_count(&self) -> usize {
99        self.page_count
100    }
101
102    /// Set the PDF document title
103    pub fn set_title(&mut self, title: String) {
104        self.document.info.title = Some(title);
105    }
106
107    /// Set the PDF document author
108    pub fn set_author(&mut self, author: String) {
109        self.document.info.author = Some(author);
110    }
111
112    /// Set the PDF document subject
113    pub fn set_subject(&mut self, subject: String) {
114        self.document.info.subject = Some(subject);
115    }
116
117    /// Collect all images from the area tree and add them to the document
118    fn collect_images(&mut self, area_tree: &AreaTree) -> Result<()> {
119        for (id, node) in area_tree.iter() {
120            if matches!(node.area.area_type, AreaType::Viewport) {
121                if let Some(image_data) = node.area.image_data() {
122                    let image_index = self.add_image_from_data(image_data)?;
123                    self.image_map.insert(id, image_index);
124                }
125            }
126        }
127        Ok(())
128    }
129
130    /// Add an image to the document from raw image data
131    fn add_image_from_data(&mut self, image_data: &[u8]) -> Result<usize> {
132        let image_info = ImageInfo::from_bytes(image_data)?;
133        let xobject = ImageXObject::from_image_info(&image_info)?;
134        Ok(self.document.add_image_xobject(xobject))
135    }
136
137    /// Render a single page from the area tree
138    fn render_page(&self, area_tree: &AreaTree, page_id: AreaId) -> Result<PdfPage> {
139        let page_node = area_tree
140            .get(page_id)
141            .ok_or_else(|| fop_types::FopError::Generic("Page not found".to_string()))?;
142
143        let mut pdf_page = PdfPage::new(page_node.area.width(), page_node.area.height());
144
145        // Render all child areas recursively
146        let page_height = pdf_page.height;
147        // Streaming renderer does not embed custom fonts; font_cache is empty.
148        let font_cache: HashMap<String, usize> = HashMap::new();
149        render_children(
150            area_tree,
151            page_id,
152            &mut pdf_page,
153            Length::ZERO,
154            Length::ZERO,
155            page_height,
156            &self.image_map,
157            &font_cache,
158        )?;
159
160        Ok(pdf_page)
161    }
162}
163
164impl Default for StreamingPdfRenderer {
165    fn default() -> Self {
166        Self::new()
167    }
168}
169
170/// Render child areas recursively with absolute positioning
171#[allow(clippy::too_many_arguments)]
172fn render_children(
173    area_tree: &AreaTree,
174    parent_id: AreaId,
175    pdf_page: &mut PdfPage,
176    offset_x: Length,
177    offset_y: Length,
178    page_height: Length,
179    image_map: &HashMap<AreaId, usize>,
180    font_cache: &HashMap<String, usize>,
181) -> Result<()> {
182    let children = area_tree.children(parent_id);
183
184    for child_id in children {
185        if let Some(child_node) = area_tree.get(child_id) {
186            let abs_x = offset_x + child_node.area.geometry.x;
187            let abs_y = offset_y + child_node.area.geometry.y;
188
189            // Render background color
190            if let Some(bg_color) = child_node.area.traits.background_color {
191                let pdf_y = page_height - abs_y - child_node.area.height();
192                let border_radius = child_node.area.traits.border_radius;
193                pdf_page.add_background_with_radius(
194                    abs_x,
195                    pdf_y,
196                    child_node.area.width(),
197                    child_node.area.height(),
198                    bg_color,
199                    border_radius,
200                );
201            }
202
203            // Render borders
204            if let (Some(border_widths), Some(border_colors), Some(border_styles)) = (
205                child_node.area.traits.border_width,
206                child_node.area.traits.border_color,
207                child_node.area.traits.border_style,
208            ) {
209                let pdf_y = page_height - abs_y - child_node.area.height();
210                let border_radius = child_node.area.traits.border_radius;
211                pdf_page.add_borders_with_radius(
212                    abs_x,
213                    pdf_y,
214                    child_node.area.width(),
215                    child_node.area.height(),
216                    border_widths,
217                    border_colors,
218                    border_styles,
219                    border_radius,
220                );
221            }
222
223            match child_node.area.area_type {
224                AreaType::Text => {
225                    if let Some(text_content) = child_node.area.text_content() {
226                        let font_size = child_node
227                            .area
228                            .traits
229                            .font_size
230                            .unwrap_or(Length::from_pt(12.0));
231                        let pdf_y = page_height - abs_y - font_size;
232
233                        // Use embedded font when font-family is set and an entry
234                        // exists in the font cache.
235                        if let Some(family) = child_node.area.traits.font_family.as_deref() {
236                            if let Some(&font_idx) = font_cache.get(&family.to_lowercase()) {
237                                pdf_page.add_text_with_font(
238                                    text_content,
239                                    abs_x,
240                                    pdf_y,
241                                    font_size,
242                                    font_idx,
243                                );
244                            } else {
245                                pdf_page.add_text(text_content, abs_x, pdf_y, font_size);
246                            }
247                        } else {
248                            pdf_page.add_text(text_content, abs_x, pdf_y, font_size);
249                        }
250                    }
251                }
252                AreaType::FootnoteSeparator => {
253                    // Render as a thin horizontal line (rule)
254                    let pdf_y = page_height - abs_y;
255                    let thickness = child_node
256                        .area
257                        .traits
258                        .border_width
259                        .map(|w| w[0])
260                        .unwrap_or(Length::from_pt(1.0));
261                    let color = child_node
262                        .area
263                        .traits
264                        .border_color
265                        .map(|c| c[0])
266                        .unwrap_or(fop_types::Color::BLACK);
267                    pdf_page.add_rule(
268                        abs_x,
269                        pdf_y,
270                        child_node.area.width(),
271                        thickness,
272                        color,
273                        "solid",
274                    );
275                }
276                AreaType::Footnote => {
277                    // Render footnote content (recursively render children)
278                    render_children(
279                        area_tree,
280                        child_id,
281                        pdf_page,
282                        abs_x,
283                        abs_y,
284                        page_height,
285                        image_map,
286                        font_cache,
287                    )?;
288                }
289                AreaType::Viewport => {
290                    if let Some(&image_index) = image_map.get(&child_id) {
291                        let pdf_y = page_height - abs_y - child_node.area.height();
292                        pdf_page.add_image(
293                            image_index,
294                            abs_x,
295                            pdf_y,
296                            child_node.area.width(),
297                            child_node.area.height(),
298                        );
299                    }
300                    render_children(
301                        area_tree,
302                        child_id,
303                        pdf_page,
304                        abs_x,
305                        abs_y,
306                        page_height,
307                        image_map,
308                        font_cache,
309                    )?;
310                }
311                _ => {
312                    render_children(
313                        area_tree,
314                        child_id,
315                        pdf_page,
316                        abs_x,
317                        abs_y,
318                        page_height,
319                        image_map,
320                        font_cache,
321                    )?;
322                }
323            }
324        }
325    }
326
327    Ok(())
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333    use fop_layout::{Area, AreaTree};
334    use fop_types::{Point, Rect, Size};
335
336    #[test]
337    fn test_streaming_renderer_creation() {
338        let renderer = StreamingPdfRenderer::new();
339        assert_eq!(renderer.page_count(), 0);
340    }
341
342    #[test]
343    fn test_add_single_page() {
344        let mut renderer = StreamingPdfRenderer::new();
345        let mut tree = AreaTree::new();
346
347        // Create a page area
348        let page_rect = Rect::from_point_size(
349            Point::ZERO,
350            Size::new(Length::from_mm(210.0), Length::from_mm(297.0)),
351        );
352        let page = Area::new(AreaType::Page, page_rect);
353        tree.add_area(page);
354
355        renderer.add_page(&tree).expect("test: should succeed");
356        assert_eq!(renderer.page_count(), 1);
357    }
358
359    #[test]
360    fn test_add_multiple_pages() {
361        let mut renderer = StreamingPdfRenderer::new();
362
363        for i in 0..5 {
364            let mut tree = AreaTree::new();
365            let page_rect = Rect::from_point_size(
366                Point::ZERO,
367                Size::new(Length::from_mm(210.0), Length::from_mm(297.0)),
368            );
369            let page = Area::new(AreaType::Page, page_rect);
370            tree.add_area(page);
371
372            renderer.add_page(&tree).expect("test: should succeed");
373            assert_eq!(renderer.page_count(), i + 1);
374        }
375    }
376
377    #[test]
378    fn test_set_document_metadata() {
379        let mut renderer = StreamingPdfRenderer::new();
380        renderer.set_title("Test Document".to_string());
381        renderer.set_author("Test Author".to_string());
382        renderer.set_subject("Test Subject".to_string());
383
384        let bytes = renderer.to_bytes().expect("test: should succeed");
385        let pdf_str = String::from_utf8_lossy(&bytes);
386
387        assert!(pdf_str.contains("Test Document"));
388        assert!(pdf_str.contains("Test Author"));
389        assert!(pdf_str.contains("Test Subject"));
390    }
391
392    #[test]
393    fn test_streaming_pdf_output() {
394        let mut renderer = StreamingPdfRenderer::new();
395        renderer.set_title("Streaming Test".to_string());
396
397        // Add 3 pages
398        for _ in 0..3 {
399            let mut tree = AreaTree::new();
400            let page_rect = Rect::from_point_size(
401                Point::ZERO,
402                Size::new(Length::from_mm(210.0), Length::from_mm(297.0)),
403            );
404            let page = Area::new(AreaType::Page, page_rect);
405            tree.add_area(page);
406
407            renderer.add_page(&tree).expect("test: should succeed");
408        }
409
410        // Check page count before consuming renderer
411        assert_eq!(renderer.page_count(), 3);
412
413        let bytes = renderer.to_bytes().expect("test: should succeed");
414        let pdf_str = String::from_utf8_lossy(&bytes);
415
416        assert!(pdf_str.starts_with("%PDF-"));
417        assert!(pdf_str.contains("%%EOF"));
418    }
419
420    #[test]
421    fn test_write_to_stream() {
422        let mut renderer = StreamingPdfRenderer::new();
423
424        let mut tree = AreaTree::new();
425        let page_rect = Rect::from_point_size(
426            Point::ZERO,
427            Size::new(Length::from_mm(210.0), Length::from_mm(297.0)),
428        );
429        let page = Area::new(AreaType::Page, page_rect);
430        tree.add_area(page);
431
432        renderer.add_page(&tree).expect("test: should succeed");
433
434        let mut output = Vec::new();
435        renderer
436            .write_to(&mut output)
437            .expect("test: should succeed");
438
439        assert!(!output.is_empty());
440        let pdf_str = String::from_utf8_lossy(&output);
441        assert!(pdf_str.starts_with("%PDF-"));
442    }
443
444    #[test]
445    fn test_memory_efficient_processing() {
446        let mut renderer = StreamingPdfRenderer::new();
447
448        // Simulate processing 100 pages
449        for i in 0..100 {
450            let mut tree = AreaTree::new();
451            let page_rect = Rect::from_point_size(
452                Point::ZERO,
453                Size::new(Length::from_mm(210.0), Length::from_mm(297.0)),
454            );
455            let page = Area::new(AreaType::Page, page_rect);
456            tree.add_area(page);
457
458            renderer.add_page(&tree).expect("test: should succeed");
459            // tree is dropped here, freeing memory for the next page
460
461            if i % 10 == 0 {
462                // Check progress
463                assert_eq!(renderer.page_count(), i + 1);
464            }
465        }
466
467        assert_eq!(renderer.page_count(), 100);
468    }
469
470    #[test]
471    fn test_no_page_error() {
472        let mut renderer = StreamingPdfRenderer::new();
473        let tree = AreaTree::new(); // Empty tree, no page
474
475        let result = renderer.add_page(&tree);
476        assert!(result.is_err());
477    }
478}
479
480#[cfg(test)]
481mod tests_extended {
482    use super::*;
483    use fop_layout::{Area, AreaTree};
484    use fop_types::{Point, Rect, Size};
485
486    fn make_page_tree(width_mm: f64, height_mm: f64) -> AreaTree {
487        let mut tree = AreaTree::new();
488        let rect = Rect::from_point_size(
489            Point::ZERO,
490            Size::new(Length::from_mm(width_mm), Length::from_mm(height_mm)),
491        );
492        let page = Area::new(AreaType::Page, rect);
493        tree.add_area(page);
494        tree
495    }
496
497    // ── Construction ─────────────────────────────────────────────────────────
498
499    #[test]
500    fn test_default_creates_zero_pages() {
501        let r = StreamingPdfRenderer::default();
502        assert_eq!(r.page_count(), 0);
503    }
504
505    #[test]
506    fn test_new_starts_with_zero_pages() {
507        let r = StreamingPdfRenderer::new();
508        assert_eq!(r.page_count(), 0);
509    }
510
511    // ── Page addition ─────────────────────────────────────────────────────────
512
513    #[test]
514    fn test_add_one_page_increments_count() {
515        let mut r = StreamingPdfRenderer::new();
516        let tree = make_page_tree(210.0, 297.0);
517        r.add_page(&tree).expect("test: should succeed");
518        assert_eq!(r.page_count(), 1);
519    }
520
521    #[test]
522    fn test_add_two_pages_increments_count() {
523        let mut r = StreamingPdfRenderer::new();
524        for _ in 0..2 {
525            r.add_page(&make_page_tree(210.0, 297.0))
526                .expect("test: should succeed");
527        }
528        assert_eq!(r.page_count(), 2);
529    }
530
531    #[test]
532    fn test_add_ten_pages() {
533        let mut r = StreamingPdfRenderer::new();
534        for i in 1..=10 {
535            r.add_page(&make_page_tree(210.0, 297.0))
536                .expect("test: should succeed");
537            assert_eq!(r.page_count(), i);
538        }
539    }
540
541    #[test]
542    fn test_add_landscape_page() {
543        let mut r = StreamingPdfRenderer::new();
544        // Landscape A4: 297 × 210 mm
545        let tree = make_page_tree(297.0, 210.0);
546        r.add_page(&tree).expect("test: should succeed");
547        assert_eq!(r.page_count(), 1);
548    }
549
550    #[test]
551    fn test_letter_size_page() {
552        let mut r = StreamingPdfRenderer::new();
553        // US Letter ≈ 215.9 × 279.4 mm
554        let tree = make_page_tree(215.9, 279.4);
555        r.add_page(&tree).expect("test: should succeed");
556        assert_eq!(r.page_count(), 1);
557    }
558
559    // ── Empty tree error ─────────────────────────────────────────────────────
560
561    #[test]
562    fn test_empty_tree_returns_error() {
563        let mut r = StreamingPdfRenderer::new();
564        let empty_tree = AreaTree::new();
565        assert!(r.add_page(&empty_tree).is_err());
566    }
567
568    // ── Metadata ─────────────────────────────────────────────────────────────
569
570    #[test]
571    fn test_title_appears_in_output() {
572        let mut r = StreamingPdfRenderer::new();
573        r.set_title("My Title".to_string());
574        r.add_page(&make_page_tree(210.0, 297.0))
575            .expect("test: should succeed");
576        let bytes = r.to_bytes().expect("test: should succeed");
577        assert!(String::from_utf8_lossy(&bytes).contains("My Title"));
578    }
579
580    #[test]
581    fn test_author_appears_in_output() {
582        let mut r = StreamingPdfRenderer::new();
583        r.set_author("Jane Doe".to_string());
584        r.add_page(&make_page_tree(210.0, 297.0))
585            .expect("test: should succeed");
586        let bytes = r.to_bytes().expect("test: should succeed");
587        assert!(String::from_utf8_lossy(&bytes).contains("Jane Doe"));
588    }
589
590    #[test]
591    fn test_subject_appears_in_output() {
592        let mut r = StreamingPdfRenderer::new();
593        r.set_subject("Test Subject".to_string());
594        r.add_page(&make_page_tree(210.0, 297.0))
595            .expect("test: should succeed");
596        let bytes = r.to_bytes().expect("test: should succeed");
597        assert!(String::from_utf8_lossy(&bytes).contains("Test Subject"));
598    }
599
600    // ── PDF structure ────────────────────────────────────────────────────────
601
602    #[test]
603    fn test_output_starts_with_pdf_header() {
604        let mut r = StreamingPdfRenderer::new();
605        r.add_page(&make_page_tree(210.0, 297.0))
606            .expect("test: should succeed");
607        let bytes = r.to_bytes().expect("test: should succeed");
608        assert!(bytes.starts_with(b"%PDF-"), "PDF header missing");
609    }
610
611    #[test]
612    fn test_output_ends_with_eof_marker() {
613        let mut r = StreamingPdfRenderer::new();
614        r.add_page(&make_page_tree(210.0, 297.0))
615            .expect("test: should succeed");
616        let bytes = r.to_bytes().expect("test: should succeed");
617        let s = String::from_utf8_lossy(&bytes);
618        assert!(s.contains("%%EOF"), "%%EOF missing");
619    }
620
621    #[test]
622    fn test_output_is_non_empty() {
623        let mut r = StreamingPdfRenderer::new();
624        r.add_page(&make_page_tree(210.0, 297.0))
625            .expect("test: should succeed");
626        let bytes = r.to_bytes().expect("test: should succeed");
627        assert!(!bytes.is_empty());
628    }
629
630    // ── Buffer accumulation ──────────────────────────────────────────────────
631
632    #[test]
633    fn test_multi_page_output_larger_than_single() {
634        let mut r1 = StreamingPdfRenderer::new();
635        r1.add_page(&make_page_tree(210.0, 297.0))
636            .expect("test: should succeed");
637        let single = r1.to_bytes().expect("test: should succeed");
638
639        let mut r5 = StreamingPdfRenderer::new();
640        for _ in 0..5 {
641            r5.add_page(&make_page_tree(210.0, 297.0))
642                .expect("test: should succeed");
643        }
644        let multi = r5.to_bytes().expect("test: should succeed");
645
646        assert!(
647            multi.len() > single.len(),
648            "5-page PDF should be larger than 1-page PDF"
649        );
650    }
651
652    // ── write_to stream ──────────────────────────────────────────────────────
653
654    #[test]
655    fn test_write_to_vec_matches_to_bytes() {
656        let mut r1 = StreamingPdfRenderer::new();
657        r1.add_page(&make_page_tree(210.0, 297.0))
658            .expect("test: should succeed");
659        let expected = r1.to_bytes().expect("test: should succeed");
660
661        let mut r2 = StreamingPdfRenderer::new();
662        r2.add_page(&make_page_tree(210.0, 297.0))
663            .expect("test: should succeed");
664        let mut buf = Vec::new();
665        r2.write_to(&mut buf).expect("test: should succeed");
666
667        // Both renderers produce the same structure (may differ by
668        // generation time, so just check they are non-empty and PDF)
669        assert!(!buf.is_empty());
670        assert!(buf.starts_with(b"%PDF-"));
671        assert_eq!(buf.len(), expected.len());
672    }
673
674    #[test]
675    fn test_write_to_cursor() {
676        use std::io::Cursor;
677        let mut r = StreamingPdfRenderer::new();
678        r.add_page(&make_page_tree(210.0, 297.0))
679            .expect("test: should succeed");
680        let mut cursor = Cursor::new(Vec::new());
681        r.write_to(&mut cursor).expect("test: should succeed");
682        assert!(!cursor.into_inner().is_empty());
683    }
684
685    // ── Page count across mix of sizes ───────────────────────────────────────
686
687    #[test]
688    fn test_mixed_page_sizes_all_added() {
689        let mut r = StreamingPdfRenderer::new();
690        r.add_page(&make_page_tree(210.0, 297.0))
691            .expect("test: should succeed"); // A4 portrait
692        r.add_page(&make_page_tree(297.0, 210.0))
693            .expect("test: should succeed"); // A4 landscape
694        r.add_page(&make_page_tree(215.9, 279.4))
695            .expect("test: should succeed"); // US Letter
696        assert_eq!(r.page_count(), 3);
697    }
698}