Skip to main content

fop_render/text/
mod.rs

1//! Plain text rendering backend
2//!
3//! Generates plain text output from area trees for accessibility and text extraction.
4
5use fop_layout::area::{AreaNode, AreaTree, AreaType};
6use fop_layout::AreaId;
7use fop_types::Result;
8
9/// Text renderer - converts area trees to plain text
10pub struct TextRenderer {
11    /// Use form feed for page separators (default: true)
12    use_form_feed: bool,
13}
14
15impl TextRenderer {
16    /// Create a new text renderer
17    pub fn new() -> Self {
18        Self {
19            use_form_feed: true,
20        }
21    }
22
23    /// Create a text renderer with custom page separator
24    pub fn with_page_separator(use_form_feed: bool) -> Self {
25        Self { use_form_feed }
26    }
27
28    /// Render an area tree to plain text
29    pub fn render_to_text(&self, area_tree: &AreaTree) -> Result<String> {
30        let mut output = String::new();
31
32        // Collect all page IDs in tree iteration order
33        let page_ids: Vec<AreaId> = area_tree
34            .iter()
35            .filter_map(|(id, node)| {
36                if matches!(node.area.area_type, AreaType::Page) {
37                    Some(id)
38                } else {
39                    None
40                }
41            })
42            .collect();
43
44        for (page_num, id) in page_ids.into_iter().enumerate() {
45            if page_num > 0 {
46                // Add page separator between pages
47                if self.use_form_feed {
48                    output.push('\x0C'); // Form feed character
49                } else {
50                    output.push_str("\n\n");
51                }
52                output.push_str(&format!("--- Page {} ---\n\n", page_num + 1));
53            }
54
55            // Render the page content
56            self.render_area(area_tree, id, &mut output);
57        }
58
59        // Trim trailing whitespace/newlines then add a final newline
60        let trimmed = output.trim_end().to_string();
61        if trimmed.is_empty() {
62            Ok(String::new())
63        } else {
64            Ok(format!("{}\n", trimmed))
65        }
66    }
67
68    /// Render a single area and its children recursively
69    fn render_area(&self, area_tree: &AreaTree, area_id: AreaId, output: &mut String) {
70        let node = match area_tree.get(area_id) {
71            Some(n) => n,
72            None => return,
73        };
74
75        match node.area.area_type {
76            AreaType::Page | AreaType::Region | AreaType::Column => {
77                // Container areas - render children
78                self.render_children(area_tree, area_id, output);
79            }
80            AreaType::Block => {
81                // Block area - render children then add newline
82                self.render_children(area_tree, area_id, output);
83                output.push('\n');
84            }
85            AreaType::Line => {
86                // Line area - render children and add newline
87                self.render_children(area_tree, area_id, output);
88                output.push('\n');
89            }
90            AreaType::Text => {
91                // Text area - extract text content
92                if let Some(text) = node.area.text_content() {
93                    output.push_str(text);
94                }
95            }
96            AreaType::Space => {
97                // Space area - add a space
98                output.push(' ');
99            }
100            AreaType::Inline => {
101                // Inline area - render children
102                self.render_children(area_tree, area_id, output);
103            }
104            AreaType::Viewport => {
105                // Image placeholder
106                if node.area.has_image_data() {
107                    output.push_str("[IMAGE]");
108                } else {
109                    self.render_children(area_tree, area_id, output);
110                }
111            }
112            AreaType::Header => {
113                // Header - render content then add a divider line
114                let start = output.len();
115                self.render_children(area_tree, area_id, output);
116                let header_text = output[start..].trim().to_string();
117                output.truncate(start);
118                if !header_text.is_empty() {
119                    output.push_str(&header_text);
120                    output.push('\n');
121                    output.push_str(&"-".repeat(40));
122                    output.push('\n');
123                }
124            }
125            AreaType::Footer => {
126                // Footer - add a divider line then render content
127                let start = output.len();
128                self.render_children(area_tree, area_id, output);
129                let footer_text = output[start..].trim().to_string();
130                output.truncate(start);
131                if !footer_text.is_empty() {
132                    output.push_str(&"-".repeat(40));
133                    output.push('\n');
134                    output.push_str(&footer_text);
135                    output.push('\n');
136                }
137            }
138            AreaType::Footnote => {
139                // Footnote - render with marker
140                output.push_str("\n[Footnote] ");
141                self.render_children(area_tree, area_id, output);
142                output.push('\n');
143            }
144            AreaType::FootnoteSeparator => {
145                // Footnote separator - use a line
146                output.push_str("\n---\n");
147            }
148            AreaType::FloatArea => {
149                // Float area - render children inline
150                self.render_children(area_tree, area_id, output);
151            }
152            AreaType::SidebarStart | AreaType::SidebarEnd => {
153                // Sidebar markers - render children
154                self.render_children(area_tree, area_id, output);
155            }
156        }
157    }
158
159    /// Render all children of an area
160    fn render_children(&self, area_tree: &AreaTree, parent_id: AreaId, output: &mut String) {
161        let children = area_tree.children(parent_id);
162        for child_id in children {
163            self.render_area(area_tree, child_id, output);
164        }
165    }
166
167    /// Extract text from an area tree without formatting
168    pub fn extract_text(&self, area_tree: &AreaTree) -> Result<String> {
169        let mut output = String::new();
170
171        for (id, _node) in area_tree.iter() {
172            self.extract_text_from_area(area_tree, id, &mut output);
173        }
174
175        Ok(output)
176    }
177
178    /// Extract raw text from a single area (no formatting)
179    fn extract_text_from_area(&self, area_tree: &AreaTree, area_id: AreaId, output: &mut String) {
180        if let Some(node) = area_tree.get(area_id) {
181            if let Some(text) = node.area.text_content() {
182                output.push_str(text);
183                output.push(' ');
184            }
185        }
186    }
187}
188
189impl Default for TextRenderer {
190    fn default() -> Self {
191        Self::new()
192    }
193}
194
195/// Helper to check if an area should add a line break after
196#[allow(dead_code)]
197fn should_add_line_break(node: &AreaNode) -> bool {
198    matches!(
199        node.area.area_type,
200        AreaType::Block | AreaType::Line | AreaType::Header | AreaType::Footer
201    )
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207    use fop_layout::area::{Area, AreaTree, AreaType};
208    use fop_types::{Length, Point, Rect, Size};
209
210    fn make_rect(w: f64, h: f64) -> Rect {
211        Rect::from_point_size(
212            Point::ZERO,
213            Size::new(Length::from_pt(w), Length::from_pt(h)),
214        )
215    }
216
217    // -----------------------------------------------------------------------
218    // Unit tests for TextRenderer internals (direct area tree construction)
219    // -----------------------------------------------------------------------
220
221    #[test]
222    fn test_text_renderer_creation() {
223        let renderer = TextRenderer::new();
224        assert!(renderer.use_form_feed);
225    }
226
227    #[test]
228    fn test_text_renderer_no_form_feed() {
229        let renderer = TextRenderer::with_page_separator(false);
230        assert!(!renderer.use_form_feed);
231    }
232
233    #[test]
234    fn test_text_renderer_default() {
235        let renderer = TextRenderer::default();
236        assert!(renderer.use_form_feed);
237    }
238
239    #[test]
240    fn test_simple_text_extraction() {
241        let mut tree = AreaTree::new();
242        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
243        let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
244        let text = tree.add_area(Area::text(make_rect(50.0, 12.0), "Hello World".to_string()));
245
246        tree.append_child(page, block)
247            .expect("test: should succeed");
248        tree.append_child(block, text)
249            .expect("test: should succeed");
250
251        let renderer = TextRenderer::new();
252        let result = renderer
253            .render_to_text(&tree)
254            .expect("test: should succeed");
255
256        assert!(result.contains("Hello World"), "got: {:?}", result);
257    }
258
259    #[test]
260    fn test_multiple_lines() {
261        let mut tree = AreaTree::new();
262        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
263        let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 40.0)));
264        let line1 = tree.add_area(Area::new(AreaType::Line, make_rect(100.0, 12.0)));
265        let text1 = tree.add_area(Area::text(make_rect(50.0, 12.0), "First line".to_string()));
266        let line2 = tree.add_area(Area::new(AreaType::Line, make_rect(100.0, 12.0)));
267        let text2 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Second line".to_string()));
268
269        tree.append_child(page, block)
270            .expect("test: should succeed");
271        tree.append_child(block, line1)
272            .expect("test: should succeed");
273        tree.append_child(line1, text1)
274            .expect("test: should succeed");
275        tree.append_child(block, line2)
276            .expect("test: should succeed");
277        tree.append_child(line2, text2)
278            .expect("test: should succeed");
279
280        let renderer = TextRenderer::new();
281        let result = renderer
282            .render_to_text(&tree)
283            .expect("test: should succeed");
284
285        assert!(result.contains("First line"), "got: {:?}", result);
286        assert!(result.contains("Second line"), "got: {:?}", result);
287    }
288
289    #[test]
290    fn test_multipage_form_feed() {
291        let mut tree = AreaTree::new();
292        let page1 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
293        let block1 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
294        let text1 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Page one".to_string()));
295        let page2 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
296        let block2 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
297        let text2 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Page two".to_string()));
298
299        tree.append_child(page1, block1)
300            .expect("test: should succeed");
301        tree.append_child(block1, text1)
302            .expect("test: should succeed");
303        tree.append_child(page2, block2)
304            .expect("test: should succeed");
305        tree.append_child(block2, text2)
306            .expect("test: should succeed");
307
308        let renderer = TextRenderer::new();
309        let result = renderer
310            .render_to_text(&tree)
311            .expect("test: should succeed");
312
313        assert!(result.contains("Page one"), "got: {:?}", result);
314        assert!(result.contains("Page two"), "got: {:?}", result);
315        // Form feed character between pages
316        assert!(
317            result.contains('\x0C'),
318            "expected form feed, got: {:?}",
319            result
320        );
321    }
322
323    #[test]
324    fn test_multipage_no_form_feed_uses_separator() {
325        let mut tree = AreaTree::new();
326        let page1 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
327        let block1 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
328        let text1 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Alpha".to_string()));
329        let page2 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
330        let block2 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
331        let text2 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Beta".to_string()));
332
333        tree.append_child(page1, block1)
334            .expect("test: should succeed");
335        tree.append_child(block1, text1)
336            .expect("test: should succeed");
337        tree.append_child(page2, block2)
338            .expect("test: should succeed");
339        tree.append_child(block2, text2)
340            .expect("test: should succeed");
341
342        let renderer = TextRenderer::with_page_separator(false);
343        let result = renderer
344            .render_to_text(&tree)
345            .expect("test: should succeed");
346
347        assert!(result.contains("Alpha"), "got: {:?}", result);
348        assert!(result.contains("Beta"), "got: {:?}", result);
349        assert!(
350            result.contains("--- Page 2 ---"),
351            "expected page separator, got: {:?}",
352            result
353        );
354    }
355
356    #[test]
357    fn test_empty_tree_produces_empty_output() {
358        let tree = AreaTree::new();
359        let renderer = TextRenderer::new();
360        let result = renderer
361            .render_to_text(&tree)
362            .expect("test: should succeed");
363        assert!(
364            result.is_empty(),
365            "empty tree should produce empty output, got: {:?}",
366            result
367        );
368    }
369
370    #[test]
371    fn test_space_area() {
372        let mut tree = AreaTree::new();
373        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
374        let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
375        let line = tree.add_area(Area::new(AreaType::Line, make_rect(100.0, 12.0)));
376        let text1 = tree.add_area(Area::text(make_rect(30.0, 12.0), "foo".to_string()));
377        let space = tree.add_area(Area::new(AreaType::Space, make_rect(5.0, 12.0)));
378        let text2 = tree.add_area(Area::text(make_rect(30.0, 12.0), "bar".to_string()));
379
380        tree.append_child(page, block)
381            .expect("test: should succeed");
382        tree.append_child(block, line)
383            .expect("test: should succeed");
384        tree.append_child(line, text1)
385            .expect("test: should succeed");
386        tree.append_child(line, space)
387            .expect("test: should succeed");
388        tree.append_child(line, text2)
389            .expect("test: should succeed");
390
391        let renderer = TextRenderer::new();
392        let result = renderer
393            .render_to_text(&tree)
394            .expect("test: should succeed");
395
396        assert!(
397            result.contains("foo bar"),
398            "expected 'foo bar', got: {:?}",
399            result
400        );
401    }
402
403    #[test]
404    fn test_footnote_area() {
405        let mut tree = AreaTree::new();
406        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
407        let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
408        let body_text = tree.add_area(Area::text(make_rect(50.0, 12.0), "Body text".to_string()));
409        let sep = tree.add_area(Area::new(
410            AreaType::FootnoteSeparator,
411            make_rect(100.0, 2.0),
412        ));
413        let footnote = tree.add_area(Area::new(AreaType::Footnote, make_rect(100.0, 20.0)));
414        let fn_text = tree.add_area(Area::text(
415            make_rect(50.0, 12.0),
416            "Footnote content".to_string(),
417        ));
418
419        tree.append_child(page, block)
420            .expect("test: should succeed");
421        tree.append_child(block, body_text)
422            .expect("test: should succeed");
423        tree.append_child(page, sep).expect("test: should succeed");
424        tree.append_child(page, footnote)
425            .expect("test: should succeed");
426        tree.append_child(footnote, fn_text)
427            .expect("test: should succeed");
428
429        let renderer = TextRenderer::new();
430        let result = renderer
431            .render_to_text(&tree)
432            .expect("test: should succeed");
433
434        assert!(result.contains("Body text"), "got: {:?}", result);
435        assert!(result.contains("[Footnote]"), "got: {:?}", result);
436        assert!(result.contains("Footnote content"), "got: {:?}", result);
437        assert!(
438            result.contains("---"),
439            "expected separator, got: {:?}",
440            result
441        );
442    }
443
444    #[test]
445    fn test_viewport_image_placeholder() {
446        let mut tree = AreaTree::new();
447        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
448        let viewport = tree.add_area(Area::viewport_with_image(
449            make_rect(50.0, 50.0),
450            vec![0u8; 10],
451        ));
452
453        tree.append_child(page, viewport)
454            .expect("test: should succeed");
455
456        let renderer = TextRenderer::new();
457        let result = renderer
458            .render_to_text(&tree)
459            .expect("test: should succeed");
460
461        assert!(
462            result.contains("[IMAGE]"),
463            "expected [IMAGE] placeholder, got: {:?}",
464            result
465        );
466    }
467
468    #[test]
469    fn test_extract_text() {
470        let mut tree = AreaTree::new();
471        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
472        let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
473        let text = tree.add_area(Area::text(make_rect(50.0, 12.0), "ExtractMe".to_string()));
474
475        tree.append_child(page, block)
476            .expect("test: should succeed");
477        tree.append_child(block, text)
478            .expect("test: should succeed");
479
480        let renderer = TextRenderer::new();
481        let result = renderer.extract_text(&tree).expect("test: should succeed");
482
483        assert!(result.contains("ExtractMe"), "got: {:?}", result);
484    }
485
486    #[test]
487    fn test_output_ends_with_newline() {
488        let mut tree = AreaTree::new();
489        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
490        let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
491        let text = tree.add_area(Area::text(make_rect(50.0, 12.0), "Content".to_string()));
492
493        tree.append_child(page, block)
494            .expect("test: should succeed");
495        tree.append_child(block, text)
496            .expect("test: should succeed");
497
498        let renderer = TextRenderer::new();
499        let result = renderer
500            .render_to_text(&tree)
501            .expect("test: should succeed");
502
503        assert!(
504            result.ends_with('\n'),
505            "output should end with newline, got: {:?}",
506            result
507        );
508    }
509
510    #[test]
511    fn test_header_with_divider() {
512        let mut tree = AreaTree::new();
513        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
514        let header = tree.add_area(Area::new(AreaType::Header, make_rect(210.0, 15.0)));
515        let htext = tree.add_area(Area::text(make_rect(100.0, 12.0), "My Header".to_string()));
516
517        tree.append_child(page, header)
518            .expect("test: should succeed");
519        tree.append_child(header, htext)
520            .expect("test: should succeed");
521
522        let renderer = TextRenderer::new();
523        let result = renderer
524            .render_to_text(&tree)
525            .expect("test: should succeed");
526
527        assert!(result.contains("My Header"), "got: {:?}", result);
528        // Should have a divider after the header
529        assert!(
530            result.contains("---"),
531            "expected divider after header, got: {:?}",
532            result
533        );
534    }
535
536    #[test]
537    fn test_footer_with_divider() {
538        let mut tree = AreaTree::new();
539        let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
540        let footer = tree.add_area(Area::new(AreaType::Footer, make_rect(210.0, 15.0)));
541        let ftext = tree.add_area(Area::text(make_rect(100.0, 12.0), "My Footer".to_string()));
542
543        tree.append_child(page, footer)
544            .expect("test: should succeed");
545        tree.append_child(footer, ftext)
546            .expect("test: should succeed");
547
548        let renderer = TextRenderer::new();
549        let result = renderer
550            .render_to_text(&tree)
551            .expect("test: should succeed");
552
553        assert!(result.contains("My Footer"), "got: {:?}", result);
554        assert!(
555            result.contains("---"),
556            "expected divider before footer, got: {:?}",
557            result
558        );
559    }
560
561    #[test]
562    fn test_three_pages_separators() {
563        let mut tree = AreaTree::new();
564        for i in 1..=3 {
565            let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
566            let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
567            let text = tree.add_area(Area::text(make_rect(50.0, 12.0), format!("Content {}", i)));
568            tree.append_child(page, block)
569                .expect("test: should succeed");
570            tree.append_child(block, text)
571                .expect("test: should succeed");
572        }
573
574        let renderer = TextRenderer::with_page_separator(false);
575        let result = renderer
576            .render_to_text(&tree)
577            .expect("test: should succeed");
578
579        assert!(result.contains("Content 1"), "got: {:?}", result);
580        assert!(result.contains("Content 2"), "got: {:?}", result);
581        assert!(result.contains("Content 3"), "got: {:?}", result);
582        assert!(
583            result.contains("--- Page 2 ---"),
584            "expected page 2 separator, got: {:?}",
585            result
586        );
587        assert!(
588            result.contains("--- Page 3 ---"),
589            "expected page 3 separator, got: {:?}",
590            result
591        );
592    }
593}