1use fop_layout::area::{AreaNode, AreaTree, AreaType};
6use fop_layout::AreaId;
7use fop_types::Result;
8
9pub struct TextRenderer {
11 use_form_feed: bool,
13}
14
15impl TextRenderer {
16 pub fn new() -> Self {
18 Self {
19 use_form_feed: true,
20 }
21 }
22
23 pub fn with_page_separator(use_form_feed: bool) -> Self {
25 Self { use_form_feed }
26 }
27
28 pub fn render_to_text(&self, area_tree: &AreaTree) -> Result<String> {
30 let mut output = String::new();
31
32 let page_ids: Vec<AreaId> = area_tree
34 .iter()
35 .filter_map(|(id, node)| {
36 if matches!(node.area.area_type, AreaType::Page) {
37 Some(id)
38 } else {
39 None
40 }
41 })
42 .collect();
43
44 for (page_num, id) in page_ids.into_iter().enumerate() {
45 if page_num > 0 {
46 if self.use_form_feed {
48 output.push('\x0C'); } else {
50 output.push_str("\n\n");
51 }
52 output.push_str(&format!("--- Page {} ---\n\n", page_num + 1));
53 }
54
55 self.render_area(area_tree, id, &mut output);
57 }
58
59 let trimmed = output.trim_end().to_string();
61 if trimmed.is_empty() {
62 Ok(String::new())
63 } else {
64 Ok(format!("{}\n", trimmed))
65 }
66 }
67
68 fn render_area(&self, area_tree: &AreaTree, area_id: AreaId, output: &mut String) {
70 let node = match area_tree.get(area_id) {
71 Some(n) => n,
72 None => return,
73 };
74
75 match node.area.area_type {
76 AreaType::Page | AreaType::Region | AreaType::Column => {
77 self.render_children(area_tree, area_id, output);
79 }
80 AreaType::Block => {
81 self.render_children(area_tree, area_id, output);
83 output.push('\n');
84 }
85 AreaType::Line => {
86 self.render_children(area_tree, area_id, output);
88 output.push('\n');
89 }
90 AreaType::Text => {
91 if let Some(text) = node.area.text_content() {
93 output.push_str(text);
94 }
95 }
96 AreaType::Space => {
97 output.push(' ');
99 }
100 AreaType::Inline => {
101 self.render_children(area_tree, area_id, output);
103 }
104 AreaType::Viewport => {
105 if node.area.has_image_data() {
107 output.push_str("[IMAGE]");
108 } else {
109 self.render_children(area_tree, area_id, output);
110 }
111 }
112 AreaType::Header => {
113 let start = output.len();
115 self.render_children(area_tree, area_id, output);
116 let header_text = output[start..].trim().to_string();
117 output.truncate(start);
118 if !header_text.is_empty() {
119 output.push_str(&header_text);
120 output.push('\n');
121 output.push_str(&"-".repeat(40));
122 output.push('\n');
123 }
124 }
125 AreaType::Footer => {
126 let start = output.len();
128 self.render_children(area_tree, area_id, output);
129 let footer_text = output[start..].trim().to_string();
130 output.truncate(start);
131 if !footer_text.is_empty() {
132 output.push_str(&"-".repeat(40));
133 output.push('\n');
134 output.push_str(&footer_text);
135 output.push('\n');
136 }
137 }
138 AreaType::Footnote => {
139 output.push_str("\n[Footnote] ");
141 self.render_children(area_tree, area_id, output);
142 output.push('\n');
143 }
144 AreaType::FootnoteSeparator => {
145 output.push_str("\n---\n");
147 }
148 AreaType::FloatArea => {
149 self.render_children(area_tree, area_id, output);
151 }
152 AreaType::SidebarStart | AreaType::SidebarEnd => {
153 self.render_children(area_tree, area_id, output);
155 }
156 }
157 }
158
159 fn render_children(&self, area_tree: &AreaTree, parent_id: AreaId, output: &mut String) {
161 let children = area_tree.children(parent_id);
162 for child_id in children {
163 self.render_area(area_tree, child_id, output);
164 }
165 }
166
167 pub fn extract_text(&self, area_tree: &AreaTree) -> Result<String> {
169 let mut output = String::new();
170
171 for (id, _node) in area_tree.iter() {
172 self.extract_text_from_area(area_tree, id, &mut output);
173 }
174
175 Ok(output)
176 }
177
178 fn extract_text_from_area(&self, area_tree: &AreaTree, area_id: AreaId, output: &mut String) {
180 if let Some(node) = area_tree.get(area_id) {
181 if let Some(text) = node.area.text_content() {
182 output.push_str(text);
183 output.push(' ');
184 }
185 }
186 }
187}
188
189impl Default for TextRenderer {
190 fn default() -> Self {
191 Self::new()
192 }
193}
194
195#[allow(dead_code)]
197fn should_add_line_break(node: &AreaNode) -> bool {
198 matches!(
199 node.area.area_type,
200 AreaType::Block | AreaType::Line | AreaType::Header | AreaType::Footer
201 )
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207 use fop_layout::area::{Area, AreaTree, AreaType};
208 use fop_types::{Length, Point, Rect, Size};
209
210 fn make_rect(w: f64, h: f64) -> Rect {
211 Rect::from_point_size(
212 Point::ZERO,
213 Size::new(Length::from_pt(w), Length::from_pt(h)),
214 )
215 }
216
217 #[test]
222 fn test_text_renderer_creation() {
223 let renderer = TextRenderer::new();
224 assert!(renderer.use_form_feed);
225 }
226
227 #[test]
228 fn test_text_renderer_no_form_feed() {
229 let renderer = TextRenderer::with_page_separator(false);
230 assert!(!renderer.use_form_feed);
231 }
232
233 #[test]
234 fn test_text_renderer_default() {
235 let renderer = TextRenderer::default();
236 assert!(renderer.use_form_feed);
237 }
238
239 #[test]
240 fn test_simple_text_extraction() {
241 let mut tree = AreaTree::new();
242 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
243 let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
244 let text = tree.add_area(Area::text(make_rect(50.0, 12.0), "Hello World".to_string()));
245
246 tree.append_child(page, block)
247 .expect("test: should succeed");
248 tree.append_child(block, text)
249 .expect("test: should succeed");
250
251 let renderer = TextRenderer::new();
252 let result = renderer
253 .render_to_text(&tree)
254 .expect("test: should succeed");
255
256 assert!(result.contains("Hello World"), "got: {:?}", result);
257 }
258
259 #[test]
260 fn test_multiple_lines() {
261 let mut tree = AreaTree::new();
262 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
263 let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 40.0)));
264 let line1 = tree.add_area(Area::new(AreaType::Line, make_rect(100.0, 12.0)));
265 let text1 = tree.add_area(Area::text(make_rect(50.0, 12.0), "First line".to_string()));
266 let line2 = tree.add_area(Area::new(AreaType::Line, make_rect(100.0, 12.0)));
267 let text2 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Second line".to_string()));
268
269 tree.append_child(page, block)
270 .expect("test: should succeed");
271 tree.append_child(block, line1)
272 .expect("test: should succeed");
273 tree.append_child(line1, text1)
274 .expect("test: should succeed");
275 tree.append_child(block, line2)
276 .expect("test: should succeed");
277 tree.append_child(line2, text2)
278 .expect("test: should succeed");
279
280 let renderer = TextRenderer::new();
281 let result = renderer
282 .render_to_text(&tree)
283 .expect("test: should succeed");
284
285 assert!(result.contains("First line"), "got: {:?}", result);
286 assert!(result.contains("Second line"), "got: {:?}", result);
287 }
288
289 #[test]
290 fn test_multipage_form_feed() {
291 let mut tree = AreaTree::new();
292 let page1 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
293 let block1 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
294 let text1 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Page one".to_string()));
295 let page2 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
296 let block2 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
297 let text2 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Page two".to_string()));
298
299 tree.append_child(page1, block1)
300 .expect("test: should succeed");
301 tree.append_child(block1, text1)
302 .expect("test: should succeed");
303 tree.append_child(page2, block2)
304 .expect("test: should succeed");
305 tree.append_child(block2, text2)
306 .expect("test: should succeed");
307
308 let renderer = TextRenderer::new();
309 let result = renderer
310 .render_to_text(&tree)
311 .expect("test: should succeed");
312
313 assert!(result.contains("Page one"), "got: {:?}", result);
314 assert!(result.contains("Page two"), "got: {:?}", result);
315 assert!(
317 result.contains('\x0C'),
318 "expected form feed, got: {:?}",
319 result
320 );
321 }
322
323 #[test]
324 fn test_multipage_no_form_feed_uses_separator() {
325 let mut tree = AreaTree::new();
326 let page1 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
327 let block1 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
328 let text1 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Alpha".to_string()));
329 let page2 = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
330 let block2 = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
331 let text2 = tree.add_area(Area::text(make_rect(50.0, 12.0), "Beta".to_string()));
332
333 tree.append_child(page1, block1)
334 .expect("test: should succeed");
335 tree.append_child(block1, text1)
336 .expect("test: should succeed");
337 tree.append_child(page2, block2)
338 .expect("test: should succeed");
339 tree.append_child(block2, text2)
340 .expect("test: should succeed");
341
342 let renderer = TextRenderer::with_page_separator(false);
343 let result = renderer
344 .render_to_text(&tree)
345 .expect("test: should succeed");
346
347 assert!(result.contains("Alpha"), "got: {:?}", result);
348 assert!(result.contains("Beta"), "got: {:?}", result);
349 assert!(
350 result.contains("--- Page 2 ---"),
351 "expected page separator, got: {:?}",
352 result
353 );
354 }
355
356 #[test]
357 fn test_empty_tree_produces_empty_output() {
358 let tree = AreaTree::new();
359 let renderer = TextRenderer::new();
360 let result = renderer
361 .render_to_text(&tree)
362 .expect("test: should succeed");
363 assert!(
364 result.is_empty(),
365 "empty tree should produce empty output, got: {:?}",
366 result
367 );
368 }
369
370 #[test]
371 fn test_space_area() {
372 let mut tree = AreaTree::new();
373 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
374 let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
375 let line = tree.add_area(Area::new(AreaType::Line, make_rect(100.0, 12.0)));
376 let text1 = tree.add_area(Area::text(make_rect(30.0, 12.0), "foo".to_string()));
377 let space = tree.add_area(Area::new(AreaType::Space, make_rect(5.0, 12.0)));
378 let text2 = tree.add_area(Area::text(make_rect(30.0, 12.0), "bar".to_string()));
379
380 tree.append_child(page, block)
381 .expect("test: should succeed");
382 tree.append_child(block, line)
383 .expect("test: should succeed");
384 tree.append_child(line, text1)
385 .expect("test: should succeed");
386 tree.append_child(line, space)
387 .expect("test: should succeed");
388 tree.append_child(line, text2)
389 .expect("test: should succeed");
390
391 let renderer = TextRenderer::new();
392 let result = renderer
393 .render_to_text(&tree)
394 .expect("test: should succeed");
395
396 assert!(
397 result.contains("foo bar"),
398 "expected 'foo bar', got: {:?}",
399 result
400 );
401 }
402
403 #[test]
404 fn test_footnote_area() {
405 let mut tree = AreaTree::new();
406 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
407 let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
408 let body_text = tree.add_area(Area::text(make_rect(50.0, 12.0), "Body text".to_string()));
409 let sep = tree.add_area(Area::new(
410 AreaType::FootnoteSeparator,
411 make_rect(100.0, 2.0),
412 ));
413 let footnote = tree.add_area(Area::new(AreaType::Footnote, make_rect(100.0, 20.0)));
414 let fn_text = tree.add_area(Area::text(
415 make_rect(50.0, 12.0),
416 "Footnote content".to_string(),
417 ));
418
419 tree.append_child(page, block)
420 .expect("test: should succeed");
421 tree.append_child(block, body_text)
422 .expect("test: should succeed");
423 tree.append_child(page, sep).expect("test: should succeed");
424 tree.append_child(page, footnote)
425 .expect("test: should succeed");
426 tree.append_child(footnote, fn_text)
427 .expect("test: should succeed");
428
429 let renderer = TextRenderer::new();
430 let result = renderer
431 .render_to_text(&tree)
432 .expect("test: should succeed");
433
434 assert!(result.contains("Body text"), "got: {:?}", result);
435 assert!(result.contains("[Footnote]"), "got: {:?}", result);
436 assert!(result.contains("Footnote content"), "got: {:?}", result);
437 assert!(
438 result.contains("---"),
439 "expected separator, got: {:?}",
440 result
441 );
442 }
443
444 #[test]
445 fn test_viewport_image_placeholder() {
446 let mut tree = AreaTree::new();
447 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
448 let viewport = tree.add_area(Area::viewport_with_image(
449 make_rect(50.0, 50.0),
450 vec![0u8; 10],
451 ));
452
453 tree.append_child(page, viewport)
454 .expect("test: should succeed");
455
456 let renderer = TextRenderer::new();
457 let result = renderer
458 .render_to_text(&tree)
459 .expect("test: should succeed");
460
461 assert!(
462 result.contains("[IMAGE]"),
463 "expected [IMAGE] placeholder, got: {:?}",
464 result
465 );
466 }
467
468 #[test]
469 fn test_extract_text() {
470 let mut tree = AreaTree::new();
471 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
472 let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
473 let text = tree.add_area(Area::text(make_rect(50.0, 12.0), "ExtractMe".to_string()));
474
475 tree.append_child(page, block)
476 .expect("test: should succeed");
477 tree.append_child(block, text)
478 .expect("test: should succeed");
479
480 let renderer = TextRenderer::new();
481 let result = renderer.extract_text(&tree).expect("test: should succeed");
482
483 assert!(result.contains("ExtractMe"), "got: {:?}", result);
484 }
485
486 #[test]
487 fn test_output_ends_with_newline() {
488 let mut tree = AreaTree::new();
489 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
490 let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
491 let text = tree.add_area(Area::text(make_rect(50.0, 12.0), "Content".to_string()));
492
493 tree.append_child(page, block)
494 .expect("test: should succeed");
495 tree.append_child(block, text)
496 .expect("test: should succeed");
497
498 let renderer = TextRenderer::new();
499 let result = renderer
500 .render_to_text(&tree)
501 .expect("test: should succeed");
502
503 assert!(
504 result.ends_with('\n'),
505 "output should end with newline, got: {:?}",
506 result
507 );
508 }
509
510 #[test]
511 fn test_header_with_divider() {
512 let mut tree = AreaTree::new();
513 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
514 let header = tree.add_area(Area::new(AreaType::Header, make_rect(210.0, 15.0)));
515 let htext = tree.add_area(Area::text(make_rect(100.0, 12.0), "My Header".to_string()));
516
517 tree.append_child(page, header)
518 .expect("test: should succeed");
519 tree.append_child(header, htext)
520 .expect("test: should succeed");
521
522 let renderer = TextRenderer::new();
523 let result = renderer
524 .render_to_text(&tree)
525 .expect("test: should succeed");
526
527 assert!(result.contains("My Header"), "got: {:?}", result);
528 assert!(
530 result.contains("---"),
531 "expected divider after header, got: {:?}",
532 result
533 );
534 }
535
536 #[test]
537 fn test_footer_with_divider() {
538 let mut tree = AreaTree::new();
539 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
540 let footer = tree.add_area(Area::new(AreaType::Footer, make_rect(210.0, 15.0)));
541 let ftext = tree.add_area(Area::text(make_rect(100.0, 12.0), "My Footer".to_string()));
542
543 tree.append_child(page, footer)
544 .expect("test: should succeed");
545 tree.append_child(footer, ftext)
546 .expect("test: should succeed");
547
548 let renderer = TextRenderer::new();
549 let result = renderer
550 .render_to_text(&tree)
551 .expect("test: should succeed");
552
553 assert!(result.contains("My Footer"), "got: {:?}", result);
554 assert!(
555 result.contains("---"),
556 "expected divider before footer, got: {:?}",
557 result
558 );
559 }
560
561 #[test]
562 fn test_three_pages_separators() {
563 let mut tree = AreaTree::new();
564 for i in 1..=3 {
565 let page = tree.add_area(Area::new(AreaType::Page, make_rect(210.0, 297.0)));
566 let block = tree.add_area(Area::new(AreaType::Block, make_rect(100.0, 20.0)));
567 let text = tree.add_area(Area::text(make_rect(50.0, 12.0), format!("Content {}", i)));
568 tree.append_child(page, block)
569 .expect("test: should succeed");
570 tree.append_child(block, text)
571 .expect("test: should succeed");
572 }
573
574 let renderer = TextRenderer::with_page_separator(false);
575 let result = renderer
576 .render_to_text(&tree)
577 .expect("test: should succeed");
578
579 assert!(result.contains("Content 1"), "got: {:?}", result);
580 assert!(result.contains("Content 2"), "got: {:?}", result);
581 assert!(result.contains("Content 3"), "got: {:?}", result);
582 assert!(
583 result.contains("--- Page 2 ---"),
584 "expected page 2 separator, got: {:?}",
585 result
586 );
587 assert!(
588 result.contains("--- Page 3 ---"),
589 "expected page 3 separator, got: {:?}",
590 result
591 );
592 }
593}