1use crate::geometry::BBox;
2use crate::words::Word;
3
4#[derive(Debug, Clone, PartialEq)]
6#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
7pub struct TextLine {
8 pub words: Vec<Word>,
10 pub bbox: BBox,
12}
13
14#[derive(Debug, Clone, PartialEq)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17pub struct TextBlock {
18 pub lines: Vec<TextLine>,
20 pub bbox: BBox,
22}
23
24#[derive(Debug, Clone)]
26pub struct TextOptions {
27 pub layout: bool,
30 pub y_tolerance: f64,
32 pub y_density: f64,
34 pub x_density: f64,
36}
37
38impl Default for TextOptions {
39 fn default() -> Self {
40 Self {
41 layout: false,
42 y_tolerance: 3.0,
43 y_density: 10.0,
44 x_density: 10.0,
45 }
46 }
47}
48
49pub fn cluster_words_into_lines(words: &[Word], y_tolerance: f64) -> Vec<TextLine> {
55 if words.is_empty() {
56 return Vec::new();
57 }
58
59 let mut sorted: Vec<&Word> = words.iter().collect();
60 sorted.sort_by(|a, b| {
61 a.bbox
62 .top
63 .partial_cmp(&b.bbox.top)
64 .unwrap()
65 .then(a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
66 });
67
68 let mut lines: Vec<TextLine> = Vec::new();
69
70 for word in sorted {
71 let word_mid_y = (word.bbox.top + word.bbox.bottom) / 2.0;
72
73 let mut found = false;
75 for line in &mut lines {
76 let line_mid_y = (line.bbox.top + line.bbox.bottom) / 2.0;
77 if (word_mid_y - line_mid_y).abs() <= y_tolerance {
78 line.bbox = line.bbox.union(&word.bbox);
79 line.words.push(word.clone());
80 found = true;
81 break;
82 }
83 }
84
85 if !found {
86 lines.push(TextLine {
87 words: vec![word.clone()],
88 bbox: word.bbox,
89 });
90 }
91 }
92
93 for line in &mut lines {
95 line.words
96 .sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
97 }
98
99 lines.sort_by(|a, b| a.bbox.top.partial_cmp(&b.bbox.top).unwrap());
101
102 lines
103}
104
105pub fn split_lines_at_columns(lines: Vec<TextLine>, x_density: f64) -> Vec<TextLine> {
110 let mut result = Vec::new();
111 for line in lines {
112 if line.words.len() <= 1 {
113 result.push(line);
114 continue;
115 }
116
117 let mut current_words = vec![line.words[0].clone()];
118 let mut current_bbox = line.words[0].bbox;
119
120 for word in line.words.iter().skip(1) {
121 let gap = word.bbox.x0 - current_bbox.x1;
122 if gap > x_density {
123 result.push(TextLine {
124 words: current_words,
125 bbox: current_bbox,
126 });
127 current_words = vec![word.clone()];
128 current_bbox = word.bbox;
129 } else {
130 current_bbox = current_bbox.union(&word.bbox);
131 current_words.push(word.clone());
132 }
133 }
134
135 result.push(TextLine {
136 words: current_words,
137 bbox: current_bbox,
138 });
139 }
140
141 result.sort_by(|a, b| {
143 a.bbox
144 .top
145 .partial_cmp(&b.bbox.top)
146 .unwrap()
147 .then(a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
148 });
149
150 result
151}
152
153pub fn cluster_lines_into_blocks(lines: Vec<TextLine>, y_density: f64) -> Vec<TextBlock> {
158 if lines.is_empty() {
159 return Vec::new();
160 }
161
162 let mut blocks: Vec<TextBlock> = Vec::new();
163
164 for line in lines {
165 let mut best_block: Option<usize> = None;
167 let mut best_gap = f64::INFINITY;
168
169 for (i, block) in blocks.iter().enumerate() {
170 let gap = line.bbox.top - block.bbox.bottom;
171 if gap >= 0.0
172 && gap <= y_density
173 && has_x_overlap(&line.bbox, &block.bbox)
174 && gap < best_gap
175 {
176 best_gap = gap;
177 best_block = Some(i);
178 }
179 }
180
181 if let Some(idx) = best_block {
182 blocks[idx].bbox = blocks[idx].bbox.union(&line.bbox);
183 blocks[idx].lines.push(line);
184 } else {
185 blocks.push(TextBlock {
186 bbox: line.bbox,
187 lines: vec![line],
188 });
189 }
190 }
191
192 for block in &mut blocks {
194 block
195 .lines
196 .sort_by(|a, b| a.bbox.top.partial_cmp(&b.bbox.top).unwrap());
197 }
198
199 blocks
200}
201
202fn has_x_overlap(a: &BBox, b: &BBox) -> bool {
204 a.x0 < b.x1 && b.x0 < a.x1
205}
206
207pub fn sort_blocks_reading_order(blocks: &mut [TextBlock], _x_density: f64) {
212 blocks.sort_by(|a, b| {
213 a.bbox
214 .top
215 .partial_cmp(&b.bbox.top)
216 .unwrap()
217 .then(a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
218 });
219}
220
221pub fn blocks_to_text(blocks: &[TextBlock]) -> String {
227 blocks
228 .iter()
229 .map(|block| {
230 block
231 .lines
232 .iter()
233 .map(|line| {
234 line.words
235 .iter()
236 .map(|w| w.text.as_str())
237 .collect::<Vec<_>>()
238 .join(" ")
239 })
240 .collect::<Vec<_>>()
241 .join("\n")
242 })
243 .collect::<Vec<_>>()
244 .join("\n\n")
245}
246
247pub fn words_to_text(words: &[Word], y_tolerance: f64) -> String {
251 let lines = cluster_words_into_lines(words, y_tolerance);
252 lines
253 .iter()
254 .map(|line| {
255 line.words
256 .iter()
257 .map(|w| w.text.as_str())
258 .collect::<Vec<_>>()
259 .join(" ")
260 })
261 .collect::<Vec<_>>()
262 .join("\n")
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268 use crate::text::Char;
269
270 fn make_word(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Word {
271 Word {
272 text: text.to_string(),
273 bbox: BBox::new(x0, top, x1, bottom),
274 doctop: top,
275 direction: crate::text::TextDirection::Ltr,
276 chars: vec![Char {
277 text: text.to_string(),
278 bbox: BBox::new(x0, top, x1, bottom),
279 fontname: "TestFont".to_string(),
280 size: 12.0,
281 doctop: top,
282 upright: true,
283 direction: crate::text::TextDirection::Ltr,
284 stroking_color: None,
285 non_stroking_color: None,
286 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
287 char_code: 0,
288 }],
289 }
290 }
291
292 #[test]
295 fn test_text_options_default() {
296 let opts = TextOptions::default();
297 assert!(!opts.layout);
298 assert_eq!(opts.y_tolerance, 3.0);
299 assert_eq!(opts.y_density, 10.0);
300 assert_eq!(opts.x_density, 10.0);
301 }
302
303 #[test]
304 fn test_text_options_layout_true() {
305 let opts = TextOptions {
306 layout: true,
307 ..TextOptions::default()
308 };
309 assert!(opts.layout);
310 }
311
312 #[test]
315 fn test_cluster_empty_words() {
316 let lines = cluster_words_into_lines(&[], 3.0);
317 assert!(lines.is_empty());
318 }
319
320 #[test]
321 fn test_cluster_single_word() {
322 let words = vec![make_word("Hello", 10.0, 100.0, 50.0, 112.0)];
323 let lines = cluster_words_into_lines(&words, 3.0);
324 assert_eq!(lines.len(), 1);
325 assert_eq!(lines[0].words.len(), 1);
326 assert_eq!(lines[0].words[0].text, "Hello");
327 assert_eq!(lines[0].bbox, BBox::new(10.0, 100.0, 50.0, 112.0));
328 }
329
330 #[test]
331 fn test_cluster_words_same_line() {
332 let words = vec![
333 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
334 make_word("World", 55.0, 100.0, 95.0, 112.0),
335 ];
336 let lines = cluster_words_into_lines(&words, 3.0);
337 assert_eq!(lines.len(), 1);
338 assert_eq!(lines[0].words.len(), 2);
339 assert_eq!(lines[0].words[0].text, "Hello");
340 assert_eq!(lines[0].words[1].text, "World");
341 }
342
343 #[test]
344 fn test_cluster_words_different_lines() {
345 let words = vec![
346 make_word("Line1", 10.0, 100.0, 50.0, 112.0),
347 make_word("Line2", 10.0, 120.0, 50.0, 132.0),
348 ];
349 let lines = cluster_words_into_lines(&words, 3.0);
350 assert_eq!(lines.len(), 2);
351 assert_eq!(lines[0].words[0].text, "Line1");
352 assert_eq!(lines[1].words[0].text, "Line2");
353 }
354
355 #[test]
356 fn test_cluster_words_slight_y_variation() {
357 let words = vec![
359 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
360 make_word("World", 55.0, 101.0, 95.0, 113.0), ];
362 let lines = cluster_words_into_lines(&words, 3.0);
363 assert_eq!(lines.len(), 1);
364 assert_eq!(lines[0].words.len(), 2);
365 }
366
367 #[test]
368 fn test_cluster_words_sorted_left_to_right_within_line() {
369 let words = vec![
371 make_word("World", 55.0, 100.0, 95.0, 112.0),
372 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
373 ];
374 let lines = cluster_words_into_lines(&words, 3.0);
375 assert_eq!(lines[0].words[0].text, "Hello");
376 assert_eq!(lines[0].words[1].text, "World");
377 }
378
379 #[test]
380 fn test_cluster_three_lines() {
381 let words = vec![
382 make_word("First", 10.0, 100.0, 50.0, 112.0),
383 make_word("line", 55.0, 100.0, 85.0, 112.0),
384 make_word("Second", 10.0, 120.0, 60.0, 132.0),
385 make_word("line", 65.0, 120.0, 95.0, 132.0),
386 make_word("Third", 10.0, 140.0, 50.0, 152.0),
387 make_word("line", 55.0, 140.0, 85.0, 152.0),
388 ];
389 let lines = cluster_words_into_lines(&words, 3.0);
390 assert_eq!(lines.len(), 3);
391 assert_eq!(lines[0].words.len(), 2);
392 assert_eq!(lines[1].words.len(), 2);
393 assert_eq!(lines[2].words.len(), 2);
394 }
395
396 #[test]
397 fn test_cluster_line_bbox_is_union() {
398 let words = vec![
399 make_word("A", 10.0, 98.0, 20.0, 112.0),
400 make_word("B", 25.0, 100.0, 35.0, 110.0),
401 ];
402 let lines = cluster_words_into_lines(&words, 3.0);
403 assert_eq!(lines[0].bbox, BBox::new(10.0, 98.0, 35.0, 112.0));
404 }
405
406 #[test]
409 fn test_cluster_lines_empty() {
410 let blocks = cluster_lines_into_blocks(vec![], 10.0);
411 assert!(blocks.is_empty());
412 }
413
414 #[test]
415 fn test_cluster_lines_single_block() {
416 let lines = vec![
417 TextLine {
418 words: vec![make_word("Line1", 10.0, 100.0, 50.0, 112.0)],
419 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
420 },
421 TextLine {
422 words: vec![make_word("Line2", 10.0, 115.0, 50.0, 127.0)],
423 bbox: BBox::new(10.0, 115.0, 50.0, 127.0),
424 },
425 ];
426 let blocks = cluster_lines_into_blocks(lines, 10.0);
427 assert_eq!(blocks.len(), 1);
428 assert_eq!(blocks[0].lines.len(), 2);
429 assert_eq!(blocks[0].bbox, BBox::new(10.0, 100.0, 50.0, 127.0));
430 }
431
432 #[test]
433 fn test_cluster_lines_two_blocks() {
434 let lines = vec![
435 TextLine {
436 words: vec![make_word("Block1", 10.0, 100.0, 60.0, 112.0)],
437 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
438 },
439 TextLine {
440 words: vec![make_word("Still1", 10.0, 115.0, 60.0, 127.0)],
441 bbox: BBox::new(10.0, 115.0, 60.0, 127.0),
442 },
443 TextLine {
445 words: vec![make_word("Block2", 10.0, 200.0, 60.0, 212.0)],
446 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
447 },
448 ];
449 let blocks = cluster_lines_into_blocks(lines, 10.0);
450 assert_eq!(blocks.len(), 2);
451 assert_eq!(blocks[0].lines.len(), 2);
452 assert_eq!(blocks[1].lines.len(), 1);
453 }
454
455 #[test]
456 fn test_cluster_lines_block_bbox() {
457 let lines = vec![
458 TextLine {
459 words: vec![make_word("Line1", 10.0, 100.0, 80.0, 112.0)],
460 bbox: BBox::new(10.0, 100.0, 80.0, 112.0),
461 },
462 TextLine {
463 words: vec![make_word("Line2", 5.0, 115.0, 90.0, 127.0)],
464 bbox: BBox::new(5.0, 115.0, 90.0, 127.0),
465 },
466 ];
467 let blocks = cluster_lines_into_blocks(lines, 10.0);
468 assert_eq!(blocks[0].bbox, BBox::new(5.0, 100.0, 90.0, 127.0));
469 }
470
471 #[test]
474 fn test_sort_single_column_top_to_bottom() {
475 let mut blocks = vec![
476 TextBlock {
477 lines: vec![TextLine {
478 words: vec![make_word("Second", 10.0, 200.0, 60.0, 212.0)],
479 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
480 }],
481 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
482 },
483 TextBlock {
484 lines: vec![TextLine {
485 words: vec![make_word("First", 10.0, 100.0, 60.0, 112.0)],
486 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
487 }],
488 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
489 },
490 ];
491 sort_blocks_reading_order(&mut blocks, 10.0);
492 assert_eq!(blocks[0].lines[0].words[0].text, "First");
493 assert_eq!(blocks[1].lines[0].words[0].text, "Second");
494 }
495
496 #[test]
497 fn test_sort_two_columns() {
498 let mut blocks = vec![
501 TextBlock {
502 lines: vec![TextLine {
503 words: vec![make_word("Right1", 200.0, 100.0, 300.0, 112.0)],
504 bbox: BBox::new(200.0, 100.0, 300.0, 112.0),
505 }],
506 bbox: BBox::new(200.0, 100.0, 300.0, 112.0),
507 },
508 TextBlock {
509 lines: vec![TextLine {
510 words: vec![make_word("Left1", 10.0, 100.0, 100.0, 112.0)],
511 bbox: BBox::new(10.0, 100.0, 100.0, 112.0),
512 }],
513 bbox: BBox::new(10.0, 100.0, 100.0, 112.0),
514 },
515 TextBlock {
516 lines: vec![TextLine {
517 words: vec![make_word("Right2", 200.0, 200.0, 300.0, 212.0)],
518 bbox: BBox::new(200.0, 200.0, 300.0, 212.0),
519 }],
520 bbox: BBox::new(200.0, 200.0, 300.0, 212.0),
521 },
522 TextBlock {
523 lines: vec![TextLine {
524 words: vec![make_word("Left2", 10.0, 200.0, 100.0, 212.0)],
525 bbox: BBox::new(10.0, 200.0, 100.0, 212.0),
526 }],
527 bbox: BBox::new(10.0, 200.0, 100.0, 212.0),
528 },
529 ];
530 sort_blocks_reading_order(&mut blocks, 10.0);
531 assert_eq!(blocks[0].lines[0].words[0].text, "Left1");
533 assert_eq!(blocks[1].lines[0].words[0].text, "Right1");
534 assert_eq!(blocks[2].lines[0].words[0].text, "Left2");
535 assert_eq!(blocks[3].lines[0].words[0].text, "Right2");
536 }
537
538 #[test]
539 fn test_sort_single_block_unchanged() {
540 let mut blocks = vec![TextBlock {
541 lines: vec![TextLine {
542 words: vec![make_word("Only", 10.0, 100.0, 50.0, 112.0)],
543 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
544 }],
545 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
546 }];
547 sort_blocks_reading_order(&mut blocks, 10.0);
548 assert_eq!(blocks[0].lines[0].words[0].text, "Only");
549 }
550
551 #[test]
554 fn test_blocks_to_text_single_block_single_line() {
555 let blocks = vec![TextBlock {
556 lines: vec![TextLine {
557 words: vec![
558 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
559 make_word("World", 55.0, 100.0, 95.0, 112.0),
560 ],
561 bbox: BBox::new(10.0, 100.0, 95.0, 112.0),
562 }],
563 bbox: BBox::new(10.0, 100.0, 95.0, 112.0),
564 }];
565 assert_eq!(blocks_to_text(&blocks), "Hello World");
566 }
567
568 #[test]
569 fn test_blocks_to_text_single_block_multi_line() {
570 let blocks = vec![TextBlock {
571 lines: vec![
572 TextLine {
573 words: vec![make_word("Line1", 10.0, 100.0, 50.0, 112.0)],
574 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
575 },
576 TextLine {
577 words: vec![make_word("Line2", 10.0, 115.0, 50.0, 127.0)],
578 bbox: BBox::new(10.0, 115.0, 50.0, 127.0),
579 },
580 ],
581 bbox: BBox::new(10.0, 100.0, 50.0, 127.0),
582 }];
583 assert_eq!(blocks_to_text(&blocks), "Line1\nLine2");
584 }
585
586 #[test]
587 fn test_blocks_to_text_two_blocks() {
588 let blocks = vec![
589 TextBlock {
590 lines: vec![TextLine {
591 words: vec![make_word("Block1", 10.0, 100.0, 60.0, 112.0)],
592 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
593 }],
594 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
595 },
596 TextBlock {
597 lines: vec![TextLine {
598 words: vec![make_word("Block2", 10.0, 200.0, 60.0, 212.0)],
599 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
600 }],
601 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
602 },
603 ];
604 assert_eq!(blocks_to_text(&blocks), "Block1\n\nBlock2");
605 }
606
607 #[test]
608 fn test_blocks_to_text_empty() {
609 assert_eq!(blocks_to_text(&[]), "");
610 }
611
612 #[test]
615 fn test_words_to_text_single_line() {
616 let words = vec![
617 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
618 make_word("World", 55.0, 100.0, 95.0, 112.0),
619 ];
620 assert_eq!(words_to_text(&words, 3.0), "Hello World");
621 }
622
623 #[test]
624 fn test_words_to_text_multi_line() {
625 let words = vec![
626 make_word("Line1", 10.0, 100.0, 50.0, 112.0),
627 make_word("Line2", 10.0, 120.0, 50.0, 132.0),
628 ];
629 assert_eq!(words_to_text(&words, 3.0), "Line1\nLine2");
630 }
631
632 #[test]
633 fn test_words_to_text_empty() {
634 assert_eq!(words_to_text(&[], 3.0), "");
635 }
636
637 #[test]
640 fn test_split_lines_no_columns() {
641 let lines = vec![TextLine {
642 words: vec![
643 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
644 make_word("World", 55.0, 100.0, 95.0, 112.0),
645 ],
646 bbox: BBox::new(10.0, 100.0, 95.0, 112.0),
647 }];
648 let result = split_lines_at_columns(lines, 50.0);
649 assert_eq!(result.len(), 1); }
651
652 #[test]
653 fn test_split_lines_with_column_gap() {
654 let lines = vec![TextLine {
655 words: vec![
656 make_word("Left", 10.0, 100.0, 50.0, 112.0),
657 make_word("Right", 200.0, 100.0, 250.0, 112.0),
658 ],
659 bbox: BBox::new(10.0, 100.0, 250.0, 112.0),
660 }];
661 let result = split_lines_at_columns(lines, 10.0);
662 assert_eq!(result.len(), 2);
663 assert_eq!(result[0].words[0].text, "Left");
664 assert_eq!(result[1].words[0].text, "Right");
665 }
666
667 #[test]
668 fn test_split_lines_single_word_line() {
669 let lines = vec![TextLine {
670 words: vec![make_word("Only", 10.0, 100.0, 50.0, 112.0)],
671 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
672 }];
673 let result = split_lines_at_columns(lines, 10.0);
674 assert_eq!(result.len(), 1);
675 }
676
677 #[test]
680 fn test_end_to_end_single_column() {
681 let words = vec![
683 make_word("Para1", 10.0, 100.0, 50.0, 112.0),
684 make_word("line1", 55.0, 100.0, 90.0, 112.0),
685 make_word("Para1", 10.0, 115.0, 50.0, 127.0),
686 make_word("line2", 55.0, 115.0, 90.0, 127.0),
687 make_word("Para2", 10.0, 200.0, 50.0, 212.0),
689 make_word("line1", 55.0, 200.0, 90.0, 212.0),
690 ];
691 let lines = cluster_words_into_lines(&words, 3.0);
692 let split = split_lines_at_columns(lines, 10.0);
693 let mut blocks = cluster_lines_into_blocks(split, 10.0);
694 sort_blocks_reading_order(&mut blocks, 10.0);
695 let text = blocks_to_text(&blocks);
696
697 assert_eq!(text, "Para1 line1\nPara1 line2\n\nPara2 line1");
698 }
699
700 #[test]
701 fn test_end_to_end_two_column_layout() {
702 let words = vec![
705 make_word("Left", 10.0, 100.0, 40.0, 112.0),
707 make_word("L1", 45.0, 100.0, 60.0, 112.0),
708 make_word("Left", 10.0, 115.0, 40.0, 127.0),
709 make_word("L2", 45.0, 115.0, 60.0, 127.0),
710 make_word("Right", 200.0, 100.0, 240.0, 112.0),
712 make_word("R1", 245.0, 100.0, 260.0, 112.0),
713 make_word("Right", 200.0, 115.0, 240.0, 127.0),
714 make_word("R2", 245.0, 115.0, 260.0, 127.0),
715 ];
716 let lines = cluster_words_into_lines(&words, 3.0);
717 let split = split_lines_at_columns(lines, 10.0);
718 let mut blocks = cluster_lines_into_blocks(split, 10.0);
719 sort_blocks_reading_order(&mut blocks, 10.0);
720 let text = blocks_to_text(&blocks);
721
722 assert_eq!(text, "Left L1\nLeft L2\n\nRight R1\nRight R2");
725 }
726
727 #[test]
728 fn test_end_to_end_mixed_blocks() {
729 let words = vec![
731 make_word("Header", 10.0, 50.0, 100.0, 62.0),
733 make_word("Left", 10.0, 100.0, 50.0, 112.0),
735 make_word("Right", 200.0, 100.0, 250.0, 112.0),
737 make_word("Footer", 10.0, 250.0, 100.0, 262.0),
739 ];
740 let lines = cluster_words_into_lines(&words, 3.0);
741 let split = split_lines_at_columns(lines, 10.0);
742 let mut blocks = cluster_lines_into_blocks(split, 10.0);
743 sort_blocks_reading_order(&mut blocks, 10.0);
744 let text = blocks_to_text(&blocks);
745
746 assert_eq!(text, "Header\n\nLeft\n\nRight\n\nFooter");
748 }
749
750 #[test]
751 fn test_reading_order_top_to_bottom_left_to_right() {
752 let words = vec![
754 make_word("C", 10.0, 300.0, 50.0, 312.0),
755 make_word("A", 10.0, 100.0, 50.0, 112.0),
756 make_word("B", 10.0, 200.0, 50.0, 212.0),
757 ];
758 let lines = cluster_words_into_lines(&words, 3.0);
759 let split = split_lines_at_columns(lines, 10.0);
760 let mut blocks = cluster_lines_into_blocks(split, 10.0);
761 sort_blocks_reading_order(&mut blocks, 10.0);
762 let text = blocks_to_text(&blocks);
763
764 assert_eq!(text, "A\n\nB\n\nC");
765 }
766}