1use crate::geometry::BBox;
2use crate::words::Word;
3
4#[derive(Debug, Clone, PartialEq)]
6#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
7pub struct TextLine {
8 pub words: Vec<Word>,
10 pub bbox: BBox,
12}
13
14#[derive(Debug, Clone, PartialEq)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17pub struct TextBlock {
18 pub lines: Vec<TextLine>,
20 pub bbox: BBox,
22}
23
24#[derive(Debug, Clone)]
26pub struct TextOptions {
27 pub layout: bool,
30 pub y_tolerance: f64,
32 pub y_density: f64,
34 pub x_density: f64,
36}
37
38impl Default for TextOptions {
39 fn default() -> Self {
40 Self {
41 layout: false,
42 y_tolerance: 3.0,
43 y_density: 10.0,
44 x_density: 10.0,
45 }
46 }
47}
48
49pub fn cluster_words_into_lines(words: &[Word], y_tolerance: f64) -> Vec<TextLine> {
55 if words.is_empty() {
56 return Vec::new();
57 }
58
59 let mut sorted: Vec<&Word> = words.iter().collect();
60 sorted.sort_by(|a, b| {
61 a.bbox
62 .top
63 .partial_cmp(&b.bbox.top)
64 .unwrap()
65 .then(a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
66 });
67
68 let mut lines: Vec<TextLine> = Vec::new();
69
70 for word in sorted {
71 let word_mid_y = (word.bbox.top + word.bbox.bottom) / 2.0;
72
73 let mut found = false;
75 for line in &mut lines {
76 let line_mid_y = (line.bbox.top + line.bbox.bottom) / 2.0;
77 if (word_mid_y - line_mid_y).abs() <= y_tolerance {
78 line.bbox = line.bbox.union(&word.bbox);
79 line.words.push(word.clone());
80 found = true;
81 break;
82 }
83 }
84
85 if !found {
86 lines.push(TextLine {
87 words: vec![word.clone()],
88 bbox: word.bbox,
89 });
90 }
91 }
92
93 for line in &mut lines {
95 line.words
96 .sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
97 }
98
99 lines.sort_by(|a, b| a.bbox.top.partial_cmp(&b.bbox.top).unwrap());
101
102 lines
103}
104
105pub fn split_lines_at_columns(lines: Vec<TextLine>, x_density: f64) -> Vec<TextLine> {
110 let mut result = Vec::new();
111 for line in lines {
112 if line.words.len() <= 1 {
113 result.push(line);
114 continue;
115 }
116
117 let mut current_words = vec![line.words[0].clone()];
118 let mut current_bbox = line.words[0].bbox;
119
120 for word in line.words.iter().skip(1) {
121 let gap = word.bbox.x0 - current_bbox.x1;
122 if gap > x_density {
123 result.push(TextLine {
124 words: current_words,
125 bbox: current_bbox,
126 });
127 current_words = vec![word.clone()];
128 current_bbox = word.bbox;
129 } else {
130 current_bbox = current_bbox.union(&word.bbox);
131 current_words.push(word.clone());
132 }
133 }
134
135 result.push(TextLine {
136 words: current_words,
137 bbox: current_bbox,
138 });
139 }
140
141 result.sort_by(|a, b| {
143 a.bbox
144 .top
145 .partial_cmp(&b.bbox.top)
146 .unwrap()
147 .then(a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
148 });
149
150 result
151}
152
153pub fn cluster_lines_into_blocks(lines: Vec<TextLine>, y_density: f64) -> Vec<TextBlock> {
158 if lines.is_empty() {
159 return Vec::new();
160 }
161
162 let mut blocks: Vec<TextBlock> = Vec::new();
163
164 for line in lines {
165 let mut best_block: Option<usize> = None;
167 let mut best_gap = f64::INFINITY;
168
169 for (i, block) in blocks.iter().enumerate() {
170 let gap = line.bbox.top - block.bbox.bottom;
171 if gap >= 0.0
172 && gap <= y_density
173 && has_x_overlap(&line.bbox, &block.bbox)
174 && gap < best_gap
175 {
176 best_gap = gap;
177 best_block = Some(i);
178 }
179 }
180
181 if let Some(idx) = best_block {
182 blocks[idx].bbox = blocks[idx].bbox.union(&line.bbox);
183 blocks[idx].lines.push(line);
184 } else {
185 blocks.push(TextBlock {
186 bbox: line.bbox,
187 lines: vec![line],
188 });
189 }
190 }
191
192 for block in &mut blocks {
194 block
195 .lines
196 .sort_by(|a, b| a.bbox.top.partial_cmp(&b.bbox.top).unwrap());
197 }
198
199 blocks
200}
201
202fn has_x_overlap(a: &BBox, b: &BBox) -> bool {
204 a.x0 < b.x1 && b.x0 < a.x1
205}
206
207pub fn sort_blocks_reading_order(blocks: &mut [TextBlock], _x_density: f64) {
212 blocks.sort_by(|a, b| {
213 a.bbox
214 .top
215 .partial_cmp(&b.bbox.top)
216 .unwrap()
217 .then(a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
218 });
219}
220
221pub fn blocks_to_text(blocks: &[TextBlock]) -> String {
227 blocks
228 .iter()
229 .map(|block| {
230 block
231 .lines
232 .iter()
233 .map(|line| {
234 line.words
235 .iter()
236 .map(|w| w.text.as_str())
237 .collect::<Vec<_>>()
238 .join(" ")
239 })
240 .collect::<Vec<_>>()
241 .join("\n")
242 })
243 .collect::<Vec<_>>()
244 .join("\n\n")
245}
246
247pub fn words_to_text(words: &[Word], y_tolerance: f64) -> String {
251 let lines = cluster_words_into_lines(words, y_tolerance);
252 lines
253 .iter()
254 .map(|line| {
255 line.words
256 .iter()
257 .map(|w| w.text.as_str())
258 .collect::<Vec<_>>()
259 .join(" ")
260 })
261 .collect::<Vec<_>>()
262 .join("\n")
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268 use crate::text::Char;
269
270 fn make_word(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Word {
271 Word {
272 text: text.to_string(),
273 bbox: BBox::new(x0, top, x1, bottom),
274 doctop: top,
275 direction: crate::text::TextDirection::Ltr,
276 chars: vec![Char {
277 text: text.to_string(),
278 bbox: BBox::new(x0, top, x1, bottom),
279 fontname: "TestFont".to_string(),
280 size: 12.0,
281 doctop: top,
282 upright: true,
283 direction: crate::text::TextDirection::Ltr,
284 stroking_color: None,
285 non_stroking_color: None,
286 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
287 char_code: 0,
288 mcid: None,
289 tag: None,
290 }],
291 }
292 }
293
294 #[test]
297 fn test_text_options_default() {
298 let opts = TextOptions::default();
299 assert!(!opts.layout);
300 assert_eq!(opts.y_tolerance, 3.0);
301 assert_eq!(opts.y_density, 10.0);
302 assert_eq!(opts.x_density, 10.0);
303 }
304
305 #[test]
306 fn test_text_options_layout_true() {
307 let opts = TextOptions {
308 layout: true,
309 ..TextOptions::default()
310 };
311 assert!(opts.layout);
312 }
313
314 #[test]
317 fn test_cluster_empty_words() {
318 let lines = cluster_words_into_lines(&[], 3.0);
319 assert!(lines.is_empty());
320 }
321
322 #[test]
323 fn test_cluster_single_word() {
324 let words = vec![make_word("Hello", 10.0, 100.0, 50.0, 112.0)];
325 let lines = cluster_words_into_lines(&words, 3.0);
326 assert_eq!(lines.len(), 1);
327 assert_eq!(lines[0].words.len(), 1);
328 assert_eq!(lines[0].words[0].text, "Hello");
329 assert_eq!(lines[0].bbox, BBox::new(10.0, 100.0, 50.0, 112.0));
330 }
331
332 #[test]
333 fn test_cluster_words_same_line() {
334 let words = vec![
335 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
336 make_word("World", 55.0, 100.0, 95.0, 112.0),
337 ];
338 let lines = cluster_words_into_lines(&words, 3.0);
339 assert_eq!(lines.len(), 1);
340 assert_eq!(lines[0].words.len(), 2);
341 assert_eq!(lines[0].words[0].text, "Hello");
342 assert_eq!(lines[0].words[1].text, "World");
343 }
344
345 #[test]
346 fn test_cluster_words_different_lines() {
347 let words = vec![
348 make_word("Line1", 10.0, 100.0, 50.0, 112.0),
349 make_word("Line2", 10.0, 120.0, 50.0, 132.0),
350 ];
351 let lines = cluster_words_into_lines(&words, 3.0);
352 assert_eq!(lines.len(), 2);
353 assert_eq!(lines[0].words[0].text, "Line1");
354 assert_eq!(lines[1].words[0].text, "Line2");
355 }
356
357 #[test]
358 fn test_cluster_words_slight_y_variation() {
359 let words = vec![
361 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
362 make_word("World", 55.0, 101.0, 95.0, 113.0), ];
364 let lines = cluster_words_into_lines(&words, 3.0);
365 assert_eq!(lines.len(), 1);
366 assert_eq!(lines[0].words.len(), 2);
367 }
368
369 #[test]
370 fn test_cluster_words_sorted_left_to_right_within_line() {
371 let words = vec![
373 make_word("World", 55.0, 100.0, 95.0, 112.0),
374 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
375 ];
376 let lines = cluster_words_into_lines(&words, 3.0);
377 assert_eq!(lines[0].words[0].text, "Hello");
378 assert_eq!(lines[0].words[1].text, "World");
379 }
380
381 #[test]
382 fn test_cluster_three_lines() {
383 let words = vec![
384 make_word("First", 10.0, 100.0, 50.0, 112.0),
385 make_word("line", 55.0, 100.0, 85.0, 112.0),
386 make_word("Second", 10.0, 120.0, 60.0, 132.0),
387 make_word("line", 65.0, 120.0, 95.0, 132.0),
388 make_word("Third", 10.0, 140.0, 50.0, 152.0),
389 make_word("line", 55.0, 140.0, 85.0, 152.0),
390 ];
391 let lines = cluster_words_into_lines(&words, 3.0);
392 assert_eq!(lines.len(), 3);
393 assert_eq!(lines[0].words.len(), 2);
394 assert_eq!(lines[1].words.len(), 2);
395 assert_eq!(lines[2].words.len(), 2);
396 }
397
398 #[test]
399 fn test_cluster_line_bbox_is_union() {
400 let words = vec![
401 make_word("A", 10.0, 98.0, 20.0, 112.0),
402 make_word("B", 25.0, 100.0, 35.0, 110.0),
403 ];
404 let lines = cluster_words_into_lines(&words, 3.0);
405 assert_eq!(lines[0].bbox, BBox::new(10.0, 98.0, 35.0, 112.0));
406 }
407
408 #[test]
411 fn test_cluster_lines_empty() {
412 let blocks = cluster_lines_into_blocks(vec![], 10.0);
413 assert!(blocks.is_empty());
414 }
415
416 #[test]
417 fn test_cluster_lines_single_block() {
418 let lines = vec![
419 TextLine {
420 words: vec![make_word("Line1", 10.0, 100.0, 50.0, 112.0)],
421 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
422 },
423 TextLine {
424 words: vec![make_word("Line2", 10.0, 115.0, 50.0, 127.0)],
425 bbox: BBox::new(10.0, 115.0, 50.0, 127.0),
426 },
427 ];
428 let blocks = cluster_lines_into_blocks(lines, 10.0);
429 assert_eq!(blocks.len(), 1);
430 assert_eq!(blocks[0].lines.len(), 2);
431 assert_eq!(blocks[0].bbox, BBox::new(10.0, 100.0, 50.0, 127.0));
432 }
433
434 #[test]
435 fn test_cluster_lines_two_blocks() {
436 let lines = vec![
437 TextLine {
438 words: vec![make_word("Block1", 10.0, 100.0, 60.0, 112.0)],
439 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
440 },
441 TextLine {
442 words: vec![make_word("Still1", 10.0, 115.0, 60.0, 127.0)],
443 bbox: BBox::new(10.0, 115.0, 60.0, 127.0),
444 },
445 TextLine {
447 words: vec![make_word("Block2", 10.0, 200.0, 60.0, 212.0)],
448 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
449 },
450 ];
451 let blocks = cluster_lines_into_blocks(lines, 10.0);
452 assert_eq!(blocks.len(), 2);
453 assert_eq!(blocks[0].lines.len(), 2);
454 assert_eq!(blocks[1].lines.len(), 1);
455 }
456
457 #[test]
458 fn test_cluster_lines_block_bbox() {
459 let lines = vec![
460 TextLine {
461 words: vec![make_word("Line1", 10.0, 100.0, 80.0, 112.0)],
462 bbox: BBox::new(10.0, 100.0, 80.0, 112.0),
463 },
464 TextLine {
465 words: vec![make_word("Line2", 5.0, 115.0, 90.0, 127.0)],
466 bbox: BBox::new(5.0, 115.0, 90.0, 127.0),
467 },
468 ];
469 let blocks = cluster_lines_into_blocks(lines, 10.0);
470 assert_eq!(blocks[0].bbox, BBox::new(5.0, 100.0, 90.0, 127.0));
471 }
472
473 #[test]
476 fn test_sort_single_column_top_to_bottom() {
477 let mut blocks = vec![
478 TextBlock {
479 lines: vec![TextLine {
480 words: vec![make_word("Second", 10.0, 200.0, 60.0, 212.0)],
481 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
482 }],
483 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
484 },
485 TextBlock {
486 lines: vec![TextLine {
487 words: vec![make_word("First", 10.0, 100.0, 60.0, 112.0)],
488 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
489 }],
490 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
491 },
492 ];
493 sort_blocks_reading_order(&mut blocks, 10.0);
494 assert_eq!(blocks[0].lines[0].words[0].text, "First");
495 assert_eq!(blocks[1].lines[0].words[0].text, "Second");
496 }
497
498 #[test]
499 fn test_sort_two_columns() {
500 let mut blocks = vec![
503 TextBlock {
504 lines: vec![TextLine {
505 words: vec![make_word("Right1", 200.0, 100.0, 300.0, 112.0)],
506 bbox: BBox::new(200.0, 100.0, 300.0, 112.0),
507 }],
508 bbox: BBox::new(200.0, 100.0, 300.0, 112.0),
509 },
510 TextBlock {
511 lines: vec![TextLine {
512 words: vec![make_word("Left1", 10.0, 100.0, 100.0, 112.0)],
513 bbox: BBox::new(10.0, 100.0, 100.0, 112.0),
514 }],
515 bbox: BBox::new(10.0, 100.0, 100.0, 112.0),
516 },
517 TextBlock {
518 lines: vec![TextLine {
519 words: vec![make_word("Right2", 200.0, 200.0, 300.0, 212.0)],
520 bbox: BBox::new(200.0, 200.0, 300.0, 212.0),
521 }],
522 bbox: BBox::new(200.0, 200.0, 300.0, 212.0),
523 },
524 TextBlock {
525 lines: vec![TextLine {
526 words: vec![make_word("Left2", 10.0, 200.0, 100.0, 212.0)],
527 bbox: BBox::new(10.0, 200.0, 100.0, 212.0),
528 }],
529 bbox: BBox::new(10.0, 200.0, 100.0, 212.0),
530 },
531 ];
532 sort_blocks_reading_order(&mut blocks, 10.0);
533 assert_eq!(blocks[0].lines[0].words[0].text, "Left1");
535 assert_eq!(blocks[1].lines[0].words[0].text, "Right1");
536 assert_eq!(blocks[2].lines[0].words[0].text, "Left2");
537 assert_eq!(blocks[3].lines[0].words[0].text, "Right2");
538 }
539
540 #[test]
541 fn test_sort_single_block_unchanged() {
542 let mut blocks = vec![TextBlock {
543 lines: vec![TextLine {
544 words: vec![make_word("Only", 10.0, 100.0, 50.0, 112.0)],
545 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
546 }],
547 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
548 }];
549 sort_blocks_reading_order(&mut blocks, 10.0);
550 assert_eq!(blocks[0].lines[0].words[0].text, "Only");
551 }
552
553 #[test]
556 fn test_blocks_to_text_single_block_single_line() {
557 let blocks = vec![TextBlock {
558 lines: vec![TextLine {
559 words: vec![
560 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
561 make_word("World", 55.0, 100.0, 95.0, 112.0),
562 ],
563 bbox: BBox::new(10.0, 100.0, 95.0, 112.0),
564 }],
565 bbox: BBox::new(10.0, 100.0, 95.0, 112.0),
566 }];
567 assert_eq!(blocks_to_text(&blocks), "Hello World");
568 }
569
570 #[test]
571 fn test_blocks_to_text_single_block_multi_line() {
572 let blocks = vec![TextBlock {
573 lines: vec![
574 TextLine {
575 words: vec![make_word("Line1", 10.0, 100.0, 50.0, 112.0)],
576 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
577 },
578 TextLine {
579 words: vec![make_word("Line2", 10.0, 115.0, 50.0, 127.0)],
580 bbox: BBox::new(10.0, 115.0, 50.0, 127.0),
581 },
582 ],
583 bbox: BBox::new(10.0, 100.0, 50.0, 127.0),
584 }];
585 assert_eq!(blocks_to_text(&blocks), "Line1\nLine2");
586 }
587
588 #[test]
589 fn test_blocks_to_text_two_blocks() {
590 let blocks = vec![
591 TextBlock {
592 lines: vec![TextLine {
593 words: vec![make_word("Block1", 10.0, 100.0, 60.0, 112.0)],
594 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
595 }],
596 bbox: BBox::new(10.0, 100.0, 60.0, 112.0),
597 },
598 TextBlock {
599 lines: vec![TextLine {
600 words: vec![make_word("Block2", 10.0, 200.0, 60.0, 212.0)],
601 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
602 }],
603 bbox: BBox::new(10.0, 200.0, 60.0, 212.0),
604 },
605 ];
606 assert_eq!(blocks_to_text(&blocks), "Block1\n\nBlock2");
607 }
608
609 #[test]
610 fn test_blocks_to_text_empty() {
611 assert_eq!(blocks_to_text(&[]), "");
612 }
613
614 #[test]
617 fn test_words_to_text_single_line() {
618 let words = vec![
619 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
620 make_word("World", 55.0, 100.0, 95.0, 112.0),
621 ];
622 assert_eq!(words_to_text(&words, 3.0), "Hello World");
623 }
624
625 #[test]
626 fn test_words_to_text_multi_line() {
627 let words = vec![
628 make_word("Line1", 10.0, 100.0, 50.0, 112.0),
629 make_word("Line2", 10.0, 120.0, 50.0, 132.0),
630 ];
631 assert_eq!(words_to_text(&words, 3.0), "Line1\nLine2");
632 }
633
634 #[test]
635 fn test_words_to_text_empty() {
636 assert_eq!(words_to_text(&[], 3.0), "");
637 }
638
639 #[test]
642 fn test_split_lines_no_columns() {
643 let lines = vec![TextLine {
644 words: vec![
645 make_word("Hello", 10.0, 100.0, 50.0, 112.0),
646 make_word("World", 55.0, 100.0, 95.0, 112.0),
647 ],
648 bbox: BBox::new(10.0, 100.0, 95.0, 112.0),
649 }];
650 let result = split_lines_at_columns(lines, 50.0);
651 assert_eq!(result.len(), 1); }
653
654 #[test]
655 fn test_split_lines_with_column_gap() {
656 let lines = vec![TextLine {
657 words: vec![
658 make_word("Left", 10.0, 100.0, 50.0, 112.0),
659 make_word("Right", 200.0, 100.0, 250.0, 112.0),
660 ],
661 bbox: BBox::new(10.0, 100.0, 250.0, 112.0),
662 }];
663 let result = split_lines_at_columns(lines, 10.0);
664 assert_eq!(result.len(), 2);
665 assert_eq!(result[0].words[0].text, "Left");
666 assert_eq!(result[1].words[0].text, "Right");
667 }
668
669 #[test]
670 fn test_split_lines_single_word_line() {
671 let lines = vec![TextLine {
672 words: vec![make_word("Only", 10.0, 100.0, 50.0, 112.0)],
673 bbox: BBox::new(10.0, 100.0, 50.0, 112.0),
674 }];
675 let result = split_lines_at_columns(lines, 10.0);
676 assert_eq!(result.len(), 1);
677 }
678
679 #[test]
682 fn test_end_to_end_single_column() {
683 let words = vec![
685 make_word("Para1", 10.0, 100.0, 50.0, 112.0),
686 make_word("line1", 55.0, 100.0, 90.0, 112.0),
687 make_word("Para1", 10.0, 115.0, 50.0, 127.0),
688 make_word("line2", 55.0, 115.0, 90.0, 127.0),
689 make_word("Para2", 10.0, 200.0, 50.0, 212.0),
691 make_word("line1", 55.0, 200.0, 90.0, 212.0),
692 ];
693 let lines = cluster_words_into_lines(&words, 3.0);
694 let split = split_lines_at_columns(lines, 10.0);
695 let mut blocks = cluster_lines_into_blocks(split, 10.0);
696 sort_blocks_reading_order(&mut blocks, 10.0);
697 let text = blocks_to_text(&blocks);
698
699 assert_eq!(text, "Para1 line1\nPara1 line2\n\nPara2 line1");
700 }
701
702 #[test]
703 fn test_end_to_end_two_column_layout() {
704 let words = vec![
707 make_word("Left", 10.0, 100.0, 40.0, 112.0),
709 make_word("L1", 45.0, 100.0, 60.0, 112.0),
710 make_word("Left", 10.0, 115.0, 40.0, 127.0),
711 make_word("L2", 45.0, 115.0, 60.0, 127.0),
712 make_word("Right", 200.0, 100.0, 240.0, 112.0),
714 make_word("R1", 245.0, 100.0, 260.0, 112.0),
715 make_word("Right", 200.0, 115.0, 240.0, 127.0),
716 make_word("R2", 245.0, 115.0, 260.0, 127.0),
717 ];
718 let lines = cluster_words_into_lines(&words, 3.0);
719 let split = split_lines_at_columns(lines, 10.0);
720 let mut blocks = cluster_lines_into_blocks(split, 10.0);
721 sort_blocks_reading_order(&mut blocks, 10.0);
722 let text = blocks_to_text(&blocks);
723
724 assert_eq!(text, "Left L1\nLeft L2\n\nRight R1\nRight R2");
727 }
728
729 #[test]
730 fn test_end_to_end_mixed_blocks() {
731 let words = vec![
733 make_word("Header", 10.0, 50.0, 100.0, 62.0),
735 make_word("Left", 10.0, 100.0, 50.0, 112.0),
737 make_word("Right", 200.0, 100.0, 250.0, 112.0),
739 make_word("Footer", 10.0, 250.0, 100.0, 262.0),
741 ];
742 let lines = cluster_words_into_lines(&words, 3.0);
743 let split = split_lines_at_columns(lines, 10.0);
744 let mut blocks = cluster_lines_into_blocks(split, 10.0);
745 sort_blocks_reading_order(&mut blocks, 10.0);
746 let text = blocks_to_text(&blocks);
747
748 assert_eq!(text, "Header\n\nLeft\n\nRight\n\nFooter");
750 }
751
752 #[test]
753 fn test_reading_order_top_to_bottom_left_to_right() {
754 let words = vec![
756 make_word("C", 10.0, 300.0, 50.0, 312.0),
757 make_word("A", 10.0, 100.0, 50.0, 112.0),
758 make_word("B", 10.0, 200.0, 50.0, 212.0),
759 ];
760 let lines = cluster_words_into_lines(&words, 3.0);
761 let split = split_lines_at_columns(lines, 10.0);
762 let mut blocks = cluster_lines_into_blocks(split, 10.0);
763 sort_blocks_reading_order(&mut blocks, 10.0);
764 let text = blocks_to_text(&blocks);
765
766 assert_eq!(text, "A\n\nB\n\nC");
767 }
768}