use crate::ir::{BBox, Block, Paragraph, TextLine};
const PARA_GAP: f32 = 0.6;
pub(super) fn group_paragraphs(lines: &[TextLine]) -> Vec<Block> {
if lines.is_empty() {
return Vec::new();
}
let mut blocks = Vec::new();
let mut start = 0;
for i in 1..lines.len() {
let prev = &lines[i - 1];
let height = (prev.bbox.y1 - prev.bbox.y0).max(1.0);
let gap = lines[i].bbox.y0 - prev.bbox.y1;
if gap > PARA_GAP * height {
blocks.push(make_paragraph(&lines[start..i]));
start = i;
}
}
blocks.push(make_paragraph(&lines[start..]));
blocks
}
fn make_paragraph(lines: &[TextLine]) -> Block {
let text = lines.iter().map(|l| l.text.as_str()).collect::<Vec<_>>().join(" ");
let mut bbox: Option<BBox> = None;
for l in lines {
bbox = Some(match bbox {
None => l.bbox,
Some(b) => BBox {
x0: b.x0.min(l.bbox.x0),
y0: b.y0.min(l.bbox.y0),
x1: b.x1.max(l.bbox.x1),
y1: b.y1.max(l.bbox.y1),
},
});
}
Block::Paragraph(Paragraph { bbox: bbox.unwrap_or_default(), text, heading_level: None })
}
#[cfg(test)]
mod tests {
use super::*;
fn line(text: &str, y0: f32, y1: f32) -> TextLine {
TextLine { bbox: BBox { x0: 0.0, y0, x1: 100.0, y1 }, text: text.into(), chars: vec![] }
}
#[test]
fn tight_lines_form_one_paragraph() {
let lines = vec![line("first", 0.0, 10.0), line("second", 12.0, 22.0)];
let blocks = group_paragraphs(&lines);
assert_eq!(blocks.len(), 1);
match &blocks[0] {
Block::Paragraph(p) => assert_eq!(p.text, "first second"),
_ => panic!("expected paragraph"),
}
}
#[test]
fn wide_gap_splits_paragraphs() {
let lines = vec![line("para one", 0.0, 10.0), line("para two", 30.0, 40.0)];
let blocks = group_paragraphs(&lines);
assert_eq!(blocks.len(), 2);
}
#[test]
fn empty_lines_yield_no_blocks() {
assert!(group_paragraphs(&[]).is_empty());
}
}