use super::config::{ItemGrouping, SegmentParams, assign_items_to_regions, build_partitioned_zone};
use super::gaps::{
bbox_of, candidate_col_widths, find_x_gaps, find_y_gaps, max_gap_size, partition_by_x_center,
partition_by_x_center_refs, partition_by_y_center,
};
use super::rows::{
col_gaps_from_xs, is_row_aligned, median_band_char_count, median_item_height, try_header_band,
};
use super::table::build_table_zone;
use super::types::{Bounded, SplitDir, Zone};
const NARROW_COL_FRACTION: f32 = 0.20;
const ROW_ALIGNED_TABLE_TOLERANCE_SCALE: f32 = 0.6;
const ROW_ALIGNED_MAX_MEDIAN_CHARS: usize = 60;
const ROW_ALIGNED_ASYMMETRY_RATIO: usize = 3;
const ROW_ALIGNED_NARROW_MEDIAN_CHARS: usize = 10;
pub fn segment<T: Bounded>(items: Vec<T>, p: &SegmentParams) -> Zone<T> {
let bbox = bbox_of(&items);
if items.len() < p.min_zone_items {
return Zone::Leaf { bbox, items };
}
if !p.table_regions.is_empty() {
match assign_items_to_regions(&items, &p.table_regions) {
ItemGrouping::SingleRegion(r_idx) => {
let region = &p.table_regions[r_idx];
let drawn_gaps = col_gaps_from_xs(®ion.col_xs);
let v_gaps = if !drawn_gaps.is_empty() {
drawn_gaps
} else {
find_x_gaps(&items, p.min_v_gap)
};
tracing::debug!(
r_idx,
col_xs_count = region.col_xs.len(),
row_ys_count = region.row_ys.len(),
v_gaps_count = v_gaps.len(),
"SingleRegion force-promote"
);
if !v_gaps.is_empty() {
return build_table_zone(items, &v_gaps, bbox, p, ®ion.row_ys);
}
}
ItemGrouping::Partitioned(assignment) => {
return build_partitioned_zone(items, assignment, p, bbox);
}
ItemGrouping::None => {}
}
}
let v_gaps = find_x_gaps(&items, p.min_v_gap);
let h_gaps = find_y_gaps(&items, p.min_h_gap);
let v_max = max_gap_size(&v_gaps);
let h_max = max_gap_size(&h_gaps);
if let Some(header_indices) = try_header_band(&items, &bbox, v_max, p) {
let mut header_items = Vec::new();
let mut rest_items = Vec::new();
for (i, item) in items.into_iter().enumerate() {
if header_indices.contains(&i) {
header_items.push(item);
} else {
rest_items.push(item);
}
}
return Zone::Split {
dir: SplitDir::Horizontal,
bbox,
children: vec![segment(header_items, p), segment(rest_items, p)],
};
}
if v_max <= 0.0 && h_max <= 0.0 {
return Zone::Leaf { bbox, items };
}
if v_max >= h_max {
let parent_w = bbox.width();
let col_widths = candidate_col_widths(&bbox, &v_gaps);
let want_table = col_widths.len() >= 3
&& parent_w > 0.0
&& col_widths
.iter()
.any(|&w| w < parent_w * NARROW_COL_FRACTION);
if want_table {
return build_table_zone(items, &v_gaps, bbox, p, &[]);
}
if col_widths.len() == 2 {
let groups_probe = partition_by_x_center_refs(&items, &v_gaps);
if groups_probe.len() == 2 && groups_probe[0].len() >= 3 && groups_probe[1].len() >= 3 {
let row_tol = median_item_height(&items)
.map(|h| h * ROW_ALIGNED_TABLE_TOLERANCE_SCALE)
.unwrap_or(0.0);
if row_tol > 0.0 && is_row_aligned(&groups_probe[0], &groups_probe[1], row_tol) {
let left_median = median_band_char_count(&groups_probe[0], row_tol);
let right_median = median_band_char_count(&groups_probe[1], row_tol);
let min_m = left_median.min(right_median);
let max_m = left_median.max(right_median);
let narrow_side = min_m <= ROW_ALIGNED_NARROW_MEDIAN_CHARS;
let asymmetric =
min_m == 0 || max_m >= min_m.saturating_mul(ROW_ALIGNED_ASYMMETRY_RATIO);
if max_m <= ROW_ALIGNED_MAX_MEDIAN_CHARS && (narrow_side || asymmetric) {
return build_table_zone(items, &v_gaps, bbox, p, &[]);
}
}
}
}
let groups = partition_by_x_center(items, &v_gaps);
let children: Vec<Zone<T>> = groups
.into_iter()
.filter(|g| !g.is_empty())
.map(|g| segment(g, p))
.collect();
if children.len() < 2 {
let items: Vec<T> = children
.into_iter()
.flat_map(|c| match c {
Zone::Leaf { items, .. } => items,
_ => Vec::new(),
})
.collect();
return Zone::Leaf { bbox, items };
}
Zone::Split {
dir: SplitDir::Vertical,
bbox,
children,
}
} else {
let groups = partition_by_y_center(items, &h_gaps);
let children: Vec<Zone<T>> = groups
.into_iter()
.filter(|g| !g.is_empty())
.map(|g| segment(g, p))
.collect();
if children.len() < 2 {
let items: Vec<T> = children
.into_iter()
.flat_map(|c| match c {
Zone::Leaf { items, .. } => items,
_ => Vec::new(),
})
.collect();
return Zone::Leaf { bbox, items };
}
Zone::Split {
dir: SplitDir::Horizontal,
bbox,
children,
}
}
}
#[cfg(test)]
#[path = "segment_tests.rs"]
mod tests;