use crate::pdf::hierarchy::SegmentData;
use super::super::geometry::Rect;
use super::LayoutRegion;
use crate::pdf::markdown::types::{LayoutHint, LayoutHintClass};
const MIN_IOS_THRESHOLD: f32 = 0.2;
const PICTURE_SUBSTANTIVE_CHAR_THRESHOLD: usize = 50;
const BBOX_REFINEMENT_PADDING: f32 = 2.0;
const MAX_REFINEMENT_ITERATIONS: usize = 3;
pub(in crate::pdf::markdown) fn assign_segments_to_regions<'a>(
segments: &[SegmentData],
hints: &'a [LayoutHint],
min_confidence: f32,
extracted_table_bboxes: &[crate::types::BoundingBox],
hint_validations: &[super::layout_validation::RegionValidation],
) -> (Vec<LayoutRegion<'a>>, Vec<usize>) {
let confident_hints: Vec<&LayoutHint> = hints
.iter()
.filter(|h| h.confidence >= min_confidence)
.filter(|h| !matches!(h.class, LayoutHintClass::Table | LayoutHintClass::Picture))
.collect();
let suppress_bboxes = extracted_table_bboxes;
let picture_hints: Vec<(&LayoutHint, bool)> = hints
.iter()
.enumerate()
.filter(|(_, h)| h.confidence >= min_confidence && h.class == LayoutHintClass::Picture)
.map(|(idx, h)| {
let is_empty = hint_validations
.get(idx)
.is_some_and(|v| *v == super::layout_validation::RegionValidation::Empty);
(h, is_empty)
})
.collect();
if confident_hints.is_empty() && suppress_bboxes.is_empty() && picture_hints.is_empty() {
let all_indices: Vec<usize> = (0..segments.len()).collect();
return (Vec::new(), all_indices);
}
let hint_areas: Vec<f32> = confident_hints
.iter()
.map(|h| (h.right - h.left) * (h.top - h.bottom))
.collect();
let mut regions: Vec<LayoutRegion> = confident_hints
.iter()
.map(|&hint| LayoutRegion {
hint,
segment_indices: Vec::new(),
merged_bbox: None,
})
.collect();
let mut unassigned: Vec<usize> = Vec::new();
let mut suppressed_count = 0_usize;
let mut picture_seg_indices: Vec<Vec<usize>> = vec![Vec::new(); picture_hints.len()];
for (seg_idx, seg) in segments.iter().enumerate() {
if seg.text.trim().is_empty() {
continue;
}
let seg_rect = Rect::from_lbrt(seg.x, seg.y, seg.x + seg.width, seg.y + seg.height);
for (pi, (ph, _)) in picture_hints.iter().enumerate() {
let hint_rect = Rect::from_lbrt(ph.left, ph.bottom, ph.right, ph.top);
if seg_rect.intersection_over_self(&hint_rect) >= 0.5 {
picture_seg_indices[pi].push(seg_idx);
break; }
}
}
let mut picture_preserved_segments: std::collections::HashSet<usize> = std::collections::HashSet::new();
let mut picture_suppressed_segments: std::collections::HashSet<usize> = std::collections::HashSet::new();
for (pi, seg_indices) in picture_seg_indices.iter().enumerate() {
let (_, is_empty) = picture_hints[pi];
if is_empty {
for &idx in seg_indices {
picture_preserved_segments.insert(idx);
}
continue;
}
let alphanum_count: usize = seg_indices
.iter()
.map(|&idx| segments[idx].text.chars().filter(|c| c.is_alphanumeric()).count())
.sum();
if alphanum_count >= PICTURE_SUBSTANTIVE_CHAR_THRESHOLD {
tracing::trace!(
alphanum_count,
segment_count = seg_indices.len(),
"picture region contains substantive text — preserving"
);
for &idx in seg_indices {
picture_preserved_segments.insert(idx);
}
} else {
for &idx in seg_indices {
picture_suppressed_segments.insert(idx);
}
}
}
for (seg_idx, seg) in segments.iter().enumerate() {
if seg.text.trim().is_empty() {
continue; }
let seg_rect = Rect::from_lbrt(seg.x, seg.y, seg.x + seg.width, seg.y + seg.height);
let in_extracted_table = suppress_bboxes.iter().any(|bb| {
let bb_rect = Rect::from_lbrt(bb.x0 as f32, bb.y0 as f32, bb.x1 as f32, bb.y1 as f32);
seg_rect.intersection_over_self(&bb_rect) >= 0.5
});
if in_extracted_table {
suppressed_count += 1;
continue;
}
if picture_suppressed_segments.contains(&seg_idx) {
suppressed_count += 1;
continue;
}
if picture_preserved_segments.contains(&seg_idx) {
unassigned.push(seg_idx);
continue;
}
let mut best_hint_idx: Option<usize> = None;
let mut best_ios = 0.0_f32;
let mut best_area = f32::MAX;
for (hi, hint) in confident_hints.iter().enumerate() {
let hint_rect = Rect::from_lbrt(hint.left, hint.bottom, hint.right, hint.top);
let ios = seg_rect.intersection_over_self(&hint_rect);
if ios >= MIN_IOS_THRESHOLD {
if ios > best_ios || (ios == best_ios && hint_areas[hi] < best_area) {
best_ios = ios;
best_area = hint_areas[hi];
best_hint_idx = Some(hi);
}
}
}
match best_hint_idx {
Some(hi) => regions[hi].segment_indices.push(seg_idx),
None => unassigned.push(seg_idx),
}
}
tracing::trace!(
confident_hints = confident_hints.len(),
segments = segments.len(),
suppressed = suppressed_count,
assigned_to_regions = regions.iter().map(|r| r.segment_indices.len()).sum::<usize>(),
unassigned = unassigned.len(),
"segment-to-region assignment complete"
);
(regions, unassigned)
}
pub(in crate::pdf::markdown) fn assign_segments_to_regions_refined<'a>(
segments: &[SegmentData],
hints: &'a [LayoutHint],
min_confidence: f32,
extracted_table_bboxes: &[crate::types::BoundingBox],
hint_validations: &[super::layout_validation::RegionValidation],
) -> (Vec<LayoutRegion<'a>>, Vec<usize>) {
let (regions, unassigned) = assign_segments_to_regions(
segments,
hints,
min_confidence,
extracted_table_bboxes,
hint_validations,
);
if regions.is_empty() {
return (regions, unassigned);
}
let refined_hints: Vec<LayoutHint> = compute_refined_hints(®ions, segments, hints);
if refined_hints.is_empty() {
return (regions, unassigned);
}
let picture_hints: Vec<LayoutHint> = hints
.iter()
.filter(|h| h.confidence >= min_confidence && h.class == LayoutHintClass::Picture)
.cloned()
.collect();
let mut current_hints = refined_hints;
current_hints.extend(picture_hints.iter().cloned());
let mut prev_assignments: Vec<Vec<usize>> = regions.iter().map(|r| r.segment_indices.clone()).collect();
for _ in 1..MAX_REFINEMENT_ITERATIONS {
let (new_regions, _) =
assign_segments_to_regions(segments, ¤t_hints, min_confidence, extracted_table_bboxes, &[]);
let new_assignments: Vec<Vec<usize>> = new_regions.iter().map(|r| r.segment_indices.clone()).collect();
if new_assignments == prev_assignments {
break; }
prev_assignments = new_assignments;
let mut new_refined = compute_refined_hints(&new_regions, segments, ¤t_hints);
new_refined.extend(picture_hints.iter().cloned());
current_hints = new_refined;
}
let (final_regions_refined, final_unassigned) =
assign_segments_to_regions(segments, ¤t_hints, min_confidence, extracted_table_bboxes, &[]);
let mut result_regions: Vec<LayoutRegion<'a>> = Vec::new();
let confident_original: Vec<(usize, &'a LayoutHint)> = hints
.iter()
.enumerate()
.filter(|(_, h)| h.confidence >= min_confidence)
.filter(|(_, h)| !matches!(h.class, LayoutHintClass::Table | LayoutHintClass::Picture))
.collect();
for (ri, refined_region) in final_regions_refined.iter().enumerate() {
if ri < confident_original.len() {
result_regions.push(LayoutRegion {
hint: confident_original[ri].1,
segment_indices: refined_region.segment_indices.clone(),
merged_bbox: None,
});
}
}
(result_regions, final_unassigned)
}
fn compute_refined_hints(
regions: &[LayoutRegion],
segments: &[SegmentData],
_source_hints: &[LayoutHint],
) -> Vec<LayoutHint> {
let mut refined = Vec::with_capacity(regions.len());
for region in regions.iter() {
let base_hint = region.hint;
if region.segment_indices.is_empty() {
refined.push(base_hint.clone());
continue;
}
let mut tight_left = f32::MAX;
let mut tight_bottom = f32::MAX;
let mut tight_right = f32::MIN;
let mut tight_top = f32::MIN;
for &idx in ®ion.segment_indices {
let seg = &segments[idx];
tight_left = tight_left.min(seg.x);
tight_bottom = tight_bottom.min(seg.y);
tight_right = tight_right.max(seg.x + seg.width);
tight_top = tight_top.max(seg.y + seg.height);
}
let tight_area = (tight_right - tight_left) * (tight_top - tight_bottom);
if tight_area > 0.0 {
let seg_area_sum: f32 = region
.segment_indices
.iter()
.map(|&idx| segments[idx].width * segments[idx].height)
.sum();
let fill_ratio = seg_area_sum / tight_area;
if fill_ratio < 0.15 {
tracing::trace!(
class = ?base_hint.class,
fill_ratio,
"refinement skipped: sparse layout"
);
refined.push(base_hint.clone());
continue;
}
}
refined.push(LayoutHint {
class: base_hint.class,
confidence: base_hint.confidence,
left: (tight_left - BBOX_REFINEMENT_PADDING).max(base_hint.left),
bottom: (tight_bottom - BBOX_REFINEMENT_PADDING).max(base_hint.bottom),
right: (tight_right + BBOX_REFINEMENT_PADDING).min(base_hint.right),
top: (tight_top + BBOX_REFINEMENT_PADDING).min(base_hint.top),
});
}
refined
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pdf::hierarchy::SegmentData;
use crate::pdf::markdown::types::{LayoutHint, LayoutHintClass};
fn make_segment(text: &str, x: f32, y: f32, w: f32, h: f32) -> SegmentData {
SegmentData {
text: text.to_string(),
x,
y,
width: w,
height: h,
font_size: 12.0,
is_bold: false,
is_italic: false,
is_monospace: false,
baseline_y: y,
}
}
fn make_hint(class: LayoutHintClass, left: f32, bottom: f32, right: f32, top: f32) -> LayoutHint {
LayoutHint {
class,
confidence: 0.9,
left,
bottom,
right,
top,
}
}
#[test]
fn picture_suppresses_short_label_text() {
let segments = vec![
make_segment("Fig 1", 10.0, 10.0, 30.0, 10.0),
make_segment("x-axis", 10.0, 5.0, 30.0, 10.0),
];
let hints = vec![make_hint(LayoutHintClass::Picture, 0.0, 0.0, 100.0, 100.0)];
let (regions, unassigned) = assign_segments_to_regions(&segments, &hints, 0.5, &[], &[]);
assert!(regions.is_empty());
assert!(
unassigned.is_empty(),
"short label text should be suppressed, got {:?}",
unassigned
);
}
#[test]
fn picture_preserves_substantive_text() {
let long_text =
"This is a substantial amount of readable text that should not be suppressed by the layout model";
let segments = vec![
make_segment(long_text, 10.0, 50.0, 200.0, 12.0),
make_segment(
"Additional paragraph of text in the screenshot region",
10.0,
30.0,
200.0,
12.0,
),
];
let hints = vec![make_hint(LayoutHintClass::Picture, 0.0, 0.0, 300.0, 100.0)];
let (regions, unassigned) = assign_segments_to_regions(&segments, &hints, 0.5, &[], &[]);
assert!(regions.is_empty());
assert_eq!(
unassigned.len(),
2,
"substantive text should be preserved as unassigned"
);
}
#[test]
fn picture_empty_validated_never_suppresses() {
let segments = vec![make_segment("Fig 1", 10.0, 10.0, 30.0, 10.0)];
let hints = vec![make_hint(LayoutHintClass::Picture, 0.0, 0.0, 100.0, 100.0)];
let validations = vec![super::super::layout_validation::RegionValidation::Empty];
let (regions, unassigned) = assign_segments_to_regions(&segments, &hints, 0.5, &[], &validations);
assert!(regions.is_empty());
assert_eq!(unassigned.len(), 1, "empty-validated picture should not suppress text");
}
#[test]
fn picture_does_not_affect_non_overlapping_segments() {
let segments = vec![
make_segment("Outside text", 200.0, 200.0, 100.0, 12.0),
make_segment("Label inside", 10.0, 10.0, 30.0, 10.0),
];
let hints = vec![make_hint(LayoutHintClass::Picture, 0.0, 0.0, 100.0, 100.0)];
let (regions, unassigned) = assign_segments_to_regions(&segments, &hints, 0.5, &[], &[]);
assert!(regions.is_empty());
assert_eq!(unassigned.len(), 1);
assert_eq!(unassigned[0], 0, "only the outside segment should be unassigned");
}
#[test]
fn picture_threshold_boundary() {
let text_50 = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWX"; assert_eq!(text_50.chars().filter(|c| c.is_alphanumeric()).count(), 50);
let segments = vec![make_segment(text_50, 10.0, 10.0, 200.0, 12.0)];
let hints = vec![make_hint(LayoutHintClass::Picture, 0.0, 0.0, 300.0, 100.0)];
let (_, unassigned) = assign_segments_to_regions(&segments, &hints, 0.5, &[], &[]);
assert_eq!(unassigned.len(), 1, "text at threshold should be preserved");
let text_49 = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVW"; assert_eq!(text_49.chars().filter(|c| c.is_alphanumeric()).count(), 49);
let segments_below = vec![make_segment(text_49, 10.0, 10.0, 200.0, 12.0)];
let (_, unassigned_below) = assign_segments_to_regions(&segments_below, &hints, 0.5, &[], &[]);
assert!(unassigned_below.is_empty(), "text below threshold should be suppressed");
}
#[test]
fn mixed_picture_and_text_regions() {
let segments = vec![
make_segment("Body paragraph text", 10.0, 200.0, 150.0, 12.0),
make_segment("Fig 1", 10.0, 50.0, 30.0, 10.0),
];
let hints = vec![
make_hint(LayoutHintClass::Text, 0.0, 180.0, 200.0, 230.0),
make_hint(LayoutHintClass::Picture, 0.0, 0.0, 100.0, 100.0),
];
let (regions, unassigned) = assign_segments_to_regions(&segments, &hints, 0.5, &[], &[]);
assert_eq!(regions.len(), 1);
assert_eq!(regions[0].hint.class, LayoutHintClass::Text);
assert_eq!(regions[0].segment_indices, vec![0]);
assert!(unassigned.is_empty());
}
}