use crate::processors::BoundingBox;
pub fn sort_quad_boxes(boxes: &[BoundingBox]) -> Vec<BoundingBox> {
if boxes.is_empty() {
return Vec::new();
}
let mut sorted: Vec<BoundingBox> = boxes.to_vec();
sorted.sort_by(|a, b| {
let a_y = a.y_min();
let a_x = a.x_min();
let b_y = b.y_min();
let b_x = b.x_min();
match a_y.partial_cmp(&b_y) {
Some(std::cmp::Ordering::Equal) => {
a_x.partial_cmp(&b_x).unwrap_or(std::cmp::Ordering::Equal)
}
other => other.unwrap_or(std::cmp::Ordering::Equal),
}
});
let num_boxes = sorted.len();
for i in 0..num_boxes.saturating_sub(1) {
for j in (0..=i).rev() {
if j + 1 >= sorted.len() {
break;
}
let curr_y = sorted[j].y_min();
let next_y = sorted[j + 1].y_min();
let curr_x = sorted[j].x_min();
let next_x = sorted[j + 1].x_min();
if (next_y - curr_y).abs() < 10.0 && next_x < curr_x {
sorted.swap(j, j + 1);
} else {
break;
}
}
}
sorted
}
pub fn sort_poly_boxes(boxes: &[BoundingBox]) -> Vec<BoundingBox> {
if boxes.is_empty() {
return Vec::new();
}
let mut indexed_boxes: Vec<(usize, f32, &BoundingBox)> = boxes
.iter()
.enumerate()
.map(|(idx, bbox)| (idx, bbox.y_min(), bbox))
.collect();
indexed_boxes.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
indexed_boxes
.into_iter()
.map(|(_, _, bbox)| bbox.clone())
.collect()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SortDirection {
Horizontal,
Vertical,
}
pub fn sort_by_xycut(boxes: &[BoundingBox], direction: SortDirection, min_gap: i32) -> Vec<usize> {
if boxes.is_empty() {
return Vec::new();
}
let int_boxes: Vec<[i32; 4]> = boxes
.iter()
.map(|b| {
[
b.x_min() as i32,
b.y_min() as i32,
b.x_max() as i32,
b.y_max() as i32,
]
})
.collect();
let indices: Vec<usize> = (0..boxes.len()).collect();
let mut result = Vec::new();
match direction {
SortDirection::Vertical => {
recursive_yx_cut(&int_boxes, &indices, &mut result, min_gap);
}
SortDirection::Horizontal => {
recursive_xy_cut(&int_boxes, &indices, &mut result, min_gap);
}
}
result
}
pub fn sort_boxes_xycut(boxes: &[BoundingBox], direction: SortDirection) -> Vec<BoundingBox> {
let indices = sort_by_xycut(boxes, direction, 1);
indices.into_iter().map(|i| boxes[i].clone()).collect()
}
pub(crate) fn projection_by_bboxes(boxes: &[[i32; 4]], axis: usize) -> Vec<i32> {
assert!(axis <= 1, "axis must be 0 or 1");
if boxes.is_empty() {
return Vec::new();
}
let max_length = boxes
.iter()
.map(|b| b[axis + 2].unsigned_abs() as usize)
.max()
.unwrap_or(0);
if max_length == 0 {
return Vec::new();
}
let mut projection = vec![0i32; max_length + 1];
for b in boxes {
let start = b[axis].unsigned_abs() as usize;
let end = b[axis + 2].unsigned_abs() as usize;
let (start, end) = if start <= end {
(start, end)
} else {
(end, start)
};
for i in start..end.min(projection.len()) {
projection[i] += 1;
}
}
projection
}
pub(crate) fn split_projection_profile(
arr_values: &[i32],
min_value: i32,
min_gap: i32,
) -> Option<(Vec<usize>, Vec<usize>)> {
let significant_indices: Vec<usize> = arr_values
.iter()
.enumerate()
.filter(|(_, val)| **val > min_value)
.map(|(idx, _)| idx)
.collect();
if significant_indices.is_empty() {
return None;
}
let mut segment_starts = Vec::new();
let mut segment_ends = Vec::new();
segment_starts.push(significant_indices[0]);
for i in 1..significant_indices.len() {
let gap = (significant_indices[i] - significant_indices[i - 1]) as i32;
if gap > min_gap {
segment_ends.push(significant_indices[i - 1] + 1);
segment_starts.push(significant_indices[i]);
}
}
segment_ends.push(significant_indices[significant_indices.len() - 1] + 1);
Some((segment_starts, segment_ends))
}
fn recursive_yx_cut(boxes: &[[i32; 4]], indices: &[usize], result: &mut Vec<usize>, min_gap: i32) {
if boxes.is_empty() || indices.is_empty() {
return;
}
let mut y_sorted: Vec<(usize, &[i32; 4], usize)> = boxes
.iter()
.enumerate()
.map(|(i, b)| (i, b, indices[i]))
.collect();
y_sorted.sort_by_key(|(_, b, _)| b[1]);
let y_sorted_boxes: Vec<[i32; 4]> = y_sorted.iter().map(|(_, b, _)| **b).collect();
let y_sorted_indices: Vec<usize> = y_sorted.iter().map(|(_, _, idx)| *idx).collect();
let y_projection = projection_by_bboxes(&y_sorted_boxes, 1);
let y_intervals = split_projection_profile(&y_projection, 0, 1);
let Some((y_starts, y_ends)) = y_intervals else {
return;
};
for (y_start, y_end) in y_starts.iter().zip(y_ends.iter()) {
let y_chunk: Vec<(usize, [i32; 4])> = y_sorted_boxes
.iter()
.enumerate()
.filter(|(_, b)| {
let y_min = b[1] as usize;
y_min >= *y_start && y_min < *y_end
})
.map(|(i, b)| (y_sorted_indices[i], *b))
.collect();
if y_chunk.is_empty() {
continue;
}
let mut x_sorted = y_chunk.clone();
x_sorted.sort_by_key(|(_, b)| b[0]);
let x_sorted_boxes: Vec<[i32; 4]> = x_sorted.iter().map(|(_, b)| *b).collect();
let x_sorted_indices: Vec<usize> = x_sorted.iter().map(|(idx, _)| *idx).collect();
let x_projection = projection_by_bboxes(&x_sorted_boxes, 0);
let x_intervals = split_projection_profile(&x_projection, 0, min_gap);
let Some((x_starts, x_ends)) = x_intervals else {
continue;
};
if x_starts.len() == 1 {
result.extend(x_sorted_indices);
continue;
}
for (x_start, x_end) in x_starts.iter().zip(x_ends.iter()) {
let x_chunk_boxes: Vec<[i32; 4]> = x_sorted_boxes
.iter()
.enumerate()
.filter(|(_, b)| {
let x_min = b[0].unsigned_abs() as usize;
x_min >= *x_start && x_min < *x_end
})
.map(|(_, b)| *b)
.collect();
let x_chunk_indices: Vec<usize> = x_sorted_boxes
.iter()
.enumerate()
.filter(|(_, b)| {
let x_min = b[0].unsigned_abs() as usize;
x_min >= *x_start && x_min < *x_end
})
.map(|(i, _)| x_sorted_indices[i])
.collect();
recursive_yx_cut(&x_chunk_boxes, &x_chunk_indices, result, min_gap);
}
}
}
fn recursive_xy_cut(boxes: &[[i32; 4]], indices: &[usize], result: &mut Vec<usize>, min_gap: i32) {
if boxes.is_empty() || indices.is_empty() {
return;
}
let mut x_sorted: Vec<(usize, &[i32; 4], usize)> = boxes
.iter()
.enumerate()
.map(|(i, b)| (i, b, indices[i]))
.collect();
x_sorted.sort_by_key(|(_, b, _)| b[0]);
let x_sorted_boxes: Vec<[i32; 4]> = x_sorted.iter().map(|(_, b, _)| **b).collect();
let x_sorted_indices: Vec<usize> = x_sorted.iter().map(|(_, _, idx)| *idx).collect();
let x_projection = projection_by_bboxes(&x_sorted_boxes, 0);
let x_intervals = split_projection_profile(&x_projection, 0, 1);
let Some((x_starts, x_ends)) = x_intervals else {
return;
};
for (x_start, x_end) in x_starts.iter().zip(x_ends.iter()) {
let x_chunk: Vec<(usize, [i32; 4])> = x_sorted_boxes
.iter()
.enumerate()
.filter(|(_, b)| {
let x_min = b[0].unsigned_abs() as usize;
x_min >= *x_start && x_min < *x_end
})
.map(|(i, b)| (x_sorted_indices[i], *b))
.collect();
if x_chunk.is_empty() {
continue;
}
let mut y_sorted = x_chunk.clone();
y_sorted.sort_by_key(|(_, b)| b[1]);
let y_sorted_boxes: Vec<[i32; 4]> = y_sorted.iter().map(|(_, b)| *b).collect();
let y_sorted_indices: Vec<usize> = y_sorted.iter().map(|(idx, _)| *idx).collect();
let y_projection = projection_by_bboxes(&y_sorted_boxes, 1);
let y_intervals = split_projection_profile(&y_projection, 0, min_gap);
let Some((y_starts, y_ends)) = y_intervals else {
continue;
};
if y_starts.len() == 1 {
result.extend(y_sorted_indices);
continue;
}
for (y_start, y_end) in y_starts.iter().zip(y_ends.iter()) {
let y_chunk_boxes: Vec<[i32; 4]> = y_sorted_boxes
.iter()
.enumerate()
.filter(|(_, b)| {
let y_min = b[1] as usize;
y_min >= *y_start && y_min < *y_end
})
.map(|(_, b)| *b)
.collect();
let y_chunk_indices: Vec<usize> = y_sorted_boxes
.iter()
.enumerate()
.filter(|(_, b)| {
let y_min = b[1] as usize;
y_min >= *y_start && y_min < *y_end
})
.map(|(i, _)| y_sorted_indices[i])
.collect();
recursive_xy_cut(&y_chunk_boxes, &y_chunk_indices, result, min_gap);
}
}
}
#[derive(Debug, Clone)]
pub struct SortableRegion {
pub bbox: BoundingBox,
pub original_index: usize,
}
impl SortableRegion {
pub fn new(bbox: BoundingBox, original_index: usize) -> Self {
Self {
bbox,
original_index,
}
}
pub fn area(&self) -> f32 {
let width = self.bbox.x_max() - self.bbox.x_min();
let height = self.bbox.y_max() - self.bbox.y_min();
width * height
}
pub fn center(&self) -> (f32, f32) {
(
(self.bbox.x_min() + self.bbox.x_max()) / 2.0,
(self.bbox.y_min() + self.bbox.y_max()) / 2.0,
)
}
}
pub fn calculate_iou(a: &BoundingBox, b: &BoundingBox) -> f32 {
let x1 = a.x_min().max(b.x_min());
let y1 = a.y_min().max(b.y_min());
let x2 = a.x_max().min(b.x_max());
let y2 = a.y_max().min(b.y_max());
let intersection_width = (x2 - x1).max(0.0);
let intersection_height = (y2 - y1).max(0.0);
let intersection_area = intersection_width * intersection_height;
let area_a = (a.x_max() - a.x_min()) * (a.y_max() - a.y_min());
let area_b = (b.x_max() - b.x_min()) * (b.y_max() - b.y_min());
let union_area = area_a + area_b - intersection_area;
if union_area > 0.0 {
intersection_area / union_area
} else {
0.0
}
}
pub fn calculate_overlap_ratio(a: &BoundingBox, b: &BoundingBox) -> f32 {
let x1 = a.x_min().max(b.x_min());
let y1 = a.y_min().max(b.y_min());
let x2 = a.x_max().min(b.x_max());
let y2 = a.y_max().min(b.y_max());
let intersection_width = (x2 - x1).max(0.0);
let intersection_height = (y2 - y1).max(0.0);
let intersection_area = intersection_width * intersection_height;
let area_a = (a.x_max() - a.x_min()) * (a.y_max() - a.y_min());
if area_a > 0.0 {
intersection_area / area_a
} else {
0.0
}
}
pub fn assign_elements_to_regions(
elements: &[BoundingBox],
regions: &[SortableRegion],
threshold: f32,
) -> Vec<Option<usize>> {
elements
.iter()
.map(|element| {
let mut best_region: Option<usize> = None;
let mut best_overlap = threshold;
for (region_idx, region) in regions.iter().enumerate() {
let overlap = calculate_overlap_ratio(element, ®ion.bbox);
if overlap > best_overlap {
best_overlap = overlap;
best_region = Some(region_idx);
}
}
best_region
})
.collect()
}
pub fn sort_regions(regions: &[SortableRegion]) -> Vec<usize> {
if regions.is_empty() {
return Vec::new();
}
let bboxes: Vec<BoundingBox> = regions.iter().map(|r| r.bbox.clone()).collect();
sort_by_xycut(&bboxes, SortDirection::Vertical, 1)
}
pub fn sort_elements_with_regions(
elements: &[BoundingBox],
regions: &[SortableRegion],
assignments: &[Option<usize>],
) -> Vec<usize> {
if elements.is_empty() {
return Vec::new();
}
if regions.is_empty() {
return sort_by_xycut(elements, SortDirection::Vertical, 1);
}
let sorted_region_indices = sort_regions(regions);
let mut region_elements: Vec<Vec<usize>> = vec![Vec::new(); regions.len()];
let mut unassigned_elements: Vec<usize> = Vec::new();
for (elem_idx, assignment) in assignments.iter().enumerate() {
match assignment {
Some(region_idx) => region_elements[*region_idx].push(elem_idx),
None => unassigned_elements.push(elem_idx),
}
}
let mut result: Vec<usize> = Vec::new();
for region_idx in sorted_region_indices {
let region_elem_indices = ®ion_elements[region_idx];
if region_elem_indices.is_empty() {
continue;
}
let region_boxes: Vec<BoundingBox> = region_elem_indices
.iter()
.map(|&idx| elements[idx].clone())
.collect();
let sorted_within = sort_by_xycut(®ion_boxes, SortDirection::Vertical, 1);
for sorted_idx in sorted_within {
result.push(region_elem_indices[sorted_idx]);
}
}
if !unassigned_elements.is_empty() {
let unassigned_boxes: Vec<BoundingBox> = unassigned_elements
.iter()
.map(|&idx| elements[idx].clone())
.collect();
let sorted_unassigned = sort_by_xycut(&unassigned_boxes, SortDirection::Vertical, 1);
for sorted_idx in sorted_unassigned {
result.push(unassigned_elements[sorted_idx]);
}
}
result
}
pub fn sort_with_region_hierarchy(
elements: &[BoundingBox],
region_bboxes: &[BoundingBox],
overlap_threshold: f32,
) -> Vec<usize> {
if elements.is_empty() {
return Vec::new();
}
let regions: Vec<SortableRegion> = region_bboxes
.iter()
.enumerate()
.map(|(idx, bbox)| SortableRegion::new(bbox.clone(), idx))
.collect();
let assignments = assign_elements_to_regions(elements, ®ions, overlap_threshold);
sort_elements_with_regions(elements, ®ions, &assignments)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sort_quad_boxes_vertical() {
let box1 = BoundingBox::from_coords(10.0, 50.0, 50.0, 70.0); let box2 = BoundingBox::from_coords(10.0, 10.0, 50.0, 30.0); let box3 = BoundingBox::from_coords(10.0, 30.0, 50.0, 50.0);
let boxes = vec![box1, box2, box3];
let sorted = sort_quad_boxes(&boxes);
assert_eq!(sorted[0].y_min(), 10.0); assert_eq!(sorted[1].y_min(), 30.0); assert_eq!(sorted[2].y_min(), 50.0); }
#[test]
fn test_sort_quad_boxes_same_line() {
let box1 = BoundingBox::from_coords(60.0, 10.0, 100.0, 30.0); let box2 = BoundingBox::from_coords(10.0, 12.0, 50.0, 32.0);
let boxes = vec![box1, box2];
let sorted = sort_quad_boxes(&boxes);
assert!(sorted[0].x_min() < sorted[1].x_min());
}
#[test]
fn test_sort_quad_boxes_mixed() {
let box1 = BoundingBox::from_coords(60.0, 10.0, 100.0, 30.0); let box2 = BoundingBox::from_coords(10.0, 11.0, 50.0, 31.0); let box3 = BoundingBox::from_coords(10.0, 50.0, 50.0, 70.0); let box4 = BoundingBox::from_coords(60.0, 52.0, 100.0, 72.0);
let boxes = vec![box1.clone(), box2.clone(), box3.clone(), box4.clone()];
let sorted = sort_quad_boxes(&boxes);
assert!(sorted[0].x_min() < sorted[1].x_min()); assert!(sorted[0].y_min() < sorted[2].y_min()); assert!(sorted[2].x_min() < sorted[3].x_min()); }
#[test]
fn test_sort_poly_boxes() {
let box1 = BoundingBox::from_coords(10.0, 50.0, 50.0, 70.0); let box2 = BoundingBox::from_coords(10.0, 10.0, 50.0, 30.0); let box3 = BoundingBox::from_coords(10.0, 30.0, 50.0, 50.0);
let boxes = vec![box1, box2, box3];
let sorted = sort_poly_boxes(&boxes);
assert_eq!(sorted[0].y_min(), 10.0); assert_eq!(sorted[1].y_min(), 30.0); assert_eq!(sorted[2].y_min(), 50.0); }
#[test]
fn test_sort_empty_boxes() {
let boxes: Vec<BoundingBox> = Vec::new();
let sorted = sort_quad_boxes(&boxes);
assert!(sorted.is_empty());
let sorted = sort_poly_boxes(&boxes);
assert!(sorted.is_empty());
}
#[test]
fn test_xycut_single_column() {
let boxes = vec![
BoundingBox::from_coords(10.0, 10.0, 100.0, 30.0), BoundingBox::from_coords(10.0, 40.0, 100.0, 60.0), BoundingBox::from_coords(10.0, 70.0, 100.0, 90.0), ];
let sorted_indices = sort_by_xycut(&boxes, SortDirection::Vertical, 1);
assert_eq!(sorted_indices.len(), 3);
assert_eq!(sorted_indices[0], 0);
assert_eq!(sorted_indices[1], 1);
assert_eq!(sorted_indices[2], 2);
}
#[test]
fn test_xycut_two_columns() {
let boxes = vec![
BoundingBox::from_coords(10.0, 10.0, 45.0, 30.0), BoundingBox::from_coords(55.0, 10.0, 90.0, 30.0), BoundingBox::from_coords(10.0, 40.0, 45.0, 60.0), BoundingBox::from_coords(55.0, 40.0, 90.0, 60.0), ];
let sorted_indices = sort_by_xycut(&boxes, SortDirection::Vertical, 1);
assert_eq!(sorted_indices.len(), 4);
}
#[test]
fn test_xycut_empty_boxes() {
let boxes: Vec<BoundingBox> = Vec::new();
let sorted_indices = sort_by_xycut(&boxes, SortDirection::Vertical, 1);
assert!(sorted_indices.is_empty());
}
#[test]
fn test_sort_boxes_xycut_wrapper() {
let boxes = vec![
BoundingBox::from_coords(10.0, 50.0, 50.0, 70.0), BoundingBox::from_coords(10.0, 10.0, 50.0, 30.0), ];
let sorted = sort_boxes_xycut(&boxes, SortDirection::Vertical);
assert_eq!(sorted.len(), 2);
assert!(sorted[0].y_min() < sorted[1].y_min());
}
#[test]
fn test_projection_by_bboxes() {
let boxes = vec![[10i32, 0, 20, 10], [15, 0, 25, 10]];
let x_proj = projection_by_bboxes(&boxes, 0);
assert!(!x_proj.is_empty());
assert_eq!(x_proj[15], 2);
assert_eq!(x_proj[10], 1);
}
#[test]
fn test_split_projection_profile() {
let profile = vec![1, 1, 0, 0, 0, 1, 1];
let result = split_projection_profile(&profile, 0, 1);
assert!(result.is_some());
let Some((starts, ends)) = result else {
panic!("expected split_projection_profile to return Some");
};
assert_eq!(starts.len(), 2);
assert_eq!(ends.len(), 2);
}
}