use super::super::geometry::Rect;
#[cfg(feature = "layout-detection")]
use crate::pdf::markdown::render::escape_html_entities;
#[cfg(feature = "layout-detection")]
use crate::pdf::markdown::types::{LayoutHint, LayoutHintClass};
#[cfg(feature = "layout-detection")]
use crate::types::Table;
pub(in crate::pdf::markdown) fn word_hint_iow(
w: &crate::pdf::table_reconstruct::HocrWord,
region_left: f32,
region_top: f32,
region_right: f32,
region_bottom: f32,
) -> f32 {
let word_rect = Rect::from_xywh(w.left as f32, w.top as f32, w.width as f32, w.height as f32);
let region_rect = Rect::from_ltrb(region_left, region_top, region_right, region_bottom);
if word_rect.area() <= 0.0 {
return if region_rect.contains_point(word_rect.center_x(), word_rect.center_y()) {
1.0
} else {
0.0
};
}
word_rect.intersection_over_self(®ion_rect)
}
#[cfg(feature = "layout-detection")]
pub(in crate::pdf::markdown) fn recognize_tables_for_native_page(
page_image: &image::DynamicImage,
hints: &[LayoutHint],
words: &[crate::pdf::table_reconstruct::HocrWord],
page_result: &crate::pdf::layout_runner::PageLayoutResult,
page_height: f32,
page_index: usize,
tatr_model: &mut crate::layout::models::tatr::TatrModel,
) -> Vec<Table> {
let rgb_image = page_image.to_rgb8();
let img_w = rgb_image.width();
let img_h = rgb_image.height();
let sx = img_w as f32 / page_result.page_width_pts;
let sy = img_h as f32 / page_result.page_height_pts;
let table_hints: Vec<&LayoutHint> = hints
.iter()
.filter(|h| {
if h.class != LayoutHintClass::Table || h.confidence < 0.5 {
return false;
}
true
})
.collect();
let mut tables = Vec::new();
for hint in &table_hints {
let px_left = (hint.left * sx).round().max(0.0) as u32;
let px_top = ((page_height - hint.top) * sy).round().max(0.0) as u32;
let px_right = (hint.right * sx).round().min(img_w as f32) as u32;
let px_bottom = ((page_height - hint.bottom) * sy).round().min(img_h as f32) as u32;
let crop_w = px_right.saturating_sub(px_left);
let crop_h = px_bottom.saturating_sub(px_top);
if crop_w < 10 || crop_h < 10 {
continue;
}
if (crop_w as u64) * (crop_h as u64) > 4_000_000 {
tracing::debug!(
page = page_index,
crop_w,
crop_h,
"Skipping TATR for oversized table crop"
);
continue;
}
let cropped = image::imageops::crop_imm(&rgb_image, px_left, px_top, crop_w, crop_h).to_image();
let tatr_result = match tatr_model.recognize(&cropped) {
Ok(r) => r,
Err(e) => {
tracing::warn!("TATR inference failed for table on page {}: {e}", page_index);
continue;
}
};
if tatr_result.rows.is_empty() || tatr_result.columns.is_empty() {
tracing::debug!(
page = page_index,
rows = tatr_result.rows.len(),
columns = tatr_result.columns.len(),
"TATR: no rows or columns detected"
);
continue;
}
let cell_grid = crate::layout::models::tatr::build_cell_grid(&tatr_result, None);
let num_rows = cell_grid.len();
let num_cols = if num_rows > 0 { cell_grid[0].len() } else { 0 };
tracing::debug!(
page = page_index,
detected_rows = tatr_result.rows.len(),
detected_columns = tatr_result.columns.len(),
grid_rows = num_rows,
grid_cols = num_cols,
crop = format!("{}x{}", crop_w, crop_h),
"TATR inference result"
);
if num_rows == 0 || num_cols == 0 {
continue;
}
let hint_img_top = (page_height - hint.top).max(0.0);
let hint_img_bottom = (page_height - hint.bottom).max(0.0);
let table_words: Vec<&crate::pdf::table_reconstruct::HocrWord> = words
.iter()
.filter(|w| {
if w.text.trim().is_empty() {
return false;
}
word_hint_iow(w, hint.left, hint_img_top, hint.right, hint_img_bottom) >= 0.2
})
.collect();
let markdown = build_tatr_grid_table(&cell_grid, &table_words, px_left as f32, px_top as f32, sx, sy);
tracing::debug!(
page = page_index,
table_words = table_words.len(),
markdown_len = markdown.len(),
"TATR: word matching and markdown generation"
);
if markdown.is_empty() {
tracing::debug!(page = page_index, "TATR: empty markdown output");
continue;
}
let total_cells = num_rows * num_cols;
let filled_cells = markdown
.split('|')
.filter(|s| !s.trim().is_empty() && s.trim() != "---")
.count();
if total_cells > 4 && filled_cells < total_cells / 4 {
tracing::debug!(
page = page_index,
total_cells,
filled_cells,
"TATR table rejected: too few filled cells"
);
continue;
}
let bounding_box = Some(crate::types::BoundingBox {
x0: hint.left as f64,
y0: hint.bottom as f64,
x1: hint.right as f64,
y1: hint.top as f64,
});
tables.push(Table {
cells: Vec::new(),
markdown,
page_number: page_index + 1,
bounding_box,
});
}
tables
}
#[cfg(feature = "layout-detection")]
fn build_tatr_grid_table(
cell_grid: &[Vec<crate::layout::models::tatr::CellBBox>],
words: &[&crate::pdf::table_reconstruct::HocrWord],
crop_offset_px_x: f32,
crop_offset_px_y: f32,
sx: f32,
sy: f32,
) -> String {
if cell_grid.is_empty() {
return String::new();
}
let num_rows = cell_grid.len();
let num_cols = cell_grid[0].len();
if num_cols == 0 {
return String::new();
}
let mut converted_cells: Vec<Vec<(f32, f32, f32, f32)>> = Vec::with_capacity(num_rows);
for row in cell_grid {
let mut conv_row = Vec::with_capacity(num_cols);
for cell in row {
let cell_left = (cell.x1 + crop_offset_px_x) / sx;
let cell_right = (cell.x2 + crop_offset_px_x) / sx;
let cell_top = (cell.y1 + crop_offset_px_y) / sy;
let cell_bottom = (cell.y2 + crop_offset_px_y) / sy;
conv_row.push((cell_left, cell_top, cell_right, cell_bottom));
}
converted_cells.push(conv_row);
}
let mut cell_words: Vec<Vec<Vec<(usize, f32, f32)>>> = (0..num_rows)
.map(|_| (0..num_cols).map(|_| Vec::new()).collect())
.collect();
for (wi, &word) in words.iter().enumerate() {
let mut best_iow: f32 = 0.0;
let mut best_row: usize = 0;
let mut best_col: usize = 0;
for (ri, conv_row) in converted_cells.iter().enumerate() {
for (ci, &(cl, ct, cr, cb)) in conv_row.iter().enumerate() {
let iow = word_hint_iow(word, cl, ct, cr, cb);
if iow > best_iow {
best_iow = iow;
best_row = ri;
best_col = ci;
}
}
}
if best_iow >= 0.2 {
let cx = word.left as f32 + word.width as f32 / 2.0;
let cy = word.top as f32 + word.height as f32 / 2.0;
cell_words[best_row][best_col].push((wi, cx, cy));
}
}
let mut grid: Vec<Vec<String>> = Vec::with_capacity(num_rows);
for row_cells in &cell_words {
let mut grid_row = vec![String::new(); num_cols];
for (ci, cell_word_indices) in row_cells.iter().enumerate() {
if cell_word_indices.is_empty() {
continue;
}
let mut sorted = cell_word_indices.clone();
sorted.sort_by(|a, b| a.2.total_cmp(&b.2).then_with(|| a.1.total_cmp(&b.1)));
let text: String = sorted
.iter()
.map(|(wi, _, _)| words[*wi].text.trim())
.filter(|t| !t.is_empty())
.collect::<Vec<_>>()
.join(" ");
grid_row[ci] = text;
}
grid.push(grid_row);
}
render_grid_as_markdown(&grid)
}
#[cfg(feature = "layout-detection")]
fn render_grid_as_markdown(grid: &[Vec<String>]) -> String {
if grid.is_empty() {
return String::new();
}
let max_cols = grid.iter().map(|r| r.len()).max().unwrap_or(0);
if max_cols == 0 {
return String::new();
}
let mut md = String::new();
for (row_idx, row) in grid.iter().enumerate() {
md.push('|');
for col in 0..max_cols {
let cell = row.get(col).map(|s| s.as_str()).unwrap_or("");
let pipe_escaped = cell.replace('|', "\\|");
let escaped = escape_html_entities(&pipe_escaped);
md.push(' ');
md.push_str(escaped.trim());
md.push_str(" |");
}
md.push('\n');
if row_idx == 0 {
md.push('|');
for _ in 0..max_cols {
md.push_str(" --- |");
}
md.push('\n');
}
}
if md.ends_with('\n') {
md.pop();
}
md
}
#[cfg(feature = "layout-detection")]
#[allow(clippy::too_many_arguments)]
pub(in crate::pdf::markdown) fn recognize_tables_slanet(
page_image: &image::DynamicImage,
hints: &[LayoutHint],
words: &[crate::pdf::table_reconstruct::HocrWord],
page_result: &crate::pdf::layout_runner::PageLayoutResult,
page_height: f32,
page_index: usize,
slanet_model: &mut crate::layout::models::slanet::SlanetModel,
classifier: Option<(
&mut crate::layout::models::table_classifier::TableClassifier,
&mut crate::layout::models::slanet::SlanetModel,
)>,
) -> Vec<Table> {
let rgb_image = page_image.to_rgb8();
let img_w = rgb_image.width();
let img_h = rgb_image.height();
let sx = img_w as f32 / page_result.page_width_pts;
let sy = img_h as f32 / page_result.page_height_pts;
let table_hints: Vec<&LayoutHint> = hints
.iter()
.filter(|h| h.class == LayoutHintClass::Table && h.confidence >= 0.5)
.collect();
if table_hints.is_empty() {
return Vec::new();
}
let active_model: &mut crate::layout::models::slanet::SlanetModel = if let Some((cls, alt_model)) = classifier {
let first_hint = table_hints[0];
let px_left = (first_hint.left * sx).round().max(0.0) as u32;
let px_top = ((page_height - first_hint.top) * sy).round().max(0.0) as u32;
let px_right = (first_hint.right * sx).round().min(img_w as f32) as u32;
let px_bottom = ((page_height - first_hint.bottom) * sy).round().min(img_h as f32) as u32;
let crop_w = px_right.saturating_sub(px_left).max(10);
let crop_h = px_bottom.saturating_sub(px_top).max(10);
let crop = image::imageops::crop_imm(&rgb_image, px_left, px_top, crop_w, crop_h).to_image();
match cls.classify(&crop) {
Ok(crate::layout::models::table_classifier::TableType::Wireless) => {
tracing::debug!(
page = page_index,
"TableClassifier: page classified as wireless, using wireless SLANeXT"
);
alt_model }
Ok(crate::layout::models::table_classifier::TableType::Wired) => {
tracing::debug!(
page = page_index,
"TableClassifier: page classified as wired, using wired SLANeXT"
);
slanet_model }
Err(e) => {
tracing::warn!(page = page_index, "TableClassifier failed: {e}, defaulting to wired");
slanet_model
}
}
} else {
slanet_model
};
tracing::trace!(
page = page_index,
page_image_w = img_w,
page_image_h = img_h,
table_hints = table_hints.len(),
"SLANeXT: running full-page inference"
);
let slanet_result = match active_model.recognize(&rgb_image) {
Ok(r) => r,
Err(e) => {
tracing::warn!("SLANeXT inference failed on page {}: {e}", page_index);
return Vec::new();
}
};
if slanet_result.cells.is_empty() {
tracing::debug!(
page = page_index,
tokens = slanet_result.structure_tokens.len(),
confidence = format!("{:.3}", slanet_result.confidence),
"SLANeXT: no cells detected on full page"
);
return Vec::new();
}
tracing::debug!(
page = page_index,
cells = slanet_result.cells.len(),
rows = slanet_result.num_rows,
cols = slanet_result.num_cols,
confidence = format!("{:.3}", slanet_result.confidence),
"SLANeXT: full-page inference result"
);
let mut tables = Vec::new();
for hint in &table_hints {
let hint_img_left = hint.left * sx;
let hint_img_top = (page_height - hint.top) * sy;
let hint_img_right = hint.right * sx;
let hint_img_bottom = (page_height - hint.bottom) * sy;
let mut matching_cells: Vec<&crate::layout::models::slanet::SlanetCell> = Vec::new();
for cell in &slanet_result.cells {
let cx = (cell.bbox[0] + cell.bbox[2]) / 2.0;
let cy = (cell.bbox[1] + cell.bbox[3]) / 2.0;
if cx >= hint_img_left && cx <= hint_img_right && cy >= hint_img_top && cy <= hint_img_bottom {
matching_cells.push(cell);
}
}
if matching_cells.is_empty() {
tracing::trace!(
page = page_index,
hint_left = format!("{:.0}", hint.left),
hint_top = format!("{:.0}", hint.top),
"SLANeXT: no cells overlap this table hint"
);
continue;
}
let max_row = matching_cells.iter().map(|c| c.row).max().unwrap_or(0);
let max_col = matching_cells.iter().map(|c| c.col).max().unwrap_or(0);
let num_rows = max_row + 1;
let num_cols = max_col + 1;
tracing::trace!(
page = page_index,
matching_cells = matching_cells.len(),
num_rows,
num_cols,
"SLANeXT: cells matched to table hint"
);
let hint_img_top = (page_height - hint.top).max(0.0);
let hint_img_bottom = (page_height - hint.bottom).max(0.0);
let table_words: Vec<&crate::pdf::table_reconstruct::HocrWord> = words
.iter()
.filter(|w| {
if w.text.trim().is_empty() {
return false;
}
word_hint_iow(w, hint.left, hint_img_top, hint.right, hint_img_bottom) >= 0.2
})
.collect();
let markdown = build_slanet_cells_table(&matching_cells, num_rows, num_cols, &table_words, sx, sy);
if markdown.is_empty() {
tracing::debug!(page = page_index, "SLANeXT: empty markdown output for table hint");
continue;
}
let total_cells = num_rows * num_cols;
let filled_cells = markdown
.split('|')
.filter(|s| !s.trim().is_empty() && s.trim() != "---")
.count();
if total_cells > 4 && filled_cells < total_cells / 4 {
tracing::debug!(
page = page_index,
total_cells,
filled_cells,
"SLANeXT table rejected: too few filled cells"
);
continue;
}
let bounding_box = Some(crate::types::BoundingBox {
x0: hint.left as f64,
y0: hint.bottom as f64,
x1: hint.right as f64,
y1: hint.top as f64,
});
tables.push(Table {
cells: Vec::new(),
markdown,
page_number: page_index + 1,
bounding_box,
});
}
tables
}
#[cfg(feature = "layout-detection")]
fn build_slanet_cells_table(
cells: &[&crate::layout::models::slanet::SlanetCell],
num_rows: usize,
num_cols: usize,
words: &[&crate::pdf::table_reconstruct::HocrWord],
sx: f32,
sy: f32,
) -> String {
if cells.is_empty() || num_rows == 0 || num_cols == 0 {
return String::new();
}
let min_row = cells.iter().map(|c| c.row).min().unwrap_or(0);
let min_col = cells.iter().map(|c| c.col).min().unwrap_or(0);
let grid_rows = num_rows.min(cells.iter().map(|c| c.row - min_row + 1).max().unwrap_or(1));
let grid_cols = num_cols.min(cells.iter().map(|c| c.col - min_col + 1).max().unwrap_or(1));
let mut grid: Vec<Vec<String>> = (0..grid_rows).map(|_| vec![String::new(); grid_cols]).collect();
let converted_cells: Vec<(usize, usize, f32, f32, f32, f32)> = cells
.iter()
.map(|cell| {
let cell_left = cell.bbox[0] / sx;
let cell_top = cell.bbox[1] / sy;
let cell_right = cell.bbox[2] / sx;
let cell_bottom = cell.bbox[3] / sy;
(
cell.row - min_row,
cell.col - min_col,
cell_left,
cell_top,
cell_right,
cell_bottom,
)
})
.collect();
let mut word_assignments: Vec<(usize, usize, f32, f32)> = Vec::new();
for (wi, &word) in words.iter().enumerate() {
let mut best_iow: f32 = 0.0;
let mut best_cell_idx: usize = 0;
for (ci, &(_row, _col, cl, ct, cr, cb)) in converted_cells.iter().enumerate() {
let iow = word_hint_iow(word, cl, ct, cr, cb);
if iow > best_iow {
best_iow = iow;
best_cell_idx = ci;
}
}
if best_iow >= 0.2 {
let cx = word.left as f32 + word.width as f32 / 2.0;
let cy = word.top as f32 + word.height as f32 / 2.0;
word_assignments.push((wi, best_cell_idx, cx, cy));
}
}
let mut cell_word_groups: Vec<Vec<(usize, f32, f32)>> = vec![Vec::new(); cells.len()];
for &(wi, cell_idx, cx, cy) in &word_assignments {
if cell_idx < cell_word_groups.len() {
cell_word_groups[cell_idx].push((wi, cx, cy));
}
}
let assigned_count = cell_word_groups.iter().filter(|g| !g.is_empty()).count();
tracing::trace!(
total_words = words.len(),
assigned_words = word_assignments.len(),
cells_with_words = assigned_count,
total_cells = cells.len(),
"SLANeXT: word-to-cell assignment complete"
);
for (ci, group) in cell_word_groups.iter_mut().enumerate() {
group.sort_by(|a, b| a.2.total_cmp(&b.2).then_with(|| a.1.total_cmp(&b.1)));
let text: String = group
.iter()
.map(|(wi, _, _)| words[*wi].text.trim())
.filter(|t| !t.is_empty())
.collect::<Vec<_>>()
.join(" ");
let (row, col) = (converted_cells[ci].0, converted_cells[ci].1);
if row < grid_rows && col < grid_cols {
grid[row][col] = text;
}
}
render_grid_as_markdown(&grid)
}