use crate::converter::utility::content::normalized_tag_name;
use std::borrow::Cow;
#[derive(Default)]
pub struct TableScan {
pub row_counts: Vec<usize>,
pub has_span: bool,
pub has_header: bool,
pub has_caption: bool,
pub nested_table_count: usize,
pub link_count: usize,
pub has_text: bool,
}
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn scan_table(
node_handle: &tl::NodeHandle,
parser: &tl::Parser,
dom_ctx: &super::super::super::DomContext,
) -> TableScan {
let mut scan = TableScan::default();
scan_table_node(node_handle, parser, dom_ctx, true, &mut scan);
scan
}
#[allow(clippy::trivially_copy_pass_by_ref)]
fn scan_table_node(
node_handle: &tl::NodeHandle,
parser: &tl::Parser,
dom_ctx: &super::super::super::DomContext,
is_root: bool,
scan: &mut TableScan,
) {
if let Some(node) = node_handle.get(parser) {
match node {
tl::Node::Raw(bytes) => {
if !scan.has_text {
let raw = bytes.as_utf8_str();
let decoded = crate::text::decode_html_entities_cow(raw.as_ref());
if !decoded.trim().is_empty() {
scan.has_text = true;
}
}
}
tl::Node::Tag(tag) => {
let tag_name: Cow<'_, str> = dom_ctx.tag_info(node_handle.get_inner(), parser).map_or_else(
|| normalized_tag_name(tag.name().as_utf8_str()).into_owned().into(),
|info| Cow::Borrowed(info.name.as_str()),
);
match tag_name.as_ref() {
"a" => scan.link_count += 1,
"caption" => scan.has_caption = true,
"th" => scan.has_header = true,
"img" | "graphic" => {
if tag.attributes().get("src").is_some() || tag.attributes().get("alt").is_some() {
scan.has_text = true;
}
}
"cell" => {
if let Some(role) = tag.attributes().get("role") {
if let Some(role_val) = role {
let role_str = role_val.as_utf8_str();
if role_str == "head" {
scan.has_header = true;
}
}
}
}
"table" if !is_root => scan.nested_table_count += 1,
"tr" | "row" => {
let mut cell_count = 0;
for child in tag.children().top().iter() {
if let Some(tl::Node::Tag(cell_tag)) = child.get(parser) {
let cell_name: Cow<'_, str> = dom_ctx
.tag_info(child.get_inner(), parser)
.map(|info| Cow::Borrowed(info.name.as_str()))
.unwrap_or_else(|| {
normalized_tag_name(cell_tag.name().as_utf8_str()).into_owned().into()
});
if matches!(cell_name.as_ref(), "td" | "th" | "cell") {
cell_count += super::cell::get_colspan(child, parser);
let attrs = cell_tag.attributes();
if attrs.get("colspan").is_some() || attrs.get("rowspan").is_some() {
scan.has_span = true;
}
}
}
scan_table_node(child, parser, dom_ctx, false, scan);
}
scan.row_counts.push(cell_count);
return;
}
_ => {}
}
for child in tag.children().top().iter() {
scan_table_node(child, parser, dom_ctx, false, scan);
}
}
_ => {}
}
}
}