use crate::element_handler::Element;
use crate::node_util::{get_node_children, get_node_content, get_node_tag_name};
use crate::text_util::concat_strings;
use markup5ever_rcdom::NodeData;
use std::rc::Rc;
pub(crate) fn table_handler(element: Element) -> Option<String> {
let content = element.content.trim();
if content.is_empty() {
return None;
}
let mut captions: Vec<String> = Vec::new();
let mut headers: Vec<String> = Vec::new();
let mut rows: Vec<Vec<String>> = Vec::new();
let mut has_thead = false;
if let NodeData::Element { .. } = &element.node.data {
for child in get_node_children(element.node) {
if let NodeData::Element { name, .. } = &child.data {
let tag_name = name.local.as_ref();
match tag_name {
"caption" => {
captions.push(get_node_content(&child).trim().to_string());
}
"thead" => {
let tr = child
.children
.borrow()
.iter()
.find(|it| get_node_tag_name(it).is_some_and(|tag| tag == "tr"))
.cloned();
let row_node = match tr {
Some(tr) => tr,
None => child,
};
has_thead = true;
headers = extract_row_cells(&row_node, "th");
if headers.is_empty() {
headers = extract_row_cells(&row_node, "td");
}
}
"tbody" | "tfoot" => {
for row_node in get_node_children(&child) {
if let NodeData::Element { name, .. } = &row_node.data {
if name.local.as_ref() == "tr" {
let row_cells = extract_row_cells(&row_node, "td");
if !row_cells.is_empty() {
rows.push(row_cells);
}
}
}
}
}
"tr" => {
if !has_thead && headers.is_empty() {
headers = extract_row_cells(&child, "th");
if headers.is_empty() {
let cells = extract_row_cells(&child, "td");
if !cells.is_empty() {
headers = cells;
}
}
has_thead = !headers.is_empty();
} else {
let row_cells = extract_row_cells(&child, "td");
if !row_cells.is_empty() {
rows.push(row_cells);
}
}
}
_ => {}
}
}
}
}
if rows.is_empty() && headers.is_empty() {
return Some(concat_strings!("\n\n", content, "\n\n"));
}
let num_columns = if headers.is_empty() {
rows.iter().map(|row| row.len()).max().unwrap_or(0)
} else {
headers.len()
};
if num_columns == 0 {
return Some(concat_strings!("\n\n", content, "\n\n"));
}
let mut table_md = String::from("\n\n");
for caption in captions {
table_md.push_str(&format!("{}\n", caption));
}
let col_widths = compute_column_widths(&headers, &rows, num_columns);
if !headers.is_empty() {
table_md.push_str(&format_row_padded(&headers, num_columns, &col_widths));
table_md.push_str(&format_separator_padded(num_columns, &col_widths));
}
for row in rows {
table_md.push_str(&format_row_padded(&row, num_columns, &col_widths));
}
table_md.push('\n');
Some(table_md)
}
fn extract_row_cells(row_node: &Rc<markup5ever_rcdom::Node>, cell_tag: &str) -> Vec<String> {
let mut cells = Vec::new();
for cell_node in get_node_children(row_node) {
if let NodeData::Element { name, .. } = &cell_node.data {
if name.local.as_ref() == cell_tag {
let cell_content = get_node_content(&cell_node).trim().to_string();
cells.push(cell_content);
}
}
}
cells
}
fn normalize_cell_content(content: &str) -> String {
let content = content
.replace('\n', " ")
.replace('\r', "")
.replace('|', "|");
content.trim().to_string()
}
fn format_row_padded(row: &[String], num_columns: usize, col_widths: &[usize]) -> String {
let mut line = String::from("|");
for (i, col_width) in col_widths.iter().enumerate().take(num_columns) {
let cell = row
.get(i)
.map(|s| normalize_cell_content(s))
.unwrap_or_default();
let pad = col_width.saturating_sub(cell.chars().count());
line.push_str(&concat_strings!(" ", cell, " ".repeat(pad), " |"));
}
line.push('\n');
line
}
fn format_separator_padded(num_columns: usize, col_widths: &[usize]) -> String {
let mut line = String::from("|");
for (_, col_width) in col_widths.iter().enumerate().take(num_columns) {
line.push_str(&concat_strings!(" ", "-".repeat(*col_width), " |"));
}
line.push('\n');
line
}
fn compute_column_widths(
headers: &[String],
rows: &[Vec<String>],
num_columns: usize,
) -> Vec<usize> {
let mut widths = vec![0; num_columns];
for (i, header) in headers.iter().enumerate() {
widths[i] = header.chars().count();
}
for row in rows {
for (i, cell) in row.iter().enumerate().take(num_columns) {
let len = cell.chars().count();
if len > widths[i] {
widths[i] = len;
}
}
}
widths
}