use std::borrow::Cow;
const MAX_TABLE_COLS: usize = 1000;
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn get_colspan(node_handle: &tl::NodeHandle, parser: &tl::Parser) -> usize {
if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
if let Some(Some(bytes)) = tag.attributes().get("colspan") {
if let Ok(colspan) = bytes.as_utf8_str().parse::<usize>() {
return clamp_table_span(colspan);
}
}
}
1
}
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn get_colspan_rowspan(node_handle: &tl::NodeHandle, parser: &tl::Parser) -> (usize, usize) {
if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
let attrs = tag.attributes();
let colspan = attrs
.get("colspan")
.flatten()
.and_then(|v| v.as_utf8_str().parse::<usize>().ok())
.map_or(1, clamp_table_span);
let rowspan = attrs
.get("rowspan")
.flatten()
.and_then(|v| v.as_utf8_str().parse::<usize>().ok())
.map_or(1, clamp_table_span);
(colspan, rowspan)
} else {
(1, 1)
}
}
fn clamp_table_span(value: usize) -> usize {
if value == 0 { 1 } else { value.min(MAX_TABLE_COLS) }
}
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn collect_table_cells(
node_handle: &tl::NodeHandle,
parser: &tl::Parser,
dom_ctx: &super::super::super::DomContext,
cells: &mut Vec<tl::NodeHandle>,
) {
cells.clear();
if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
let children = tag.children();
for child_handle in children.top().iter() {
if let Some(cell_name) = dom_ctx.tag_name_for(*child_handle, parser) {
if matches!(cell_name.as_ref(), "th" | "td" | "cell") {
cells.push(*child_handle);
}
}
}
}
}
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn convert_table_cell(
node_handle: &tl::NodeHandle,
parser: &tl::Parser,
output: &mut String,
options: &crate::options::ConversionOptions,
ctx: &super::super::super::Context,
_tag_name: &str,
dom_ctx: &super::super::super::DomContext,
) {
let mut text = String::with_capacity(128);
let cell_ctx = super::super::super::Context {
in_table_cell: true,
..ctx.clone()
};
if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
let children = tag.children();
let has_tag_child = children
.top()
.iter()
.any(|child_handle| matches!(child_handle.get(parser), Some(tl::Node::Tag(_))));
if has_tag_child {
for child_handle in children.top().iter() {
super::super::super::walk_node(child_handle, parser, &mut text, options, &cell_ctx, 0, dom_ctx);
}
} else {
let raw = dom_ctx.text_content(*node_handle, parser);
let normalized = if options.whitespace_mode == crate::options::WhitespaceMode::Normalized {
crate::text::normalize_whitespace_cow(raw.as_str())
} else {
Cow::Borrowed(raw.as_str())
};
let escaped = crate::text::escape(
normalized.as_ref(),
options.escape_misc,
options.escape_asterisks,
options.escape_underscores,
options.escape_ascii,
);
if options.escape_misc {
text = escaped.into_owned();
} else {
text = escaped.replace('|', r"\|");
}
}
}
let text = text.trim();
let text = if options.br_in_tables {
text.to_string()
} else if text.contains('\n') {
text.replace('\n', " ")
} else {
text.to_string()
};
let colspan = get_colspan(node_handle, parser);
output.push(' ');
output.push_str(&text);
for _ in 0..colspan {
output.push_str(" |");
}
}
#[cfg(test)]
mod tests {
#[test]
fn rich_formatting_preserved_in_cells() {
let html = "<table><tr><th>H</th></tr><tr><td><strong>Bold</strong> and <em>italic</em></td></tr></table>";
let result = crate::convert(html, None, None).unwrap();
let content = result.content.unwrap_or_default();
assert!(
content.contains("**Bold**") || content.contains("__Bold__"),
"bold should be preserved: {}",
content
);
assert!(
content.contains("*italic*") || content.contains("_italic_"),
"italic should be preserved: {}",
content
);
}
}