use crate::converter::dom_context::DomContext;
use crate::converter::main_helpers::is_inline_element;
use crate::converter::utility::attributes::{attribute_matches_any, element_has_navigation_hint};
use crate::converter::utility::content::normalized_tag_name;
use crate::options::ConversionOptions;
pub fn inline_ancestor_allows_block(tag_name: &str) -> bool {
matches!(tag_name, "a" | "ins" | "del")
}
pub fn has_inline_block_misnest(dom_ctx: &DomContext, parser: &tl::Parser) -> bool {
for handle in dom_ctx.node_map.iter().flatten() {
if let Some(tl::Node::Tag(_tag)) = handle.get(parser) {
let is_block = dom_ctx
.tag_info(handle.get_inner(), parser)
.map(|info| info.is_block)
.unwrap_or(false);
if is_block {
let mut check_parent = Some(handle.get_inner());
let mut inside_preformatted = false;
while let Some(node_id) = check_parent {
if let Some(info) = dom_ctx.tag_info(node_id, parser) {
if matches!(info.name.as_str(), "pre" | "code") {
inside_preformatted = true;
break;
}
}
check_parent = dom_ctx.parent_of(node_id);
}
if inside_preformatted {
continue;
}
let mut current = dom_ctx.parent_of(handle.get_inner());
while let Some(parent_id) = current {
if let Some(parent_info) = dom_ctx.tag_info(parent_id, parser) {
if is_inline_element(&parent_info.name) && !inline_ancestor_allows_block(&parent_info.name) {
return true;
}
} else if let Some(parent_handle) = dom_ctx.node_handle(parent_id) {
if let Some(tl::Node::Tag(parent_tag)) = parent_handle.get(parser) {
let parent_name = normalized_tag_name(parent_tag.name().as_utf8_str());
if is_inline_element(&parent_name) && !inline_ancestor_allows_block(&parent_name) {
return true;
}
}
}
current = dom_ctx.parent_of(parent_id);
}
}
}
}
false
}
pub fn should_drop_for_preprocessing(tag_name: &str, tag: &tl::HTMLTag, options: &ConversionOptions) -> bool {
use crate::options::PreprocessingPreset;
if !options.preprocessing.enabled {
return false;
}
let preset = options.preprocessing.preset;
if preset == PreprocessingPreset::Minimal {
return false;
}
if options.preprocessing.remove_forms && tag_name == "form" {
return true;
}
let is_aggressive = preset == PreprocessingPreset::Aggressive;
if is_aggressive && tag_name == "noscript" {
return true;
}
if !options.preprocessing.remove_navigation {
return false;
}
let has_nav_hint = element_has_navigation_hint(tag);
if tag_name == "nav" {
return true;
}
if tag_name == "header" {
return has_nav_hint;
}
if tag_name == "footer" || tag_name == "aside" {
return is_aggressive || has_nav_hint;
}
if is_aggressive && has_nav_hint {
return true;
}
if is_aggressive {
if element_has_noise_hint(tag) {
return true;
}
}
false
}
fn element_has_noise_hint(tag: &tl::HTMLTag) -> bool {
const NOISE_KEYWORDS: &[&str] = &[
"cookie",
"consent",
"gdpr",
"banner",
"advertisement",
"ad-container",
"advert",
"social-share",
"share-buttons",
"popup",
"modal-overlay",
"newsletter-signup",
];
attribute_matches_any(tag, "class", NOISE_KEYWORDS) || attribute_matches_any(tag, "id", NOISE_KEYWORDS)
}