html-to-markdown-rs 3.5.1

High-performance HTML to Markdown converter using the astral-tl parser. Part of the Kreuzberg ecosystem.
Documentation
//! Handlers for HTML5 interactive elements.
//!
//! Processes interactive disclosure and dialog semantic elements:
//! - `<details>` - Expandable/collapsible disclosure widget
//! - `<summary>` - Summary or caption for a details element
//! - `<dialog>` - Dialog box overlay widget
//!
//! These elements are treated as block-level content containers
//! with special formatting for the summary element.

// Note: Context and DomContext are defined in converter.rs
// walk_node is also defined there and must be called via the parent module
use super::walk_node;

/// Handles the `<details>` element.
///
/// A details element represents a disclosure widget that can be toggled
/// to show/hide additional content. In Markdown, it's rendered as a block
/// with all content visible.
///
/// # Behavior
///
/// - **Inline mode**: Children are processed inline without block spacing
/// - **Block mode**: Content is collected and wrapped with proper blank-line spacing
/// - **Empty content**: Skipped entirely
#[cfg_attr(not(feature = "visitor"), allow(unused_variables))]
pub fn handle_details(
    _tag_name: &str,
    node_handle: &tl::NodeHandle,
    parser: &tl::Parser,
    output: &mut String,
    options: &crate::options::ConversionOptions,
    ctx: &super::Context,
    depth: usize,
    dom_ctx: &super::DomContext,
) {
    if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
        #[cfg(feature = "visitor")]
        if let Some(ref visitor_handle) = ctx.visitor {
            use crate::converter::utility::content::collect_tag_attributes;
            use crate::visitor::{NodeContext, NodeType, VisitResult};

            let attributes = collect_tag_attributes(tag);
            let node_id = node_handle.get_inner();
            let parent_tag = dom_ctx.parent_tag_name(node_id, parser);
            let index_in_parent = dom_ctx.get_sibling_index(node_id).unwrap_or(0);
            let open = tag.attributes().get("open").is_some();
            let node_ctx = NodeContext {
                node_type: NodeType::Details,
                tag_name: "details".to_string(),
                attributes,
                depth,
                index_in_parent,
                parent_tag,
                is_inline: false,
            };
            let visit_result = {
                let mut visitor = visitor_handle.lock().expect("visitor mutex poisoned");
                visitor.visit_details(&node_ctx, open)
            };
            match visit_result {
                VisitResult::Continue => {}
                VisitResult::Skip => return,
                VisitResult::Custom(custom) => {
                    if !output.is_empty() && !output.ends_with("\n\n") {
                        output.push_str("\n\n");
                    }
                    output.push_str(&custom);
                    output.push_str("\n\n");
                    return;
                }
                VisitResult::PreserveHtml => {
                    use crate::converter::utility::serialization::serialize_node;
                    output.push_str(&serialize_node(node_handle, parser));
                    return;
                }
                VisitResult::Error(err) => {
                    if ctx.visitor_error.borrow().is_none() {
                        *ctx.visitor_error.borrow_mut() = Some(err);
                    }
                    return;
                }
            }
        }

        // In inline context, just process children inline
        if ctx.convert_as_inline {
            let children = tag.children();
            {
                for child_handle in children.top().iter() {
                    super::walk_node(child_handle, parser, output, options, ctx, depth, dom_ctx);
                }
            }
            return;
        }

        // Collect content
        let mut content = String::with_capacity(256);
        let children = tag.children();
        {
            for child_handle in children.top().iter() {
                walk_node(child_handle, parser, &mut content, options, ctx, depth, dom_ctx);
            }
        }

        let trimmed = content.trim();
        if !trimmed.is_empty() {
            // Add spacing before if needed
            if !output.is_empty() && !output.ends_with("\n\n") {
                output.push_str("\n\n");
            }

            // Output content
            output.push_str(trimmed);
            output.push_str("\n\n");
        }
    }
}

/// Handles the `<summary>` element.
///
/// A summary element contains a caption for a details element.
/// It is rendered as strong (bold) text to distinguish it from regular content.
///
/// # Behavior
///
/// - **Inline mode**: Content is rendered inline without emphasis
/// - **Block mode**: Content is wrapped in strong markers (e.g., `**text**`)
/// - Uses the configured strong/emphasis symbol from ConversionOptions
///
/// # Implementation Details
///
/// The handler:
/// 1. Creates a context with `in_strong: true` for nested formatting
/// 2. Collects content from all children
/// 3. Wraps non-empty content in strong markers (repeated twice per Markdown spec)
#[cfg_attr(not(feature = "visitor"), allow(unused_variables))]
pub fn handle_summary(
    _tag_name: &str,
    node_handle: &tl::NodeHandle,
    parser: &tl::Parser,
    output: &mut String,
    options: &crate::options::ConversionOptions,
    ctx: &super::Context,
    depth: usize,
    dom_ctx: &super::DomContext,
) {
    if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
        let mut content = String::with_capacity(64);

        // Set strong context for nested content
        let mut summary_ctx = ctx.clone();
        if !ctx.convert_as_inline {
            summary_ctx.in_strong = true;
        }

        let children = tag.children();
        {
            for child_handle in children.top().iter() {
                super::walk_node(
                    child_handle,
                    parser,
                    &mut content,
                    options,
                    &summary_ctx,
                    depth + 1,
                    dom_ctx,
                );
            }
        }

        let trimmed = content.trim().to_owned();
        if trimmed.is_empty() {
            return;
        }

        #[cfg(feature = "visitor")]
        if let Some(ref visitor_handle) = ctx.visitor {
            use crate::converter::utility::content::collect_tag_attributes;
            use crate::visitor::{NodeContext, NodeType, VisitResult};

            let attributes = collect_tag_attributes(tag);
            let node_id = node_handle.get_inner();
            let parent_tag = dom_ctx.parent_tag_name(node_id, parser);
            let index_in_parent = dom_ctx.get_sibling_index(node_id).unwrap_or(0);
            let node_ctx = NodeContext {
                node_type: NodeType::Summary,
                tag_name: "summary".to_string(),
                attributes,
                depth,
                index_in_parent,
                parent_tag,
                is_inline: false,
            };
            let visit_result = {
                let mut visitor = visitor_handle.lock().expect("visitor mutex poisoned");
                visitor.visit_summary(&node_ctx, &trimmed)
            };
            match visit_result {
                VisitResult::Continue => {}
                VisitResult::Skip => return,
                VisitResult::Custom(custom) => {
                    if ctx.convert_as_inline {
                        output.push_str(&custom);
                    } else {
                        output.push_str(&custom);
                        if !custom.ends_with('\n') {
                            output.push_str("\n\n");
                        }
                    }
                    return;
                }
                VisitResult::PreserveHtml => {
                    use crate::converter::utility::serialization::serialize_node;
                    output.push_str(&serialize_node(node_handle, parser));
                    return;
                }
                VisitResult::Error(err) => {
                    if ctx.visitor_error.borrow().is_none() {
                        *ctx.visitor_error.borrow_mut() = Some(err);
                    }
                    return;
                }
            }
        }

        if ctx.convert_as_inline {
            // Inline mode: output without formatting
            output.push_str(&trimmed);
        } else {
            // Block mode: output with strong markers
            let mut symbol = String::with_capacity(2);
            symbol.push(options.strong_em_symbol);
            symbol.push(options.strong_em_symbol);
            output.push_str(&symbol);
            output.push_str(&trimmed);
            output.push_str(&symbol);
            output.push_str("\n\n");
        }
    }
}

/// Handles the `<dialog>` element.
///
/// A dialog element represents a modal dialog box. In Markdown, it's rendered
/// as a block container with content visible.
///
/// # Behavior
///
/// - **Inline mode**: Children are processed inline without block spacing
/// - **Block mode**: Content is processed and wrapped with proper blank lines
/// - Trailing whitespace is removed from collected content
///
/// # Implementation Details
///
/// The handler:
/// 1. Marks the position in output before processing children
/// 2. Processes all children in the normal context
/// 3. Removes trailing spaces and tabs from the output
/// 4. Ensures proper blank-line spacing after the dialog
pub fn handle_dialog(
    _tag_name: &str,
    node_handle: &tl::NodeHandle,
    parser: &tl::Parser,
    output: &mut String,
    options: &crate::options::ConversionOptions,
    ctx: &super::Context,
    depth: usize,
    dom_ctx: &super::DomContext,
) {
    if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
        // In inline context, just process children inline
        if ctx.convert_as_inline {
            let children = tag.children();
            {
                for child_handle in children.top().iter() {
                    super::walk_node(child_handle, parser, output, options, ctx, depth, dom_ctx);
                }
            }
            return;
        }

        // Mark position before processing children
        let content_start = output.len();

        let children = tag.children();
        {
            for child_handle in children.top().iter() {
                super::walk_node(child_handle, parser, output, options, ctx, depth, dom_ctx);
            }
        }

        // Remove trailing whitespace from dialog content
        while output.len() > content_start && (output.ends_with(' ') || output.ends_with('\t')) {
            output.pop();
        }

        // Ensure proper spacing after dialog
        if output.len() > content_start && !output.ends_with("\n\n") {
            output.push_str("\n\n");
        }
    }
}

/// Dispatcher for interactive elements.
///
/// Routes `<details>`, `<summary>`, and `<dialog>` elements to their respective handlers.
pub fn handle(
    tag_name: &str,
    node_handle: &tl::NodeHandle,
    parser: &tl::Parser,
    output: &mut String,
    options: &crate::options::ConversionOptions,
    ctx: &super::Context,
    depth: usize,
    dom_ctx: &super::DomContext,
) {
    match tag_name {
        "details" => handle_details(tag_name, node_handle, parser, output, options, ctx, depth, dom_ctx),
        "summary" => handle_summary(tag_name, node_handle, parser, output, options, ctx, depth, dom_ctx),
        "dialog" => handle_dialog(tag_name, node_handle, parser, output, options, ctx, depth, dom_ctx),
        _ => {}
    }
}