fast_h2m 0.4.2

High-performance HTML to Markdown converter
Documentation
//! Performance caching utilities.
//!
//! Caching mechanisms for expensive operations during conversion, including
//! DOM context building and cache capacity management.

use crate::converter::{
    DomContext,
    dom_context::{ChildRange, TextCache},
};
use std::num::NonZeroUsize;

/// Build a DOM context with hierarchical node information.
///
/// Pre-computes parent-child relationships, sibling indices, and caches
/// tag information for efficient DOM navigation during conversion.
pub fn build_dom_context(
    dom: &crate::tl_types::Dom,
    parser: &crate::tl_types::Parser,
    input_len: usize,
) -> DomContext {
    let cache_capacity = text_cache_capacity_for_input(input_len);
    let mut ctx = DomContext {
        parent_map: Vec::new(),
        children_map: Vec::new(),
        child_handles: Vec::with_capacity(input_len / 32),
        sibling_index_map: Vec::new(),
        root_children: dom.children().to_vec(),
        node_map: Vec::new(),
        tag_info_map: Vec::new(),
        prev_inline_like_map: Vec::new(),
        next_inline_like_map: Vec::new(),
        next_tag_map: Vec::new(),
        next_whitespace_map: Vec::new(),
        text_cache: std::cell::RefCell::new(TextCache::new(cache_capacity)),
    };

    for (index, child_handle) in dom.children().iter().enumerate() {
        let id = child_handle.get_inner();
        ctx.ensure_capacity(id);
        ctx.sibling_index_map[id as usize] = Some(index);
        record_node_hierarchy(*child_handle, None, parser, &mut ctx);
    }

    ctx
}

/// Calculate appropriate cache capacity based on input size.
///
/// Returns a cache capacity between 32 and TEXT_CACHE_CAPACITY,
/// scaled proportionally to input size (1KB = 1 slot).
pub fn text_cache_capacity_for_input(input_len: usize) -> NonZeroUsize {
    const TEXT_CACHE_CAPACITY: usize = 256;
    // `clamp(32, TEXT_CACHE_CAPACITY)` guarantees `target >= 32 > 0`, so `new` always returns Some.
    let target = (input_len / 1024).clamp(32, TEXT_CACHE_CAPACITY);
    NonZeroUsize::new(target).unwrap_or(NonZeroUsize::MIN)
}

/// Recursively record node hierarchy into DOM context.
///
/// Builds the complete parent-child relationship map for efficient tree traversal.
pub fn record_node_hierarchy(
    node_handle: tl::NodeHandle,
    parent: Option<u32>,
    parser: &crate::tl_types::Parser,
    ctx: &mut DomContext,
) {
    let id = node_handle.get_inner();
    ctx.ensure_capacity(id);
    ctx.parent_map[id as usize] = parent;
    ctx.node_map[id as usize] = Some(node_handle);

    if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
        let start = ctx.child_handles.len();
        let children = tag.children();
        let children = children.top();
        for child in children.iter().copied() {
            ctx.child_handles.push(child);
        }
        let len = ctx.child_handles.len() - start;

        for index in 0..len {
            let child = ctx.child_handles[start + index];
            let child_id = child.get_inner();
            ctx.ensure_capacity(child_id);
            ctx.sibling_index_map[child_id as usize] = Some(index);
            record_node_hierarchy(child, Some(id), parser, ctx);
        }
        ctx.children_map[id as usize] = Some(ChildRange {
            start: start as u32,
            len: len as u32,
        });
    }
}