rulemorph 0.3.4

use std::collections::HashSet;

use comrak::nodes::{Node, NodeValue};

use crate::error::{TransformError, TransformErrorKind};

use super::super::NormalizationOptions;

pub(super) fn enforce_markdown_structural_preflight(
    input: &str,
    estimate_gfm_extensions: bool,
    options: &NormalizationOptions,
) -> Result<(), TransformError> {
    let mut estimated_nodes = 1usize;
    let mut estimated_table_cells = 0usize;
    let mut active_fence: Option<ActiveFence> = None;
    let mut active_html_block: Option<ActiveHtmlBlock> = None;
    let mut pending_table_header_cells: Option<TableHeaderCandidate> = None;
    let mut active_table: Option<TableState> = None;
    let mut in_indented_code = false;
    let mut paragraph_quote_depth: Option<usize> = None;
    let mut reference_definition_quote_depth: Option<usize> = None;
    let mut reference_title_state: Option<ReferenceTitleState> = None;
    let mut active_list: Option<ListState> = None;
    let reference_labels = collect_link_reference_labels(input);
    let lines = input.lines().collect::<Vec<_>>();
    for (line_index, line) in lines.iter().enumerate() {
        let line = *line;
        if let Some(active) = active_fence {
            if let Some(fence_line) = fence_line_content(line, active.quote_depth)
                && is_closing_fence(fence_line, active.fence)
            {
                active_fence = None;
            }
            enforce_markdown_node_count(estimated_nodes, options)?;
            enforce_markdown_table_cell_count(estimated_table_cells, options)?;
            continue;
        }

        if let Some(active) = active_html_block {
            if let Some(html_line) = html_block_content_line(line, active.quote_depth) {
                if html_block_ends_on_line(html_line, active.end) {
                    active_html_block = None;
                }
                pending_table_header_cells = None;
                active_table = None;
                enforce_markdown_node_count(estimated_nodes, options)?;
                enforce_markdown_table_cell_count(estimated_table_cells, options)?;
                continue;
            }
            active_html_block = None;
        }

        let Some(trimmed) = active_markdown_line(line) else {
            if !in_indented_code {
                estimated_nodes = estimated_nodes.saturating_add(1);
                in_indented_code = true;
            }
            pending_table_header_cells = None;
            active_table = None;
            active_list = None;
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            enforce_markdown_node_count(estimated_nodes, options)?;
            enforce_markdown_table_cell_count(estimated_table_cells, options)?;
            continue;
        };
        in_indented_code = false;

        if let Some(next_reference_title_state) = effective_link_reference_title_continuation(
            trimmed,
            &lines[line_index + 1..],
            paragraph_quote_depth,
            reference_title_state,
        ) {
            pending_table_header_cells = None;
            active_table = None;
            active_list = None;
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = next_reference_title_state;
            enforce_markdown_node_count(estimated_nodes, options)?;
            enforce_markdown_table_cell_count(estimated_table_cells, options)?;
            continue;
        }

        if let Some(active) = opening_fence_line(trimmed) {
            estimated_nodes = estimated_nodes.saturating_add(1);
            active_fence = Some(active);
            pending_table_header_cells = None;
            active_table = None;
            active_list = None;
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            enforce_markdown_node_count(estimated_nodes, options)?;
            enforce_markdown_table_cell_count(estimated_table_cells, options)?;
            continue;
        }
        if let Some(active) = opening_html_block_line(trimmed)
            && can_start_html_block(active, paragraph_quote_depth)
        {
            estimated_nodes =
                estimated_nodes.saturating_add(estimate_structural_nodes(trimmed, None));
            if let Some(html_line) = html_block_content_line(trimmed, active.quote_depth)
                && !html_block_ends_on_line(html_line, active.end)
            {
                active_html_block = Some(active);
            }
            pending_table_header_cells = None;
            active_table = None;
            active_list = None;
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            enforce_markdown_node_count(estimated_nodes, options)?;
            enforce_markdown_table_cell_count(estimated_table_cells, options)?;
            continue;
        }
        if reference_definition_quote_depth.is_none()
            && is_setext_underline_for_paragraph(trimmed, paragraph_quote_depth)
        {
            pending_table_header_cells = None;
            active_table = None;
            active_list = None;
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            enforce_markdown_node_count(estimated_nodes, options)?;
            enforce_markdown_table_cell_count(estimated_table_cells, options)?;
            continue;
        }
        let reference_definition = effective_link_reference_definition(
            trimmed,
            paragraph_quote_depth,
            reference_definition_quote_depth,
        );
        let reference_definition_quote = reference_definition
            .as_ref()
            .map(|definition| definition.quote_depth);
        reference_title_state = reference_definition.as_ref().and_then(|definition| {
            definition.title_pending.then_some(ReferenceTitleState {
                quote_depth: definition.quote_depth,
                closer: None,
            })
        });
        let line_is_reference_definition = reference_definition_quote.is_some();
        let fallback_nodes = if line_is_reference_definition {
            active_list = None;
            0
        } else if let Some(list) = list_state_for_preflight_line(trimmed, paragraph_quote_depth) {
            let nodes = if active_list == Some(list) { 3 } else { 4 };
            active_list = Some(list);
            nodes
        } else {
            active_list = None;
            estimate_structural_nodes(trimmed, paragraph_quote_depth)
        };
        estimated_nodes = estimated_nodes.saturating_add(fallback_nodes);
        estimated_nodes = estimated_nodes.saturating_add(estimate_inline_nodes(
            trimmed,
            &reference_labels,
            line_is_reference_definition,
            estimate_gfm_extensions,
        ));
        paragraph_quote_depth = reference_definition_quote
            .or_else(|| paragraph_quote_depth_for_line(trimmed, paragraph_quote_depth));
        reference_definition_quote_depth = reference_definition_quote;
        if estimate_gfm_extensions && !line_is_reference_definition {
            if let Some((quote_depth, table_line)) = table_preflight_line(trimmed) {
                let pipe_cells = pipe_table_cell_count(table_line);
                if let Some(table) = active_table {
                    match pipe_cells {
                        Some(cells) if quote_depth == table.quote_depth => {
                            estimated_nodes = estimated_nodes.saturating_sub(fallback_nodes);
                            estimated_nodes = estimated_nodes
                                .saturating_add(estimate_table_row_nodes(table.columns));
                            estimated_table_cells =
                                estimated_table_cells.saturating_add(table.columns);
                            pending_table_header_cells = None;
                        }
                        Some(cells) => {
                            active_table = None;
                            pending_table_header_cells = Some(TableHeaderCandidate {
                                cells,
                                quote_depth,
                                fallback_nodes,
                            });
                        }
                        None => {
                            active_table = None;
                            pending_table_header_cells = None;
                        }
                    }
                } else if let Some(separator_cells) = table_separator_cell_count(table_line) {
                    if let Some(header) = pending_table_header_cells.take()
                        && header.cells == separator_cells
                        && header.quote_depth == quote_depth
                    {
                        estimated_nodes = estimated_nodes
                            .saturating_sub(header.fallback_nodes)
                            .saturating_sub(fallback_nodes);
                        estimated_nodes = estimated_nodes
                            .saturating_add(header.quote_depth)
                            .saturating_add(1)
                            .saturating_add(estimate_table_row_nodes(header.cells));
                        estimated_table_cells = estimated_table_cells.saturating_add(header.cells);
                        active_table = Some(TableState {
                            columns: header.cells,
                            quote_depth,
                        });
                    } else {
                        active_table = None;
                    }
                } else if let Some(cells) = pipe_cells {
                    pending_table_header_cells = Some(TableHeaderCandidate {
                        cells,
                        quote_depth,
                        fallback_nodes,
                    });
                } else {
                    pending_table_header_cells = None;
                    active_table = None;
                }
            } else {
                pending_table_header_cells = None;
                active_table = None;
            }
        } else {
            pending_table_header_cells = None;
            active_table = None;
        }
        enforce_markdown_node_count(estimated_nodes, options)?;
        enforce_markdown_table_cell_count(estimated_table_cells, options)?;
    }
    Ok(())
}

#[derive(Clone, Copy)]
struct Fence {
    marker: u8,
    len: usize,
}

#[derive(Clone, Copy)]
struct ActiveFence {
    fence: Fence,
    quote_depth: usize,
}

#[derive(Clone, Copy)]
struct ActiveHtmlBlock {
    end: HtmlBlockEnd,
    quote_depth: usize,
    can_interrupt_paragraph: bool,
}

#[derive(Clone, Copy)]
enum HtmlBlockEnd {
    ClosingTag(&'static str),
    Contains(&'static str),
    BlankLine,
}

#[derive(Clone, Copy)]
struct TableHeaderCandidate {
    cells: usize,
    quote_depth: usize,
    fallback_nodes: usize,
}

#[derive(Clone, Copy)]
struct TableState {
    columns: usize,
    quote_depth: usize,
}

#[derive(Clone, Copy, PartialEq, Eq)]
struct ListState {
    quote_depth: usize,
    kind: ListKind,
}

#[derive(Clone, Copy, PartialEq, Eq)]
enum ListKind {
    Unordered,
    Ordered,
}

fn opening_fence(trimmed: &str) -> Option<Fence> {
    let marker = match trimmed.as_bytes().first()? {
        b'`' => b'`',
        b'~' => b'~',
        _ => return None,
    };
    let len = trimmed
        .as_bytes()
        .iter()
        .take_while(|byte| **byte == marker)
        .count();
    if len < 3 {
        return None;
    }
    if marker == b'`' && trimmed[len..].contains('`') {
        return None;
    }
    Some(Fence { marker, len })
}

fn opening_fence_line(trimmed: &str) -> Option<ActiveFence> {
    let (quote_depth, content) = strip_blockquote_markers(trimmed);
    let content = active_markdown_line(content)?;
    opening_fence(content).map(|fence| ActiveFence { fence, quote_depth })
}

fn active_markdown_line(line: &str) -> Option<&str> {
    if line.trim().is_empty() {
        return Some("");
    }
    let spaces = line
        .as_bytes()
        .iter()
        .take_while(|byte| **byte == b' ')
        .count();
    if spaces >= 4 || line.as_bytes().get(spaces).copied() == Some(b'\t') {
        None
    } else {
        Some(&line[spaces..])
    }
}

fn fence_line_content(line: &str, quote_depth: usize) -> Option<&str> {
    block_content_line(line, quote_depth)
}

fn block_content_line(line: &str, quote_depth: usize) -> Option<&str> {
    let active = active_markdown_line(line)?;
    let (line_quote_depth, content) = strip_blockquote_markers(active);
    (line_quote_depth == quote_depth)
        .then(|| active_markdown_line(content))
        .flatten()
}

fn html_block_content_line(line: &str, quote_depth: usize) -> Option<&str> {
    if quote_depth == 0 {
        return Some(line);
    }
    let active = active_markdown_line(line)?;
    let (line_quote_depth, content) = strip_blockquote_markers(active);
    (line_quote_depth == quote_depth).then_some(content)
}

fn is_closing_fence(trimmed: &str, fence: Fence) -> bool {
    let Some(candidate) = opening_fence(trimmed) else {
        return false;
    };
    candidate.marker == fence.marker
        && candidate.len >= fence.len
        && trimmed[candidate.len..].trim().is_empty()
}

pub(super) fn count_parsed_markdown_nodes(
    root: Node<'_>,
    options: &NormalizationOptions,
) -> Result<(), TransformError> {
    let mut node_count = 0usize;
    let mut table_cell_count = 0usize;
    for node in root.descendants() {
        node_count = node_count.saturating_add(1);
        enforce_markdown_node_count(node_count, options)?;
        if matches!(&node.data.borrow().value, NodeValue::TableCell) {
            table_cell_count = table_cell_count.saturating_add(1);
            enforce_markdown_table_cell_count(table_cell_count, options)?;
        }
    }
    Ok(())
}

fn is_structural_line(trimmed: &str, paragraph_quote_depth: Option<usize>) -> bool {
    is_atx_heading_line(trimmed)
        || is_thematic_break_line(trimmed)
        || trimmed.starts_with('>')
        || is_list_item_line_for_context(trimmed, paragraph_quote_depth)
        || trimmed.starts_with("```")
        || trimmed.starts_with("~~~")
        || trimmed.starts_with('<')
        || is_table_separator_line(trimmed)
}

fn is_atx_heading_line(trimmed: &str) -> bool {
    let bytes = trimmed.as_bytes();
    let marker_count = bytes.iter().take_while(|byte| **byte == b'#').count();
    (1..=6).contains(&marker_count)
        && (bytes.len() == marker_count || matches!(bytes[marker_count], b' ' | b'\t'))
}

fn estimate_structural_nodes(trimmed: &str, paragraph_quote_depth: Option<usize>) -> usize {
    if is_thematic_break_line(trimmed) {
        1
    } else if is_list_item_line_for_context(trimmed, paragraph_quote_depth) {
        // A compact list item typically expands to list + item + paragraph + text nodes.
        4
    } else if trimmed.starts_with('>') {
        estimate_blockquote_nodes(trimmed, paragraph_quote_depth)
    } else if is_atx_heading_line(trimmed) {
        estimate_atx_heading_nodes(trimmed)
    } else if is_structural_line(trimmed, paragraph_quote_depth) {
        1
    } else if !trimmed.is_empty() {
        2
    } else {
        0
    }
}

fn estimate_atx_heading_nodes(trimmed: &str) -> usize {
    let marker_count = trimmed
        .as_bytes()
        .iter()
        .take_while(|byte| **byte == b'#')
        .count();
    let content = strip_atx_closing_sequence(trimmed[marker_count..].trim_end());
    if content.trim().is_empty() { 1 } else { 2 }
}

fn strip_atx_closing_sequence(content: &str) -> &str {
    let closing_count = content
        .as_bytes()
        .iter()
        .rev()
        .take_while(|byte| **byte == b'#')
        .count();
    if closing_count == 0 || closing_count == content.len() {
        return content;
    }
    let closing_start = content.len() - closing_count;
    if content.as_bytes()[closing_start - 1].is_ascii_whitespace() {
        &content[..closing_start]
    } else {
        content
    }
}

fn estimate_blockquote_nodes(trimmed: &str, paragraph_quote_depth: Option<usize>) -> usize {
    let (quote_nodes, content) = strip_blockquote_markers(trimmed);
    if quote_nodes == 0 {
        return 0;
    }
    let content_paragraph_depth =
        paragraph_quote_depth.and_then(|depth| depth.checked_sub(quote_nodes));
    let content_nodes = active_markdown_line(content)
        .filter(|content| !content.is_empty())
        .map(|content| estimate_structural_nodes(content, content_paragraph_depth))
        .unwrap_or(0);
    quote_nodes.saturating_add(content_nodes)
}

fn strip_blockquote_markers(mut line: &str) -> (usize, &str) {
    let mut quote_nodes = 0usize;
    while let Some(rest) = line.strip_prefix('>') {
        quote_nodes = quote_nodes.saturating_add(1);
        line = strip_optional_space_or_tab(rest);
    }
    (quote_nodes, line)
}

fn strip_optional_space_or_tab(line: &str) -> &str {
    line.strip_prefix(' ')
        .or_else(|| line.strip_prefix('\t'))
        .unwrap_or(line)
}

fn is_list_item_line_for_context(trimmed: &str, paragraph_quote_depth: Option<usize>) -> bool {
    is_unordered_list_item_line(trimmed)
        || is_ordered_list_item_line_for_context(trimmed, paragraph_quote_depth)
}

fn list_state_for_preflight_line(
    trimmed: &str,
    paragraph_quote_depth: Option<usize>,
) -> Option<ListState> {
    let (quote_depth, content) = strip_blockquote_markers(trimmed);
    if quote_depth != 0 {
        return None;
    }
    if is_unordered_list_item_line(content) {
        Some(ListState {
            quote_depth,
            kind: ListKind::Unordered,
        })
    } else if is_ordered_list_item_line_for_context(trimmed, paragraph_quote_depth) {
        Some(ListState {
            quote_depth,
            kind: ListKind::Ordered,
        })
    } else {
        None
    }
}

fn is_unordered_list_item_line(trimmed: &str) -> bool {
    marker_followed_by_space_or_tab(trimmed, b'-')
        || marker_followed_by_space_or_tab(trimmed, b'*')
        || marker_followed_by_space_or_tab(trimmed, b'+')
}

fn is_nonempty_unordered_list_item_line(trimmed: &str) -> bool {
    unordered_list_marker_tail(trimmed).is_some_and(|tail| !tail.trim().is_empty())
}

fn unordered_list_marker_tail(trimmed: &str) -> Option<&str> {
    for marker in [b'-', b'*', b'+'] {
        if let Some(tail) = marker_tail(trimmed, marker)
            && matches!(tail.as_bytes().first().copied(), Some(b' ' | b'\t'))
        {
            return Some(&tail[1..]);
        }
    }
    None
}

fn marker_tail(trimmed: &str, marker: u8) -> Option<&str> {
    let bytes = trimmed.as_bytes();
    (bytes.len() >= 2 && bytes[0] == marker).then_some(&trimmed[1..])
}

fn marker_followed_by_space_or_tab(trimmed: &str, marker: u8) -> bool {
    let bytes = trimmed.as_bytes();
    bytes.len() >= 2 && bytes[0] == marker && matches!(bytes[1], b' ' | b'\t')
}

fn is_ordered_list_item_line_for_context(
    trimmed: &str,
    paragraph_quote_depth: Option<usize>,
) -> bool {
    let Some(start) = ordered_list_marker_start(trimmed) else {
        return false;
    };
    let (quote_depth, _) = strip_blockquote_markers(trimmed);
    if paragraph_quote_depth.is_some_and(|depth| depth >= quote_depth) {
        start == 1
    } else {
        true
    }
}

fn ordered_list_marker_start(trimmed: &str) -> Option<u64> {
    ordered_list_marker_tail(trimmed).map(|(start, _)| start)
}

fn ordered_list_marker_tail(trimmed: &str) -> Option<(u64, &str)> {
    let (_, content) = strip_blockquote_markers(trimmed);
    let content = active_markdown_line(content)?;
    let bytes = content.as_bytes();
    let digit_count = bytes
        .iter()
        .take_while(|byte| byte.is_ascii_digit())
        .count();
    if digit_count == 0 || digit_count > 9 || digit_count + 1 >= bytes.len() {
        return None;
    }
    if matches!(bytes[digit_count], b'.' | b')') && bytes[digit_count + 1].is_ascii_whitespace() {
        let start = std::str::from_utf8(&bytes[..digit_count])
            .ok()?
            .parse()
            .ok()?;
        Some((start, &content[digit_count + 2..]))
    } else {
        None
    }
}

fn is_thematic_break_line(trimmed: &str) -> bool {
    let mut marker = None;
    let mut marker_count = 0usize;
    for byte in trimmed.bytes() {
        if byte.is_ascii_whitespace() {
            continue;
        }
        match marker {
            None if matches!(byte, b'-' | b'*' | b'_') => {
                marker = Some(byte);
                marker_count += 1;
            }
            Some(active) if byte == active => marker_count += 1,
            _ => return false,
        }
    }
    marker_count >= 3
}

fn is_setext_underline_for_paragraph(trimmed: &str, paragraph_quote_depth: Option<usize>) -> bool {
    let Some(paragraph_quote_depth) = paragraph_quote_depth else {
        return false;
    };
    let (quote_depth, content) = strip_blockquote_markers(trimmed);
    if quote_depth != paragraph_quote_depth {
        return false;
    }
    let Some(content) = active_markdown_line(content) else {
        return false;
    };
    is_setext_underline_line(content)
}

fn is_setext_underline_line(line: &str) -> bool {
    let line = line.trim();
    if line.is_empty() {
        return false;
    }
    let Some(marker) = line.as_bytes().first().copied() else {
        return false;
    };
    matches!(marker, b'=' | b'-') && line.as_bytes().iter().all(|byte| *byte == marker)
}

fn estimate_inline_nodes(
    line: &str,
    reference_labels: &HashSet<String>,
    line_is_reference_definition: bool,
    estimate_gfm_extensions: bool,
) -> usize {
    if line_is_reference_definition {
        return 0;
    }
    let gfm_nodes = if estimate_gfm_extensions {
        estimate_gfm_autolink_nodes(line).saturating_add(estimate_strikethrough_nodes(line))
    } else {
        0
    };
    estimate_explicit_link_nodes(line)
        .saturating_add(estimate_reference_link_nodes(
            line,
            reference_labels,
            line_is_reference_definition,
        ))
        .saturating_add(estimate_image_nodes(line))
        .saturating_add(line.matches("**").count() / 2)
        .saturating_add(line.matches("__").count() / 2)
        .saturating_add(estimate_single_marker_emphasis_nodes(line))
        .saturating_add(line.matches('`').count() / 2)
        .saturating_add(estimate_inline_html_nodes(line))
        .saturating_add(gfm_nodes)
}

fn estimate_explicit_link_nodes(line: &str) -> usize {
    let bytes = line.as_bytes();
    let mut index = 0usize;
    let mut estimate = 0usize;
    let mut bracket_stack = Vec::new();
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => index = (index + 2).min(bytes.len()),
            b'`' => {
                let len = backtick_run_len(bytes, index);
                if let Some(closing_index) = matching_backtick_run(bytes, index + len, len) {
                    index = closing_index + len;
                } else {
                    index += len;
                }
            }
            b'[' => {
                bracket_stack.push(is_unescaped_image_opener(bytes, index));
                index += 1;
            }
            b']' if bytes.get(index + 1).copied() == Some(b'(') => {
                if !bracket_stack.pop().unwrap_or(false) {
                    estimate = estimate.saturating_add(2);
                }
                index = explicit_link_destination_end(bytes, index + 2).unwrap_or(index + 2);
            }
            b']' => {
                bracket_stack.pop();
                index += 1;
            }
            _ => index += 1,
        }
    }
    estimate
}

fn is_unescaped_image_opener(bytes: &[u8], bracket_index: usize) -> bool {
    bracket_index > 0
        && bytes[bracket_index - 1] == b'!'
        && !is_escaped_byte(bytes, bracket_index - 1)
}

fn is_escaped_byte(bytes: &[u8], index: usize) -> bool {
    let mut slash_count = 0usize;
    let mut cursor = index;
    while cursor > 0 && bytes[cursor - 1] == b'\\' {
        slash_count += 1;
        cursor -= 1;
    }
    slash_count % 2 == 1
}

fn estimate_image_nodes(line: &str) -> usize {
    let bytes = line.as_bytes();
    let mut index = 0usize;
    let mut estimate = 0usize;
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => index = (index + 2).min(bytes.len()),
            b'`' => {
                let len = backtick_run_len(bytes, index);
                if let Some(closing_index) = matching_backtick_run(bytes, index + len, len) {
                    index = closing_index + len;
                } else {
                    index += len;
                }
            }
            b'!' if bytes.get(index + 1).copied() == Some(b'[') => {
                estimate = estimate.saturating_add(1);
                index += 2;
            }
            _ => index += 1,
        }
    }
    estimate
}

fn estimate_gfm_autolink_nodes(line: &str) -> usize {
    let bytes = line.as_bytes();
    let mut index = 0usize;
    let mut estimate = 0usize;
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => index = (index + 2).min(bytes.len()),
            b'`' => {
                let len = backtick_run_len(bytes, index);
                if let Some(closing_index) = matching_backtick_run(bytes, index + len, len) {
                    index = closing_index + len;
                } else {
                    index += len;
                }
            }
            b']' if bytes.get(index + 1).copied() == Some(b'(') => {
                index = explicit_link_destination_end(bytes, index + 2).unwrap_or(index + 1);
            }
            _ if is_gfm_url_autolink_start(bytes, index) => {
                estimate = estimate.saturating_add(2);
                index = autolink_token_end(bytes, index);
            }
            _ if is_autolink_token_boundary(bytes, index) => {
                let end = autolink_token_end(bytes, index);
                if looks_like_email_autolink(&line[index..end]) {
                    estimate = estimate.saturating_add(2);
                    index = end;
                } else {
                    index += 1;
                }
            }
            _ => index += 1,
        }
    }
    estimate
}

fn explicit_link_destination_end(bytes: &[u8], mut index: usize) -> Option<usize> {
    let mut depth = 1usize;
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => index = (index + 2).min(bytes.len()),
            b'(' => {
                depth = depth.saturating_add(1);
                index += 1;
            }
            b')' => {
                depth = depth.checked_sub(1)?;
                index += 1;
                if depth == 0 {
                    return Some(index);
                }
            }
            _ => index += 1,
        }
    }
    None
}

fn is_gfm_url_autolink_start(bytes: &[u8], index: usize) -> bool {
    is_autolink_token_boundary(bytes, index)
        && (starts_with_ascii(bytes, index, b"http://")
            || starts_with_ascii(bytes, index, b"https://")
            || starts_with_ascii(bytes, index, b"www."))
}

fn starts_with_ascii(bytes: &[u8], index: usize, prefix: &[u8]) -> bool {
    bytes
        .get(index..index.saturating_add(prefix.len()))
        .is_some_and(|value| value.eq_ignore_ascii_case(prefix))
}

fn is_autolink_token_boundary(bytes: &[u8], index: usize) -> bool {
    index == 0 || bytes[index - 1].is_ascii_whitespace() || matches!(bytes[index - 1], b'(' | b'[')
}

fn autolink_token_end(bytes: &[u8], mut index: usize) -> usize {
    while index < bytes.len() {
        if bytes[index].is_ascii_whitespace() || matches!(bytes[index], b'<' | b'>') {
            break;
        }
        index += 1;
    }
    index
}

fn looks_like_email_autolink(token: &str) -> bool {
    let token = token.trim_matches(|ch: char| matches!(ch, '.' | ',' | ';' | ':' | '!' | '?'));
    let Some(at) = token.find('@') else {
        return false;
    };
    at > 0
        && token[at + 1..].contains('.')
        && token[..at]
            .bytes()
            .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'_' | b'+' | b'-'))
        && token[at + 1..]
            .bytes()
            .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'-'))
}

fn estimate_single_marker_emphasis_nodes(line: &str) -> usize {
    estimate_delimited_inline_nodes(line, b'*', 1)
        .saturating_add(estimate_delimited_inline_nodes(line, b'_', 1))
}

fn estimate_strikethrough_nodes(line: &str) -> usize {
    estimate_delimited_inline_nodes(line, b'~', 2)
}

fn estimate_delimited_inline_nodes(line: &str, marker: u8, delimiter_len: usize) -> usize {
    let bytes = line.as_bytes();
    let mut index = 0usize;
    let mut delimiters = 0usize;
    let mut single_marker_open = false;
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => index = (index + 2).min(bytes.len()),
            b'`' => {
                let len = backtick_run_len(bytes, index);
                if let Some(closing_index) = matching_backtick_run(bytes, index + len, len) {
                    index = closing_index + len;
                } else {
                    index += len;
                }
            }
            byte if byte == marker => {
                let run_len = bytes[index..]
                    .iter()
                    .take_while(|byte| **byte == marker)
                    .count();
                if delimiter_len == 1 {
                    if run_len == 1 {
                        let (can_open, can_close) =
                            single_emphasis_marker_sides(bytes, index, marker);
                        if single_marker_open && can_close {
                            delimiters = delimiters.saturating_add(2);
                            single_marker_open = false;
                        } else if can_open {
                            single_marker_open = true;
                        }
                    }
                } else {
                    delimiters = delimiters.saturating_add(run_len / delimiter_len);
                }
                index += run_len;
            }
            _ => index += 1,
        }
    }
    delimiters / 2
}

fn single_emphasis_marker_sides(bytes: &[u8], index: usize, marker: u8) -> (bool, bool) {
    let prev_space = index > 0 && bytes[index - 1].is_ascii_whitespace();
    let next_space = bytes
        .get(index + 1)
        .is_some_and(|byte| byte.is_ascii_whitespace());
    let mut can_open = !next_space && index + 1 < bytes.len();
    let mut can_close = !prev_space && index > 0;
    if marker == b'_' {
        let prev_alnum = index > 0 && bytes[index - 1].is_ascii_alphanumeric();
        let next_alnum = bytes
            .get(index + 1)
            .is_some_and(|byte| byte.is_ascii_alphanumeric());
        if prev_alnum && next_alnum {
            can_open = false;
            can_close = false;
        }
    }
    (can_open, can_close)
}

fn estimate_reference_link_nodes(
    line: &str,
    reference_labels: &HashSet<String>,
    line_is_reference_definition: bool,
) -> usize {
    if reference_labels.is_empty() || line_is_reference_definition {
        return 0;
    }

    let bytes = line.as_bytes();
    let mut index = 0usize;
    let mut estimate = 0usize;
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => index = (index + 2).min(bytes.len()),
            b'`' => {
                let len = backtick_run_len(bytes, index);
                if let Some(closing_index) = matching_backtick_run(bytes, index + len, len) {
                    index = closing_index + len;
                } else {
                    index += len;
                }
            }
            b'[' => {
                let Some(close_index) = matching_closing_bracket(bytes, index + 1) else {
                    index += 1;
                    continue;
                };
                let link_label = reference_label_from_bytes(bytes, index + 1, close_index);
                match bytes.get(close_index + 1).copied() {
                    Some(b'(') => index = close_index + 1,
                    Some(b'[') => {
                        let label_start = close_index + 2;
                        let Some(label_close) = matching_closing_bracket(bytes, label_start) else {
                            index = label_start;
                            continue;
                        };
                        let label = reference_label_from_bytes(bytes, label_start, label_close)
                            .or_else(|| link_label.clone());
                        if label.is_some_and(|label| reference_labels.contains(&label)) {
                            estimate = estimate.saturating_add(2);
                        }
                        index = label_close + 1;
                    }
                    _ => {
                        if link_label
                            .as_ref()
                            .is_some_and(|label| reference_labels.contains(label))
                        {
                            estimate = estimate.saturating_add(2);
                        }
                        index = close_index + 1;
                    }
                }
            }
            _ => index += 1,
        }
    }
    estimate
}

fn matching_closing_bracket(bytes: &[u8], mut index: usize) -> Option<usize> {
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => index = (index + 2).min(bytes.len()),
            b']' => return Some(index),
            _ => index += 1,
        }
    }
    None
}

fn collect_link_reference_labels(input: &str) -> HashSet<String> {
    let mut active_fence: Option<ActiveFence> = None;
    let mut active_html_block: Option<ActiveHtmlBlock> = None;
    let mut paragraph_quote_depth: Option<usize> = None;
    let mut reference_definition_quote_depth: Option<usize> = None;
    let mut reference_title_state: Option<ReferenceTitleState> = None;
    let mut labels = HashSet::new();
    let lines = input.lines().collect::<Vec<_>>();
    for (line_index, line) in lines.iter().enumerate() {
        let line = *line;
        if let Some(active) = active_fence {
            if let Some(fence_line) = fence_line_content(line, active.quote_depth)
                && is_closing_fence(fence_line, active.fence)
            {
                active_fence = None;
            }
            continue;
        }

        if let Some(active) = active_html_block {
            if let Some(html_line) = html_block_content_line(line, active.quote_depth) {
                if html_block_ends_on_line(html_line, active.end) {
                    active_html_block = None;
                }
                continue;
            }
            active_html_block = None;
        }

        let Some(trimmed) = active_markdown_line(line) else {
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            continue;
        };
        if let Some(next_reference_title_state) = effective_link_reference_title_continuation(
            trimmed,
            &lines[line_index + 1..],
            paragraph_quote_depth,
            reference_title_state,
        ) {
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = next_reference_title_state;
            continue;
        }
        if let Some(active) = opening_fence_line(trimmed) {
            active_fence = Some(active);
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            continue;
        }
        if let Some(active) = opening_html_block_line(trimmed)
            && can_start_html_block(active, paragraph_quote_depth)
        {
            if let Some(html_line) = html_block_content_line(trimmed, active.quote_depth)
                && !html_block_ends_on_line(html_line, active.end)
            {
                active_html_block = Some(active);
            }
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            continue;
        }
        if reference_definition_quote_depth.is_none()
            && is_setext_underline_for_paragraph(trimmed, paragraph_quote_depth)
        {
            paragraph_quote_depth = None;
            reference_definition_quote_depth = None;
            reference_title_state = None;
            continue;
        }

        if let Some(definition) = effective_link_reference_definition(
            trimmed,
            paragraph_quote_depth,
            reference_definition_quote_depth,
        ) {
            labels.insert(definition.label);
            reference_definition_quote_depth = Some(definition.quote_depth);
            reference_title_state = definition.title_pending.then_some(ReferenceTitleState {
                quote_depth: definition.quote_depth,
                closer: None,
            });
        } else {
            reference_definition_quote_depth = None;
            reference_title_state = None;
        }
        paragraph_quote_depth = reference_definition_quote_depth
            .or_else(|| paragraph_quote_depth_for_line(trimmed, paragraph_quote_depth));
    }
    labels
}

struct LinkReferenceDefinition {
    quote_depth: usize,
    label: String,
    title_pending: bool,
}

#[derive(Clone, Copy)]
struct ReferenceTitleState {
    quote_depth: usize,
    closer: Option<char>,
}

fn effective_link_reference_definition(
    line: &str,
    paragraph_quote_depth: Option<usize>,
    reference_definition_quote_depth: Option<usize>,
) -> Option<LinkReferenceDefinition> {
    let mut definition = link_reference_definition_with_quote_depth(line)?;
    if let Some(paragraph_quote_depth) = paragraph_quote_depth
        && paragraph_quote_depth > definition.quote_depth
    {
        if reference_definition_quote_depth == Some(paragraph_quote_depth) {
            definition.quote_depth = paragraph_quote_depth;
            return Some(definition);
        }
        return None;
    }
    (paragraph_quote_depth != Some(definition.quote_depth)
        || reference_definition_quote_depth == Some(definition.quote_depth))
    .then_some(definition)
}

fn effective_link_reference_title_continuation(
    line: &str,
    remaining_lines: &[&str],
    paragraph_quote_depth: Option<usize>,
    reference_title_state: Option<ReferenceTitleState>,
) -> Option<Option<ReferenceTitleState>> {
    let state = reference_title_state?;
    let (quote_depth, content) = strip_blockquote_markers(line);
    let content = active_markdown_line(content)?;
    if reference_title_continuation_stops(content) {
        return None;
    }
    let effective_quote_depth = if state.quote_depth > 0 && quote_depth == 0 {
        state.quote_depth
    } else if paragraph_quote_depth
        .is_some_and(|depth| depth > quote_depth && state.quote_depth == depth)
    {
        state.quote_depth
    } else {
        quote_depth
    };
    if effective_quote_depth != state.quote_depth {
        return None;
    }
    match state.closer {
        Some(closer) => Some((!link_reference_title_closes(content, closer, 0)).then_some(state)),
        None => {
            let (closer, content_start) = link_reference_title_start(content)?;
            if link_reference_title_closes(content, closer, content_start) {
                Some(None)
            } else {
                reference_title_has_future_closer(remaining_lines, state.quote_depth, closer)
                    .then_some(Some(ReferenceTitleState {
                        quote_depth: state.quote_depth,
                        closer: Some(closer),
                    }))
            }
        }
    }
}

fn link_reference_definition_with_quote_depth(line: &str) -> Option<LinkReferenceDefinition> {
    let (quote_depth, content) = strip_blockquote_markers(line);
    active_markdown_line(content)
        .and_then(link_reference_definition)
        .map(|(label, title_pending)| LinkReferenceDefinition {
            quote_depth,
            label,
            title_pending,
        })
}

fn link_reference_definition(line: &str) -> Option<(String, bool)> {
    let trimmed = line.trim_start();
    let bytes = trimmed.as_bytes();
    if bytes.first().copied() != Some(b'[') {
        return None;
    }
    let close_index = matching_closing_bracket(bytes, 1)?;
    if bytes.get(close_index + 1).copied() != Some(b':') {
        return None;
    }
    let title_pending = link_reference_destination_title_pending(&trimmed[close_index + 2..])?;
    let label = reference_label_from_bytes(bytes, 1, close_index)?;
    Some((label, title_pending))
}

fn link_reference_destination_title_pending(rest: &str) -> Option<bool> {
    let rest = rest.trim_start();
    if rest.is_empty() {
        return None;
    }
    let rest = if let Some(destination) = rest.strip_prefix('<') {
        bracketed_link_reference_destination_tail(destination)?
    } else {
        unbracketed_link_reference_destination_tail(rest)?
    }
    .trim_start();

    if rest.is_empty() {
        Some(true)
    } else {
        has_link_reference_title(rest).then_some(false)
    }
}

fn link_reference_title_start(line: &str) -> Option<(char, usize)> {
    let start = line.len().saturating_sub(line.trim_start().len());
    let opener = line[start..].chars().next()?;
    let closer = match opener {
        '"' => '"',
        '\'' => '\'',
        '(' => ')',
        _ => return None,
    };
    Some((closer, start + opener.len_utf8()))
}

fn link_reference_title_closes(line: &str, closer: char, start_index: usize) -> bool {
    let mut escaped = false;
    for (index, value) in line
        .char_indices()
        .skip_while(|(index, _)| *index < start_index)
    {
        if escaped {
            escaped = false;
            continue;
        }
        if value == '\\' {
            escaped = true;
            continue;
        }
        if value == closer {
            return line[index + value.len_utf8()..].trim().is_empty();
        }
    }
    false
}

fn reference_title_has_future_closer(
    remaining_lines: &[&str],
    quote_depth: usize,
    closer: char,
) -> bool {
    for line in remaining_lines {
        let Some(content) = reference_title_content_line(line, quote_depth) else {
            return false;
        };
        if reference_title_continuation_stops(content) {
            return false;
        }
        if link_reference_title_closes(content, closer, 0) {
            return true;
        }
    }
    false
}

fn reference_title_continuation_stops(content: &str) -> bool {
    content.trim().is_empty()
        || opening_fence(content).is_some()
        || is_atx_heading_line(content)
        || is_thematic_break_line(content)
        || is_setext_underline_line(content)
        || opening_html_block_end(content).is_some_and(|(_, can_interrupt)| can_interrupt)
        || is_nonempty_unordered_list_item_line(content)
        || ordered_list_marker_tail(content)
            .is_some_and(|(start, tail)| start == 1 && !tail.trim().is_empty())
}

fn reference_title_content_line(line: &str, quote_depth: usize) -> Option<&str> {
    let active = active_markdown_line(line)?;
    let (line_quote_depth, content) = strip_blockquote_markers(active);
    if line_quote_depth == quote_depth {
        return active_markdown_line(content);
    }
    (quote_depth > 0 && line_quote_depth == 0).then_some(active)
}

fn bracketed_link_reference_destination_tail(destination: &str) -> Option<&str> {
    let mut escaped = false;
    for (index, value) in destination.char_indices() {
        if escaped {
            escaped = false;
            continue;
        }
        if value == '\\' {
            escaped = true;
            continue;
        }
        match value {
            '<' => return None,
            '>' => return Some(&destination[index + value.len_utf8()..]),
            _ => {}
        }
    }
    None
}

fn unbracketed_link_reference_destination_tail(rest: &str) -> Option<&str> {
    let mut escaped = false;
    let mut paren_depth = 0usize;
    let mut end = rest.len();
    for (index, value) in rest.char_indices() {
        if escaped {
            escaped = false;
            if value.is_whitespace() {
                end = index;
                break;
            }
            continue;
        }
        if value == '\\' {
            escaped = true;
            continue;
        }
        if value.is_whitespace() {
            end = index;
            break;
        }
        match value {
            '(' => paren_depth = paren_depth.saturating_add(1),
            ')' => paren_depth = paren_depth.checked_sub(1)?,
            '<' => return None,
            _ => {}
        }
    }
    if end == 0 || paren_depth != 0 {
        return None;
    }
    Some(&rest[end..])
}

fn has_link_reference_title(rest: &str) -> bool {
    let mut chars = rest.chars();
    let Some(opener) = chars.next() else {
        return true;
    };
    let closer = match opener {
        '"' => '"',
        '\'' => '\'',
        '(' => ')',
        _ => return false,
    };
    let mut escaped = false;
    for (index, value) in rest.char_indices().skip(1) {
        if escaped {
            escaped = false;
            continue;
        }
        if value == '\\' {
            escaped = true;
            continue;
        }
        if value == closer {
            return rest[index + value.len_utf8()..].trim().is_empty();
        }
    }
    false
}

fn reference_label_from_bytes(bytes: &[u8], start: usize, end: usize) -> Option<String> {
    std::str::from_utf8(bytes.get(start..end)?)
        .ok()
        .and_then(normalize_reference_label)
}

fn normalize_reference_label(label: &str) -> Option<String> {
    let label = label.split_whitespace().collect::<Vec<_>>().join(" ");
    (!label.is_empty()).then(|| label.to_lowercase())
}

fn estimate_inline_html_nodes(line: &str) -> usize {
    let bytes = line.as_bytes();
    let mut index = 0usize;
    let mut count = 0usize;
    while index < bytes.len() {
        if bytes[index] == b'<'
            && (bytes
                .get(index + 1)
                .is_some_and(|byte| byte.is_ascii_alphabetic())
                || (bytes.get(index + 1).copied() == Some(b'/')
                    && bytes
                        .get(index + 2)
                        .is_some_and(|byte| byte.is_ascii_alphabetic())))
        {
            count = count.saturating_add(1);
        }
        index += 1;
    }
    count
}

fn is_paragraph_content_line(trimmed: &str, paragraph_quote_depth: Option<usize>) -> bool {
    !trimmed.is_empty()
        && !trimmed.starts_with('>')
        && !is_atx_heading_line(trimmed)
        && !is_thematic_break_line(trimmed)
        && !is_list_item_line_for_context(trimmed, paragraph_quote_depth)
        && !trimmed.starts_with("```")
        && !trimmed.starts_with("~~~")
        && !is_table_separator_line(trimmed)
}

fn paragraph_quote_depth_for_line(
    trimmed: &str,
    paragraph_quote_depth: Option<usize>,
) -> Option<usize> {
    let (quote_depth, content) = strip_blockquote_markers(trimmed);
    let content = active_markdown_line(content)?;
    let content_paragraph_depth =
        paragraph_quote_depth.and_then(|depth| depth.checked_sub(quote_depth));
    is_paragraph_content_line(content, content_paragraph_depth).then_some(quote_depth)
}

fn opening_html_block_line(trimmed: &str) -> Option<ActiveHtmlBlock> {
    let (quote_depth, content) = strip_blockquote_markers(trimmed);
    let content = active_markdown_line(content)?;
    opening_html_block_end(content).map(|(end, can_interrupt_paragraph)| ActiveHtmlBlock {
        end,
        quote_depth,
        can_interrupt_paragraph,
    })
}

fn opening_html_block_end(line: &str) -> Option<(HtmlBlockEnd, bool)> {
    let trimmed = line.trim_start();
    if trimmed.starts_with("<!--") {
        return Some((HtmlBlockEnd::Contains("-->"), true));
    }
    if trimmed.starts_with("<?") {
        return Some((HtmlBlockEnd::Contains("?>"), true));
    }
    if starts_with_ignore_ascii_case(trimmed, "<![CDATA[") {
        return Some((HtmlBlockEnd::Contains("]]>"), true));
    }
    if starts_with_html_declaration(trimmed) {
        return Some((HtmlBlockEnd::Contains(">"), true));
    }
    if let Some(tag) = ["script", "pre", "style", "textarea"]
        .into_iter()
        .find(|tag| starts_with_html_open_tag(trimmed, tag))
    {
        return Some((HtmlBlockEnd::ClosingTag(tag), true));
    }
    if let Some(_) = html_block_tag(trimmed) {
        return Some((HtmlBlockEnd::BlankLine, true));
    }
    if starts_with_complete_html_tag_line(trimmed) {
        return Some((HtmlBlockEnd::BlankLine, false));
    }
    None
}

fn can_start_html_block(active: ActiveHtmlBlock, paragraph_quote_depth: Option<usize>) -> bool {
    active.can_interrupt_paragraph || paragraph_quote_depth != Some(active.quote_depth)
}

fn starts_with_html_open_tag(line: &str, tag: &str) -> bool {
    starts_with_html_tag(line, tag, false)
}

fn html_block_tag(line: &str) -> Option<&'static str> {
    HTML_BLOCK_TAGS
        .into_iter()
        .find(|tag| starts_with_html_tag(line, tag, true))
        .copied()
}

fn starts_with_html_tag(line: &str, tag: &str, allow_closing: bool) -> bool {
    let trimmed = line.trim_start();
    let bytes = trimmed.as_bytes();
    let tag_bytes = tag.as_bytes();
    if bytes.first().copied() != Some(b'<') {
        return false;
    }
    let tag_start = if allow_closing && bytes.get(1).copied() == Some(b'/') {
        2
    } else {
        1
    };
    if bytes.len() < tag_start + tag_bytes.len() {
        return false;
    }
    bytes[tag_start..tag_start + tag_bytes.len()].eq_ignore_ascii_case(tag_bytes)
        && matches!(
            bytes.get(tag_start + tag_bytes.len()).copied(),
            None | Some(b' ' | b'\t' | b'>' | b'/')
        )
}

fn starts_with_html_declaration(trimmed: &str) -> bool {
    let bytes = trimmed.as_bytes();
    bytes.len() >= 3 && bytes[0] == b'<' && bytes[1] == b'!' && bytes[2].is_ascii_alphabetic()
}

fn starts_with_complete_html_tag_line(line: &str) -> bool {
    let bytes = line.trim_start().as_bytes();
    if bytes.first().copied() != Some(b'<') {
        return false;
    }
    let mut index = 1usize;
    if bytes.get(index).copied() == Some(b'/') {
        index += 1;
    }
    let Some(first) = bytes.get(index).copied() else {
        return false;
    };
    if !first.is_ascii_alphabetic() {
        return false;
    }
    index += 1;
    while bytes
        .get(index)
        .is_some_and(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_'))
    {
        index += 1;
    }
    if !matches!(
        bytes.get(index).copied(),
        None | Some(b' ' | b'\t' | b'>' | b'/')
    ) {
        return false;
    }
    let mut quote = None;
    while index < bytes.len() {
        match (quote, bytes[index]) {
            (Some(active), byte) if byte == active => quote = None,
            (None, b'"' | b'\'') => quote = Some(bytes[index]),
            (None, b'>') => {
                return bytes[index + 1..].iter().all(u8::is_ascii_whitespace);
            }
            _ => {}
        }
        index += 1;
    }
    false
}

fn starts_with_ignore_ascii_case(value: &str, prefix: &str) -> bool {
    value.len() >= prefix.len()
        && value.as_bytes()[..prefix.len()].eq_ignore_ascii_case(prefix.as_bytes())
}

fn html_block_ends_on_line(line: &str, end: HtmlBlockEnd) -> bool {
    match end {
        HtmlBlockEnd::ClosingTag(tag) => contains_html_closing_tag(line, tag),
        HtmlBlockEnd::Contains(needle) => line.contains(needle),
        HtmlBlockEnd::BlankLine => line.trim().is_empty(),
    }
}

fn contains_html_closing_tag(line: &str, tag: &str) -> bool {
    let needle = format!("</{tag}");
    let bytes = line.as_bytes();
    let needle = needle.as_bytes();
    let mut index = 0usize;
    while index + needle.len() <= bytes.len() {
        if bytes[index..index + needle.len()].eq_ignore_ascii_case(needle)
            && html_closing_tag_boundary(&bytes[index + needle.len()..])
        {
            return true;
        }
        index += 1;
    }
    false
}

fn html_closing_tag_boundary(tail: &[u8]) -> bool {
    let mut index = 0usize;
    while tail
        .get(index)
        .is_some_and(|byte| byte.is_ascii_whitespace())
    {
        index += 1;
    }
    tail.get(index).copied() == Some(b'>')
}

const HTML_BLOCK_TAGS: &[&str] = &[
    "address",
    "article",
    "aside",
    "base",
    "basefont",
    "blockquote",
    "body",
    "caption",
    "center",
    "col",
    "colgroup",
    "dd",
    "details",
    "dialog",
    "dir",
    "div",
    "dl",
    "dt",
    "fieldset",
    "figcaption",
    "figure",
    "footer",
    "form",
    "frame",
    "frameset",
    "h1",
    "h2",
    "h3",
    "h4",
    "h5",
    "h6",
    "head",
    "header",
    "hr",
    "html",
    "iframe",
    "legend",
    "li",
    "link",
    "main",
    "menu",
    "menuitem",
    "nav",
    "noframes",
    "ol",
    "optgroup",
    "option",
    "p",
    "param",
    "search",
    "section",
    "summary",
    "table",
    "tbody",
    "td",
    "tfoot",
    "th",
    "thead",
    "title",
    "tr",
    "track",
    "ul",
];

fn pipe_table_cell_count(line: &str) -> Option<usize> {
    split_table_cells(line).map(|cells| cells.len())
}

fn table_preflight_line(trimmed: &str) -> Option<(usize, &str)> {
    let (quote_depth, content) = strip_blockquote_markers(trimmed);
    active_markdown_line(content).map(|line| (quote_depth, line))
}

fn is_table_separator_line(line: &str) -> bool {
    table_separator_cell_count(line).is_some()
}

fn table_separator_cell_count(line: &str) -> Option<usize> {
    let Some(cells) = split_table_cells(line) else {
        return None;
    };
    cells
        .iter()
        .all(|cell| is_table_separator_cell(cell))
        .then_some(cells.len())
}

fn split_table_cells(line: &str) -> Option<Vec<&str>> {
    let trimmed = line.trim();
    let positions = table_pipe_positions(trimmed);
    if positions.is_empty() {
        return None;
    }

    let mut start = 0usize;
    let mut end = trimmed.len();
    let mut first_delimiter = 0usize;
    let mut last_delimiter = positions.len();
    if positions.first().copied() == Some(0) {
        start = 1;
        first_delimiter = 1;
    }
    if positions.last().copied() == trimmed.len().checked_sub(1) {
        end -= 1;
        last_delimiter -= 1;
    }
    if start > end || trimmed[start..end].trim().is_empty() {
        return None;
    }

    let mut cells = Vec::new();
    let mut cell_start = start;
    for position in &positions[first_delimiter..last_delimiter] {
        cells.push(&trimmed[cell_start..*position]);
        cell_start = *position + 1;
    }
    cells.push(&trimmed[cell_start..end]);
    Some(cells)
}

fn table_pipe_positions(line: &str) -> Vec<usize> {
    let bytes = line.as_bytes();
    let mut positions = Vec::new();
    let mut index = 0usize;
    while index < bytes.len() {
        match bytes[index] {
            b'\\' => {
                index = (index + 2).min(bytes.len());
            }
            b'`' => {
                let len = backtick_run_len(bytes, index);
                if let Some(closing_index) = matching_backtick_run(bytes, index + len, len) {
                    index = closing_index + len;
                } else {
                    index += len;
                }
            }
            b'|' => {
                positions.push(index);
                index += 1;
            }
            _ => {
                index += 1;
            }
        }
    }
    positions
}

fn backtick_run_len(bytes: &[u8], index: usize) -> usize {
    bytes[index..]
        .iter()
        .take_while(|byte| **byte == b'`')
        .count()
}

fn matching_backtick_run(bytes: &[u8], mut index: usize, len: usize) -> Option<usize> {
    while index < bytes.len() {
        if bytes[index] == b'`' {
            let run_len = backtick_run_len(bytes, index);
            if run_len == len {
                return Some(index);
            }
            index += run_len;
        } else {
            index += 1;
        }
    }
    None
}

fn estimate_table_row_nodes(cells: usize) -> usize {
    1usize.saturating_add(cells.saturating_mul(2))
}

fn is_table_separator_cell(cell: &str) -> bool {
    let mut value = cell.trim();
    if let Some(rest) = value.strip_prefix(':') {
        value = rest;
    }
    if let Some(rest) = value.strip_suffix(':') {
        value = rest;
    }
    let mut hyphen_count = 0usize;
    for byte in value.bytes() {
        if byte == b'-' {
            hyphen_count += 1;
        } else if !byte.is_ascii_whitespace() {
            return false;
        }
    }
    hyphen_count >= 3
}

fn enforce_markdown_node_count(
    count: usize,
    options: &NormalizationOptions,
) -> Result<(), TransformError> {
    if count > options.max_markdown_nodes {
        Err(invalid("input exceeds max_markdown_nodes"))
    } else {
        Ok(())
    }
}

fn enforce_markdown_table_cell_count(
    count: usize,
    options: &NormalizationOptions,
) -> Result<(), TransformError> {
    if count > options.max_markdown_table_cells {
        Err(invalid("input exceeds max_markdown_table_cells"))
    } else {
        Ok(())
    }
}

fn invalid(message: impl Into<String>) -> TransformError {
    TransformError::new(TransformErrorKind::InvalidInput, message)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn preflight_rejects_inline_heavy_input_before_parsing() {
        let input = "[x](https://example.com)".repeat(16);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("inline-heavy input should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_skips_inline_estimate_inside_fenced_code() {
        let input = format!("```\n{}\n```", "[x](https://example.com)".repeat(16));
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("link-like code text should not count as inline nodes");
    }

    #[test]
    fn preflight_skips_inline_estimate_inside_html_block() {
        let input = format!("<pre>\n{}\n</pre>", "[x](https://example.com)\n".repeat(8));
        let options = NormalizationOptions {
            max_markdown_nodes: 2,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("link-like text inside an HTML block should not count as Markdown nodes");
    }

    #[test]
    fn preflight_skips_inline_estimate_inside_block_html_tag() {
        let input = format!("<div>\n{}</div>", "[x](https://example.com)\n".repeat(8));
        let options = NormalizationOptions {
            max_markdown_nodes: 2,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("link-like text inside a block HTML tag should not count as Markdown nodes");
    }

    #[test]
    fn preflight_skips_inline_estimate_inside_complete_html_tag_block() {
        let input = format!(
            "<custom-element>\n{}</custom-element>",
            "[x](https://example.com)\n".repeat(8)
        );
        let options = NormalizationOptions {
            max_markdown_nodes: 2,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options).expect(
            "link-like text inside a complete HTML tag block should not count as Markdown nodes",
        );
    }

    #[test]
    fn preflight_does_not_start_complete_html_tag_block_inside_paragraph() {
        let input = format!(
            "paragraph\n<custom-element>\n{}</custom-element>",
            "[x](https://example.com)\n".repeat(4)
        );
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("type 7 HTML blocks should not hide Markdown while a paragraph is active");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_does_not_start_complete_html_tag_block_inside_blockquote_paragraph() {
        let input = format!(
            "> paragraph\n> <custom-element>\n> {}</custom-element>",
            "[x](https://example.com)\n".repeat(4)
        );
        let options = NormalizationOptions {
            max_markdown_nodes: 10,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options).expect_err(
            "type 7 HTML blocks should not hide Markdown inside a blockquote paragraph",
        );

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_does_not_start_complete_html_tag_block_after_reference_definition() {
        let input = "[x]: https://example.com\n<custom-element>\n[x](https://example.com)\n</custom-element>";
        let options = NormalizationOptions {
            max_markdown_nodes: 4,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options).expect_err(
            "reference definitions should keep type 7 HTML tags inside the paragraph preflight",
        );

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_does_not_start_complete_html_tag_block_after_blockquote_reference_definition() {
        let input = "> [x]: https://example.com\n> <custom-element>\n> [x](https://example.com)\n> </custom-element>";
        let options = NormalizationOptions {
            max_markdown_nodes: 6,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options).expect_err(
            "blockquote reference definitions should keep type 7 HTML tags inside the paragraph preflight",
        );

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_skips_table_like_text_inside_blockquote_fenced_code() {
        let input = "> ```\n> | A | B |\n> | --- | --- |\n> | x | y |\n> ```";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options).expect(
            "table-like code text inside a blockquote fence should not count as table cells",
        );
    }

    #[test]
    fn preflight_does_not_skip_content_after_invalid_backtick_fence_info() {
        let input = format!(
            "``` invalid ` info\n{}",
            "[x](https://example.com)\n".repeat(4)
        );
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("invalid backtick fence info should not hide later content");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_rejects_content_after_four_space_indented_fence() {
        let input = format!("    ```\n{}", "[x](https://example.com)\n".repeat(4));
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("four-space indented fence marker should not hide later content");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_allows_markdown_markers_inside_indented_code() {
        let input = format!("{}{}", "    - x\n".repeat(8), "    # title\n".repeat(8));
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("indented code markers should not count as active Markdown structure");
    }

    #[test]
    fn preflight_allows_blank_lines_without_node_growth() {
        let input = "\n".repeat(20);
        let options = NormalizationOptions {
            max_markdown_nodes: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("blank lines should not count as parsed Markdown nodes");
    }

    #[test]
    fn preflight_counts_non_heading_hash_lines_as_paragraphs() {
        let input = "#tag\n".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("non-heading hash lines should count as paragraph/text nodes");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_atx_heading_text_nodes_before_parsing() {
        let input = "# title\n".repeat(5);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("heading text nodes should count toward the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_thematic_breaks_as_single_nodes() {
        let input = "***\n---\n_ _ _\n";
        let options = NormalizationOptions {
            max_markdown_nodes: 4,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("thematic breaks should count as one parsed block node each");
    }

    #[test]
    fn preflight_allows_setext_headings_without_underline_node_growth() {
        let input = "Title\n===\nSubtitle\n---\n";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("setext underline lines should not add paragraph nodes");
    }

    #[test]
    fn preflight_allows_empty_atx_headings_with_closing_markers() {
        let input = "# #\n".repeat(2);
        let options = NormalizationOptions {
            max_markdown_nodes: 3,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("closing markers without heading content should not add text nodes");
    }

    #[test]
    fn preflight_rejects_list_heavy_input_before_parsing() {
        let input = "- x\n".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("compact list-heavy input should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_shares_list_container_across_adjacent_items() {
        let input = "- a\n- b\n- c";
        let options = NormalizationOptions {
            max_markdown_nodes: 11,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("adjacent list items should share one list container estimate");
    }

    #[test]
    fn preflight_treats_non_one_ordered_marker_inside_paragraph_as_text() {
        let input = "intro\n2. not a list\n3. still paragraph\n";
        let options = NormalizationOptions {
            max_markdown_nodes: 7,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("ordered markers above one should not interrupt an active paragraph");
    }

    #[test]
    fn preflight_keeps_one_ordered_marker_as_paragraph_interrupting_list() {
        let input = "intro\n1. list\n";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("ordered marker one can interrupt an active paragraph");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_gfm_autolinks_before_parsing() {
        let input = "https://example.com support@example.com ".repeat(3);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("GFM autolinks should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_ignores_gfm_autolinks_when_extensions_are_disabled() {
        let input = "https://example.com support@example.com ".repeat(3);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, false, &options)
            .expect("CommonMark mode should not count GFM bare URL autolinks");
    }

    #[test]
    fn preflight_does_not_double_count_angle_bracket_autolinks_as_gfm_bare_links() {
        let input = "<https://example.com> <user@example.com>";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("angle-bracket autolinks should not also count as GFM bare autolinks");
    }

    #[test]
    fn preflight_counts_single_marker_emphasis_before_parsing() {
        let input = "*x* _y_ ".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("single-marker emphasis should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_gfm_strikethrough_before_parsing() {
        let input = "~~x~~ ".repeat(6);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("GFM strikethrough should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_closing_inline_html_before_parsing() {
        let input = "x </span> ".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("closing inline HTML should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_reference_links_before_parsing() {
        let input = format!("{}\n\n[ref]: https://example.com", "[x][ref] ".repeat(5));
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("reference links should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_shortcut_reference_links_before_parsing() {
        let input = format!("{}\n\n[x]: https://example.com", "[x] ".repeat(5));
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("shortcut reference links should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_collapsed_reference_links_before_parsing() {
        let input = format!("{}\n\n[x]: https://example.com", "[x][] ".repeat(5));
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("collapsed reference links should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_reference_images_before_parsing() {
        for input in [
            format!(
                "{}\n\n[img]: https://example.com/image.png",
                "![alt][img] ".repeat(4)
            ),
            format!(
                "{}\n\n[alt]: https://example.com/image.png",
                "![alt][] ".repeat(4)
            ),
        ] {
            let options = NormalizationOptions {
                max_markdown_nodes: 10,
                ..NormalizationOptions::default()
            };

            let err = enforce_markdown_structural_preflight(&input, true, &options)
                .expect_err("reference images should exceed the preflight node estimate");

            assert_eq!(err.kind, TransformErrorKind::InvalidInput);
            assert!(err.message.contains("max_markdown_nodes"));
        }
    }

    #[test]
    fn preflight_ignores_undefined_reference_labels() {
        let input = "A [not-a-link]\n[real]: not-url";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("undefined reference labels should not count as reference link nodes");
    }

    #[test]
    fn preflight_ignores_reference_text_inside_code_span() {
        let input = format!("{}\n\n[x]: https://example.com", "`[x]` ".repeat(3));
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("bracket text inside code spans should not count as reference link nodes");
    }

    #[test]
    fn preflight_ignores_explicit_link_markers_inside_code_spans() {
        let input = "`a](b)` ".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("explicit link-like text inside code spans should not count as link nodes");
    }

    #[test]
    fn preflight_ignores_image_markers_inside_code_spans() {
        let input = "`![alt](image.png)` ".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("image-like text inside code spans should not count as image nodes");
    }

    #[test]
    fn preflight_does_not_count_explicit_images_as_links() {
        let input = "![alt](image.png) ".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 10,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("explicit images should not also count as explicit links");
    }

    #[test]
    fn preflight_counts_escaped_bang_explicit_links_as_links() {
        let input = "\\![alt](image.png) ".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 10,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("escaped bang should leave an explicit link to count");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_ignores_reference_definitions_inside_fenced_code() {
        let input = "A [x]\n```\n[x]: https://example.com\n```";
        let options = NormalizationOptions {
            max_markdown_nodes: 4,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("reference definitions inside fenced code should not enable shortcut links");
    }

    #[test]
    fn preflight_ignores_reference_definitions_inside_html_block() {
        let input = "A [x]\n<div>\n[x]: https://example.com\n</div>";
        let options = NormalizationOptions {
            max_markdown_nodes: 4,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("reference definitions inside HTML blocks should not enable shortcut links");
    }

    #[test]
    fn preflight_ignores_reference_definition_without_destination() {
        let input = "A [x]\n[x]:";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("reference labels without destinations should not enable shortcut links");
    }

    #[test]
    fn preflight_ignores_blockquote_reference_definition_without_destination() {
        let input = "> A [x]\n> [x]:";
        let options = NormalizationOptions {
            max_markdown_nodes: 7,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options).expect(
            "blockquote reference labels without destinations should not enable shortcut links",
        );
    }

    #[test]
    fn preflight_ignores_reference_definition_with_trailing_text() {
        let input = "A [x] [x] [x] [x] [x]\n[x]: foo bar";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("reference definitions with trailing text should not enable shortcut links");
    }

    #[test]
    fn preflight_ignores_reference_definition_with_unbalanced_destination_parens() {
        let input = "[x]: foo(bar\nA [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("unbalanced destination parens should not enable shortcut links");
    }

    #[test]
    fn preflight_ignores_reference_definition_with_nested_bracketed_destination_start() {
        let input = "[x]: <foo<bar>\nA [x] [x] [x] [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 7,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("nested bracketed destination starts should not enable shortcut links");
    }

    #[test]
    fn preflight_ignores_reference_definition_with_escaped_destination_space() {
        let input = "[x]: foo\\ bar\nA [x] [x] [x] [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("escaped spaces should not make unbracketed destinations valid");
    }

    #[test]
    fn preflight_counts_reference_definition_with_balanced_destination_parens() {
        let input = "[x]: foo(bar)\nA [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("balanced destination parens should enable shortcut links");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_reference_definition_with_escaped_destination_parens() {
        let input = "[x]: foo\\(bar\\)\nA [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("escaped destination parens should keep the reference definition valid");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_does_not_count_reference_definitions_as_paragraphs() {
        let input =
            "[a]: https://example.com/a\n[b]: https://example.com/b\n[c]: https://example.com/c";
        let options = NormalizationOptions {
            max_markdown_nodes: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("reference definition lines should not add paragraph/text nodes");
    }

    #[test]
    fn preflight_does_not_count_multiline_reference_definition_title_as_paragraph() {
        let input = "[a]: https://example.com/a\n\"title\"";
        let options = NormalizationOptions {
            max_markdown_nodes: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("reference definition title continuation should not add paragraph/text nodes");
    }

    #[test]
    fn preflight_does_not_count_multiline_reference_definition_title_lines_as_paragraphs() {
        let input = "[a]: https://example.com/a\n\"one\ntwo\"";
        let options = NormalizationOptions {
            max_markdown_nodes: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("multiline reference definition title should not add paragraph/text nodes");
    }

    #[test]
    fn preflight_ignores_table_text_inside_multiline_reference_definition_title() {
        let input = "[a]: https://example.com/a\n\"title\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("table-like text inside a multiline reference title should not count cells");
    }

    #[test]
    fn preflight_ignores_table_text_inside_blockquote_lazy_multiline_reference_title() {
        let input = "> [a]: https://example.com/a\n\"title\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("table-like lazy continuation title text should not count cells");
    }

    #[test]
    fn preflight_counts_table_after_blank_line_in_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("blank line should end pending reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_blank_line_in_blockquote_lazy_reference_title_candidate() {
        let input = "> [a]: https://example.com/a\n\"title\n\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("blank line should end blockquote lazy reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_fence_in_reference_title_candidate() {
        let input =
            "[a]: https://example.com/a\n\"title\n```\ncode\n```\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("code fence should end pending reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_fence_in_blockquote_lazy_reference_title_candidate() {
        let input = "> [a]: https://example.com/a\n\"title\n```\ncode\n```\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("code fence should end blockquote lazy reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_heading_in_reference_title_candidate() {
        let input =
            "[a]: https://example.com/a\n\"title\n# Heading\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("ATX heading should end pending reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_heading_in_blockquote_lazy_reference_title_candidate() {
        let input =
            "> [a]: https://example.com/a\n\"title\n# Heading\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("ATX heading should end blockquote lazy reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_thematic_break_in_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n---\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("thematic break should end pending reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_list_item_in_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n- item\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("list item should end pending reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_table_after_html_block_in_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n<script>\n</script>\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("HTML block should end pending reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_keeps_non_one_ordered_marker_inside_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n2. item\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("ordered list markers above one should stay inside the reference title");
    }

    #[test]
    fn preflight_counts_table_after_setext_underline_in_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n===\n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("setext underline should end pending reference title continuation");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_keeps_empty_unordered_marker_inside_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n* \n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("empty unordered list marker should stay inside the reference title");
    }

    #[test]
    fn preflight_keeps_empty_ordered_marker_inside_reference_title_candidate() {
        let input = "[a]: https://example.com/a\n\"title\n1. \n| A | B |\n| --- | --- |\nend\"";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("empty ordered list marker should stay inside the reference title");
    }

    #[test]
    fn preflight_counts_table_after_reference_definition_without_title_continuation() {
        let input = "[a]: https://example.com/a\n| A | B |\n| --- | --- |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options).expect_err(
            "table text after a reference definition should still count as table cells",
        );

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_reference_definition_with_escaped_bracketed_destination_start() {
        let input = "[x]: <foo\\<bar>\nA [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 5,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("escaped bracketed destination starts should keep the definition valid");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_ignores_blockquote_reference_definition_with_trailing_text() {
        let input = "> A [x] [x] [x] [x] [x]\n> [x]: foo bar";
        let options = NormalizationOptions {
            max_markdown_nodes: 7,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options).expect(
            "blockquote reference definitions with trailing text should not enable shortcut links",
        );
    }

    #[test]
    fn preflight_ignores_reference_definition_inside_paragraph() {
        let input = "intro\n[x]: not-url\nA [x] [x] [x] [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 7,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("reference-like text inside a paragraph should not enable shortcut links");
    }

    #[test]
    fn preflight_counts_reference_links_on_reference_like_paragraph_line() {
        let input = "intro\n[x]: https://example.com [y] [y]\n\n[y]: https://example.com";
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("reference links on paragraph text should count before parsing");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_ignores_blockquote_reference_definition_inside_paragraph() {
        let input = "> intro\n> [x]: not-url\n> A [x] [x] [x] [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 10,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options).expect(
            "reference-like text inside a blockquote paragraph should not enable shortcut links",
        );
    }

    #[test]
    fn preflight_ignores_blockquote_lazy_continuation_reference_definition_inside_paragraph() {
        let input = "> intro\n[x]: not-url\nA [x] [x] [x] [x] [x]";
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options).expect(
            "lazy continuation text inside a blockquote paragraph should not enable shortcut links",
        );
    }

    #[test]
    fn preflight_counts_reference_links_after_blockquote_reference_lazy_continuation() {
        let input = "> [x]: https://example.com\n[y]: https://example.com\nA [y] [y]";
        let options = NormalizationOptions {
            max_markdown_nodes: 6,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options).expect_err(
            "blockquote reference-definition-only continuation should enable shortcut links",
        );

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_reference_links_on_blockquote_lazy_continuation_line() {
        let input = "> intro\n[x]: https://example.com [y] [y]\n\n[y]: https://example.com";
        let options = NormalizationOptions {
            max_markdown_nodes: 9,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options).expect_err(
            "reference links on blockquote lazy continuation text should count before parsing",
        );

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_blockquote_list_items_before_parsing() {
        let input = "> - x\n".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 12,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("blockquote list items should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_rejects_tab_delimited_list_items_before_parsing() {
        let input = "-\tx\n".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("tab-delimited list items should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_allows_pipe_text_without_table_cells() {
        let input = "echo a | sed s/a/b/ | wc\n".repeat(2);
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(&input, true, &options)
            .expect("ordinary pipe text should not count as table cells before parsing");
    }

    #[test]
    fn preflight_ignores_escaped_and_code_span_pipes_in_table_cells() {
        let input = "| Field | Value |\n| --- | --- |\n| a \\| b | `c | d` |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 5,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("escaped and code-span pipes should stay inside their table cells");
    }

    #[test]
    fn preflight_counts_unmatched_backtick_table_pipes_before_parsing() {
        let input = format!(
            "| Field | Type |\n| --- | --- |\n{}",
            "| `id | string |\n".repeat(3)
        );
        let options = NormalizationOptions {
            max_markdown_table_cells: 5,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("unmatched backticks should not hide later table separators");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_keeps_html_block_open_on_closing_tag_prefix() {
        let input = "<script>\n</scripture>\n- item\n- item\n</script>";
        let options = NormalizationOptions {
            max_markdown_nodes: 3,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("closing tag prefixes should not end an HTML block");
    }

    #[test]
    fn preflight_counts_blockquote_gfm_table_cells_before_parsing() {
        let input = "> | Field | Type |\n> | --- | --- |\n> | id | string |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 3,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("blockquote tables should count table cells before parsing");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_does_not_merge_table_header_across_blockquote_boundary() {
        let input = "| Field | Type |\n> | --- | --- |\n> | id | string |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("table header and delimiter should not merge across quote boundaries");
    }

    #[test]
    fn preflight_does_not_continue_blockquote_table_after_quote_boundary() {
        let input = "> | Field | Type |\n> | --- | --- |\n| id | string |\n| name | string |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 3,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("blockquote table rows should not continue after the quote boundary");
    }

    #[test]
    fn preflight_ignores_header_delimiter_mismatch_as_non_table() {
        let input = "| A | B |\n| --- | --- | --- |\n| x | y |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("mismatched header and delimiter columns should not count as a table");
    }

    #[test]
    fn preflight_counts_short_table_rows_as_header_width() {
        let input = "| A | B | C |\n| --- | --- | --- |\n| x |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 5,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("short table rows should count padded cells before parsing");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_long_table_rows_as_header_width() {
        let input = "| A | B |\n| --- | --- |\n| x | y | ignored | ignored |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 4,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("extra table row cells should not overcount parsed table cells");
    }

    #[test]
    fn preflight_does_not_double_count_gfm_table_header_nodes() {
        let input = "| A | B |\n| --- | --- |";
        let options = NormalizationOptions {
            max_markdown_nodes: 7,
            max_markdown_table_cells: 100,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("table header and delimiter fallback nodes should not be double-counted");
    }

    #[test]
    fn preflight_does_not_double_count_gfm_table_body_row_nodes() {
        let input = "| A | B |\n| --- | --- |\n| x | y |";
        let options = NormalizationOptions {
            max_markdown_nodes: 12,
            max_markdown_table_cells: 100,
            ..NormalizationOptions::default()
        };

        enforce_markdown_structural_preflight(input, true, &options)
            .expect("table body row fallback nodes should not be double-counted");
    }

    #[test]
    fn preflight_counts_blockquote_gfm_table_container_node() {
        let input = "> | A | B |\n> | --- | --- |";
        let options = NormalizationOptions {
            max_markdown_nodes: 7,
            max_markdown_table_cells: 100,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("blockquote table container should count before parsing");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_blockquote_gfm_table_body_row_nodes() {
        let input = "> | A | B |\n> | --- | --- |\n> | x | y |";
        let options = NormalizationOptions {
            max_markdown_nodes: 12,
            max_markdown_table_cells: 100,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("blockquote table body rows should count before parsing");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_rejects_gfm_table_cells_before_parsing() {
        let input = "| Field | Type |\n| --- | --- |\n| id | string |";
        let options = NormalizationOptions {
            max_markdown_table_cells: 1,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(input, true, &options)
            .expect_err("table cells should exceed the preflight table-cell estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_table_cells"));
    }

    #[test]
    fn preflight_counts_gfm_table_rows_toward_nodes() {
        let input = format!(
            "| Field | Type |\n| --- | --- |\n{}",
            "| id | string |\n".repeat(4)
        );
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            max_markdown_table_cells: 100,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("table rows should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }

    #[test]
    fn preflight_counts_plain_paragraphs_toward_nodes() {
        let input = "plain\n\n".repeat(4);
        let options = NormalizationOptions {
            max_markdown_nodes: 8,
            ..NormalizationOptions::default()
        };

        let err = enforce_markdown_structural_preflight(&input, true, &options)
            .expect_err("plain paragraphs should exceed the preflight node estimate");

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_markdown_nodes"));
    }
}