panache-parser 0.5.1

//! Fenced code block parsing utilities.

use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
use crate::syntax::SyntaxKind;
use rowan::GreenNodeBuilder;

use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
use crate::parser::utils::container_stack::byte_index_at_column;
use crate::parser::utils::helpers::{strip_leading_spaces, strip_newline};

/// Represents the type of code block based on its info string syntax.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CodeBlockType {
    /// Display-only block with shortcut syntax: ```python
    DisplayShortcut { language: String },
    /// Display-only block with explicit Pandoc syntax: ```{.python}
    DisplayExplicit { classes: Vec<String> },
    /// Executable chunk (Quarto/RMarkdown): ```{python}
    Executable { language: String },
    /// Raw block for specific output format: ```{=html}
    Raw { format: String },
    /// No language specified: ```
    Plain,
}

/// Parsed attributes from a code block info string.
#[derive(Debug, Clone, PartialEq)]
pub struct InfoString {
    pub raw: String,
    pub block_type: CodeBlockType,
    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
}

impl InfoString {
    /// Parse an info string into structured attributes.
    pub fn parse(raw: &str) -> Self {
        let trimmed = raw.trim();

        if trimmed.is_empty() {
            return InfoString {
                raw: raw.to_string(),
                block_type: CodeBlockType::Plain,
                attributes: Vec::new(),
            };
        }

        // Check if it starts with '{' - explicit attribute block
        if let Some(stripped) = trimmed.strip_prefix('{')
            && let Some(content) = stripped.strip_suffix('}')
        {
            return Self::parse_explicit(raw, content);
        }

        // Check for mixed form: python {.numberLines}
        if let Some(brace_start) = trimmed.find('{') {
            let language = trimmed[..brace_start].trim();
            if !language.is_empty() && !language.contains(char::is_whitespace) {
                let attr_part = &trimmed[brace_start..];
                if let Some(stripped) = attr_part.strip_prefix('{')
                    && let Some(content) = stripped.strip_suffix('}')
                {
                    let attrs = Self::parse_attributes(content);
                    return InfoString {
                        raw: raw.to_string(),
                        block_type: CodeBlockType::DisplayShortcut {
                            language: language.to_string(),
                        },
                        attributes: attrs,
                    };
                }
            }
        }

        // Otherwise, it's a shortcut form (just the language name)
        // Only take the first word as language
        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
        InfoString {
            raw: raw.to_string(),
            block_type: CodeBlockType::DisplayShortcut {
                language: language.to_string(),
            },
            attributes: Vec::new(),
        }
    }

    fn parse_explicit(raw: &str, content: &str) -> Self {
        // Check for raw attribute FIRST: {=format}
        // The content should start with '=' and have only alphanumeric chars after
        let trimmed_content = content.trim();
        if let Some(format_name) = trimmed_content.strip_prefix('=') {
            // Validate format name: alphanumeric only, no spaces
            if !format_name.is_empty()
                && format_name.chars().all(|c| c.is_alphanumeric())
                && !format_name.contains(char::is_whitespace)
            {
                return InfoString {
                    raw: raw.to_string(),
                    block_type: CodeBlockType::Raw {
                        format: format_name.to_string(),
                    },
                    attributes: Vec::new(),
                };
            }
        }

        // First, do a preliminary parse to determine block type
        // Use chunk options parser (comma-aware) for initial detection
        let prelim_attrs = Self::parse_chunk_options(content);

        // First non-ID, non-attribute token determines if it's executable or display
        let mut first_lang_token = None;
        for (key, val) in prelim_attrs.iter() {
            if val.is_none() && !key.starts_with('#') {
                first_lang_token = Some(key.as_str());
                break;
            }
        }

        let first_token = first_lang_token.unwrap_or("");

        if first_token.starts_with('.') {
            // Display block: {.python} or {.haskell .numberLines}
            // Re-parse with Pandoc-style parser (space-delimited)
            let attrs = Self::parse_pandoc_attributes(content);

            let classes: Vec<String> = attrs
                .iter()
                .filter(|(k, v)| k.starts_with('.') && v.is_none())
                .map(|(k, _)| k[1..].to_string())
                .collect();

            let non_class_attrs: Vec<(String, Option<String>)> = attrs
                .into_iter()
                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
                .collect();

            InfoString {
                raw: raw.to_string(),
                block_type: CodeBlockType::DisplayExplicit { classes },
                attributes: non_class_attrs,
            }
        } else if !first_token.is_empty() && !first_token.starts_with('#') {
            // Executable chunk: {python} or {r}
            // Use chunk options parser (comma-delimited)
            let attrs = Self::parse_chunk_options(content);
            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();

            // Check if there's a second bareword (implicit label in R/Quarto chunks)
            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}
            let mut has_implicit_label = false;
            let implicit_label_value = if lang_index + 1 < attrs.len() {
                if let (label_key, None) = &attrs[lang_index + 1] {
                    // Second bareword after language
                    has_implicit_label = true;
                    Some(label_key.clone())
                } else {
                    None
                }
            } else {
                None
            };

            let mut final_attrs: Vec<(String, Option<String>)> = attrs
                .into_iter()
                .enumerate()
                .filter(|(i, _)| {
                    // Remove language token
                    if *i == lang_index {
                        return false;
                    }
                    // Remove implicit label token (will be added back explicitly)
                    if has_implicit_label && *i == lang_index + 1 {
                        return false;
                    }
                    true
                })
                .map(|(_, attr)| attr)
                .collect();

            // Add explicit label if we found an implicit one
            if let Some(label_val) = implicit_label_value {
                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
            }

            InfoString {
                raw: raw.to_string(),
                block_type: CodeBlockType::Executable {
                    language: first_token.to_string(),
                },
                attributes: final_attrs,
            }
        } else {
            // Just attributes, no language - use Pandoc parser
            let attrs = Self::parse_pandoc_attributes(content);
            InfoString {
                raw: raw.to_string(),
                block_type: CodeBlockType::Plain,
                attributes: attrs,
            }
        }
    }

    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
        let mut attrs = Vec::new();
        let mut chars = content.chars().peekable();

        while chars.peek().is_some() {
            // Skip whitespace
            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
                chars.next();
            }

            if chars.peek().is_none() {
                break;
            }

            // Read key
            let mut key = String::new();
            while let Some(&ch) = chars.peek() {
                if ch == '=' || ch == ' ' || ch == '\t' {
                    break;
                }
                key.push(ch);
                chars.next();
            }

            if key.is_empty() {
                break;
            }

            // Skip whitespace
            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
                chars.next();
            }

            // Check for value
            if chars.peek() == Some(&'=') {
                chars.next(); // consume '='

                // Skip whitespace after '='
                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
                    chars.next();
                }

                // Read value (might be quoted)
                let value = if chars.peek() == Some(&'"') {
                    chars.next(); // consume opening quote
                    let mut val = String::new();
                    while let Some(&ch) = chars.peek() {
                        chars.next();
                        if ch == '"' {
                            break;
                        }
                        if ch == '\\' {
                            if let Some(&next_ch) = chars.peek() {
                                chars.next();
                                val.push(next_ch);
                            }
                        } else {
                            val.push(ch);
                        }
                    }
                    val
                } else {
                    // Unquoted value - read until space
                    let mut val = String::new();
                    while let Some(&ch) = chars.peek() {
                        if ch == ' ' || ch == '\t' {
                            break;
                        }
                        val.push(ch);
                        chars.next();
                    }
                    val
                };

                attrs.push((key, Some(value)));
            } else {
                attrs.push((key, None));
            }
        }

        attrs
    }

    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
        let mut attrs = Vec::new();
        let mut chars = content.chars().peekable();

        while chars.peek().is_some() {
            // Skip whitespace and commas
            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
                chars.next();
            }

            if chars.peek().is_none() {
                break;
            }

            // Read key
            let mut key = String::new();
            while let Some(&ch) = chars.peek() {
                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
                    break;
                }
                key.push(ch);
                chars.next();
            }

            if key.is_empty() {
                break;
            }

            // Skip whitespace and commas
            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
                chars.next();
            }

            // Check for value
            if chars.peek() == Some(&'=') {
                chars.next(); // consume '='

                // Skip whitespace and commas after '='
                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
                    chars.next();
                }

                // Read value (might be quoted)
                let value = if chars.peek() == Some(&'"') {
                    chars.next(); // consume opening quote
                    let mut val = String::new();
                    while let Some(&ch) = chars.peek() {
                        chars.next();
                        if ch == '"' {
                            break;
                        }
                        if ch == '\\' {
                            if let Some(&next_ch) = chars.peek() {
                                chars.next();
                                val.push(next_ch);
                            }
                        } else {
                            val.push(ch);
                        }
                    }
                    val
                } else {
                    // Unquoted value - read until comma, space, or tab at depth 0
                    // Track nesting depth for (), [], {} and quote state
                    let mut val = String::new();
                    let mut depth = 0; // Track parentheses/brackets/braces depth
                    let mut in_quote: Option<char> = None; // Track if inside ' or "
                    let mut escaped = false; // Track if previous char was backslash

                    while let Some(&ch) = chars.peek() {
                        // Handle escape sequences
                        if escaped {
                            val.push(ch);
                            chars.next();
                            escaped = false;
                            continue;
                        }

                        if ch == '\\' {
                            val.push(ch);
                            chars.next();
                            escaped = true;
                            continue;
                        }

                        // Handle quotes
                        if let Some(quote_char) = in_quote {
                            val.push(ch);
                            chars.next();
                            if ch == quote_char {
                                in_quote = None; // Close quote
                            }
                            continue;
                        }

                        // Not in a quote - check for quote start
                        if ch == '"' || ch == '\'' {
                            in_quote = Some(ch);
                            val.push(ch);
                            chars.next();
                            continue;
                        }

                        // Track nesting depth (only when not in quotes)
                        if ch == '(' || ch == '[' || ch == '{' {
                            depth += 1;
                            val.push(ch);
                            chars.next();
                            continue;
                        }

                        if ch == ')' || ch == ']' || ch == '}' {
                            depth -= 1;
                            val.push(ch);
                            chars.next();
                            continue;
                        }

                        // Check for delimiters - only break at depth 0
                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
                            break;
                        }

                        // Regular character
                        val.push(ch);
                        chars.next();
                    }
                    val
                };

                attrs.push((key, Some(value)));
            } else {
                attrs.push((key, None));
            }
        }

        attrs
    }

    /// Legacy function - kept for backward compatibility in mixed-form parsing
    /// For new code, use parse_pandoc_attributes or parse_chunk_options
    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
        // Default to chunk options parsing (comma-aware)
        Self::parse_chunk_options(content)
    }
}

/// Information about a detected code fence opening.
#[derive(Debug, Clone)]
pub(crate) struct FenceInfo {
    pub fence_char: char,
    pub fence_count: usize,
    pub info_string: String,
}

pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
    fence.info_string.trim() == "math"
}

/// Try to detect a fenced code block opening from content.
/// Returns fence info if this is a valid opening fence.
pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
    let trimmed = strip_leading_spaces(content);

    // Check for fence opening (``` or ~~~)
    let (fence_char, fence_count) = if trimmed.starts_with('`') {
        let count = trimmed.chars().take_while(|&c| c == '`').count();
        ('`', count)
    } else if trimmed.starts_with('~') {
        let count = trimmed.chars().take_while(|&c| c == '~').count();
        ('~', count)
    } else {
        return None;
    };

    if fence_count < 3 {
        return None;
    }

    let info_string_raw = &trimmed[fence_count..];
    // Strip trailing newline (LF or CRLF) and at most one leading space
    let (info_string_trimmed, _) = strip_newline(info_string_raw);
    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
        stripped.to_string()
    } else {
        info_string_trimmed.to_string()
    };

    // Backtick-fenced blocks cannot have backticks in the info string.
    if fence_char == '`' && info_string.contains('`') {
        return None;
    }

    Some(FenceInfo {
        fence_char,
        fence_count,
        info_string,
    })
}

fn prepare_fence_open_line<'a>(
    builder: &mut GreenNodeBuilder<'static>,
    source_line: &'a str,
    first_line_override: Option<&'a str>,
    bq_depth: usize,
    base_indent: usize,
) -> (&'a str, &'a str) {
    let first_line = first_line_override.unwrap_or(source_line);

    // Only strip blockquote markers for the *surrounding* blockquote depth.
    // Anything beyond that (e.g. a literal `>` inside the code block) must be preserved.
    let first_inner = if bq_depth > 0 && first_line_override.is_none() {
        strip_n_blockquote_markers(first_line, bq_depth)
    } else {
        if bq_depth > 0 && first_line_override.is_some() && source_line != first_line {
            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
            let prefix_len = source_line.len().saturating_sub(stripped.len());
            if prefix_len > 0 {
                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
            }
        }
        first_line
    };

    // For lossless parsing: emit the base indent before stripping it
    let first_base_indent = if first_line_override.is_some() {
        0
    } else {
        base_indent
    };
    let first_base_indent_bytes = byte_index_at_column(first_inner, first_base_indent);
    let first_stripped = if first_base_indent > 0 && first_inner.len() >= first_base_indent_bytes {
        let indent_str = &first_inner[..first_base_indent_bytes];
        if !indent_str.is_empty() {
            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
        }
        &first_inner[first_base_indent_bytes..]
    } else {
        first_inner
    };

    let first_trimmed = strip_leading_spaces(first_stripped);
    let leading_ws_len = first_stripped.len().saturating_sub(first_trimmed.len());
    if leading_ws_len > 0 {
        builder.token(
            SyntaxKind::WHITESPACE.into(),
            &first_stripped[..leading_ws_len],
        );
    }
    (first_trimmed, first_inner)
}

fn emit_blockquote_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
    for ch in prefix.chars() {
        if ch == '>' {
            builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
        } else {
            let mut buf = [0u8; 4];
            builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
        }
    }
}

fn emit_content_line_prefixes<'a>(
    builder: &mut GreenNodeBuilder<'static>,
    content_line: &'a str,
    bq_depth: usize,
    base_indent: usize,
) -> &'a str {
    let after_blockquote = if bq_depth > 0 {
        let stripped = strip_n_blockquote_markers(content_line, bq_depth);
        let prefix_len = content_line.len().saturating_sub(stripped.len());
        if prefix_len > 0 {
            emit_blockquote_prefix_tokens(builder, &content_line[..prefix_len]);
        }
        stripped
    } else {
        content_line
    };

    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
        let indent_str = &after_blockquote[..base_indent_bytes];
        if !indent_str.is_empty() {
            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
        }
        &after_blockquote[base_indent_bytes..]
    } else {
        after_blockquote
    }
}

fn strip_content_line_prefixes(content_line: &str, bq_depth: usize, base_indent: usize) -> &str {
    let after_blockquote = if bq_depth > 0 {
        strip_n_blockquote_markers(content_line, bq_depth)
    } else {
        content_line
    };

    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
        &after_blockquote[base_indent_bytes..]
    } else {
        after_blockquote
    }
}

pub(crate) fn compute_hashpipe_preamble_line_count(
    content_lines: &[&str],
    prefix: &str,
    bq_depth: usize,
    base_indent: usize,
) -> usize {
    let mut line_idx = 0usize;

    while line_idx < content_lines.len() {
        let preview_after_indent =
            strip_content_line_prefixes(content_lines[line_idx], bq_depth, base_indent);
        let (preview_without_newline, _) = strip_newline(preview_after_indent);
        if !is_hashpipe_option_line(preview_without_newline, prefix)
            && !is_hashpipe_continuation_line(preview_without_newline, prefix)
        {
            break;
        }
        line_idx += 1;
    }

    line_idx
}

fn emit_hashpipe_option_line(
    builder: &mut GreenNodeBuilder<'static>,
    line_without_newline: &str,
    prefix: &str,
) -> bool {
    if !is_hashpipe_option_line(line_without_newline, prefix) {
        return false;
    }

    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
    let leading_ws_len = line_without_newline
        .len()
        .saturating_sub(trimmed_start.len());
    let after_prefix = &trimmed_start[prefix.len()..];
    let ws_after_prefix_len = after_prefix
        .len()
        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
    let rest = &after_prefix[ws_after_prefix_len..];
    let Some(colon_idx) = rest.find(':') else {
        return false;
    };

    let key_with_ws = &rest[..colon_idx];
    let key = key_with_ws.trim_end_matches([' ', '\t']);
    if key.is_empty() {
        return false;
    }
    let key_ws_suffix = &key_with_ws[key.len()..];

    let after_colon = &rest[colon_idx + 1..];
    let value_ws_prefix_len = after_colon
        .len()
        .saturating_sub(after_colon.trim_start_matches([' ', '\t']).len());
    let value_with_trailing = &after_colon[value_ws_prefix_len..];
    let value = value_with_trailing.trim_end_matches([' ', '\t']);
    let value_ws_suffix = &value_with_trailing[value.len()..];

    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
    if leading_ws_len > 0 {
        builder.token(
            SyntaxKind::WHITESPACE.into(),
            &line_without_newline[..leading_ws_len],
        );
    }
    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
    if ws_after_prefix_len > 0 {
        builder.token(
            SyntaxKind::WHITESPACE.into(),
            &after_prefix[..ws_after_prefix_len],
        );
    }

    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
    if !key_ws_suffix.is_empty() {
        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
    }
    builder.token(SyntaxKind::TEXT.into(), ":");
    if value_ws_prefix_len > 0 {
        builder.token(
            SyntaxKind::WHITESPACE.into(),
            &after_colon[..value_ws_prefix_len],
        );
    }

    if !value.is_empty() {
        if let Some(quote) = value.chars().next()
            && (quote == '"' || quote == '\'')
            && value.ends_with(quote)
            && value.len() >= 2
        {
            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
            builder.token(
                SyntaxKind::CHUNK_OPTION_VALUE.into(),
                &value[1..value.len() - 1],
            );
            builder.token(
                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
                &value[value.len() - 1..],
            );
        } else {
            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
        }
    }

    if !value_ws_suffix.is_empty() {
        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
    }
    builder.finish_node();
    true
}

fn emit_hashpipe_continuation_line(
    builder: &mut GreenNodeBuilder<'static>,
    line_without_newline: &str,
    prefix: &str,
) -> bool {
    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
        return false;
    }
    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
    let leading_ws_len = line_without_newline
        .len()
        .saturating_sub(trimmed_start.len());
    let after_prefix = &trimmed_start[prefix.len()..];
    let ws_after_prefix_len = after_prefix
        .len()
        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
    let continuation_value = continuation_with_trailing.trim_end_matches([' ', '\t']);
    if continuation_value.is_empty() {
        return false;
    }
    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];

    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
    if leading_ws_len > 0 {
        builder.token(
            SyntaxKind::WHITESPACE.into(),
            &line_without_newline[..leading_ws_len],
        );
    }
    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
    if ws_after_prefix_len > 0 {
        builder.token(
            SyntaxKind::WHITESPACE.into(),
            &after_prefix[..ws_after_prefix_len],
        );
    }
    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
    if !continuation_ws_suffix.is_empty() {
        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
    }
    builder.finish_node();
    true
}

fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
    if !trimmed_start.starts_with(prefix) {
        return false;
    }
    let after_prefix = &trimmed_start[prefix.len()..];
    let rest = after_prefix.trim_start_matches([' ', '\t']);
    let Some(colon_idx) = rest.find(':') else {
        return false;
    };
    let key = rest[..colon_idx].trim_end_matches([' ', '\t']);
    if key.is_empty() {
        return false;
    }
    true
}

fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
    if !trimmed_start.starts_with(prefix) {
        return false;
    }
    let after_prefix = &trimmed_start[prefix.len()..];
    let Some(first) = after_prefix.chars().next() else {
        return false;
    };
    if first != ' ' && first != '\t' {
        return false;
    }
    !after_prefix.trim_start_matches([' ', '\t']).is_empty()
}

/// Check if a line is a valid closing fence for the given fence info.
pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
    let trimmed = strip_leading_spaces(content);

    if !trimmed.starts_with(fence.fence_char) {
        return false;
    }

    let closing_count = trimmed
        .chars()
        .take_while(|&c| c == fence.fence_char)
        .count();

    if closing_count < fence.fence_count {
        return false;
    }

    // Rest of line must be empty
    trimmed[closing_count..].trim().is_empty()
}

/// Emit chunk options as structured CST nodes while preserving all bytes.
/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
    if content.trim().is_empty() {
        builder.token(SyntaxKind::TEXT.into(), content);
        return;
    }

    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());

    let mut pos = 0;
    let bytes = content.as_bytes();

    while pos < bytes.len() {
        // Emit leading whitespace/commas as TEXT
        let ws_start = pos;
        while pos < bytes.len() {
            let ch = bytes[pos] as char;
            if ch != ' ' && ch != '\t' && ch != ',' {
                break;
            }
            pos += 1;
        }
        if pos > ws_start {
            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
        }

        if pos >= bytes.len() {
            break;
        }

        // Check if this is a closing brace
        if bytes[pos] as char == '}' {
            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
            pos += 1;
            if pos < bytes.len() {
                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
            }
            break;
        }

        // Read key
        let key_start = pos;
        while pos < bytes.len() {
            let ch = bytes[pos] as char;
            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
                break;
            }
            pos += 1;
        }

        if pos == key_start {
            // No key found, emit rest as TEXT
            if pos < bytes.len() {
                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
            }
            break;
        }

        let key = &content[key_start..pos];

        // Check for whitespace before '='
        let ws_before_eq_start = pos;
        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
            pos += 1;
        }

        // Check if there's a value (=)
        if pos < bytes.len() && bytes[pos] as char == '=' {
            // Has value - emit as CHUNK_OPTION
            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);

            // Emit whitespace before '=' if any
            if pos > ws_before_eq_start {
                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
            }

            builder.token(SyntaxKind::TEXT.into(), "=");
            pos += 1; // consume '='

            // Emit whitespace after '='
            let ws_after_eq_start = pos;
            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
                pos += 1;
            }
            if pos > ws_after_eq_start {
                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
            }

            // Parse value (might be quoted)
            if pos < bytes.len() {
                let quote_char = bytes[pos] as char;
                if quote_char == '"' || quote_char == '\'' {
                    // Quoted value
                    builder.token(
                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
                        &content[pos..pos + 1],
                    );
                    pos += 1; // consume opening quote

                    let val_start = pos;
                    let mut escaped = false;
                    while pos < bytes.len() {
                        let ch = bytes[pos] as char;
                        if !escaped && ch == quote_char {
                            break;
                        }
                        escaped = !escaped && ch == '\\';
                        pos += 1;
                    }

                    if pos > val_start {
                        builder.token(
                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
                            &content[val_start..pos],
                        );
                    }

                    // Emit closing quote
                    if pos < bytes.len() && bytes[pos] as char == quote_char {
                        builder.token(
                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
                            &content[pos..pos + 1],
                        );
                        pos += 1;
                    }
                } else {
                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
                    let val_start = pos;
                    let mut depth = 0;

                    while pos < bytes.len() {
                        let ch = bytes[pos] as char;
                        match ch {
                            '(' | '[' | '{' => depth += 1,
                            ')' | ']' => {
                                if depth > 0 {
                                    depth -= 1;
                                } else {
                                    break;
                                }
                            }
                            '}' => {
                                if depth > 0 {
                                    depth -= 1;
                                } else {
                                    break; // End of chunk options
                                }
                            }
                            ',' if depth == 0 => {
                                break; // Next option
                            }
                            ' ' | '\t' if depth == 0 => {
                                break; // Space separator
                            }
                            _ => {}
                        }
                        pos += 1;
                    }

                    if pos > val_start {
                        builder.token(
                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
                            &content[val_start..pos],
                        );
                    }
                }
            }

            builder.finish_node(); // CHUNK_OPTION
        } else {
            // No '=' - this is a label or bareword option
            // Emit any whitespace we skipped as TEXT
            if pos > ws_before_eq_start {
                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
                builder.token(SyntaxKind::TEXT.into(), key);
                builder.finish_node(); // CHUNK_LABEL
                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
            } else {
                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
                builder.token(SyntaxKind::TEXT.into(), key);
                builder.finish_node(); // CHUNK_LABEL
            }
        }
    }

    builder.finish_node(); // CHUNK_OPTIONS
}

/// Helper to parse info string and emit CodeInfo node with parsed components.
/// This breaks down the info string into its logical parts while preserving all bytes.
fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
    builder.start_node(SyntaxKind::CODE_INFO.into());

    let info = InfoString::parse(info_string);

    match &info.block_type {
        CodeBlockType::DisplayShortcut { language } => {
            // Simple case: python or python {.class}
            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);

            // If there's more after the language, emit it as TEXT
            let after_lang = &info_string[language.len()..];
            if !after_lang.is_empty() {
                builder.token(SyntaxKind::TEXT.into(), after_lang);
            }
        }
        CodeBlockType::Executable { language } => {
            // Quarto: {r} or {r my-label, echo=FALSE}
            builder.token(SyntaxKind::TEXT.into(), "{");
            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);

            // Parse and emit chunk options
            let start_offset = 1 + language.len(); // Skip "{r"
            if start_offset < info_string.len() {
                let rest = &info_string[start_offset..];
                emit_chunk_options(builder, rest);
            }
        }
        CodeBlockType::DisplayExplicit { classes } => {
            // Pandoc: {.python} or {#id .haskell .numberLines}
            // We need to find the first class in the raw string and emit everything around it

            if let Some(lang) = classes.first() {
                // Find where ".lang" appears in the info string
                let needle = format!(".{}", lang);
                if let Some(lang_start) = info_string.find(&needle) {
                    // Emit everything before the language
                    if lang_start > 0 {
                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
                    }

                    // Emit the dot
                    builder.token(SyntaxKind::TEXT.into(), ".");

                    // Emit the language
                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);

                    // Emit everything after
                    let after_lang_start = lang_start + 1 + lang.len();
                    if after_lang_start < info_string.len() {
                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
                    }
                } else {
                    // Couldn't find it, just emit as TEXT
                    builder.token(SyntaxKind::TEXT.into(), info_string);
                }
            } else {
                // No classes
                builder.token(SyntaxKind::TEXT.into(), info_string);
            }
        }
        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
            // No language, just emit as TEXT
            builder.token(SyntaxKind::TEXT.into(), info_string);
        }
    }

    builder.finish_node(); // CodeInfo
}

/// Parse a fenced code block, consuming lines from the parser.
/// Returns the new position after the code block.
/// Parse a fenced code block, consuming lines from the parser.
/// Returns the new position after the code block.
/// base_indent accounts for container indentation (e.g., footnotes) that should be stripped.
pub(crate) fn parse_fenced_code_block(
    builder: &mut GreenNodeBuilder<'static>,
    lines: &[&str],
    start_pos: usize,
    fence: FenceInfo,
    bq_depth: usize,
    base_indent: usize,
    first_line_override: Option<&str>,
) -> usize {
    // Start code block
    builder.start_node(SyntaxKind::CODE_BLOCK.into());

    // Opening fence
    let (first_trimmed, _first_inner) = prepare_fence_open_line(
        builder,
        lines[start_pos],
        first_line_override,
        bq_depth,
        base_indent,
    );

    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
    builder.token(
        SyntaxKind::CODE_FENCE_MARKER.into(),
        &first_trimmed[..fence.fence_count],
    );

    // Emit any space between fence and info string (for losslessness)
    let after_fence = &first_trimmed[fence.fence_count..];
    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
        // There was a space - emit it as WHITESPACE
        builder.token(SyntaxKind::WHITESPACE.into(), " ");
        // Parse and emit the info string as a structured node
        if !fence.info_string.is_empty() {
            emit_code_info_node(builder, &fence.info_string);
        }
    } else if !fence.info_string.is_empty() {
        // No space - parse and emit info_string as a structured node
        emit_code_info_node(builder, &fence.info_string);
    }

    // Extract and emit the actual newline from the opening fence line
    let (_, newline_str) = strip_newline(first_trimmed);
    if !newline_str.is_empty() {
        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
    }
    builder.finish_node(); // CodeFenceOpen

    let mut current_pos = start_pos + 1;
    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
    let mut found_closing = false;

    while current_pos < lines.len() {
        let line = lines[current_pos];

        // Count blockquote markers to detect leaving the surrounding blockquote.
        let (line_bq_depth, _) = count_blockquote_markers(line);

        // If blockquote depth decreases, code block ends (we've left the blockquote)
        if line_bq_depth < bq_depth {
            break;
        }

        // Strip exactly the surrounding blockquote depth; preserve any additional `>` literally.
        let inner = if bq_depth > 0 {
            strip_n_blockquote_markers(line, bq_depth)
        } else {
            line
        };

        // Strip base indent (footnote context) from content lines for fence detection
        let base_indent_bytes = byte_index_at_column(inner, base_indent);
        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
            &inner[base_indent_bytes..]
        } else {
            inner
        };

        // Check for closing fence
        if is_closing_fence(inner_stripped, &fence) {
            found_closing = true;
            current_pos += 1;
            break;
        }

        // Store the original line for lossless parsing.
        content_lines.push(line);
        current_pos += 1;
    }

    // Add content
    if !content_lines.is_empty() {
        builder.start_node(SyntaxKind::CODE_CONTENT.into());
        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
            _ => None,
        };

        let mut line_idx = 0usize;
        if let Some(prefix) = hashpipe_prefix {
            let prepared_hashpipe_lines =
                compute_hashpipe_preamble_line_count(&content_lines, prefix, bq_depth, base_indent);
            if prepared_hashpipe_lines > 0 {
                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
                while line_idx < prepared_hashpipe_lines {
                    let content_line = content_lines[line_idx];
                    let after_indent =
                        emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
                    let (line_without_newline, newline_str) = strip_newline(after_indent);
                    if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
                        let _ =
                            emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
                    }
                    if !newline_str.is_empty() {
                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
                    }
                    line_idx += 1;
                }
                builder.finish_node(); // HASHPIPE_YAML_CONTENT
                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
            }
        }

        for content_line in content_lines.iter().skip(line_idx) {
            let after_indent =
                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
            let (line_without_newline, newline_str) = strip_newline(after_indent);

            if !line_without_newline.is_empty() {
                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
            }

            if !newline_str.is_empty() {
                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
            }
        }
        builder.finish_node(); // CodeContent
    }

    // Closing fence (if found)
    if found_closing {
        let closing_line = lines[current_pos - 1];
        let closing_after_blockquote = if bq_depth > 0 {
            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
            let prefix_len = closing_line.len().saturating_sub(stripped.len());
            if prefix_len > 0 {
                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
            }
            stripped
        } else {
            closing_line
        };

        // Emit base indent for lossless parsing
        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
            let indent_str = &closing_after_blockquote[..base_indent_bytes];
            if !indent_str.is_empty() {
                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
            }
        }

        // Strip base indent to get fence
        let closing_stripped =
            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
                &closing_after_blockquote[base_indent_bytes..]
            } else {
                closing_after_blockquote
            };
        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
        let closing_count = closing_trimmed_start
            .chars()
            .take_while(|&c| c == fence.fence_char)
            .count();
        let trailing_after_marker = &closing_trimmed_start[closing_count..];

        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
        if leading_ws_len > 0 {
            builder.token(
                SyntaxKind::WHITESPACE.into(),
                &closing_without_newline[..leading_ws_len],
            );
        }
        builder.token(
            SyntaxKind::CODE_FENCE_MARKER.into(),
            &closing_trimmed_start[..closing_count],
        );
        if !trailing_after_marker.is_empty() {
            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
        }
        if !newline_str.is_empty() {
            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
        }
        builder.finish_node(); // CodeFenceClose
    }

    builder.finish_node(); // CodeBlock

    current_pos
}

/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
pub(crate) fn parse_fenced_math_block(
    builder: &mut GreenNodeBuilder<'static>,
    lines: &[&str],
    start_pos: usize,
    fence: FenceInfo,
    bq_depth: usize,
    base_indent: usize,
    first_line_override: Option<&str>,
) -> usize {
    builder.start_node(SyntaxKind::DISPLAY_MATH.into());

    let (first_trimmed, _first_inner) = prepare_fence_open_line(
        builder,
        lines[start_pos],
        first_line_override,
        bq_depth,
        base_indent,
    );
    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
    builder.token(
        SyntaxKind::DISPLAY_MATH_MARKER.into(),
        opening_without_newline,
    );
    if !opening_newline.is_empty() {
        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
    }

    let mut current_pos = start_pos + 1;
    let mut content_lines: Vec<&str> = Vec::new();
    let mut found_closing = false;

    while current_pos < lines.len() {
        let line = lines[current_pos];
        let (line_bq_depth, _) = count_blockquote_markers(line);
        if line_bq_depth < bq_depth {
            break;
        }

        let inner = if bq_depth > 0 {
            strip_n_blockquote_markers(line, bq_depth)
        } else {
            line
        };
        let base_indent_bytes = byte_index_at_column(inner, base_indent);
        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
            &inner[base_indent_bytes..]
        } else {
            inner
        };

        if is_closing_fence(inner_stripped, &fence) {
            found_closing = true;
            current_pos += 1;
            break;
        }

        content_lines.push(line);
        current_pos += 1;
    }

    if !content_lines.is_empty() {
        let mut content = String::new();
        for content_line in content_lines {
            let after_indent =
                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
            let (line_without_newline, newline_str) = strip_newline(after_indent);
            content.push_str(line_without_newline);
            content.push_str(newline_str);
        }
        builder.token(SyntaxKind::TEXT.into(), &content);
    }

    if found_closing {
        let closing_line = lines[current_pos - 1];
        let closing_after_blockquote = if bq_depth > 0 {
            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
            let prefix_len = closing_line.len().saturating_sub(stripped.len());
            if prefix_len > 0 {
                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
            }
            stripped
        } else {
            closing_line
        };

        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
            let indent_str = &closing_after_blockquote[..base_indent_bytes];
            if !indent_str.is_empty() {
                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
            }
        }

        let closing_stripped =
            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
                &closing_after_blockquote[base_indent_bytes..]
            } else {
                closing_after_blockquote
            };
        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
        let closing_count = closing_trimmed_start
            .chars()
            .take_while(|&c| c == fence.fence_char)
            .count();
        let trailing_after_marker = &closing_trimmed_start[closing_count..];

        if leading_ws_len > 0 {
            builder.token(
                SyntaxKind::WHITESPACE.into(),
                &closing_without_newline[..leading_ws_len],
            );
        }
        builder.token(
            SyntaxKind::DISPLAY_MATH_MARKER.into(),
            &closing_trimmed_start[..closing_count],
        );
        if !trailing_after_marker.is_empty() {
            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
        }
        if !newline_str.is_empty() {
            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
        }
    }

    builder.finish_node(); // DisplayMath
    current_pos
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_backtick_fence() {
        let fence = try_parse_fence_open("```python").unwrap();
        assert_eq!(fence.fence_char, '`');
        assert_eq!(fence.fence_count, 3);
        assert_eq!(fence.info_string, "python");
    }

    #[test]
    fn test_tilde_fence() {
        let fence = try_parse_fence_open("~~~").unwrap();
        assert_eq!(fence.fence_char, '~');
        assert_eq!(fence.fence_count, 3);
        assert_eq!(fence.info_string, "");
    }

    #[test]
    fn test_long_fence() {
        let fence = try_parse_fence_open("`````").unwrap();
        assert_eq!(fence.fence_count, 5);
    }

    #[test]
    fn test_two_backticks_invalid() {
        assert!(try_parse_fence_open("``").is_none());
    }

    #[test]
    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
        assert!(try_parse_fence_open("`````hi````there`````").is_none());
    }

    #[test]
    fn test_closing_fence() {
        let fence = FenceInfo {
            fence_char: '`',
            fence_count: 3,
            info_string: String::new(),
        };
        assert!(is_closing_fence("```", &fence));
        assert!(is_closing_fence("````", &fence));
        assert!(!is_closing_fence("``", &fence));
        assert!(!is_closing_fence("~~~", &fence));
    }

    #[test]
    fn test_fenced_code_preserves_leading_gt() {
        let input = "```\n> foo\n```\n";
        let tree = crate::parse(input, None);
        assert_eq!(tree.text().to_string(), input);
    }

    #[test]
    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
        let input = "> ```\n> code\n> ```\n";
        let tree = crate::parse(input, None);
        assert_eq!(tree.text().to_string(), input);
    }

    #[test]
    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
        let input = "Term\n: ```\n├── pyproject.toml\n```\n";
        let tree = crate::parse(input, None);
        assert_eq!(tree.text().to_string(), input);
    }

    #[test]
    fn test_info_string_plain() {
        let info = InfoString::parse("");
        assert_eq!(info.block_type, CodeBlockType::Plain);
        assert!(info.attributes.is_empty());
    }

    #[test]
    fn test_info_string_shortcut() {
        let info = InfoString::parse("python");
        assert_eq!(
            info.block_type,
            CodeBlockType::DisplayShortcut {
                language: "python".to_string()
            }
        );
        assert!(info.attributes.is_empty());
    }

    #[test]
    fn test_info_string_shortcut_with_trailing() {
        let info = InfoString::parse("python extra stuff");
        assert_eq!(
            info.block_type,
            CodeBlockType::DisplayShortcut {
                language: "python".to_string()
            }
        );
    }

    #[test]
    fn test_info_string_display_explicit() {
        let info = InfoString::parse("{.python}");
        assert_eq!(
            info.block_type,
            CodeBlockType::DisplayExplicit {
                classes: vec!["python".to_string()]
            }
        );
    }

    #[test]
    fn test_info_string_display_explicit_multiple() {
        let info = InfoString::parse("{.python .numberLines}");
        assert_eq!(
            info.block_type,
            CodeBlockType::DisplayExplicit {
                classes: vec!["python".to_string(), "numberLines".to_string()]
            }
        );
    }

    #[test]
    fn test_info_string_executable() {
        let info = InfoString::parse("{python}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Executable {
                language: "python".to_string()
            }
        );
    }

    #[test]
    fn test_info_string_executable_with_options() {
        let info = InfoString::parse("{python echo=false warning=true}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Executable {
                language: "python".to_string()
            }
        );
        assert_eq!(info.attributes.len(), 2);
        assert_eq!(
            info.attributes[0],
            ("echo".to_string(), Some("false".to_string()))
        );
        assert_eq!(
            info.attributes[1],
            ("warning".to_string(), Some("true".to_string()))
        );
    }

    #[test]
    fn test_info_string_executable_with_commas() {
        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Executable {
                language: "r".to_string()
            }
        );
        assert_eq!(info.attributes.len(), 2);
        assert_eq!(
            info.attributes[0],
            ("echo".to_string(), Some("FALSE".to_string()))
        );
        assert_eq!(
            info.attributes[1],
            ("warning".to_string(), Some("TRUE".to_string()))
        );
    }

    #[test]
    fn test_info_string_executable_mixed_commas_spaces() {
        // R-style with commas and spaces
        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Executable {
                language: "r".to_string()
            }
        );
        assert_eq!(info.attributes.len(), 2);
        assert_eq!(
            info.attributes[0],
            ("echo".to_string(), Some("FALSE".to_string()))
        );
        assert_eq!(
            info.attributes[1],
            ("label".to_string(), Some("my chunk".to_string()))
        );
    }

    #[test]
    fn test_info_string_mixed_shortcut_and_attrs() {
        let info = InfoString::parse("python {.numberLines}");
        assert_eq!(
            info.block_type,
            CodeBlockType::DisplayShortcut {
                language: "python".to_string()
            }
        );
        assert_eq!(info.attributes.len(), 1);
        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
    }

    #[test]
    fn test_info_string_mixed_with_key_value() {
        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
        assert_eq!(
            info.block_type,
            CodeBlockType::DisplayShortcut {
                language: "python".to_string()
            }
        );
        assert_eq!(info.attributes.len(), 2);
        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
        assert_eq!(
            info.attributes[1],
            ("startFrom".to_string(), Some("100".to_string()))
        );
    }

    #[test]
    fn test_info_string_explicit_with_id_and_classes() {
        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
        assert_eq!(
            info.block_type,
            CodeBlockType::DisplayExplicit {
                classes: vec!["haskell".to_string(), "numberLines".to_string()]
            }
        );
        // Non-class attributes
        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
        let has_start = info
            .attributes
            .iter()
            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
        assert!(has_id);
        assert!(has_start);
    }

    #[test]
    fn test_info_string_raw_html() {
        let info = InfoString::parse("{=html}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Raw {
                format: "html".to_string()
            }
        );
        assert!(info.attributes.is_empty());
    }

    #[test]
    fn test_info_string_raw_latex() {
        let info = InfoString::parse("{=latex}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Raw {
                format: "latex".to_string()
            }
        );
    }

    #[test]
    fn test_info_string_raw_openxml() {
        let info = InfoString::parse("{=openxml}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Raw {
                format: "openxml".to_string()
            }
        );
    }

    #[test]
    fn test_info_string_raw_ms() {
        let info = InfoString::parse("{=ms}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Raw {
                format: "ms".to_string()
            }
        );
    }

    #[test]
    fn test_info_string_raw_html5() {
        let info = InfoString::parse("{=html5}");
        assert_eq!(
            info.block_type,
            CodeBlockType::Raw {
                format: "html5".to_string()
            }
        );
    }

    #[test]
    fn test_info_string_raw_not_combined_with_attrs() {
        // If there are other attributes with =format, it should not be treated as raw
        let info = InfoString::parse("{=html .class}");
        // This should NOT be parsed as raw because there's more than one attribute
        assert_ne!(
            info.block_type,
            CodeBlockType::Raw {
                format: "html".to_string()
            }
        );
    }

    #[test]
    fn test_parse_pandoc_attributes_spaces() {
        // Pandoc display blocks use spaces as delimiters
        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], (".python".to_string(), None));
        assert_eq!(attrs[1], (".numberLines".to_string(), None));
        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
    }

    #[test]
    fn test_parse_pandoc_attributes_no_commas() {
        // Commas in Pandoc attributes should be treated as part of the value
        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], ("#id".to_string(), None));
        assert_eq!(attrs[1], (".class".to_string(), None));
        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
    }

    #[test]
    fn test_parse_chunk_options_commas() {
        // Quarto/RMarkdown chunks use commas as delimiters
        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
    }

    #[test]
    fn test_parse_chunk_options_no_spaces() {
        // Should handle comma-separated without spaces
        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
    }

    #[test]
    fn test_parse_chunk_options_mixed() {
        // Handle both commas and spaces
        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], ("python".to_string(), None));
        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
    }

    #[test]
    fn test_parse_chunk_options_nested_function_call() {
        // R function calls with nested commas should be treated as single value
        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
        assert_eq!(
            attrs[2],
            (
                "dependson".to_string(),
                Some(r#"c("foo", "bar")"#.to_string())
            )
        );
    }

    #[test]
    fn test_parse_chunk_options_nested_with_spaces() {
        // Function call with spaces inside
        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
        assert_eq!(attrs.len(), 2);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(
            attrs[1],
            (
                "cache.path".to_string(),
                Some(r#"file.path("cache", "dir")"#.to_string())
            )
        );
    }

    #[test]
    fn test_parse_chunk_options_deeply_nested() {
        // Multiple levels of nesting
        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
        assert_eq!(attrs.len(), 2);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(
            attrs[1],
            (
                "x".to_string(),
                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
            )
        );
    }

    #[test]
    fn test_parse_chunk_options_brackets_and_braces() {
        // Test all bracket types
        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(
            attrs[1],
            ("data".to_string(), Some("df[rows, cols]".to_string()))
        );
        assert_eq!(
            attrs[2],
            ("config".to_string(), Some("{a:1, b:2}".to_string()))
        );
    }

    #[test]
    fn test_parse_chunk_options_quotes_with_parens() {
        // Parentheses inside quoted strings shouldn't affect depth tracking
        // Note: The parser strips outer quotes from quoted values
        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
        assert_eq!(attrs.len(), 3);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(
            attrs[1],
            ("label".to_string(), Some("test (with parens)".to_string()))
        );
        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
    }

    #[test]
    fn test_parse_chunk_options_escaped_quotes() {
        // Escaped quotes inside string values
        // Note: The parser strips outer quotes and processes escapes
        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
        assert_eq!(attrs.len(), 2);
        assert_eq!(attrs[0], ("r".to_string(), None));
        assert_eq!(
            attrs[1],
            (
                "label".to_string(),
                Some(r#"has "quoted" text"#.to_string())
            )
        );
    }

    #[test]
    fn test_display_vs_executable_parsing() {
        // Display block should use Pandoc parser (spaces)
        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
        assert!(matches!(
            info1.block_type,
            CodeBlockType::DisplayExplicit { .. }
        ));

        // Executable chunk should use chunk options parser (commas)
        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
        assert_eq!(info2.attributes.len(), 2);
    }

    #[test]
    fn test_info_string_executable_implicit_label() {
        // {r mylabel} should parse as label=mylabel
        let info = InfoString::parse("{r mylabel}");
        assert!(matches!(
            info.block_type,
            CodeBlockType::Executable { ref language } if language == "r"
        ));
        assert_eq!(info.attributes.len(), 1);
        assert_eq!(
            info.attributes[0],
            ("label".to_string(), Some("mylabel".to_string()))
        );
    }

    #[test]
    fn test_info_string_executable_implicit_label_with_options() {
        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
        let info = InfoString::parse("{r mylabel, echo=FALSE}");
        assert!(matches!(
            info.block_type,
            CodeBlockType::Executable { ref language } if language == "r"
        ));
        assert_eq!(info.attributes.len(), 2);
        assert_eq!(
            info.attributes[0],
            ("label".to_string(), Some("mylabel".to_string()))
        );
        assert_eq!(
            info.attributes[1],
            ("echo".to_string(), Some("FALSE".to_string()))
        );
    }

    #[test]
    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
        let content_lines = vec![
            "#| fig-cap: |\n",
            "#|   A caption\n",
            "#|   spanning lines\n",
            "a <- 1\n",
        ];
        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
        assert_eq!(count, 3);
    }

    #[test]
    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
        assert_eq!(count, 1);
    }

    #[test]
    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
        assert_eq!(count, 1);
    }
}