Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use crate::parser::utils::container_stack::byte_index_at_column;
9use crate::parser::utils::helpers::{
10    strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
11};
12
13/// Represents the type of code block based on its info string syntax.
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum CodeBlockType {
16    /// Display-only block with shortcut syntax: ```python
17    DisplayShortcut { language: String },
18    /// Display-only block with explicit Pandoc syntax: ```{.python}
19    DisplayExplicit { classes: Vec<String> },
20    /// Executable chunk (Quarto/RMarkdown): ```{python}
21    Executable { language: String },
22    /// Raw block for specific output format: ```{=html}
23    Raw { format: String },
24    /// No language specified: ```
25    Plain,
26}
27
28/// Parsed attributes from a code block info string.
29#[derive(Debug, Clone, PartialEq)]
30pub struct InfoString {
31    pub raw: String,
32    pub block_type: CodeBlockType,
33    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
34}
35
36impl InfoString {
37    /// Parse an info string into structured attributes.
38    pub fn parse(raw: &str) -> Self {
39        let trimmed = raw.trim();
40
41        if trimmed.is_empty() {
42            return InfoString {
43                raw: raw.to_string(),
44                block_type: CodeBlockType::Plain,
45                attributes: Vec::new(),
46            };
47        }
48
49        // Check if it starts with '{' - explicit attribute block
50        if let Some(stripped) = trimmed.strip_prefix('{')
51            && let Some(content) = stripped.strip_suffix('}')
52        {
53            return Self::parse_explicit(raw, content);
54        }
55
56        // Check for mixed form: python {.numberLines}
57        if let Some(brace_start) = trimmed.find('{') {
58            let language = trimmed[..brace_start].trim();
59            if !language.is_empty() && !language.contains(char::is_whitespace) {
60                let attr_part = &trimmed[brace_start..];
61                if let Some(stripped) = attr_part.strip_prefix('{')
62                    && let Some(content) = stripped.strip_suffix('}')
63                {
64                    let attrs = Self::parse_attributes(content);
65                    return InfoString {
66                        raw: raw.to_string(),
67                        block_type: CodeBlockType::DisplayShortcut {
68                            language: language.to_string(),
69                        },
70                        attributes: attrs,
71                    };
72                }
73            }
74        }
75
76        // Otherwise, it's a shortcut form (just the language name)
77        // Only take the first word as language
78        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
79        InfoString {
80            raw: raw.to_string(),
81            block_type: CodeBlockType::DisplayShortcut {
82                language: language.to_string(),
83            },
84            attributes: Vec::new(),
85        }
86    }
87
88    fn parse_explicit(raw: &str, content: &str) -> Self {
89        // Check for raw attribute FIRST: {=format}
90        // The content should start with '=' and have only alphanumeric chars after
91        let trimmed_content = content.trim();
92        if let Some(format_name) = trimmed_content.strip_prefix('=') {
93            // Validate format name: alphanumeric only, no spaces
94            if !format_name.is_empty()
95                && format_name.chars().all(|c| c.is_alphanumeric())
96                && !format_name.contains(char::is_whitespace)
97            {
98                return InfoString {
99                    raw: raw.to_string(),
100                    block_type: CodeBlockType::Raw {
101                        format: format_name.to_string(),
102                    },
103                    attributes: Vec::new(),
104                };
105            }
106        }
107
108        // First, do a preliminary parse to determine block type
109        // Use chunk options parser (comma-aware) for initial detection
110        let prelim_attrs = Self::parse_chunk_options(content);
111
112        // First non-ID, non-attribute token determines if it's executable or display
113        let mut first_lang_token = None;
114        for (key, val) in prelim_attrs.iter() {
115            if val.is_none() && !key.starts_with('#') {
116                first_lang_token = Some(key.as_str());
117                break;
118            }
119        }
120
121        let first_token = first_lang_token.unwrap_or("");
122
123        if first_token.starts_with('.') {
124            // Display block: {.python} or {.haskell .numberLines}
125            // Re-parse with Pandoc-style parser (space-delimited)
126            let attrs = Self::parse_pandoc_attributes(content);
127
128            let classes: Vec<String> = attrs
129                .iter()
130                .filter(|(k, v)| k.starts_with('.') && v.is_none())
131                .map(|(k, _)| k[1..].to_string())
132                .collect();
133
134            let non_class_attrs: Vec<(String, Option<String>)> = attrs
135                .into_iter()
136                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
137                .collect();
138
139            InfoString {
140                raw: raw.to_string(),
141                block_type: CodeBlockType::DisplayExplicit { classes },
142                attributes: non_class_attrs,
143            }
144        } else if !first_token.is_empty() && !first_token.starts_with('#') {
145            // Executable chunk: {python} or {r}
146            // Use chunk options parser (comma-delimited)
147            let attrs = Self::parse_chunk_options(content);
148            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
149
150            // Check if there's a second bareword (implicit label in R/Quarto chunks)
151            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}
152            let mut has_implicit_label = false;
153            let implicit_label_value = if lang_index + 1 < attrs.len() {
154                if let (label_key, None) = &attrs[lang_index + 1] {
155                    // Second bareword after language
156                    has_implicit_label = true;
157                    Some(label_key.clone())
158                } else {
159                    None
160                }
161            } else {
162                None
163            };
164
165            let mut final_attrs: Vec<(String, Option<String>)> = attrs
166                .into_iter()
167                .enumerate()
168                .filter(|(i, _)| {
169                    // Remove language token
170                    if *i == lang_index {
171                        return false;
172                    }
173                    // Remove implicit label token (will be added back explicitly)
174                    if has_implicit_label && *i == lang_index + 1 {
175                        return false;
176                    }
177                    true
178                })
179                .map(|(_, attr)| attr)
180                .collect();
181
182            // Add explicit label if we found an implicit one
183            if let Some(label_val) = implicit_label_value {
184                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
185            }
186
187            InfoString {
188                raw: raw.to_string(),
189                block_type: CodeBlockType::Executable {
190                    language: first_token.to_string(),
191                },
192                attributes: final_attrs,
193            }
194        } else {
195            // Just attributes, no language - use Pandoc parser
196            let attrs = Self::parse_pandoc_attributes(content);
197            InfoString {
198                raw: raw.to_string(),
199                block_type: CodeBlockType::Plain,
200                attributes: attrs,
201            }
202        }
203    }
204
205    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
206    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
207    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
208        let mut attrs = Vec::new();
209        let mut chars = content.chars().peekable();
210
211        while chars.peek().is_some() {
212            // Skip whitespace
213            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
214                chars.next();
215            }
216
217            if chars.peek().is_none() {
218                break;
219            }
220
221            // Read key
222            let mut key = String::new();
223            while let Some(&ch) = chars.peek() {
224                if ch == '=' || ch == ' ' || ch == '\t' {
225                    break;
226                }
227                key.push(ch);
228                chars.next();
229            }
230
231            if key.is_empty() {
232                break;
233            }
234
235            // Skip whitespace
236            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
237                chars.next();
238            }
239
240            // Check for value
241            if chars.peek() == Some(&'=') {
242                chars.next(); // consume '='
243
244                // Skip whitespace after '='
245                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
246                    chars.next();
247                }
248
249                // Read value (might be quoted)
250                let value = if chars.peek() == Some(&'"') {
251                    chars.next(); // consume opening quote
252                    let mut val = String::new();
253                    while let Some(&ch) = chars.peek() {
254                        chars.next();
255                        if ch == '"' {
256                            break;
257                        }
258                        if ch == '\\' {
259                            if let Some(&next_ch) = chars.peek() {
260                                chars.next();
261                                val.push(next_ch);
262                            }
263                        } else {
264                            val.push(ch);
265                        }
266                    }
267                    val
268                } else {
269                    // Unquoted value - read until space
270                    let mut val = String::new();
271                    while let Some(&ch) = chars.peek() {
272                        if ch == ' ' || ch == '\t' {
273                            break;
274                        }
275                        val.push(ch);
276                        chars.next();
277                    }
278                    val
279                };
280
281                attrs.push((key, Some(value)));
282            } else {
283                attrs.push((key, None));
284            }
285        }
286
287        attrs
288    }
289
290    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
291    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
292    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
293        let mut attrs = Vec::new();
294        let mut chars = content.chars().peekable();
295
296        while chars.peek().is_some() {
297            // Skip whitespace and commas
298            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
299                chars.next();
300            }
301
302            if chars.peek().is_none() {
303                break;
304            }
305
306            // Read key
307            let mut key = String::new();
308            while let Some(&ch) = chars.peek() {
309                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
310                    break;
311                }
312                key.push(ch);
313                chars.next();
314            }
315
316            if key.is_empty() {
317                break;
318            }
319
320            // Skip whitespace and commas
321            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
322                chars.next();
323            }
324
325            // Check for value
326            if chars.peek() == Some(&'=') {
327                chars.next(); // consume '='
328
329                // Skip whitespace and commas after '='
330                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
331                    chars.next();
332                }
333
334                // Read value (might be quoted)
335                let value = if chars.peek() == Some(&'"') {
336                    chars.next(); // consume opening quote
337                    let mut val = String::new();
338                    while let Some(&ch) = chars.peek() {
339                        chars.next();
340                        if ch == '"' {
341                            break;
342                        }
343                        if ch == '\\' {
344                            if let Some(&next_ch) = chars.peek() {
345                                chars.next();
346                                val.push(next_ch);
347                            }
348                        } else {
349                            val.push(ch);
350                        }
351                    }
352                    val
353                } else {
354                    // Unquoted value - read until comma, space, or tab at depth 0
355                    // Track nesting depth for (), [], {} and quote state
356                    let mut val = String::new();
357                    let mut depth = 0; // Track parentheses/brackets/braces depth
358                    let mut in_quote: Option<char> = None; // Track if inside ' or "
359                    let mut escaped = false; // Track if previous char was backslash
360
361                    while let Some(&ch) = chars.peek() {
362                        // Handle escape sequences
363                        if escaped {
364                            val.push(ch);
365                            chars.next();
366                            escaped = false;
367                            continue;
368                        }
369
370                        if ch == '\\' {
371                            val.push(ch);
372                            chars.next();
373                            escaped = true;
374                            continue;
375                        }
376
377                        // Handle quotes
378                        if let Some(quote_char) = in_quote {
379                            val.push(ch);
380                            chars.next();
381                            if ch == quote_char {
382                                in_quote = None; // Close quote
383                            }
384                            continue;
385                        }
386
387                        // Not in a quote - check for quote start
388                        if ch == '"' || ch == '\'' {
389                            in_quote = Some(ch);
390                            val.push(ch);
391                            chars.next();
392                            continue;
393                        }
394
395                        // Track nesting depth (only when not in quotes)
396                        if ch == '(' || ch == '[' || ch == '{' {
397                            depth += 1;
398                            val.push(ch);
399                            chars.next();
400                            continue;
401                        }
402
403                        if ch == ')' || ch == ']' || ch == '}' {
404                            depth -= 1;
405                            val.push(ch);
406                            chars.next();
407                            continue;
408                        }
409
410                        // Check for delimiters - only break at depth 0
411                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
412                            break;
413                        }
414
415                        // Regular character
416                        val.push(ch);
417                        chars.next();
418                    }
419                    val
420                };
421
422                attrs.push((key, Some(value)));
423            } else {
424                attrs.push((key, None));
425            }
426        }
427
428        attrs
429    }
430
431    /// Legacy function - kept for backward compatibility in mixed-form parsing
432    /// For new code, use parse_pandoc_attributes or parse_chunk_options
433    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
434        // Default to chunk options parsing (comma-aware)
435        Self::parse_chunk_options(content)
436    }
437}
438
439/// Information about a detected code fence opening.
440#[derive(Debug, Clone)]
441pub(crate) struct FenceInfo {
442    pub fence_char: char,
443    pub fence_count: usize,
444    pub info_string: String,
445}
446
447pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
448    fence.info_string.trim() == "math"
449}
450
451/// Try to detect a fenced code block opening from content.
452/// Returns fence info if this is a valid opening fence.
453pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
454    let trimmed = strip_leading_spaces(content);
455
456    // Check for fence opening (``` or ~~~)
457    let (fence_char, fence_count) = if trimmed.starts_with('`') {
458        let count = trimmed.chars().take_while(|&c| c == '`').count();
459        ('`', count)
460    } else if trimmed.starts_with('~') {
461        let count = trimmed.chars().take_while(|&c| c == '~').count();
462        ('~', count)
463    } else {
464        return None;
465    };
466
467    if fence_count < 3 {
468        return None;
469    }
470
471    let info_string_raw = &trimmed[fence_count..];
472    // Strip trailing newline (LF or CRLF) and at most one leading space
473    let (info_string_trimmed, _) = strip_newline(info_string_raw);
474    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
475        stripped.to_string()
476    } else {
477        info_string_trimmed.to_string()
478    };
479
480    // Backtick-fenced blocks cannot have backticks in the info string.
481    if fence_char == '`' && info_string.contains('`') {
482        return None;
483    }
484
485    Some(FenceInfo {
486        fence_char,
487        fence_count,
488        info_string,
489    })
490}
491
492fn prepare_fence_open_line<'a>(
493    builder: &mut GreenNodeBuilder<'static>,
494    source_line: &'a str,
495    first_line_override: Option<&'a str>,
496    bq_depth: usize,
497    base_indent: usize,
498) -> (&'a str, &'a str) {
499    let first_line = first_line_override.unwrap_or(source_line);
500
501    // Only strip blockquote markers for the *surrounding* blockquote depth.
502    // Anything beyond that (e.g. a literal `>` inside the code block) must be preserved.
503    let first_inner = if bq_depth > 0 && first_line_override.is_none() {
504        strip_n_blockquote_markers(first_line, bq_depth)
505    } else {
506        if bq_depth > 0 && first_line_override.is_some() && source_line != first_line {
507            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
508            let prefix_len = source_line.len().saturating_sub(stripped.len());
509            if prefix_len > 0 {
510                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
511            }
512        }
513        first_line
514    };
515
516    // For lossless parsing: emit the base indent before stripping it
517    let first_base_indent = if first_line_override.is_some() {
518        0
519    } else {
520        base_indent
521    };
522    let first_base_indent_bytes = byte_index_at_column(first_inner, first_base_indent);
523    let first_stripped = if first_base_indent > 0 && first_inner.len() >= first_base_indent_bytes {
524        let indent_str = &first_inner[..first_base_indent_bytes];
525        if !indent_str.is_empty() {
526            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
527        }
528        &first_inner[first_base_indent_bytes..]
529    } else {
530        first_inner
531    };
532
533    let first_trimmed = strip_leading_spaces(first_stripped);
534    let leading_ws_len = first_stripped.len().saturating_sub(first_trimmed.len());
535    if leading_ws_len > 0 {
536        builder.token(
537            SyntaxKind::WHITESPACE.into(),
538            &first_stripped[..leading_ws_len],
539        );
540    }
541    (first_trimmed, first_inner)
542}
543
544fn emit_blockquote_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
545    for ch in prefix.chars() {
546        if ch == '>' {
547            builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
548        } else {
549            let mut buf = [0u8; 4];
550            builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
551        }
552    }
553}
554
555fn emit_content_line_prefixes<'a>(
556    builder: &mut GreenNodeBuilder<'static>,
557    content_line: &'a str,
558    bq_depth: usize,
559    base_indent: usize,
560) -> &'a str {
561    let after_blockquote = if bq_depth > 0 {
562        let stripped = strip_n_blockquote_markers(content_line, bq_depth);
563        let prefix_len = content_line.len().saturating_sub(stripped.len());
564        if prefix_len > 0 {
565            emit_blockquote_prefix_tokens(builder, &content_line[..prefix_len]);
566        }
567        stripped
568    } else {
569        content_line
570    };
571
572    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
573    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
574        let indent_str = &after_blockquote[..base_indent_bytes];
575        if !indent_str.is_empty() {
576            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
577        }
578        &after_blockquote[base_indent_bytes..]
579    } else {
580        after_blockquote
581    }
582}
583
584fn strip_content_line_prefixes(content_line: &str, bq_depth: usize, base_indent: usize) -> &str {
585    let after_blockquote = if bq_depth > 0 {
586        strip_n_blockquote_markers(content_line, bq_depth)
587    } else {
588        content_line
589    };
590
591    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
592    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
593        &after_blockquote[base_indent_bytes..]
594    } else {
595        after_blockquote
596    }
597}
598
599pub(crate) fn compute_hashpipe_preamble_line_count(
600    content_lines: &[&str],
601    prefix: &str,
602    bq_depth: usize,
603    base_indent: usize,
604) -> usize {
605    let mut line_idx = 0usize;
606
607    while line_idx < content_lines.len() {
608        let preview_after_indent =
609            strip_content_line_prefixes(content_lines[line_idx], bq_depth, base_indent);
610        let (preview_without_newline, _) = strip_newline(preview_after_indent);
611        if !is_hashpipe_option_line(preview_without_newline, prefix)
612            && !is_hashpipe_continuation_line(preview_without_newline, prefix)
613        {
614            break;
615        }
616        line_idx += 1;
617    }
618
619    line_idx
620}
621
622fn emit_hashpipe_option_line(
623    builder: &mut GreenNodeBuilder<'static>,
624    line_without_newline: &str,
625    prefix: &str,
626) -> bool {
627    if !is_hashpipe_option_line(line_without_newline, prefix) {
628        return false;
629    }
630
631    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
632    let leading_ws_len = line_without_newline
633        .len()
634        .saturating_sub(trimmed_start.len());
635    let after_prefix = &trimmed_start[prefix.len()..];
636    let ws_after_prefix_len = after_prefix
637        .len()
638        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
639    let rest = &after_prefix[ws_after_prefix_len..];
640    let Some(colon_idx) = rest.find(':') else {
641        return false;
642    };
643
644    let key_with_ws = &rest[..colon_idx];
645    let key = trim_end_spaces_tabs(key_with_ws);
646    if key.is_empty() {
647        return false;
648    }
649    let key_ws_suffix = &key_with_ws[key.len()..];
650
651    let after_colon = &rest[colon_idx + 1..];
652    let value_ws_prefix_len = after_colon
653        .len()
654        .saturating_sub(trim_start_spaces_tabs(after_colon).len());
655    let value_with_trailing = &after_colon[value_ws_prefix_len..];
656    let value = trim_end_spaces_tabs(value_with_trailing);
657    let value_ws_suffix = &value_with_trailing[value.len()..];
658
659    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
660    if leading_ws_len > 0 {
661        builder.token(
662            SyntaxKind::WHITESPACE.into(),
663            &line_without_newline[..leading_ws_len],
664        );
665    }
666    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
667    if ws_after_prefix_len > 0 {
668        builder.token(
669            SyntaxKind::WHITESPACE.into(),
670            &after_prefix[..ws_after_prefix_len],
671        );
672    }
673
674    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
675    if !key_ws_suffix.is_empty() {
676        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
677    }
678    builder.token(SyntaxKind::TEXT.into(), ":");
679    if value_ws_prefix_len > 0 {
680        builder.token(
681            SyntaxKind::WHITESPACE.into(),
682            &after_colon[..value_ws_prefix_len],
683        );
684    }
685
686    if !value.is_empty() {
687        if let Some(quote) = value.chars().next()
688            && (quote == '"' || quote == '\'')
689            && value.ends_with(quote)
690            && value.len() >= 2
691        {
692            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
693            builder.token(
694                SyntaxKind::CHUNK_OPTION_VALUE.into(),
695                &value[1..value.len() - 1],
696            );
697            builder.token(
698                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
699                &value[value.len() - 1..],
700            );
701        } else {
702            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
703        }
704    }
705
706    if !value_ws_suffix.is_empty() {
707        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
708    }
709    builder.finish_node();
710    true
711}
712
713fn emit_hashpipe_continuation_line(
714    builder: &mut GreenNodeBuilder<'static>,
715    line_without_newline: &str,
716    prefix: &str,
717) -> bool {
718    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
719        return false;
720    }
721    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
722    let leading_ws_len = line_without_newline
723        .len()
724        .saturating_sub(trimmed_start.len());
725    let after_prefix = &trimmed_start[prefix.len()..];
726    let ws_after_prefix_len = after_prefix
727        .len()
728        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
729    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
730    let continuation_value = trim_end_spaces_tabs(continuation_with_trailing);
731    if continuation_value.is_empty() {
732        return false;
733    }
734    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
735
736    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
737    if leading_ws_len > 0 {
738        builder.token(
739            SyntaxKind::WHITESPACE.into(),
740            &line_without_newline[..leading_ws_len],
741        );
742    }
743    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
744    if ws_after_prefix_len > 0 {
745        builder.token(
746            SyntaxKind::WHITESPACE.into(),
747            &after_prefix[..ws_after_prefix_len],
748        );
749    }
750    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
751    if !continuation_ws_suffix.is_empty() {
752        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
753    }
754    builder.finish_node();
755    true
756}
757
758fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
759    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
760    if !trimmed_start.starts_with(prefix) {
761        return false;
762    }
763    let after_prefix = &trimmed_start[prefix.len()..];
764    let rest = trim_start_spaces_tabs(after_prefix);
765    let Some(colon_idx) = rest.find(':') else {
766        return false;
767    };
768    let key = trim_end_spaces_tabs(&rest[..colon_idx]);
769    if key.is_empty() {
770        return false;
771    }
772    true
773}
774
775fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
776    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
777    if !trimmed_start.starts_with(prefix) {
778        return false;
779    }
780    let after_prefix = &trimmed_start[prefix.len()..];
781    let Some(first) = after_prefix.chars().next() else {
782        return false;
783    };
784    if first != ' ' && first != '\t' {
785        return false;
786    }
787    !trim_start_spaces_tabs(after_prefix).is_empty()
788}
789
790/// Check if a line is a valid closing fence for the given fence info.
791pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
792    let trimmed = strip_leading_spaces(content);
793
794    if !trimmed.starts_with(fence.fence_char) {
795        return false;
796    }
797
798    let closing_count = trimmed
799        .chars()
800        .take_while(|&c| c == fence.fence_char)
801        .count();
802
803    if closing_count < fence.fence_count {
804        return false;
805    }
806
807    // Rest of line must be empty
808    trimmed[closing_count..].trim().is_empty()
809}
810
811/// Emit chunk options as structured CST nodes while preserving all bytes.
812/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
813fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
814    if content.trim().is_empty() {
815        builder.token(SyntaxKind::TEXT.into(), content);
816        return;
817    }
818
819    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
820
821    let mut pos = 0;
822    let bytes = content.as_bytes();
823
824    while pos < bytes.len() {
825        // Emit leading whitespace/commas as TEXT
826        let ws_start = pos;
827        while pos < bytes.len() {
828            let ch = bytes[pos] as char;
829            if ch != ' ' && ch != '\t' && ch != ',' {
830                break;
831            }
832            pos += 1;
833        }
834        if pos > ws_start {
835            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
836        }
837
838        if pos >= bytes.len() {
839            break;
840        }
841
842        // Check if this is a closing brace
843        if bytes[pos] as char == '}' {
844            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
845            pos += 1;
846            if pos < bytes.len() {
847                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
848            }
849            break;
850        }
851
852        // Read key
853        let key_start = pos;
854        while pos < bytes.len() {
855            let ch = bytes[pos] as char;
856            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
857                break;
858            }
859            pos += 1;
860        }
861
862        if pos == key_start {
863            // No key found, emit rest as TEXT
864            if pos < bytes.len() {
865                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
866            }
867            break;
868        }
869
870        let key = &content[key_start..pos];
871
872        // Check for whitespace before '='
873        let ws_before_eq_start = pos;
874        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
875            pos += 1;
876        }
877
878        // Check if there's a value (=)
879        if pos < bytes.len() && bytes[pos] as char == '=' {
880            // Has value - emit as CHUNK_OPTION
881            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
882            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
883
884            // Emit whitespace before '=' if any
885            if pos > ws_before_eq_start {
886                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
887            }
888
889            builder.token(SyntaxKind::TEXT.into(), "=");
890            pos += 1; // consume '='
891
892            // Emit whitespace after '='
893            let ws_after_eq_start = pos;
894            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
895                pos += 1;
896            }
897            if pos > ws_after_eq_start {
898                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
899            }
900
901            // Parse value (might be quoted)
902            if pos < bytes.len() {
903                let quote_char = bytes[pos] as char;
904                if quote_char == '"' || quote_char == '\'' {
905                    // Quoted value
906                    builder.token(
907                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
908                        &content[pos..pos + 1],
909                    );
910                    pos += 1; // consume opening quote
911
912                    let val_start = pos;
913                    let mut escaped = false;
914                    while pos < bytes.len() {
915                        let ch = bytes[pos] as char;
916                        if !escaped && ch == quote_char {
917                            break;
918                        }
919                        escaped = !escaped && ch == '\\';
920                        pos += 1;
921                    }
922
923                    if pos > val_start {
924                        builder.token(
925                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
926                            &content[val_start..pos],
927                        );
928                    }
929
930                    // Emit closing quote
931                    if pos < bytes.len() && bytes[pos] as char == quote_char {
932                        builder.token(
933                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
934                            &content[pos..pos + 1],
935                        );
936                        pos += 1;
937                    }
938                } else {
939                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
940                    let val_start = pos;
941                    let mut depth = 0;
942
943                    while pos < bytes.len() {
944                        let ch = bytes[pos] as char;
945                        match ch {
946                            '(' | '[' | '{' => depth += 1,
947                            ')' | ']' => {
948                                if depth > 0 {
949                                    depth -= 1;
950                                } else {
951                                    break;
952                                }
953                            }
954                            '}' => {
955                                if depth > 0 {
956                                    depth -= 1;
957                                } else {
958                                    break; // End of chunk options
959                                }
960                            }
961                            ',' if depth == 0 => {
962                                break; // Next option
963                            }
964                            ' ' | '\t' if depth == 0 => {
965                                break; // Space separator
966                            }
967                            _ => {}
968                        }
969                        pos += 1;
970                    }
971
972                    if pos > val_start {
973                        builder.token(
974                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
975                            &content[val_start..pos],
976                        );
977                    }
978                }
979            }
980
981            builder.finish_node(); // CHUNK_OPTION
982        } else {
983            // No '=' - this is a label or bareword option
984            // Emit any whitespace we skipped as TEXT
985            if pos > ws_before_eq_start {
986                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
987                builder.token(SyntaxKind::TEXT.into(), key);
988                builder.finish_node(); // CHUNK_LABEL
989                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
990            } else {
991                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
992                builder.token(SyntaxKind::TEXT.into(), key);
993                builder.finish_node(); // CHUNK_LABEL
994            }
995        }
996    }
997
998    builder.finish_node(); // CHUNK_OPTIONS
999}
1000
1001/// Helper to parse info string and emit CodeInfo node with parsed components.
1002/// This breaks down the info string into its logical parts while preserving all bytes.
1003fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1004    builder.start_node(SyntaxKind::CODE_INFO.into());
1005
1006    let info = InfoString::parse(info_string);
1007
1008    match &info.block_type {
1009        CodeBlockType::DisplayShortcut { language } => {
1010            // Simple case: python or python {.class}
1011            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1012
1013            // If there's more after the language, emit it as TEXT
1014            let after_lang = &info_string[language.len()..];
1015            if !after_lang.is_empty() {
1016                builder.token(SyntaxKind::TEXT.into(), after_lang);
1017            }
1018        }
1019        CodeBlockType::Executable { language } => {
1020            // Quarto: {r} or {r my-label, echo=FALSE}
1021            builder.token(SyntaxKind::TEXT.into(), "{");
1022            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1023
1024            // Parse and emit chunk options
1025            let start_offset = 1 + language.len(); // Skip "{r"
1026            if start_offset < info_string.len() {
1027                let rest = &info_string[start_offset..];
1028                emit_chunk_options(builder, rest);
1029            }
1030        }
1031        CodeBlockType::DisplayExplicit { classes } => {
1032            // Pandoc: {.python} or {#id .haskell .numberLines}
1033            // We need to find the first class in the raw string and emit everything around it
1034
1035            if let Some(lang) = classes.first() {
1036                // Find where ".lang" appears in the info string
1037                let needle = format!(".{}", lang);
1038                if let Some(lang_start) = info_string.find(&needle) {
1039                    // Emit everything before the language
1040                    if lang_start > 0 {
1041                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1042                    }
1043
1044                    // Emit the dot
1045                    builder.token(SyntaxKind::TEXT.into(), ".");
1046
1047                    // Emit the language
1048                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1049
1050                    // Emit everything after
1051                    let after_lang_start = lang_start + 1 + lang.len();
1052                    if after_lang_start < info_string.len() {
1053                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1054                    }
1055                } else {
1056                    // Couldn't find it, just emit as TEXT
1057                    builder.token(SyntaxKind::TEXT.into(), info_string);
1058                }
1059            } else {
1060                // No classes
1061                builder.token(SyntaxKind::TEXT.into(), info_string);
1062            }
1063        }
1064        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1065            // No language, just emit as TEXT
1066            builder.token(SyntaxKind::TEXT.into(), info_string);
1067        }
1068    }
1069
1070    builder.finish_node(); // CodeInfo
1071}
1072
1073/// Parse a fenced code block, consuming lines from the parser.
1074/// Returns the new position after the code block.
1075/// Parse a fenced code block, consuming lines from the parser.
1076/// Returns the new position after the code block.
1077/// base_indent accounts for container indentation (e.g., footnotes) that should be stripped.
1078pub(crate) fn parse_fenced_code_block(
1079    builder: &mut GreenNodeBuilder<'static>,
1080    lines: &[&str],
1081    start_pos: usize,
1082    fence: FenceInfo,
1083    bq_depth: usize,
1084    base_indent: usize,
1085    first_line_override: Option<&str>,
1086) -> usize {
1087    // Start code block
1088    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1089
1090    // Opening fence
1091    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1092        builder,
1093        lines[start_pos],
1094        first_line_override,
1095        bq_depth,
1096        base_indent,
1097    );
1098
1099    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1100    builder.token(
1101        SyntaxKind::CODE_FENCE_MARKER.into(),
1102        &first_trimmed[..fence.fence_count],
1103    );
1104
1105    // Emit any space between fence and info string (for losslessness)
1106    let after_fence = &first_trimmed[fence.fence_count..];
1107    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1108        // There was a space - emit it as WHITESPACE
1109        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1110        // Parse and emit the info string as a structured node
1111        if !fence.info_string.is_empty() {
1112            emit_code_info_node(builder, &fence.info_string);
1113        }
1114    } else if !fence.info_string.is_empty() {
1115        // No space - parse and emit info_string as a structured node
1116        emit_code_info_node(builder, &fence.info_string);
1117    }
1118
1119    // Extract and emit the actual newline from the opening fence line
1120    let (_, newline_str) = strip_newline(first_trimmed);
1121    if !newline_str.is_empty() {
1122        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1123    }
1124    builder.finish_node(); // CodeFenceOpen
1125
1126    let mut current_pos = start_pos + 1;
1127    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1128    let mut found_closing = false;
1129
1130    while current_pos < lines.len() {
1131        let line = lines[current_pos];
1132
1133        // Count blockquote markers to detect leaving the surrounding blockquote.
1134        let (line_bq_depth, _) = count_blockquote_markers(line);
1135
1136        // If blockquote depth decreases, code block ends (we've left the blockquote)
1137        if line_bq_depth < bq_depth {
1138            break;
1139        }
1140
1141        // Strip exactly the surrounding blockquote depth; preserve any additional `>` literally.
1142        let inner = if bq_depth > 0 {
1143            strip_n_blockquote_markers(line, bq_depth)
1144        } else {
1145            line
1146        };
1147
1148        // Strip base indent (footnote context) from content lines for fence detection
1149        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1150        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1151            &inner[base_indent_bytes..]
1152        } else {
1153            inner
1154        };
1155
1156        // Check for closing fence
1157        if is_closing_fence(inner_stripped, &fence) {
1158            found_closing = true;
1159            current_pos += 1;
1160            break;
1161        }
1162
1163        // Store the original line for lossless parsing.
1164        content_lines.push(line);
1165        current_pos += 1;
1166    }
1167
1168    // Add content
1169    if !content_lines.is_empty() {
1170        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1171        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1172            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1173            _ => None,
1174        };
1175
1176        let mut line_idx = 0usize;
1177        if let Some(prefix) = hashpipe_prefix {
1178            let prepared_hashpipe_lines =
1179                compute_hashpipe_preamble_line_count(&content_lines, prefix, bq_depth, base_indent);
1180            if prepared_hashpipe_lines > 0 {
1181                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1182                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1183                while line_idx < prepared_hashpipe_lines {
1184                    let content_line = content_lines[line_idx];
1185                    let after_indent =
1186                        emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1187                    let (line_without_newline, newline_str) = strip_newline(after_indent);
1188                    if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
1189                        let _ =
1190                            emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
1191                    }
1192                    if !newline_str.is_empty() {
1193                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1194                    }
1195                    line_idx += 1;
1196                }
1197                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1198                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1199            }
1200        }
1201
1202        for content_line in content_lines.iter().skip(line_idx) {
1203            let after_indent =
1204                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1205            let (line_without_newline, newline_str) = strip_newline(after_indent);
1206
1207            if !line_without_newline.is_empty() {
1208                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1209            }
1210
1211            if !newline_str.is_empty() {
1212                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1213            }
1214        }
1215        builder.finish_node(); // CodeContent
1216    }
1217
1218    // Closing fence (if found)
1219    if found_closing {
1220        let closing_line = lines[current_pos - 1];
1221        let closing_after_blockquote = if bq_depth > 0 {
1222            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1223            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1224            if prefix_len > 0 {
1225                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1226            }
1227            stripped
1228        } else {
1229            closing_line
1230        };
1231
1232        // Emit base indent for lossless parsing
1233        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1234        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1235            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1236            if !indent_str.is_empty() {
1237                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1238            }
1239        }
1240
1241        // Strip base indent to get fence
1242        let closing_stripped =
1243            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1244                &closing_after_blockquote[base_indent_bytes..]
1245            } else {
1246                closing_after_blockquote
1247            };
1248        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1249        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1250        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1251        let closing_count = closing_trimmed_start
1252            .chars()
1253            .take_while(|&c| c == fence.fence_char)
1254            .count();
1255        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1256
1257        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1258        if leading_ws_len > 0 {
1259            builder.token(
1260                SyntaxKind::WHITESPACE.into(),
1261                &closing_without_newline[..leading_ws_len],
1262            );
1263        }
1264        builder.token(
1265            SyntaxKind::CODE_FENCE_MARKER.into(),
1266            &closing_trimmed_start[..closing_count],
1267        );
1268        if !trailing_after_marker.is_empty() {
1269            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1270        }
1271        if !newline_str.is_empty() {
1272            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1273        }
1274        builder.finish_node(); // CodeFenceClose
1275    }
1276
1277    builder.finish_node(); // CodeBlock
1278
1279    current_pos
1280}
1281
1282/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1283pub(crate) fn parse_fenced_math_block(
1284    builder: &mut GreenNodeBuilder<'static>,
1285    lines: &[&str],
1286    start_pos: usize,
1287    fence: FenceInfo,
1288    bq_depth: usize,
1289    base_indent: usize,
1290    first_line_override: Option<&str>,
1291) -> usize {
1292    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1293
1294    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1295        builder,
1296        lines[start_pos],
1297        first_line_override,
1298        bq_depth,
1299        base_indent,
1300    );
1301    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1302    builder.token(
1303        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1304        opening_without_newline,
1305    );
1306    if !opening_newline.is_empty() {
1307        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1308    }
1309
1310    let mut current_pos = start_pos + 1;
1311    let mut content_lines: Vec<&str> = Vec::new();
1312    let mut found_closing = false;
1313
1314    while current_pos < lines.len() {
1315        let line = lines[current_pos];
1316        let (line_bq_depth, _) = count_blockquote_markers(line);
1317        if line_bq_depth < bq_depth {
1318            break;
1319        }
1320
1321        let inner = if bq_depth > 0 {
1322            strip_n_blockquote_markers(line, bq_depth)
1323        } else {
1324            line
1325        };
1326        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1327        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1328            &inner[base_indent_bytes..]
1329        } else {
1330            inner
1331        };
1332
1333        if is_closing_fence(inner_stripped, &fence) {
1334            found_closing = true;
1335            current_pos += 1;
1336            break;
1337        }
1338
1339        content_lines.push(line);
1340        current_pos += 1;
1341    }
1342
1343    if !content_lines.is_empty() {
1344        let mut content = String::new();
1345        for content_line in content_lines {
1346            let after_indent =
1347                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1348            let (line_without_newline, newline_str) = strip_newline(after_indent);
1349            content.push_str(line_without_newline);
1350            content.push_str(newline_str);
1351        }
1352        builder.token(SyntaxKind::TEXT.into(), &content);
1353    }
1354
1355    if found_closing {
1356        let closing_line = lines[current_pos - 1];
1357        let closing_after_blockquote = if bq_depth > 0 {
1358            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1359            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1360            if prefix_len > 0 {
1361                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1362            }
1363            stripped
1364        } else {
1365            closing_line
1366        };
1367
1368        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1369        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1370            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1371            if !indent_str.is_empty() {
1372                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1373            }
1374        }
1375
1376        let closing_stripped =
1377            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1378                &closing_after_blockquote[base_indent_bytes..]
1379            } else {
1380                closing_after_blockquote
1381            };
1382        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1383        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1384        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1385        let closing_count = closing_trimmed_start
1386            .chars()
1387            .take_while(|&c| c == fence.fence_char)
1388            .count();
1389        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1390
1391        if leading_ws_len > 0 {
1392            builder.token(
1393                SyntaxKind::WHITESPACE.into(),
1394                &closing_without_newline[..leading_ws_len],
1395            );
1396        }
1397        builder.token(
1398            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1399            &closing_trimmed_start[..closing_count],
1400        );
1401        if !trailing_after_marker.is_empty() {
1402            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1403        }
1404        if !newline_str.is_empty() {
1405            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1406        }
1407    }
1408
1409    builder.finish_node(); // DisplayMath
1410    current_pos
1411}
1412
1413#[cfg(test)]
1414mod tests {
1415    use super::*;
1416
1417    #[test]
1418    fn test_backtick_fence() {
1419        let fence = try_parse_fence_open("```python").unwrap();
1420        assert_eq!(fence.fence_char, '`');
1421        assert_eq!(fence.fence_count, 3);
1422        assert_eq!(fence.info_string, "python");
1423    }
1424
1425    #[test]
1426    fn test_tilde_fence() {
1427        let fence = try_parse_fence_open("~~~").unwrap();
1428        assert_eq!(fence.fence_char, '~');
1429        assert_eq!(fence.fence_count, 3);
1430        assert_eq!(fence.info_string, "");
1431    }
1432
1433    #[test]
1434    fn test_long_fence() {
1435        let fence = try_parse_fence_open("`````").unwrap();
1436        assert_eq!(fence.fence_count, 5);
1437    }
1438
1439    #[test]
1440    fn test_two_backticks_invalid() {
1441        assert!(try_parse_fence_open("``").is_none());
1442    }
1443
1444    #[test]
1445    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1446        assert!(try_parse_fence_open("`````hi````there`````").is_none());
1447    }
1448
1449    #[test]
1450    fn test_closing_fence() {
1451        let fence = FenceInfo {
1452            fence_char: '`',
1453            fence_count: 3,
1454            info_string: String::new(),
1455        };
1456        assert!(is_closing_fence("```", &fence));
1457        assert!(is_closing_fence("````", &fence));
1458        assert!(!is_closing_fence("``", &fence));
1459        assert!(!is_closing_fence("~~~", &fence));
1460    }
1461
1462    #[test]
1463    fn test_fenced_code_preserves_leading_gt() {
1464        let input = "```\n> foo\n```\n";
1465        let tree = crate::parse(input, None);
1466        assert_eq!(tree.text().to_string(), input);
1467    }
1468
1469    #[test]
1470    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1471        let input = "> ```\n> code\n> ```\n";
1472        let tree = crate::parse(input, None);
1473        assert_eq!(tree.text().to_string(), input);
1474    }
1475
1476    #[test]
1477    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1478        let input = "Term\n: ```\nā”œā”€ā”€ pyproject.toml\n```\n";
1479        let tree = crate::parse(input, None);
1480        assert_eq!(tree.text().to_string(), input);
1481    }
1482
1483    #[test]
1484    fn test_info_string_plain() {
1485        let info = InfoString::parse("");
1486        assert_eq!(info.block_type, CodeBlockType::Plain);
1487        assert!(info.attributes.is_empty());
1488    }
1489
1490    #[test]
1491    fn test_info_string_shortcut() {
1492        let info = InfoString::parse("python");
1493        assert_eq!(
1494            info.block_type,
1495            CodeBlockType::DisplayShortcut {
1496                language: "python".to_string()
1497            }
1498        );
1499        assert!(info.attributes.is_empty());
1500    }
1501
1502    #[test]
1503    fn test_info_string_shortcut_with_trailing() {
1504        let info = InfoString::parse("python extra stuff");
1505        assert_eq!(
1506            info.block_type,
1507            CodeBlockType::DisplayShortcut {
1508                language: "python".to_string()
1509            }
1510        );
1511    }
1512
1513    #[test]
1514    fn test_info_string_display_explicit() {
1515        let info = InfoString::parse("{.python}");
1516        assert_eq!(
1517            info.block_type,
1518            CodeBlockType::DisplayExplicit {
1519                classes: vec!["python".to_string()]
1520            }
1521        );
1522    }
1523
1524    #[test]
1525    fn test_info_string_display_explicit_multiple() {
1526        let info = InfoString::parse("{.python .numberLines}");
1527        assert_eq!(
1528            info.block_type,
1529            CodeBlockType::DisplayExplicit {
1530                classes: vec!["python".to_string(), "numberLines".to_string()]
1531            }
1532        );
1533    }
1534
1535    #[test]
1536    fn test_info_string_executable() {
1537        let info = InfoString::parse("{python}");
1538        assert_eq!(
1539            info.block_type,
1540            CodeBlockType::Executable {
1541                language: "python".to_string()
1542            }
1543        );
1544    }
1545
1546    #[test]
1547    fn test_info_string_executable_with_options() {
1548        let info = InfoString::parse("{python echo=false warning=true}");
1549        assert_eq!(
1550            info.block_type,
1551            CodeBlockType::Executable {
1552                language: "python".to_string()
1553            }
1554        );
1555        assert_eq!(info.attributes.len(), 2);
1556        assert_eq!(
1557            info.attributes[0],
1558            ("echo".to_string(), Some("false".to_string()))
1559        );
1560        assert_eq!(
1561            info.attributes[1],
1562            ("warning".to_string(), Some("true".to_string()))
1563        );
1564    }
1565
1566    #[test]
1567    fn test_info_string_executable_with_commas() {
1568        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1569        assert_eq!(
1570            info.block_type,
1571            CodeBlockType::Executable {
1572                language: "r".to_string()
1573            }
1574        );
1575        assert_eq!(info.attributes.len(), 2);
1576        assert_eq!(
1577            info.attributes[0],
1578            ("echo".to_string(), Some("FALSE".to_string()))
1579        );
1580        assert_eq!(
1581            info.attributes[1],
1582            ("warning".to_string(), Some("TRUE".to_string()))
1583        );
1584    }
1585
1586    #[test]
1587    fn test_info_string_executable_mixed_commas_spaces() {
1588        // R-style with commas and spaces
1589        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1590        assert_eq!(
1591            info.block_type,
1592            CodeBlockType::Executable {
1593                language: "r".to_string()
1594            }
1595        );
1596        assert_eq!(info.attributes.len(), 2);
1597        assert_eq!(
1598            info.attributes[0],
1599            ("echo".to_string(), Some("FALSE".to_string()))
1600        );
1601        assert_eq!(
1602            info.attributes[1],
1603            ("label".to_string(), Some("my chunk".to_string()))
1604        );
1605    }
1606
1607    #[test]
1608    fn test_info_string_mixed_shortcut_and_attrs() {
1609        let info = InfoString::parse("python {.numberLines}");
1610        assert_eq!(
1611            info.block_type,
1612            CodeBlockType::DisplayShortcut {
1613                language: "python".to_string()
1614            }
1615        );
1616        assert_eq!(info.attributes.len(), 1);
1617        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1618    }
1619
1620    #[test]
1621    fn test_info_string_mixed_with_key_value() {
1622        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1623        assert_eq!(
1624            info.block_type,
1625            CodeBlockType::DisplayShortcut {
1626                language: "python".to_string()
1627            }
1628        );
1629        assert_eq!(info.attributes.len(), 2);
1630        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1631        assert_eq!(
1632            info.attributes[1],
1633            ("startFrom".to_string(), Some("100".to_string()))
1634        );
1635    }
1636
1637    #[test]
1638    fn test_info_string_explicit_with_id_and_classes() {
1639        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1640        assert_eq!(
1641            info.block_type,
1642            CodeBlockType::DisplayExplicit {
1643                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1644            }
1645        );
1646        // Non-class attributes
1647        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1648        let has_start = info
1649            .attributes
1650            .iter()
1651            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1652        assert!(has_id);
1653        assert!(has_start);
1654    }
1655
1656    #[test]
1657    fn test_info_string_raw_html() {
1658        let info = InfoString::parse("{=html}");
1659        assert_eq!(
1660            info.block_type,
1661            CodeBlockType::Raw {
1662                format: "html".to_string()
1663            }
1664        );
1665        assert!(info.attributes.is_empty());
1666    }
1667
1668    #[test]
1669    fn test_info_string_raw_latex() {
1670        let info = InfoString::parse("{=latex}");
1671        assert_eq!(
1672            info.block_type,
1673            CodeBlockType::Raw {
1674                format: "latex".to_string()
1675            }
1676        );
1677    }
1678
1679    #[test]
1680    fn test_info_string_raw_openxml() {
1681        let info = InfoString::parse("{=openxml}");
1682        assert_eq!(
1683            info.block_type,
1684            CodeBlockType::Raw {
1685                format: "openxml".to_string()
1686            }
1687        );
1688    }
1689
1690    #[test]
1691    fn test_info_string_raw_ms() {
1692        let info = InfoString::parse("{=ms}");
1693        assert_eq!(
1694            info.block_type,
1695            CodeBlockType::Raw {
1696                format: "ms".to_string()
1697            }
1698        );
1699    }
1700
1701    #[test]
1702    fn test_info_string_raw_html5() {
1703        let info = InfoString::parse("{=html5}");
1704        assert_eq!(
1705            info.block_type,
1706            CodeBlockType::Raw {
1707                format: "html5".to_string()
1708            }
1709        );
1710    }
1711
1712    #[test]
1713    fn test_info_string_raw_not_combined_with_attrs() {
1714        // If there are other attributes with =format, it should not be treated as raw
1715        let info = InfoString::parse("{=html .class}");
1716        // This should NOT be parsed as raw because there's more than one attribute
1717        assert_ne!(
1718            info.block_type,
1719            CodeBlockType::Raw {
1720                format: "html".to_string()
1721            }
1722        );
1723    }
1724
1725    #[test]
1726    fn test_parse_pandoc_attributes_spaces() {
1727        // Pandoc display blocks use spaces as delimiters
1728        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1729        assert_eq!(attrs.len(), 3);
1730        assert_eq!(attrs[0], (".python".to_string(), None));
1731        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1732        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1733    }
1734
1735    #[test]
1736    fn test_parse_pandoc_attributes_no_commas() {
1737        // Commas in Pandoc attributes should be treated as part of the value
1738        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1739        assert_eq!(attrs.len(), 3);
1740        assert_eq!(attrs[0], ("#id".to_string(), None));
1741        assert_eq!(attrs[1], (".class".to_string(), None));
1742        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1743    }
1744
1745    #[test]
1746    fn test_parse_chunk_options_commas() {
1747        // Quarto/RMarkdown chunks use commas as delimiters
1748        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1749        assert_eq!(attrs.len(), 3);
1750        assert_eq!(attrs[0], ("r".to_string(), None));
1751        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1752        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1753    }
1754
1755    #[test]
1756    fn test_parse_chunk_options_no_spaces() {
1757        // Should handle comma-separated without spaces
1758        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1759        assert_eq!(attrs.len(), 3);
1760        assert_eq!(attrs[0], ("r".to_string(), None));
1761        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1762        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1763    }
1764
1765    #[test]
1766    fn test_parse_chunk_options_mixed() {
1767        // Handle both commas and spaces
1768        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1769        assert_eq!(attrs.len(), 3);
1770        assert_eq!(attrs[0], ("python".to_string(), None));
1771        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1772        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1773    }
1774
1775    #[test]
1776    fn test_parse_chunk_options_nested_function_call() {
1777        // R function calls with nested commas should be treated as single value
1778        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1779        assert_eq!(attrs.len(), 3);
1780        assert_eq!(attrs[0], ("r".to_string(), None));
1781        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1782        assert_eq!(
1783            attrs[2],
1784            (
1785                "dependson".to_string(),
1786                Some(r#"c("foo", "bar")"#.to_string())
1787            )
1788        );
1789    }
1790
1791    #[test]
1792    fn test_parse_chunk_options_nested_with_spaces() {
1793        // Function call with spaces inside
1794        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1795        assert_eq!(attrs.len(), 2);
1796        assert_eq!(attrs[0], ("r".to_string(), None));
1797        assert_eq!(
1798            attrs[1],
1799            (
1800                "cache.path".to_string(),
1801                Some(r#"file.path("cache", "dir")"#.to_string())
1802            )
1803        );
1804    }
1805
1806    #[test]
1807    fn test_parse_chunk_options_deeply_nested() {
1808        // Multiple levels of nesting
1809        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1810        assert_eq!(attrs.len(), 2);
1811        assert_eq!(attrs[0], ("r".to_string(), None));
1812        assert_eq!(
1813            attrs[1],
1814            (
1815                "x".to_string(),
1816                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1817            )
1818        );
1819    }
1820
1821    #[test]
1822    fn test_parse_chunk_options_brackets_and_braces() {
1823        // Test all bracket types
1824        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1825        assert_eq!(attrs.len(), 3);
1826        assert_eq!(attrs[0], ("r".to_string(), None));
1827        assert_eq!(
1828            attrs[1],
1829            ("data".to_string(), Some("df[rows, cols]".to_string()))
1830        );
1831        assert_eq!(
1832            attrs[2],
1833            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1834        );
1835    }
1836
1837    #[test]
1838    fn test_parse_chunk_options_quotes_with_parens() {
1839        // Parentheses inside quoted strings shouldn't affect depth tracking
1840        // Note: The parser strips outer quotes from quoted values
1841        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1842        assert_eq!(attrs.len(), 3);
1843        assert_eq!(attrs[0], ("r".to_string(), None));
1844        assert_eq!(
1845            attrs[1],
1846            ("label".to_string(), Some("test (with parens)".to_string()))
1847        );
1848        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1849    }
1850
1851    #[test]
1852    fn test_parse_chunk_options_escaped_quotes() {
1853        // Escaped quotes inside string values
1854        // Note: The parser strips outer quotes and processes escapes
1855        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1856        assert_eq!(attrs.len(), 2);
1857        assert_eq!(attrs[0], ("r".to_string(), None));
1858        assert_eq!(
1859            attrs[1],
1860            (
1861                "label".to_string(),
1862                Some(r#"has "quoted" text"#.to_string())
1863            )
1864        );
1865    }
1866
1867    #[test]
1868    fn test_display_vs_executable_parsing() {
1869        // Display block should use Pandoc parser (spaces)
1870        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1871        assert!(matches!(
1872            info1.block_type,
1873            CodeBlockType::DisplayExplicit { .. }
1874        ));
1875
1876        // Executable chunk should use chunk options parser (commas)
1877        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1878        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1879        assert_eq!(info2.attributes.len(), 2);
1880    }
1881
1882    #[test]
1883    fn test_info_string_executable_implicit_label() {
1884        // {r mylabel} should parse as label=mylabel
1885        let info = InfoString::parse("{r mylabel}");
1886        assert!(matches!(
1887            info.block_type,
1888            CodeBlockType::Executable { ref language } if language == "r"
1889        ));
1890        assert_eq!(info.attributes.len(), 1);
1891        assert_eq!(
1892            info.attributes[0],
1893            ("label".to_string(), Some("mylabel".to_string()))
1894        );
1895    }
1896
1897    #[test]
1898    fn test_info_string_executable_implicit_label_with_options() {
1899        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
1900        let info = InfoString::parse("{r mylabel, echo=FALSE}");
1901        assert!(matches!(
1902            info.block_type,
1903            CodeBlockType::Executable { ref language } if language == "r"
1904        ));
1905        assert_eq!(info.attributes.len(), 2);
1906        assert_eq!(
1907            info.attributes[0],
1908            ("label".to_string(), Some("mylabel".to_string()))
1909        );
1910        assert_eq!(
1911            info.attributes[1],
1912            ("echo".to_string(), Some("FALSE".to_string()))
1913        );
1914    }
1915
1916    #[test]
1917    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
1918        let content_lines = vec![
1919            "#| fig-cap: |\n",
1920            "#|   A caption\n",
1921            "#|   spanning lines\n",
1922            "a <- 1\n",
1923        ];
1924        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1925        assert_eq!(count, 3);
1926    }
1927
1928    #[test]
1929    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
1930        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
1931        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1932        assert_eq!(count, 1);
1933    }
1934
1935    #[test]
1936    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
1937        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
1938        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1939        assert_eq!(count, 1);
1940    }
1941}