Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use crate::parser::utils::container_stack::byte_index_at_column;
9use crate::parser::utils::helpers::{strip_leading_spaces, strip_newline};
10
11/// Represents the type of code block based on its info string syntax.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub enum CodeBlockType {
14    /// Display-only block with shortcut syntax: ```python
15    DisplayShortcut { language: String },
16    /// Display-only block with explicit Pandoc syntax: ```{.python}
17    DisplayExplicit { classes: Vec<String> },
18    /// Executable chunk (Quarto/RMarkdown): ```{python}
19    Executable { language: String },
20    /// Raw block for specific output format: ```{=html}
21    Raw { format: String },
22    /// No language specified: ```
23    Plain,
24}
25
26/// Parsed attributes from a code block info string.
27#[derive(Debug, Clone, PartialEq)]
28pub struct InfoString {
29    pub raw: String,
30    pub block_type: CodeBlockType,
31    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
32}
33
34impl InfoString {
35    /// Parse an info string into structured attributes.
36    pub fn parse(raw: &str) -> Self {
37        let trimmed = raw.trim();
38
39        if trimmed.is_empty() {
40            return InfoString {
41                raw: raw.to_string(),
42                block_type: CodeBlockType::Plain,
43                attributes: Vec::new(),
44            };
45        }
46
47        // Check if it starts with '{' - explicit attribute block
48        if let Some(stripped) = trimmed.strip_prefix('{')
49            && let Some(content) = stripped.strip_suffix('}')
50        {
51            return Self::parse_explicit(raw, content);
52        }
53
54        // Check for mixed form: python {.numberLines}
55        if let Some(brace_start) = trimmed.find('{') {
56            let language = trimmed[..brace_start].trim();
57            if !language.is_empty() && !language.contains(char::is_whitespace) {
58                let attr_part = &trimmed[brace_start..];
59                if let Some(stripped) = attr_part.strip_prefix('{')
60                    && let Some(content) = stripped.strip_suffix('}')
61                {
62                    let attrs = Self::parse_attributes(content);
63                    return InfoString {
64                        raw: raw.to_string(),
65                        block_type: CodeBlockType::DisplayShortcut {
66                            language: language.to_string(),
67                        },
68                        attributes: attrs,
69                    };
70                }
71            }
72        }
73
74        // Otherwise, it's a shortcut form (just the language name)
75        // Only take the first word as language
76        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
77        InfoString {
78            raw: raw.to_string(),
79            block_type: CodeBlockType::DisplayShortcut {
80                language: language.to_string(),
81            },
82            attributes: Vec::new(),
83        }
84    }
85
86    fn parse_explicit(raw: &str, content: &str) -> Self {
87        // Check for raw attribute FIRST: {=format}
88        // The content should start with '=' and have only alphanumeric chars after
89        let trimmed_content = content.trim();
90        if let Some(format_name) = trimmed_content.strip_prefix('=') {
91            // Validate format name: alphanumeric only, no spaces
92            if !format_name.is_empty()
93                && format_name.chars().all(|c| c.is_alphanumeric())
94                && !format_name.contains(char::is_whitespace)
95            {
96                return InfoString {
97                    raw: raw.to_string(),
98                    block_type: CodeBlockType::Raw {
99                        format: format_name.to_string(),
100                    },
101                    attributes: Vec::new(),
102                };
103            }
104        }
105
106        // First, do a preliminary parse to determine block type
107        // Use chunk options parser (comma-aware) for initial detection
108        let prelim_attrs = Self::parse_chunk_options(content);
109
110        // First non-ID, non-attribute token determines if it's executable or display
111        let mut first_lang_token = None;
112        for (key, val) in prelim_attrs.iter() {
113            if val.is_none() && !key.starts_with('#') {
114                first_lang_token = Some(key.as_str());
115                break;
116            }
117        }
118
119        let first_token = first_lang_token.unwrap_or("");
120
121        if first_token.starts_with('.') {
122            // Display block: {.python} or {.haskell .numberLines}
123            // Re-parse with Pandoc-style parser (space-delimited)
124            let attrs = Self::parse_pandoc_attributes(content);
125
126            let classes: Vec<String> = attrs
127                .iter()
128                .filter(|(k, v)| k.starts_with('.') && v.is_none())
129                .map(|(k, _)| k[1..].to_string())
130                .collect();
131
132            let non_class_attrs: Vec<(String, Option<String>)> = attrs
133                .into_iter()
134                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
135                .collect();
136
137            InfoString {
138                raw: raw.to_string(),
139                block_type: CodeBlockType::DisplayExplicit { classes },
140                attributes: non_class_attrs,
141            }
142        } else if !first_token.is_empty() && !first_token.starts_with('#') {
143            // Executable chunk: {python} or {r}
144            // Use chunk options parser (comma-delimited)
145            let attrs = Self::parse_chunk_options(content);
146            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
147
148            // Check if there's a second bareword (implicit label in R/Quarto chunks)
149            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}
150            let mut has_implicit_label = false;
151            let implicit_label_value = if lang_index + 1 < attrs.len() {
152                if let (label_key, None) = &attrs[lang_index + 1] {
153                    // Second bareword after language
154                    has_implicit_label = true;
155                    Some(label_key.clone())
156                } else {
157                    None
158                }
159            } else {
160                None
161            };
162
163            let mut final_attrs: Vec<(String, Option<String>)> = attrs
164                .into_iter()
165                .enumerate()
166                .filter(|(i, _)| {
167                    // Remove language token
168                    if *i == lang_index {
169                        return false;
170                    }
171                    // Remove implicit label token (will be added back explicitly)
172                    if has_implicit_label && *i == lang_index + 1 {
173                        return false;
174                    }
175                    true
176                })
177                .map(|(_, attr)| attr)
178                .collect();
179
180            // Add explicit label if we found an implicit one
181            if let Some(label_val) = implicit_label_value {
182                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
183            }
184
185            InfoString {
186                raw: raw.to_string(),
187                block_type: CodeBlockType::Executable {
188                    language: first_token.to_string(),
189                },
190                attributes: final_attrs,
191            }
192        } else {
193            // Just attributes, no language - use Pandoc parser
194            let attrs = Self::parse_pandoc_attributes(content);
195            InfoString {
196                raw: raw.to_string(),
197                block_type: CodeBlockType::Plain,
198                attributes: attrs,
199            }
200        }
201    }
202
203    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
204    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
205    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
206        let mut attrs = Vec::new();
207        let mut chars = content.chars().peekable();
208
209        while chars.peek().is_some() {
210            // Skip whitespace
211            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
212                chars.next();
213            }
214
215            if chars.peek().is_none() {
216                break;
217            }
218
219            // Read key
220            let mut key = String::new();
221            while let Some(&ch) = chars.peek() {
222                if ch == '=' || ch == ' ' || ch == '\t' {
223                    break;
224                }
225                key.push(ch);
226                chars.next();
227            }
228
229            if key.is_empty() {
230                break;
231            }
232
233            // Skip whitespace
234            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
235                chars.next();
236            }
237
238            // Check for value
239            if chars.peek() == Some(&'=') {
240                chars.next(); // consume '='
241
242                // Skip whitespace after '='
243                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
244                    chars.next();
245                }
246
247                // Read value (might be quoted)
248                let value = if chars.peek() == Some(&'"') {
249                    chars.next(); // consume opening quote
250                    let mut val = String::new();
251                    while let Some(&ch) = chars.peek() {
252                        chars.next();
253                        if ch == '"' {
254                            break;
255                        }
256                        if ch == '\\' {
257                            if let Some(&next_ch) = chars.peek() {
258                                chars.next();
259                                val.push(next_ch);
260                            }
261                        } else {
262                            val.push(ch);
263                        }
264                    }
265                    val
266                } else {
267                    // Unquoted value - read until space
268                    let mut val = String::new();
269                    while let Some(&ch) = chars.peek() {
270                        if ch == ' ' || ch == '\t' {
271                            break;
272                        }
273                        val.push(ch);
274                        chars.next();
275                    }
276                    val
277                };
278
279                attrs.push((key, Some(value)));
280            } else {
281                attrs.push((key, None));
282            }
283        }
284
285        attrs
286    }
287
288    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
289    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
290    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
291        let mut attrs = Vec::new();
292        let mut chars = content.chars().peekable();
293
294        while chars.peek().is_some() {
295            // Skip whitespace and commas
296            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
297                chars.next();
298            }
299
300            if chars.peek().is_none() {
301                break;
302            }
303
304            // Read key
305            let mut key = String::new();
306            while let Some(&ch) = chars.peek() {
307                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
308                    break;
309                }
310                key.push(ch);
311                chars.next();
312            }
313
314            if key.is_empty() {
315                break;
316            }
317
318            // Skip whitespace and commas
319            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
320                chars.next();
321            }
322
323            // Check for value
324            if chars.peek() == Some(&'=') {
325                chars.next(); // consume '='
326
327                // Skip whitespace and commas after '='
328                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
329                    chars.next();
330                }
331
332                // Read value (might be quoted)
333                let value = if chars.peek() == Some(&'"') {
334                    chars.next(); // consume opening quote
335                    let mut val = String::new();
336                    while let Some(&ch) = chars.peek() {
337                        chars.next();
338                        if ch == '"' {
339                            break;
340                        }
341                        if ch == '\\' {
342                            if let Some(&next_ch) = chars.peek() {
343                                chars.next();
344                                val.push(next_ch);
345                            }
346                        } else {
347                            val.push(ch);
348                        }
349                    }
350                    val
351                } else {
352                    // Unquoted value - read until comma, space, or tab at depth 0
353                    // Track nesting depth for (), [], {} and quote state
354                    let mut val = String::new();
355                    let mut depth = 0; // Track parentheses/brackets/braces depth
356                    let mut in_quote: Option<char> = None; // Track if inside ' or "
357                    let mut escaped = false; // Track if previous char was backslash
358
359                    while let Some(&ch) = chars.peek() {
360                        // Handle escape sequences
361                        if escaped {
362                            val.push(ch);
363                            chars.next();
364                            escaped = false;
365                            continue;
366                        }
367
368                        if ch == '\\' {
369                            val.push(ch);
370                            chars.next();
371                            escaped = true;
372                            continue;
373                        }
374
375                        // Handle quotes
376                        if let Some(quote_char) = in_quote {
377                            val.push(ch);
378                            chars.next();
379                            if ch == quote_char {
380                                in_quote = None; // Close quote
381                            }
382                            continue;
383                        }
384
385                        // Not in a quote - check for quote start
386                        if ch == '"' || ch == '\'' {
387                            in_quote = Some(ch);
388                            val.push(ch);
389                            chars.next();
390                            continue;
391                        }
392
393                        // Track nesting depth (only when not in quotes)
394                        if ch == '(' || ch == '[' || ch == '{' {
395                            depth += 1;
396                            val.push(ch);
397                            chars.next();
398                            continue;
399                        }
400
401                        if ch == ')' || ch == ']' || ch == '}' {
402                            depth -= 1;
403                            val.push(ch);
404                            chars.next();
405                            continue;
406                        }
407
408                        // Check for delimiters - only break at depth 0
409                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
410                            break;
411                        }
412
413                        // Regular character
414                        val.push(ch);
415                        chars.next();
416                    }
417                    val
418                };
419
420                attrs.push((key, Some(value)));
421            } else {
422                attrs.push((key, None));
423            }
424        }
425
426        attrs
427    }
428
429    /// Legacy function - kept for backward compatibility in mixed-form parsing
430    /// For new code, use parse_pandoc_attributes or parse_chunk_options
431    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
432        // Default to chunk options parsing (comma-aware)
433        Self::parse_chunk_options(content)
434    }
435}
436
437/// Information about a detected code fence opening.
438#[derive(Debug, Clone)]
439pub(crate) struct FenceInfo {
440    pub fence_char: char,
441    pub fence_count: usize,
442    pub info_string: String,
443}
444
445pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
446    fence.info_string.trim() == "math"
447}
448
449/// Try to detect a fenced code block opening from content.
450/// Returns fence info if this is a valid opening fence.
451pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
452    let trimmed = strip_leading_spaces(content);
453
454    // Check for fence opening (``` or ~~~)
455    let (fence_char, fence_count) = if trimmed.starts_with('`') {
456        let count = trimmed.chars().take_while(|&c| c == '`').count();
457        ('`', count)
458    } else if trimmed.starts_with('~') {
459        let count = trimmed.chars().take_while(|&c| c == '~').count();
460        ('~', count)
461    } else {
462        return None;
463    };
464
465    if fence_count < 3 {
466        return None;
467    }
468
469    let info_string_raw = &trimmed[fence_count..];
470    // Strip trailing newline (LF or CRLF) and at most one leading space
471    let (info_string_trimmed, _) = strip_newline(info_string_raw);
472    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
473        stripped.to_string()
474    } else {
475        info_string_trimmed.to_string()
476    };
477
478    // Backtick-fenced blocks cannot have backticks in the info string.
479    if fence_char == '`' && info_string.contains('`') {
480        return None;
481    }
482
483    Some(FenceInfo {
484        fence_char,
485        fence_count,
486        info_string,
487    })
488}
489
490fn prepare_fence_open_line<'a>(
491    builder: &mut GreenNodeBuilder<'static>,
492    source_line: &'a str,
493    first_line_override: Option<&'a str>,
494    bq_depth: usize,
495    base_indent: usize,
496) -> (&'a str, &'a str) {
497    let first_line = first_line_override.unwrap_or(source_line);
498
499    // Only strip blockquote markers for the *surrounding* blockquote depth.
500    // Anything beyond that (e.g. a literal `>` inside the code block) must be preserved.
501    let first_inner = if bq_depth > 0 && first_line_override.is_none() {
502        strip_n_blockquote_markers(first_line, bq_depth)
503    } else {
504        if bq_depth > 0 && first_line_override.is_some() && source_line != first_line {
505            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
506            let prefix_len = source_line.len().saturating_sub(stripped.len());
507            if prefix_len > 0 {
508                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
509            }
510        }
511        first_line
512    };
513
514    // For lossless parsing: emit the base indent before stripping it
515    let first_base_indent = if first_line_override.is_some() {
516        0
517    } else {
518        base_indent
519    };
520    let first_base_indent_bytes = byte_index_at_column(first_inner, first_base_indent);
521    let first_stripped = if first_base_indent > 0 && first_inner.len() >= first_base_indent_bytes {
522        let indent_str = &first_inner[..first_base_indent_bytes];
523        if !indent_str.is_empty() {
524            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
525        }
526        &first_inner[first_base_indent_bytes..]
527    } else {
528        first_inner
529    };
530
531    let first_trimmed = strip_leading_spaces(first_stripped);
532    let leading_ws_len = first_stripped.len().saturating_sub(first_trimmed.len());
533    if leading_ws_len > 0 {
534        builder.token(
535            SyntaxKind::WHITESPACE.into(),
536            &first_stripped[..leading_ws_len],
537        );
538    }
539    (first_trimmed, first_inner)
540}
541
542fn emit_blockquote_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
543    for ch in prefix.chars() {
544        if ch == '>' {
545            builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
546        } else {
547            let mut buf = [0u8; 4];
548            builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
549        }
550    }
551}
552
553fn emit_content_line_prefixes<'a>(
554    builder: &mut GreenNodeBuilder<'static>,
555    content_line: &'a str,
556    bq_depth: usize,
557    base_indent: usize,
558) -> &'a str {
559    let after_blockquote = if bq_depth > 0 {
560        let stripped = strip_n_blockquote_markers(content_line, bq_depth);
561        let prefix_len = content_line.len().saturating_sub(stripped.len());
562        if prefix_len > 0 {
563            emit_blockquote_prefix_tokens(builder, &content_line[..prefix_len]);
564        }
565        stripped
566    } else {
567        content_line
568    };
569
570    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
571    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
572        let indent_str = &after_blockquote[..base_indent_bytes];
573        if !indent_str.is_empty() {
574            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
575        }
576        &after_blockquote[base_indent_bytes..]
577    } else {
578        after_blockquote
579    }
580}
581
582fn strip_content_line_prefixes(content_line: &str, bq_depth: usize, base_indent: usize) -> &str {
583    let after_blockquote = if bq_depth > 0 {
584        strip_n_blockquote_markers(content_line, bq_depth)
585    } else {
586        content_line
587    };
588
589    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
590    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
591        &after_blockquote[base_indent_bytes..]
592    } else {
593        after_blockquote
594    }
595}
596
597pub(crate) fn compute_hashpipe_preamble_line_count(
598    content_lines: &[&str],
599    prefix: &str,
600    bq_depth: usize,
601    base_indent: usize,
602) -> usize {
603    let mut line_idx = 0usize;
604
605    while line_idx < content_lines.len() {
606        let preview_after_indent =
607            strip_content_line_prefixes(content_lines[line_idx], bq_depth, base_indent);
608        let (preview_without_newline, _) = strip_newline(preview_after_indent);
609        if !is_hashpipe_option_line(preview_without_newline, prefix)
610            && !is_hashpipe_continuation_line(preview_without_newline, prefix)
611        {
612            break;
613        }
614        line_idx += 1;
615    }
616
617    line_idx
618}
619
620fn emit_hashpipe_option_line(
621    builder: &mut GreenNodeBuilder<'static>,
622    line_without_newline: &str,
623    prefix: &str,
624) -> bool {
625    if !is_hashpipe_option_line(line_without_newline, prefix) {
626        return false;
627    }
628
629    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
630    let leading_ws_len = line_without_newline
631        .len()
632        .saturating_sub(trimmed_start.len());
633    let after_prefix = &trimmed_start[prefix.len()..];
634    let ws_after_prefix_len = after_prefix
635        .len()
636        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
637    let rest = &after_prefix[ws_after_prefix_len..];
638    let Some(colon_idx) = rest.find(':') else {
639        return false;
640    };
641
642    let key_with_ws = &rest[..colon_idx];
643    let key = key_with_ws.trim_end_matches([' ', '\t']);
644    if key.is_empty() {
645        return false;
646    }
647    let key_ws_suffix = &key_with_ws[key.len()..];
648
649    let after_colon = &rest[colon_idx + 1..];
650    let value_ws_prefix_len = after_colon
651        .len()
652        .saturating_sub(after_colon.trim_start_matches([' ', '\t']).len());
653    let value_with_trailing = &after_colon[value_ws_prefix_len..];
654    let value = value_with_trailing.trim_end_matches([' ', '\t']);
655    let value_ws_suffix = &value_with_trailing[value.len()..];
656
657    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
658    if leading_ws_len > 0 {
659        builder.token(
660            SyntaxKind::WHITESPACE.into(),
661            &line_without_newline[..leading_ws_len],
662        );
663    }
664    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
665    if ws_after_prefix_len > 0 {
666        builder.token(
667            SyntaxKind::WHITESPACE.into(),
668            &after_prefix[..ws_after_prefix_len],
669        );
670    }
671
672    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
673    if !key_ws_suffix.is_empty() {
674        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
675    }
676    builder.token(SyntaxKind::TEXT.into(), ":");
677    if value_ws_prefix_len > 0 {
678        builder.token(
679            SyntaxKind::WHITESPACE.into(),
680            &after_colon[..value_ws_prefix_len],
681        );
682    }
683
684    if !value.is_empty() {
685        if let Some(quote) = value.chars().next()
686            && (quote == '"' || quote == '\'')
687            && value.ends_with(quote)
688            && value.len() >= 2
689        {
690            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
691            builder.token(
692                SyntaxKind::CHUNK_OPTION_VALUE.into(),
693                &value[1..value.len() - 1],
694            );
695            builder.token(
696                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
697                &value[value.len() - 1..],
698            );
699        } else {
700            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
701        }
702    }
703
704    if !value_ws_suffix.is_empty() {
705        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
706    }
707    builder.finish_node();
708    true
709}
710
711fn emit_hashpipe_continuation_line(
712    builder: &mut GreenNodeBuilder<'static>,
713    line_without_newline: &str,
714    prefix: &str,
715) -> bool {
716    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
717        return false;
718    }
719    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
720    let leading_ws_len = line_without_newline
721        .len()
722        .saturating_sub(trimmed_start.len());
723    let after_prefix = &trimmed_start[prefix.len()..];
724    let ws_after_prefix_len = after_prefix
725        .len()
726        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
727    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
728    let continuation_value = continuation_with_trailing.trim_end_matches([' ', '\t']);
729    if continuation_value.is_empty() {
730        return false;
731    }
732    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
733
734    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
735    if leading_ws_len > 0 {
736        builder.token(
737            SyntaxKind::WHITESPACE.into(),
738            &line_without_newline[..leading_ws_len],
739        );
740    }
741    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
742    if ws_after_prefix_len > 0 {
743        builder.token(
744            SyntaxKind::WHITESPACE.into(),
745            &after_prefix[..ws_after_prefix_len],
746        );
747    }
748    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
749    if !continuation_ws_suffix.is_empty() {
750        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
751    }
752    builder.finish_node();
753    true
754}
755
756fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
757    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
758    if !trimmed_start.starts_with(prefix) {
759        return false;
760    }
761    let after_prefix = &trimmed_start[prefix.len()..];
762    let rest = after_prefix.trim_start_matches([' ', '\t']);
763    let Some(colon_idx) = rest.find(':') else {
764        return false;
765    };
766    let key = rest[..colon_idx].trim_end_matches([' ', '\t']);
767    if key.is_empty() {
768        return false;
769    }
770    true
771}
772
773fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
774    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
775    if !trimmed_start.starts_with(prefix) {
776        return false;
777    }
778    let after_prefix = &trimmed_start[prefix.len()..];
779    let Some(first) = after_prefix.chars().next() else {
780        return false;
781    };
782    if first != ' ' && first != '\t' {
783        return false;
784    }
785    !after_prefix.trim_start_matches([' ', '\t']).is_empty()
786}
787
788/// Check if a line is a valid closing fence for the given fence info.
789pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
790    let trimmed = strip_leading_spaces(content);
791
792    if !trimmed.starts_with(fence.fence_char) {
793        return false;
794    }
795
796    let closing_count = trimmed
797        .chars()
798        .take_while(|&c| c == fence.fence_char)
799        .count();
800
801    if closing_count < fence.fence_count {
802        return false;
803    }
804
805    // Rest of line must be empty
806    trimmed[closing_count..].trim().is_empty()
807}
808
809/// Emit chunk options as structured CST nodes while preserving all bytes.
810/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
811fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
812    if content.trim().is_empty() {
813        builder.token(SyntaxKind::TEXT.into(), content);
814        return;
815    }
816
817    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
818
819    let mut pos = 0;
820    let bytes = content.as_bytes();
821
822    while pos < bytes.len() {
823        // Emit leading whitespace/commas as TEXT
824        let ws_start = pos;
825        while pos < bytes.len() {
826            let ch = bytes[pos] as char;
827            if ch != ' ' && ch != '\t' && ch != ',' {
828                break;
829            }
830            pos += 1;
831        }
832        if pos > ws_start {
833            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
834        }
835
836        if pos >= bytes.len() {
837            break;
838        }
839
840        // Check if this is a closing brace
841        if bytes[pos] as char == '}' {
842            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
843            pos += 1;
844            if pos < bytes.len() {
845                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
846            }
847            break;
848        }
849
850        // Read key
851        let key_start = pos;
852        while pos < bytes.len() {
853            let ch = bytes[pos] as char;
854            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
855                break;
856            }
857            pos += 1;
858        }
859
860        if pos == key_start {
861            // No key found, emit rest as TEXT
862            if pos < bytes.len() {
863                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
864            }
865            break;
866        }
867
868        let key = &content[key_start..pos];
869
870        // Check for whitespace before '='
871        let ws_before_eq_start = pos;
872        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
873            pos += 1;
874        }
875
876        // Check if there's a value (=)
877        if pos < bytes.len() && bytes[pos] as char == '=' {
878            // Has value - emit as CHUNK_OPTION
879            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
880            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
881
882            // Emit whitespace before '=' if any
883            if pos > ws_before_eq_start {
884                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
885            }
886
887            builder.token(SyntaxKind::TEXT.into(), "=");
888            pos += 1; // consume '='
889
890            // Emit whitespace after '='
891            let ws_after_eq_start = pos;
892            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
893                pos += 1;
894            }
895            if pos > ws_after_eq_start {
896                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
897            }
898
899            // Parse value (might be quoted)
900            if pos < bytes.len() {
901                let quote_char = bytes[pos] as char;
902                if quote_char == '"' || quote_char == '\'' {
903                    // Quoted value
904                    builder.token(
905                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
906                        &content[pos..pos + 1],
907                    );
908                    pos += 1; // consume opening quote
909
910                    let val_start = pos;
911                    let mut escaped = false;
912                    while pos < bytes.len() {
913                        let ch = bytes[pos] as char;
914                        if !escaped && ch == quote_char {
915                            break;
916                        }
917                        escaped = !escaped && ch == '\\';
918                        pos += 1;
919                    }
920
921                    if pos > val_start {
922                        builder.token(
923                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
924                            &content[val_start..pos],
925                        );
926                    }
927
928                    // Emit closing quote
929                    if pos < bytes.len() && bytes[pos] as char == quote_char {
930                        builder.token(
931                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
932                            &content[pos..pos + 1],
933                        );
934                        pos += 1;
935                    }
936                } else {
937                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
938                    let val_start = pos;
939                    let mut depth = 0;
940
941                    while pos < bytes.len() {
942                        let ch = bytes[pos] as char;
943                        match ch {
944                            '(' | '[' | '{' => depth += 1,
945                            ')' | ']' => {
946                                if depth > 0 {
947                                    depth -= 1;
948                                } else {
949                                    break;
950                                }
951                            }
952                            '}' => {
953                                if depth > 0 {
954                                    depth -= 1;
955                                } else {
956                                    break; // End of chunk options
957                                }
958                            }
959                            ',' if depth == 0 => {
960                                break; // Next option
961                            }
962                            ' ' | '\t' if depth == 0 => {
963                                break; // Space separator
964                            }
965                            _ => {}
966                        }
967                        pos += 1;
968                    }
969
970                    if pos > val_start {
971                        builder.token(
972                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
973                            &content[val_start..pos],
974                        );
975                    }
976                }
977            }
978
979            builder.finish_node(); // CHUNK_OPTION
980        } else {
981            // No '=' - this is a label or bareword option
982            // Emit any whitespace we skipped as TEXT
983            if pos > ws_before_eq_start {
984                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
985                builder.token(SyntaxKind::TEXT.into(), key);
986                builder.finish_node(); // CHUNK_LABEL
987                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
988            } else {
989                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
990                builder.token(SyntaxKind::TEXT.into(), key);
991                builder.finish_node(); // CHUNK_LABEL
992            }
993        }
994    }
995
996    builder.finish_node(); // CHUNK_OPTIONS
997}
998
999/// Helper to parse info string and emit CodeInfo node with parsed components.
1000/// This breaks down the info string into its logical parts while preserving all bytes.
1001fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1002    builder.start_node(SyntaxKind::CODE_INFO.into());
1003
1004    let info = InfoString::parse(info_string);
1005
1006    match &info.block_type {
1007        CodeBlockType::DisplayShortcut { language } => {
1008            // Simple case: python or python {.class}
1009            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1010
1011            // If there's more after the language, emit it as TEXT
1012            let after_lang = &info_string[language.len()..];
1013            if !after_lang.is_empty() {
1014                builder.token(SyntaxKind::TEXT.into(), after_lang);
1015            }
1016        }
1017        CodeBlockType::Executable { language } => {
1018            // Quarto: {r} or {r my-label, echo=FALSE}
1019            builder.token(SyntaxKind::TEXT.into(), "{");
1020            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1021
1022            // Parse and emit chunk options
1023            let start_offset = 1 + language.len(); // Skip "{r"
1024            if start_offset < info_string.len() {
1025                let rest = &info_string[start_offset..];
1026                emit_chunk_options(builder, rest);
1027            }
1028        }
1029        CodeBlockType::DisplayExplicit { classes } => {
1030            // Pandoc: {.python} or {#id .haskell .numberLines}
1031            // We need to find the first class in the raw string and emit everything around it
1032
1033            if let Some(lang) = classes.first() {
1034                // Find where ".lang" appears in the info string
1035                let needle = format!(".{}", lang);
1036                if let Some(lang_start) = info_string.find(&needle) {
1037                    // Emit everything before the language
1038                    if lang_start > 0 {
1039                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1040                    }
1041
1042                    // Emit the dot
1043                    builder.token(SyntaxKind::TEXT.into(), ".");
1044
1045                    // Emit the language
1046                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1047
1048                    // Emit everything after
1049                    let after_lang_start = lang_start + 1 + lang.len();
1050                    if after_lang_start < info_string.len() {
1051                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1052                    }
1053                } else {
1054                    // Couldn't find it, just emit as TEXT
1055                    builder.token(SyntaxKind::TEXT.into(), info_string);
1056                }
1057            } else {
1058                // No classes
1059                builder.token(SyntaxKind::TEXT.into(), info_string);
1060            }
1061        }
1062        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1063            // No language, just emit as TEXT
1064            builder.token(SyntaxKind::TEXT.into(), info_string);
1065        }
1066    }
1067
1068    builder.finish_node(); // CodeInfo
1069}
1070
1071/// Parse a fenced code block, consuming lines from the parser.
1072/// Returns the new position after the code block.
1073/// Parse a fenced code block, consuming lines from the parser.
1074/// Returns the new position after the code block.
1075/// base_indent accounts for container indentation (e.g., footnotes) that should be stripped.
1076pub(crate) fn parse_fenced_code_block(
1077    builder: &mut GreenNodeBuilder<'static>,
1078    lines: &[&str],
1079    start_pos: usize,
1080    fence: FenceInfo,
1081    bq_depth: usize,
1082    base_indent: usize,
1083    first_line_override: Option<&str>,
1084) -> usize {
1085    // Start code block
1086    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1087
1088    // Opening fence
1089    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1090        builder,
1091        lines[start_pos],
1092        first_line_override,
1093        bq_depth,
1094        base_indent,
1095    );
1096
1097    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1098    builder.token(
1099        SyntaxKind::CODE_FENCE_MARKER.into(),
1100        &first_trimmed[..fence.fence_count],
1101    );
1102
1103    // Emit any space between fence and info string (for losslessness)
1104    let after_fence = &first_trimmed[fence.fence_count..];
1105    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1106        // There was a space - emit it as WHITESPACE
1107        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1108        // Parse and emit the info string as a structured node
1109        if !fence.info_string.is_empty() {
1110            emit_code_info_node(builder, &fence.info_string);
1111        }
1112    } else if !fence.info_string.is_empty() {
1113        // No space - parse and emit info_string as a structured node
1114        emit_code_info_node(builder, &fence.info_string);
1115    }
1116
1117    // Extract and emit the actual newline from the opening fence line
1118    let (_, newline_str) = strip_newline(first_trimmed);
1119    if !newline_str.is_empty() {
1120        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1121    }
1122    builder.finish_node(); // CodeFenceOpen
1123
1124    let mut current_pos = start_pos + 1;
1125    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1126    let mut found_closing = false;
1127
1128    while current_pos < lines.len() {
1129        let line = lines[current_pos];
1130
1131        // Count blockquote markers to detect leaving the surrounding blockquote.
1132        let (line_bq_depth, _) = count_blockquote_markers(line);
1133
1134        // If blockquote depth decreases, code block ends (we've left the blockquote)
1135        if line_bq_depth < bq_depth {
1136            break;
1137        }
1138
1139        // Strip exactly the surrounding blockquote depth; preserve any additional `>` literally.
1140        let inner = if bq_depth > 0 {
1141            strip_n_blockquote_markers(line, bq_depth)
1142        } else {
1143            line
1144        };
1145
1146        // Strip base indent (footnote context) from content lines for fence detection
1147        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1148        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1149            &inner[base_indent_bytes..]
1150        } else {
1151            inner
1152        };
1153
1154        // Check for closing fence
1155        if is_closing_fence(inner_stripped, &fence) {
1156            found_closing = true;
1157            current_pos += 1;
1158            break;
1159        }
1160
1161        // Store the original line for lossless parsing.
1162        content_lines.push(line);
1163        current_pos += 1;
1164    }
1165
1166    // Add content
1167    if !content_lines.is_empty() {
1168        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1169        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1170            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1171            _ => None,
1172        };
1173
1174        let mut line_idx = 0usize;
1175        if let Some(prefix) = hashpipe_prefix {
1176            let prepared_hashpipe_lines =
1177                compute_hashpipe_preamble_line_count(&content_lines, prefix, bq_depth, base_indent);
1178            if prepared_hashpipe_lines > 0 {
1179                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1180                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1181                while line_idx < prepared_hashpipe_lines {
1182                    let content_line = content_lines[line_idx];
1183                    let after_indent =
1184                        emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1185                    let (line_without_newline, newline_str) = strip_newline(after_indent);
1186                    if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
1187                        let _ =
1188                            emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
1189                    }
1190                    if !newline_str.is_empty() {
1191                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1192                    }
1193                    line_idx += 1;
1194                }
1195                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1196                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1197            }
1198        }
1199
1200        for content_line in content_lines.iter().skip(line_idx) {
1201            let after_indent =
1202                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1203            let (line_without_newline, newline_str) = strip_newline(after_indent);
1204
1205            if !line_without_newline.is_empty() {
1206                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1207            }
1208
1209            if !newline_str.is_empty() {
1210                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1211            }
1212        }
1213        builder.finish_node(); // CodeContent
1214    }
1215
1216    // Closing fence (if found)
1217    if found_closing {
1218        let closing_line = lines[current_pos - 1];
1219        let closing_after_blockquote = if bq_depth > 0 {
1220            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1221            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1222            if prefix_len > 0 {
1223                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1224            }
1225            stripped
1226        } else {
1227            closing_line
1228        };
1229
1230        // Emit base indent for lossless parsing
1231        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1232        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1233            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1234            if !indent_str.is_empty() {
1235                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1236            }
1237        }
1238
1239        // Strip base indent to get fence
1240        let closing_stripped =
1241            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1242                &closing_after_blockquote[base_indent_bytes..]
1243            } else {
1244                closing_after_blockquote
1245            };
1246        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1247        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1248        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1249        let closing_count = closing_trimmed_start
1250            .chars()
1251            .take_while(|&c| c == fence.fence_char)
1252            .count();
1253        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1254
1255        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1256        if leading_ws_len > 0 {
1257            builder.token(
1258                SyntaxKind::WHITESPACE.into(),
1259                &closing_without_newline[..leading_ws_len],
1260            );
1261        }
1262        builder.token(
1263            SyntaxKind::CODE_FENCE_MARKER.into(),
1264            &closing_trimmed_start[..closing_count],
1265        );
1266        if !trailing_after_marker.is_empty() {
1267            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1268        }
1269        if !newline_str.is_empty() {
1270            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1271        }
1272        builder.finish_node(); // CodeFenceClose
1273    }
1274
1275    builder.finish_node(); // CodeBlock
1276
1277    current_pos
1278}
1279
1280/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1281pub(crate) fn parse_fenced_math_block(
1282    builder: &mut GreenNodeBuilder<'static>,
1283    lines: &[&str],
1284    start_pos: usize,
1285    fence: FenceInfo,
1286    bq_depth: usize,
1287    base_indent: usize,
1288    first_line_override: Option<&str>,
1289) -> usize {
1290    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1291
1292    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1293        builder,
1294        lines[start_pos],
1295        first_line_override,
1296        bq_depth,
1297        base_indent,
1298    );
1299    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1300    builder.token(
1301        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1302        opening_without_newline,
1303    );
1304    if !opening_newline.is_empty() {
1305        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1306    }
1307
1308    let mut current_pos = start_pos + 1;
1309    let mut content_lines: Vec<&str> = Vec::new();
1310    let mut found_closing = false;
1311
1312    while current_pos < lines.len() {
1313        let line = lines[current_pos];
1314        let (line_bq_depth, _) = count_blockquote_markers(line);
1315        if line_bq_depth < bq_depth {
1316            break;
1317        }
1318
1319        let inner = if bq_depth > 0 {
1320            strip_n_blockquote_markers(line, bq_depth)
1321        } else {
1322            line
1323        };
1324        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1325        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1326            &inner[base_indent_bytes..]
1327        } else {
1328            inner
1329        };
1330
1331        if is_closing_fence(inner_stripped, &fence) {
1332            found_closing = true;
1333            current_pos += 1;
1334            break;
1335        }
1336
1337        content_lines.push(line);
1338        current_pos += 1;
1339    }
1340
1341    if !content_lines.is_empty() {
1342        let mut content = String::new();
1343        for content_line in content_lines {
1344            let after_indent =
1345                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1346            let (line_without_newline, newline_str) = strip_newline(after_indent);
1347            content.push_str(line_without_newline);
1348            content.push_str(newline_str);
1349        }
1350        builder.token(SyntaxKind::TEXT.into(), &content);
1351    }
1352
1353    if found_closing {
1354        let closing_line = lines[current_pos - 1];
1355        let closing_after_blockquote = if bq_depth > 0 {
1356            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1357            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1358            if prefix_len > 0 {
1359                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1360            }
1361            stripped
1362        } else {
1363            closing_line
1364        };
1365
1366        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1367        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1368            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1369            if !indent_str.is_empty() {
1370                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1371            }
1372        }
1373
1374        let closing_stripped =
1375            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1376                &closing_after_blockquote[base_indent_bytes..]
1377            } else {
1378                closing_after_blockquote
1379            };
1380        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1381        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1382        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1383        let closing_count = closing_trimmed_start
1384            .chars()
1385            .take_while(|&c| c == fence.fence_char)
1386            .count();
1387        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1388
1389        if leading_ws_len > 0 {
1390            builder.token(
1391                SyntaxKind::WHITESPACE.into(),
1392                &closing_without_newline[..leading_ws_len],
1393            );
1394        }
1395        builder.token(
1396            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1397            &closing_trimmed_start[..closing_count],
1398        );
1399        if !trailing_after_marker.is_empty() {
1400            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1401        }
1402        if !newline_str.is_empty() {
1403            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1404        }
1405    }
1406
1407    builder.finish_node(); // DisplayMath
1408    current_pos
1409}
1410
1411#[cfg(test)]
1412mod tests {
1413    use super::*;
1414
1415    #[test]
1416    fn test_backtick_fence() {
1417        let fence = try_parse_fence_open("```python").unwrap();
1418        assert_eq!(fence.fence_char, '`');
1419        assert_eq!(fence.fence_count, 3);
1420        assert_eq!(fence.info_string, "python");
1421    }
1422
1423    #[test]
1424    fn test_tilde_fence() {
1425        let fence = try_parse_fence_open("~~~").unwrap();
1426        assert_eq!(fence.fence_char, '~');
1427        assert_eq!(fence.fence_count, 3);
1428        assert_eq!(fence.info_string, "");
1429    }
1430
1431    #[test]
1432    fn test_long_fence() {
1433        let fence = try_parse_fence_open("`````").unwrap();
1434        assert_eq!(fence.fence_count, 5);
1435    }
1436
1437    #[test]
1438    fn test_two_backticks_invalid() {
1439        assert!(try_parse_fence_open("``").is_none());
1440    }
1441
1442    #[test]
1443    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1444        assert!(try_parse_fence_open("`````hi````there`````").is_none());
1445    }
1446
1447    #[test]
1448    fn test_closing_fence() {
1449        let fence = FenceInfo {
1450            fence_char: '`',
1451            fence_count: 3,
1452            info_string: String::new(),
1453        };
1454        assert!(is_closing_fence("```", &fence));
1455        assert!(is_closing_fence("````", &fence));
1456        assert!(!is_closing_fence("``", &fence));
1457        assert!(!is_closing_fence("~~~", &fence));
1458    }
1459
1460    #[test]
1461    fn test_fenced_code_preserves_leading_gt() {
1462        let input = "```\n> foo\n```\n";
1463        let tree = crate::parse(input, None);
1464        assert_eq!(tree.text().to_string(), input);
1465    }
1466
1467    #[test]
1468    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1469        let input = "> ```\n> code\n> ```\n";
1470        let tree = crate::parse(input, None);
1471        assert_eq!(tree.text().to_string(), input);
1472    }
1473
1474    #[test]
1475    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1476        let input = "Term\n: ```\nā”œā”€ā”€ pyproject.toml\n```\n";
1477        let tree = crate::parse(input, None);
1478        assert_eq!(tree.text().to_string(), input);
1479    }
1480
1481    #[test]
1482    fn test_info_string_plain() {
1483        let info = InfoString::parse("");
1484        assert_eq!(info.block_type, CodeBlockType::Plain);
1485        assert!(info.attributes.is_empty());
1486    }
1487
1488    #[test]
1489    fn test_info_string_shortcut() {
1490        let info = InfoString::parse("python");
1491        assert_eq!(
1492            info.block_type,
1493            CodeBlockType::DisplayShortcut {
1494                language: "python".to_string()
1495            }
1496        );
1497        assert!(info.attributes.is_empty());
1498    }
1499
1500    #[test]
1501    fn test_info_string_shortcut_with_trailing() {
1502        let info = InfoString::parse("python extra stuff");
1503        assert_eq!(
1504            info.block_type,
1505            CodeBlockType::DisplayShortcut {
1506                language: "python".to_string()
1507            }
1508        );
1509    }
1510
1511    #[test]
1512    fn test_info_string_display_explicit() {
1513        let info = InfoString::parse("{.python}");
1514        assert_eq!(
1515            info.block_type,
1516            CodeBlockType::DisplayExplicit {
1517                classes: vec!["python".to_string()]
1518            }
1519        );
1520    }
1521
1522    #[test]
1523    fn test_info_string_display_explicit_multiple() {
1524        let info = InfoString::parse("{.python .numberLines}");
1525        assert_eq!(
1526            info.block_type,
1527            CodeBlockType::DisplayExplicit {
1528                classes: vec!["python".to_string(), "numberLines".to_string()]
1529            }
1530        );
1531    }
1532
1533    #[test]
1534    fn test_info_string_executable() {
1535        let info = InfoString::parse("{python}");
1536        assert_eq!(
1537            info.block_type,
1538            CodeBlockType::Executable {
1539                language: "python".to_string()
1540            }
1541        );
1542    }
1543
1544    #[test]
1545    fn test_info_string_executable_with_options() {
1546        let info = InfoString::parse("{python echo=false warning=true}");
1547        assert_eq!(
1548            info.block_type,
1549            CodeBlockType::Executable {
1550                language: "python".to_string()
1551            }
1552        );
1553        assert_eq!(info.attributes.len(), 2);
1554        assert_eq!(
1555            info.attributes[0],
1556            ("echo".to_string(), Some("false".to_string()))
1557        );
1558        assert_eq!(
1559            info.attributes[1],
1560            ("warning".to_string(), Some("true".to_string()))
1561        );
1562    }
1563
1564    #[test]
1565    fn test_info_string_executable_with_commas() {
1566        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1567        assert_eq!(
1568            info.block_type,
1569            CodeBlockType::Executable {
1570                language: "r".to_string()
1571            }
1572        );
1573        assert_eq!(info.attributes.len(), 2);
1574        assert_eq!(
1575            info.attributes[0],
1576            ("echo".to_string(), Some("FALSE".to_string()))
1577        );
1578        assert_eq!(
1579            info.attributes[1],
1580            ("warning".to_string(), Some("TRUE".to_string()))
1581        );
1582    }
1583
1584    #[test]
1585    fn test_info_string_executable_mixed_commas_spaces() {
1586        // R-style with commas and spaces
1587        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1588        assert_eq!(
1589            info.block_type,
1590            CodeBlockType::Executable {
1591                language: "r".to_string()
1592            }
1593        );
1594        assert_eq!(info.attributes.len(), 2);
1595        assert_eq!(
1596            info.attributes[0],
1597            ("echo".to_string(), Some("FALSE".to_string()))
1598        );
1599        assert_eq!(
1600            info.attributes[1],
1601            ("label".to_string(), Some("my chunk".to_string()))
1602        );
1603    }
1604
1605    #[test]
1606    fn test_info_string_mixed_shortcut_and_attrs() {
1607        let info = InfoString::parse("python {.numberLines}");
1608        assert_eq!(
1609            info.block_type,
1610            CodeBlockType::DisplayShortcut {
1611                language: "python".to_string()
1612            }
1613        );
1614        assert_eq!(info.attributes.len(), 1);
1615        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1616    }
1617
1618    #[test]
1619    fn test_info_string_mixed_with_key_value() {
1620        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1621        assert_eq!(
1622            info.block_type,
1623            CodeBlockType::DisplayShortcut {
1624                language: "python".to_string()
1625            }
1626        );
1627        assert_eq!(info.attributes.len(), 2);
1628        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1629        assert_eq!(
1630            info.attributes[1],
1631            ("startFrom".to_string(), Some("100".to_string()))
1632        );
1633    }
1634
1635    #[test]
1636    fn test_info_string_explicit_with_id_and_classes() {
1637        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1638        assert_eq!(
1639            info.block_type,
1640            CodeBlockType::DisplayExplicit {
1641                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1642            }
1643        );
1644        // Non-class attributes
1645        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1646        let has_start = info
1647            .attributes
1648            .iter()
1649            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1650        assert!(has_id);
1651        assert!(has_start);
1652    }
1653
1654    #[test]
1655    fn test_info_string_raw_html() {
1656        let info = InfoString::parse("{=html}");
1657        assert_eq!(
1658            info.block_type,
1659            CodeBlockType::Raw {
1660                format: "html".to_string()
1661            }
1662        );
1663        assert!(info.attributes.is_empty());
1664    }
1665
1666    #[test]
1667    fn test_info_string_raw_latex() {
1668        let info = InfoString::parse("{=latex}");
1669        assert_eq!(
1670            info.block_type,
1671            CodeBlockType::Raw {
1672                format: "latex".to_string()
1673            }
1674        );
1675    }
1676
1677    #[test]
1678    fn test_info_string_raw_openxml() {
1679        let info = InfoString::parse("{=openxml}");
1680        assert_eq!(
1681            info.block_type,
1682            CodeBlockType::Raw {
1683                format: "openxml".to_string()
1684            }
1685        );
1686    }
1687
1688    #[test]
1689    fn test_info_string_raw_ms() {
1690        let info = InfoString::parse("{=ms}");
1691        assert_eq!(
1692            info.block_type,
1693            CodeBlockType::Raw {
1694                format: "ms".to_string()
1695            }
1696        );
1697    }
1698
1699    #[test]
1700    fn test_info_string_raw_html5() {
1701        let info = InfoString::parse("{=html5}");
1702        assert_eq!(
1703            info.block_type,
1704            CodeBlockType::Raw {
1705                format: "html5".to_string()
1706            }
1707        );
1708    }
1709
1710    #[test]
1711    fn test_info_string_raw_not_combined_with_attrs() {
1712        // If there are other attributes with =format, it should not be treated as raw
1713        let info = InfoString::parse("{=html .class}");
1714        // This should NOT be parsed as raw because there's more than one attribute
1715        assert_ne!(
1716            info.block_type,
1717            CodeBlockType::Raw {
1718                format: "html".to_string()
1719            }
1720        );
1721    }
1722
1723    #[test]
1724    fn test_parse_pandoc_attributes_spaces() {
1725        // Pandoc display blocks use spaces as delimiters
1726        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1727        assert_eq!(attrs.len(), 3);
1728        assert_eq!(attrs[0], (".python".to_string(), None));
1729        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1730        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1731    }
1732
1733    #[test]
1734    fn test_parse_pandoc_attributes_no_commas() {
1735        // Commas in Pandoc attributes should be treated as part of the value
1736        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1737        assert_eq!(attrs.len(), 3);
1738        assert_eq!(attrs[0], ("#id".to_string(), None));
1739        assert_eq!(attrs[1], (".class".to_string(), None));
1740        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1741    }
1742
1743    #[test]
1744    fn test_parse_chunk_options_commas() {
1745        // Quarto/RMarkdown chunks use commas as delimiters
1746        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1747        assert_eq!(attrs.len(), 3);
1748        assert_eq!(attrs[0], ("r".to_string(), None));
1749        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1750        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1751    }
1752
1753    #[test]
1754    fn test_parse_chunk_options_no_spaces() {
1755        // Should handle comma-separated without spaces
1756        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1757        assert_eq!(attrs.len(), 3);
1758        assert_eq!(attrs[0], ("r".to_string(), None));
1759        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1760        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1761    }
1762
1763    #[test]
1764    fn test_parse_chunk_options_mixed() {
1765        // Handle both commas and spaces
1766        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1767        assert_eq!(attrs.len(), 3);
1768        assert_eq!(attrs[0], ("python".to_string(), None));
1769        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1770        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1771    }
1772
1773    #[test]
1774    fn test_parse_chunk_options_nested_function_call() {
1775        // R function calls with nested commas should be treated as single value
1776        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1777        assert_eq!(attrs.len(), 3);
1778        assert_eq!(attrs[0], ("r".to_string(), None));
1779        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1780        assert_eq!(
1781            attrs[2],
1782            (
1783                "dependson".to_string(),
1784                Some(r#"c("foo", "bar")"#.to_string())
1785            )
1786        );
1787    }
1788
1789    #[test]
1790    fn test_parse_chunk_options_nested_with_spaces() {
1791        // Function call with spaces inside
1792        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1793        assert_eq!(attrs.len(), 2);
1794        assert_eq!(attrs[0], ("r".to_string(), None));
1795        assert_eq!(
1796            attrs[1],
1797            (
1798                "cache.path".to_string(),
1799                Some(r#"file.path("cache", "dir")"#.to_string())
1800            )
1801        );
1802    }
1803
1804    #[test]
1805    fn test_parse_chunk_options_deeply_nested() {
1806        // Multiple levels of nesting
1807        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1808        assert_eq!(attrs.len(), 2);
1809        assert_eq!(attrs[0], ("r".to_string(), None));
1810        assert_eq!(
1811            attrs[1],
1812            (
1813                "x".to_string(),
1814                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1815            )
1816        );
1817    }
1818
1819    #[test]
1820    fn test_parse_chunk_options_brackets_and_braces() {
1821        // Test all bracket types
1822        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1823        assert_eq!(attrs.len(), 3);
1824        assert_eq!(attrs[0], ("r".to_string(), None));
1825        assert_eq!(
1826            attrs[1],
1827            ("data".to_string(), Some("df[rows, cols]".to_string()))
1828        );
1829        assert_eq!(
1830            attrs[2],
1831            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1832        );
1833    }
1834
1835    #[test]
1836    fn test_parse_chunk_options_quotes_with_parens() {
1837        // Parentheses inside quoted strings shouldn't affect depth tracking
1838        // Note: The parser strips outer quotes from quoted values
1839        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1840        assert_eq!(attrs.len(), 3);
1841        assert_eq!(attrs[0], ("r".to_string(), None));
1842        assert_eq!(
1843            attrs[1],
1844            ("label".to_string(), Some("test (with parens)".to_string()))
1845        );
1846        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1847    }
1848
1849    #[test]
1850    fn test_parse_chunk_options_escaped_quotes() {
1851        // Escaped quotes inside string values
1852        // Note: The parser strips outer quotes and processes escapes
1853        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1854        assert_eq!(attrs.len(), 2);
1855        assert_eq!(attrs[0], ("r".to_string(), None));
1856        assert_eq!(
1857            attrs[1],
1858            (
1859                "label".to_string(),
1860                Some(r#"has "quoted" text"#.to_string())
1861            )
1862        );
1863    }
1864
1865    #[test]
1866    fn test_display_vs_executable_parsing() {
1867        // Display block should use Pandoc parser (spaces)
1868        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1869        assert!(matches!(
1870            info1.block_type,
1871            CodeBlockType::DisplayExplicit { .. }
1872        ));
1873
1874        // Executable chunk should use chunk options parser (commas)
1875        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1876        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1877        assert_eq!(info2.attributes.len(), 2);
1878    }
1879
1880    #[test]
1881    fn test_info_string_executable_implicit_label() {
1882        // {r mylabel} should parse as label=mylabel
1883        let info = InfoString::parse("{r mylabel}");
1884        assert!(matches!(
1885            info.block_type,
1886            CodeBlockType::Executable { ref language } if language == "r"
1887        ));
1888        assert_eq!(info.attributes.len(), 1);
1889        assert_eq!(
1890            info.attributes[0],
1891            ("label".to_string(), Some("mylabel".to_string()))
1892        );
1893    }
1894
1895    #[test]
1896    fn test_info_string_executable_implicit_label_with_options() {
1897        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
1898        let info = InfoString::parse("{r mylabel, echo=FALSE}");
1899        assert!(matches!(
1900            info.block_type,
1901            CodeBlockType::Executable { ref language } if language == "r"
1902        ));
1903        assert_eq!(info.attributes.len(), 2);
1904        assert_eq!(
1905            info.attributes[0],
1906            ("label".to_string(), Some("mylabel".to_string()))
1907        );
1908        assert_eq!(
1909            info.attributes[1],
1910            ("echo".to_string(), Some("FALSE".to_string()))
1911        );
1912    }
1913
1914    #[test]
1915    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
1916        let content_lines = vec![
1917            "#| fig-cap: |\n",
1918            "#|   A caption\n",
1919            "#|   spanning lines\n",
1920            "a <- 1\n",
1921        ];
1922        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1923        assert_eq!(count, 3);
1924    }
1925
1926    #[test]
1927    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
1928        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
1929        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1930        assert_eq!(count, 1);
1931    }
1932
1933    #[test]
1934    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
1935        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
1936        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1937        assert_eq!(count, 1);
1938    }
1939}