Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use super::container_prefix::{ContainerPrefix, StripOp, advance_columns};
9use crate::parser::utils::container_stack::byte_index_at_column;
10
11/// Strip up to `list_content_col` columns of leading whitespace,
12/// stopping at the first non-whitespace byte (newlines stop the scan
13/// rather than being consumed — important on blank lines inside a
14/// fenced code block). Mirrors the legacy
15/// `byte_index_at_column`-based strip used by the formatter.
16pub(crate) fn strip_list_indent(line: &str, list_content_col: usize) -> &str {
17    if list_content_col == 0 {
18        return line;
19    }
20    let idx = byte_index_at_column(line, list_content_col);
21    &line[idx..]
22}
23
24/// Returns `true` iff the outermost active container in `prefix` is a
25/// blockquote (i.e. `prefix.ops()` starts with `BlockQuoteMarker`
26/// before any `ListAdvance`). Used to pick the bq-vs-list strip order
27/// on content/lookahead lines.
28pub(crate) fn bq_outer_of_list(prefix: &ContainerPrefix) -> bool {
29    for op in prefix.ops() {
30        match op {
31            StripOp::BlockQuoteMarker => return true,
32            StripOp::ListAdvance(_) => return false,
33            StripOp::ContentIndent(_) => {}
34        }
35    }
36    false
37}
38
39use crate::parser::utils::helpers::{
40    strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
41};
42
43/// Represents the type of code block based on its info string syntax.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum CodeBlockType {
46    /// Display-only block with shortcut syntax: ```python
47    DisplayShortcut { language: String },
48    /// Display-only block with explicit Pandoc syntax: ```{.python}
49    DisplayExplicit { classes: Vec<String> },
50    /// Executable chunk (Quarto/RMarkdown): ```{python}
51    Executable { language: String },
52    /// Raw block for specific output format: ```{=html}
53    Raw { format: String },
54    /// No language specified: ```
55    Plain,
56}
57
58/// Parsed attributes from a code block info string.
59#[derive(Debug, Clone, PartialEq)]
60pub struct InfoString {
61    pub raw: String,
62    pub block_type: CodeBlockType,
63    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
64}
65
66impl InfoString {
67    /// Parse an info string into structured attributes.
68    pub fn parse(raw: &str) -> Self {
69        let trimmed = raw.trim();
70
71        if trimmed.is_empty() {
72            return InfoString {
73                raw: raw.to_string(),
74                block_type: CodeBlockType::Plain,
75                attributes: Vec::new(),
76            };
77        }
78
79        // Check if it starts with '{' - explicit attribute block
80        if let Some(stripped) = trimmed.strip_prefix('{')
81            && let Some(content) = stripped.strip_suffix('}')
82        {
83            return Self::parse_explicit(raw, content);
84        }
85
86        // Check for mixed form: python {.numberLines}
87        if let Some(brace_start) = trimmed.find('{') {
88            let language = trimmed[..brace_start].trim();
89            if !language.is_empty() && !language.contains(char::is_whitespace) {
90                let attr_part = &trimmed[brace_start..];
91                if let Some(stripped) = attr_part.strip_prefix('{')
92                    && let Some(content) = stripped.strip_suffix('}')
93                {
94                    let attrs = Self::parse_attributes(content);
95                    return InfoString {
96                        raw: raw.to_string(),
97                        block_type: CodeBlockType::DisplayShortcut {
98                            language: language.to_string(),
99                        },
100                        attributes: attrs,
101                    };
102                }
103            }
104        }
105
106        // Otherwise, it's a shortcut form (just the language name)
107        // Only take the first word as language
108        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
109        InfoString {
110            raw: raw.to_string(),
111            block_type: CodeBlockType::DisplayShortcut {
112                language: language.to_string(),
113            },
114            attributes: Vec::new(),
115        }
116    }
117
118    fn parse_explicit(raw: &str, content: &str) -> Self {
119        // Check for raw attribute FIRST: {=format}
120        // The content should start with '=' and have only alphanumeric chars after
121        let trimmed_content = content.trim();
122        if let Some(format_name) = trimmed_content.strip_prefix('=') {
123            // Validate format name: alphanumeric only, no spaces
124            if !format_name.is_empty()
125                && format_name.chars().all(|c| c.is_alphanumeric())
126                && !format_name.contains(char::is_whitespace)
127            {
128                return InfoString {
129                    raw: raw.to_string(),
130                    block_type: CodeBlockType::Raw {
131                        format: format_name.to_string(),
132                    },
133                    attributes: Vec::new(),
134                };
135            }
136        }
137
138        // First, do a preliminary parse to determine block type
139        // Use chunk options parser (comma-aware) for initial detection
140        let prelim_attrs = Self::parse_chunk_options(content);
141
142        // First non-ID, non-attribute token determines if it's executable or display
143        let mut first_lang_token = None;
144        for (key, val) in prelim_attrs.iter() {
145            if val.is_none() && !key.starts_with('#') {
146                first_lang_token = Some(key.as_str());
147                break;
148            }
149        }
150
151        let first_token = first_lang_token.unwrap_or("");
152
153        if first_token.starts_with('.') {
154            // Display block: {.python} or {.haskell .numberLines}
155            // Re-parse with Pandoc-style parser (space-delimited)
156            let attrs = Self::parse_pandoc_attributes(content);
157
158            let classes: Vec<String> = attrs
159                .iter()
160                .filter(|(k, v)| k.starts_with('.') && v.is_none())
161                .map(|(k, _)| k[1..].to_string())
162                .collect();
163
164            let non_class_attrs: Vec<(String, Option<String>)> = attrs
165                .into_iter()
166                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
167                .collect();
168
169            InfoString {
170                raw: raw.to_string(),
171                block_type: CodeBlockType::DisplayExplicit { classes },
172                attributes: non_class_attrs,
173            }
174        } else if !first_token.is_empty() && !first_token.starts_with('#') {
175            // Executable chunk: {python} or {r}
176            // Use chunk options parser (comma-delimited)
177            let attrs = Self::parse_chunk_options(content);
178            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
179
180            // Check if there's a second bareword (implicit label in R/Quarto chunks)
181            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}
182            let mut has_implicit_label = false;
183            let implicit_label_value = if lang_index + 1 < attrs.len() {
184                if let (label_key, None) = &attrs[lang_index + 1] {
185                    // Second bareword after language
186                    has_implicit_label = true;
187                    Some(label_key.clone())
188                } else {
189                    None
190                }
191            } else {
192                None
193            };
194
195            let mut final_attrs: Vec<(String, Option<String>)> = attrs
196                .into_iter()
197                .enumerate()
198                .filter(|(i, _)| {
199                    // Remove language token
200                    if *i == lang_index {
201                        return false;
202                    }
203                    // Remove implicit label token (will be added back explicitly)
204                    if has_implicit_label && *i == lang_index + 1 {
205                        return false;
206                    }
207                    true
208                })
209                .map(|(_, attr)| attr)
210                .collect();
211
212            // Add explicit label if we found an implicit one
213            if let Some(label_val) = implicit_label_value {
214                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
215            }
216
217            InfoString {
218                raw: raw.to_string(),
219                block_type: CodeBlockType::Executable {
220                    language: first_token.to_string(),
221                },
222                attributes: final_attrs,
223            }
224        } else {
225            // Just attributes, no language - use Pandoc parser
226            let attrs = Self::parse_pandoc_attributes(content);
227            InfoString {
228                raw: raw.to_string(),
229                block_type: CodeBlockType::Plain,
230                attributes: attrs,
231            }
232        }
233    }
234
235    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
236    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
237    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
238        let mut attrs = Vec::new();
239        let mut chars = content.chars().peekable();
240
241        while chars.peek().is_some() {
242            // Skip whitespace
243            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
244                chars.next();
245            }
246
247            if chars.peek().is_none() {
248                break;
249            }
250
251            // Read key
252            let mut key = String::new();
253            while let Some(&ch) = chars.peek() {
254                if ch == '=' || ch == ' ' || ch == '\t' {
255                    break;
256                }
257                key.push(ch);
258                chars.next();
259            }
260
261            if key.is_empty() {
262                break;
263            }
264
265            // Skip whitespace
266            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
267                chars.next();
268            }
269
270            // Check for value
271            if chars.peek() == Some(&'=') {
272                chars.next(); // consume '='
273
274                // Skip whitespace after '='
275                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
276                    chars.next();
277                }
278
279                // Read value (might be quoted)
280                let value = if chars.peek() == Some(&'"') {
281                    chars.next(); // consume opening quote
282                    let mut val = String::new();
283                    while let Some(&ch) = chars.peek() {
284                        chars.next();
285                        if ch == '"' {
286                            break;
287                        }
288                        if ch == '\\' {
289                            if let Some(&next_ch) = chars.peek() {
290                                chars.next();
291                                val.push(next_ch);
292                            }
293                        } else {
294                            val.push(ch);
295                        }
296                    }
297                    val
298                } else {
299                    // Unquoted value - read until space
300                    let mut val = String::new();
301                    while let Some(&ch) = chars.peek() {
302                        if ch == ' ' || ch == '\t' {
303                            break;
304                        }
305                        val.push(ch);
306                        chars.next();
307                    }
308                    val
309                };
310
311                attrs.push((key, Some(value)));
312            } else {
313                attrs.push((key, None));
314            }
315        }
316
317        attrs
318    }
319
320    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
321    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
322    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
323        let mut attrs = Vec::new();
324        let mut chars = content.chars().peekable();
325
326        while chars.peek().is_some() {
327            // Skip whitespace and commas
328            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
329                chars.next();
330            }
331
332            if chars.peek().is_none() {
333                break;
334            }
335
336            // Read key
337            let mut key = String::new();
338            while let Some(&ch) = chars.peek() {
339                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
340                    break;
341                }
342                key.push(ch);
343                chars.next();
344            }
345
346            if key.is_empty() {
347                break;
348            }
349
350            // Skip whitespace and commas
351            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
352                chars.next();
353            }
354
355            // Check for value
356            if chars.peek() == Some(&'=') {
357                chars.next(); // consume '='
358
359                // Skip whitespace and commas after '='
360                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
361                    chars.next();
362                }
363
364                // Read value (might be quoted)
365                let value = if chars.peek() == Some(&'"') {
366                    chars.next(); // consume opening quote
367                    let mut val = String::new();
368                    while let Some(&ch) = chars.peek() {
369                        chars.next();
370                        if ch == '"' {
371                            break;
372                        }
373                        if ch == '\\' {
374                            if let Some(&next_ch) = chars.peek() {
375                                chars.next();
376                                val.push(next_ch);
377                            }
378                        } else {
379                            val.push(ch);
380                        }
381                    }
382                    val
383                } else {
384                    // Unquoted value - read until comma, space, or tab at depth 0
385                    // Track nesting depth for (), [], {} and quote state
386                    let mut val = String::new();
387                    let mut depth = 0; // Track parentheses/brackets/braces depth
388                    let mut in_quote: Option<char> = None; // Track if inside ' or "
389                    let mut escaped = false; // Track if previous char was backslash
390
391                    while let Some(&ch) = chars.peek() {
392                        // Handle escape sequences
393                        if escaped {
394                            val.push(ch);
395                            chars.next();
396                            escaped = false;
397                            continue;
398                        }
399
400                        if ch == '\\' {
401                            val.push(ch);
402                            chars.next();
403                            escaped = true;
404                            continue;
405                        }
406
407                        // Handle quotes
408                        if let Some(quote_char) = in_quote {
409                            val.push(ch);
410                            chars.next();
411                            if ch == quote_char {
412                                in_quote = None; // Close quote
413                            }
414                            continue;
415                        }
416
417                        // Not in a quote - check for quote start
418                        if ch == '"' || ch == '\'' {
419                            in_quote = Some(ch);
420                            val.push(ch);
421                            chars.next();
422                            continue;
423                        }
424
425                        // Track nesting depth (only when not in quotes)
426                        if ch == '(' || ch == '[' || ch == '{' {
427                            depth += 1;
428                            val.push(ch);
429                            chars.next();
430                            continue;
431                        }
432
433                        if ch == ')' || ch == ']' || ch == '}' {
434                            depth -= 1;
435                            val.push(ch);
436                            chars.next();
437                            continue;
438                        }
439
440                        // Check for delimiters - only break at depth 0
441                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
442                            break;
443                        }
444
445                        // Regular character
446                        val.push(ch);
447                        chars.next();
448                    }
449                    val
450                };
451
452                attrs.push((key, Some(value)));
453            } else {
454                attrs.push((key, None));
455            }
456        }
457
458        attrs
459    }
460
461    /// Legacy function - kept for backward compatibility in mixed-form parsing
462    /// For new code, use parse_pandoc_attributes or parse_chunk_options
463    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
464        // Default to chunk options parsing (comma-aware)
465        Self::parse_chunk_options(content)
466    }
467}
468
469/// Information about a detected code fence opening.
470#[derive(Debug, Clone)]
471pub(crate) struct FenceInfo {
472    pub fence_char: char,
473    pub fence_count: usize,
474    pub info_string: String,
475}
476
477pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
478    fence.info_string.trim() == "math"
479}
480
481/// Try to detect a fenced code block opening from content.
482/// Returns fence info if this is a valid opening fence.
483pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
484    let trimmed = strip_leading_spaces(content);
485
486    // Check for fence opening (``` or ~~~)
487    let (fence_char, fence_count) = if trimmed.starts_with('`') {
488        let count = trimmed.chars().take_while(|&c| c == '`').count();
489        ('`', count)
490    } else if trimmed.starts_with('~') {
491        let count = trimmed.chars().take_while(|&c| c == '~').count();
492        ('~', count)
493    } else {
494        return None;
495    };
496
497    if fence_count < 3 {
498        return None;
499    }
500
501    let info_string_raw = &trimmed[fence_count..];
502    // Strip trailing newline (LF or CRLF) and at most one leading space
503    let (info_string_trimmed, _) = strip_newline(info_string_raw);
504    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
505        stripped.to_string()
506    } else {
507        info_string_trimmed.to_string()
508    };
509
510    // Backtick-fenced blocks cannot have backticks in the info string.
511    if fence_char == '`' && info_string.contains('`') {
512        return None;
513    }
514
515    Some(FenceInfo {
516        fence_char,
517        fence_count,
518        info_string,
519    })
520}
521
522#[allow(clippy::too_many_arguments)]
523fn prepare_fence_open_line<'a>(
524    builder: &mut GreenNodeBuilder<'static>,
525    source_line: &'a str,
526    first_line_override: Option<&'a str>,
527    bq_depth: usize,
528    list_content_col: usize,
529    list_marker_consumed_on_line_0: bool,
530    bq_outer: bool,
531    content_indent: usize,
532) -> (&'a str, &'a str) {
533    // Strip the active container prefix on line 0 in container-stack
534    // order. Bq markers are always upstream-emitted by the blockquote
535    // dispatch and silently consumed here. The list_content_col indent
536    // is upstream-emitted only on a marker-line dispatch
537    // (`list_marker_consumed_on_line_0=true`); on continuation-line
538    // dispatch it must be emitted here as WHITESPACE. Adjacent
539    // WHITESPACE emissions are coalesced into one token for
540    // byte-range-equivalent CST stability.
541    if let Some(first_line) = first_line_override {
542        if bq_depth > 0 && source_line != first_line {
543            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
544            let prefix_len = source_line.len().saturating_sub(stripped.len());
545            if prefix_len > 0 {
546                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
547            }
548        }
549        let first_trimmed = strip_leading_spaces(first_line);
550        let leading_ws_len = first_line.len().saturating_sub(first_trimmed.len());
551        if leading_ws_len > 0 {
552            builder.token(SyntaxKind::WHITESPACE.into(), &first_line[..leading_ws_len]);
553        }
554        return (first_trimmed, first_line);
555    }
556
557    let mut s: &'a str = source_line;
558    let mut pending_ws_start: Option<usize> = None;
559    let suppress_list = list_marker_consumed_on_line_0;
560
561    let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
562                    pending: &mut Option<usize>,
563                    current_offset: usize| {
564        if let Some(start) = *pending
565            && current_offset > start
566        {
567            builder.token(
568                SyntaxKind::WHITESPACE.into(),
569                &source_line[start..current_offset],
570            );
571        }
572        *pending = None;
573    };
574
575    let do_strip_list = |s: &mut &'a str, pending: &mut Option<usize>| {
576        if list_content_col == 0 {
577            return;
578        }
579        // On a marker-line dispatch (`suppress_list=true`), the list
580        // marker bytes have already been emitted upstream and may not
581        // be whitespace (e.g. `- > ```` has a leading `-`). Use
582        // `advance_columns` which counts columns through any char.
583        // On continuation lines, the leading bytes ARE whitespace
584        // (the list-content-indent) so use the whitespace-only
585        // `strip_list_indent` to stop at non-whitespace.
586        let stripped = if suppress_list {
587            advance_columns(s, list_content_col)
588        } else {
589            strip_list_indent(s, list_content_col)
590        };
591        let consumed = s.len() - stripped.len();
592        if consumed > 0 {
593            let start = source_line.len() - s.len();
594            if !suppress_list && pending.is_none() {
595                *pending = Some(start);
596            }
597            *s = stripped;
598        }
599    };
600
601    let do_strip_bq =
602        |builder: &mut GreenNodeBuilder<'static>, s: &mut &'a str, pending: &mut Option<usize>| {
603            if bq_depth == 0 {
604                return;
605            }
606            let current_offset = source_line.len() - s.len();
607            flush_ws(builder, pending, current_offset);
608            *s = strip_n_blockquote_markers(s, bq_depth);
609        };
610
611    if bq_outer {
612        do_strip_bq(builder, &mut s, &mut pending_ws_start);
613        do_strip_list(&mut s, &mut pending_ws_start);
614    } else {
615        do_strip_list(&mut s, &mut pending_ws_start);
616        do_strip_bq(builder, &mut s, &mut pending_ws_start);
617    }
618
619    // content_indent (footnote/definition) — always emit as WHITESPACE.
620    if content_indent > 0 {
621        let indent_bytes = byte_index_at_column(s, content_indent);
622        if s.len() >= indent_bytes && indent_bytes > 0 {
623            let start = source_line.len() - s.len();
624            if pending_ws_start.is_none() {
625                pending_ws_start = Some(start);
626            }
627            s = &s[indent_bytes..];
628        }
629    }
630
631    let final_offset = source_line.len() - s.len();
632    flush_ws(builder, &mut pending_ws_start, final_offset);
633
634    let first_trimmed = strip_leading_spaces(s);
635    let leading_ws_len = s.len().saturating_sub(first_trimmed.len());
636    if leading_ws_len > 0 {
637        builder.token(SyntaxKind::WHITESPACE.into(), &s[..leading_ws_len]);
638    }
639    (first_trimmed, s)
640}
641
642pub(crate) fn emit_blockquote_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
643    for ch in prefix.chars() {
644        if ch == '>' {
645            builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
646        } else {
647            let mut buf = [0u8; 4];
648            builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
649        }
650    }
651}
652
653pub(crate) fn emit_content_line_prefixes<'a>(
654    builder: &mut GreenNodeBuilder<'static>,
655    content_line: &'a str,
656    bq_depth: usize,
657    list_content_col: usize,
658    bq_outer: bool,
659    content_indent: usize,
660) -> &'a str {
661    // Strip and emit content-line (1+) prefixes in container-stack
662    // order:
663    //   bq_outer=true  → bq markers → list_content_col → content_indent
664    //   bq_outer=false → list_content_col → bq markers → content_indent
665    // Bq markers emit granular tokens (BLOCK_QUOTE_MARKER + WHITESPACE);
666    // list_content_col and content_indent emit WHITESPACE. Adjacent
667    // WHITESPACE emissions are coalesced into one token for
668    // byte-range-equivalent CST stability.
669    let mut s = content_line;
670    let mut pending_ws_start: Option<usize> = None;
671
672    let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
673                    pending: &mut Option<usize>,
674                    current_offset: usize| {
675        if let Some(start) = *pending
676            && current_offset > start
677        {
678            builder.token(
679                SyntaxKind::WHITESPACE.into(),
680                &content_line[start..current_offset],
681            );
682            *pending = None;
683        }
684    };
685
686    let strip_and_remember_list =
687        |s: &mut &'a str, pending: &mut Option<usize>, list_content_col: usize| {
688            if list_content_col == 0 {
689                return;
690            }
691            let stripped = strip_list_indent(s, list_content_col);
692            let consumed = s.len() - stripped.len();
693            if consumed > 0 {
694                let start = content_line.len() - s.len();
695                if pending.is_none() {
696                    *pending = Some(start);
697                }
698                *s = stripped;
699            }
700        };
701
702    let strip_and_emit_bq = |builder: &mut GreenNodeBuilder<'static>,
703                             s: &mut &'a str,
704                             pending: &mut Option<usize>,
705                             bq_depth: usize| {
706        if bq_depth == 0 {
707            return;
708        }
709        let current_offset = content_line.len() - s.len();
710        flush_ws(builder, pending, current_offset);
711        let stripped = strip_n_blockquote_markers(s, bq_depth);
712        let prefix_len = s.len() - stripped.len();
713        if prefix_len > 0 {
714            emit_blockquote_prefix_tokens(builder, &s[..prefix_len]);
715        }
716        *s = stripped;
717    };
718
719    if bq_outer {
720        strip_and_emit_bq(builder, &mut s, &mut pending_ws_start, bq_depth);
721        strip_and_remember_list(&mut s, &mut pending_ws_start, list_content_col);
722    } else {
723        strip_and_remember_list(&mut s, &mut pending_ws_start, list_content_col);
724        strip_and_emit_bq(builder, &mut s, &mut pending_ws_start, bq_depth);
725    }
726
727    if content_indent > 0 {
728        let indent_bytes = byte_index_at_column(s, content_indent);
729        if s.len() >= indent_bytes && indent_bytes > 0 {
730            let start = content_line.len() - s.len();
731            if pending_ws_start.is_none() {
732                pending_ws_start = Some(start);
733            }
734            s = &s[indent_bytes..];
735        }
736    }
737
738    let final_offset = content_line.len() - s.len();
739    flush_ws(builder, &mut pending_ws_start, final_offset);
740    s
741}
742
743fn strip_content_line_prefixes(
744    content_line: &str,
745    bq_depth: usize,
746    list_content_col: usize,
747    bq_outer: bool,
748    content_indent: usize,
749) -> &str {
750    let after_bq_and_list = if bq_outer {
751        let after_bq = if bq_depth > 0 {
752            strip_n_blockquote_markers(content_line, bq_depth)
753        } else {
754            content_line
755        };
756        strip_list_indent(after_bq, list_content_col)
757    } else {
758        let after_list = strip_list_indent(content_line, list_content_col);
759        if bq_depth > 0 {
760            strip_n_blockquote_markers(after_list, bq_depth)
761        } else {
762            after_list
763        }
764    };
765
766    let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
767    if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
768        &after_bq_and_list[indent_bytes..]
769    } else {
770        after_bq_and_list
771    }
772}
773
774pub(crate) fn compute_hashpipe_preamble_line_count(
775    content_lines: &[&str],
776    prefix: &str,
777    bq_depth: usize,
778    list_content_col: usize,
779    bq_outer: bool,
780    content_indent: usize,
781) -> usize {
782    let mut line_idx = 0usize;
783
784    while line_idx < content_lines.len() {
785        let preview_after_indent = strip_content_line_prefixes(
786            content_lines[line_idx],
787            bq_depth,
788            list_content_col,
789            bq_outer,
790            content_indent,
791        );
792        let (preview_without_newline, _) = strip_newline(preview_after_indent);
793        if !is_hashpipe_option_line(preview_without_newline, prefix)
794            && !is_hashpipe_continuation_line(preview_without_newline, prefix)
795        {
796            break;
797        }
798        line_idx += 1;
799    }
800
801    line_idx
802}
803
804fn emit_hashpipe_option_line(
805    builder: &mut GreenNodeBuilder<'static>,
806    line_without_newline: &str,
807    prefix: &str,
808) -> bool {
809    if !is_hashpipe_option_line(line_without_newline, prefix) {
810        return false;
811    }
812
813    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
814    let leading_ws_len = line_without_newline
815        .len()
816        .saturating_sub(trimmed_start.len());
817    let after_prefix = &trimmed_start[prefix.len()..];
818    let ws_after_prefix_len = after_prefix
819        .len()
820        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
821    let rest = &after_prefix[ws_after_prefix_len..];
822    let Some(colon_idx) = rest.find(':') else {
823        return false;
824    };
825
826    let key_with_ws = &rest[..colon_idx];
827    let key = trim_end_spaces_tabs(key_with_ws);
828    if key.is_empty() {
829        return false;
830    }
831    let key_ws_suffix = &key_with_ws[key.len()..];
832
833    let after_colon = &rest[colon_idx + 1..];
834    let value_ws_prefix_len = after_colon
835        .len()
836        .saturating_sub(trim_start_spaces_tabs(after_colon).len());
837    let value_with_trailing = &after_colon[value_ws_prefix_len..];
838    let value = trim_end_spaces_tabs(value_with_trailing);
839    let value_ws_suffix = &value_with_trailing[value.len()..];
840
841    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
842    if leading_ws_len > 0 {
843        builder.token(
844            SyntaxKind::WHITESPACE.into(),
845            &line_without_newline[..leading_ws_len],
846        );
847    }
848    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
849    if ws_after_prefix_len > 0 {
850        builder.token(
851            SyntaxKind::WHITESPACE.into(),
852            &after_prefix[..ws_after_prefix_len],
853        );
854    }
855
856    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
857    if !key_ws_suffix.is_empty() {
858        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
859    }
860    builder.token(SyntaxKind::TEXT.into(), ":");
861    if value_ws_prefix_len > 0 {
862        builder.token(
863            SyntaxKind::WHITESPACE.into(),
864            &after_colon[..value_ws_prefix_len],
865        );
866    }
867
868    if !value.is_empty() {
869        if let Some(quote) = value.chars().next()
870            && (quote == '"' || quote == '\'')
871            && value.ends_with(quote)
872            && value.len() >= 2
873        {
874            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
875            builder.token(
876                SyntaxKind::CHUNK_OPTION_VALUE.into(),
877                &value[1..value.len() - 1],
878            );
879            builder.token(
880                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
881                &value[value.len() - 1..],
882            );
883        } else {
884            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
885        }
886    }
887
888    if !value_ws_suffix.is_empty() {
889        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
890    }
891    builder.finish_node();
892    true
893}
894
895fn emit_hashpipe_continuation_line(
896    builder: &mut GreenNodeBuilder<'static>,
897    line_without_newline: &str,
898    prefix: &str,
899) -> bool {
900    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
901        return false;
902    }
903    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
904    let leading_ws_len = line_without_newline
905        .len()
906        .saturating_sub(trimmed_start.len());
907    let after_prefix = &trimmed_start[prefix.len()..];
908    let ws_after_prefix_len = after_prefix
909        .len()
910        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
911    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
912    let continuation_value = trim_end_spaces_tabs(continuation_with_trailing);
913    if continuation_value.is_empty() {
914        return false;
915    }
916    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
917
918    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
919    if leading_ws_len > 0 {
920        builder.token(
921            SyntaxKind::WHITESPACE.into(),
922            &line_without_newline[..leading_ws_len],
923        );
924    }
925    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
926    if ws_after_prefix_len > 0 {
927        builder.token(
928            SyntaxKind::WHITESPACE.into(),
929            &after_prefix[..ws_after_prefix_len],
930        );
931    }
932    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
933    if !continuation_ws_suffix.is_empty() {
934        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
935    }
936    builder.finish_node();
937    true
938}
939
940fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
941    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
942    if !trimmed_start.starts_with(prefix) {
943        return false;
944    }
945    let after_prefix = &trimmed_start[prefix.len()..];
946    let rest = trim_start_spaces_tabs(after_prefix);
947    let Some(colon_idx) = rest.find(':') else {
948        return false;
949    };
950    let key = trim_end_spaces_tabs(&rest[..colon_idx]);
951    if key.is_empty() {
952        return false;
953    }
954    true
955}
956
957fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
958    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
959    if !trimmed_start.starts_with(prefix) {
960        return false;
961    }
962    let after_prefix = &trimmed_start[prefix.len()..];
963    let Some(first) = after_prefix.chars().next() else {
964        return false;
965    };
966    if first != ' ' && first != '\t' {
967        return false;
968    }
969    !trim_start_spaces_tabs(after_prefix).is_empty()
970}
971
972/// Check if a line is a valid closing fence for the given fence info.
973pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
974    let trimmed = strip_leading_spaces(content);
975
976    if !trimmed.starts_with(fence.fence_char) {
977        return false;
978    }
979
980    let closing_count = trimmed
981        .chars()
982        .take_while(|&c| c == fence.fence_char)
983        .count();
984
985    if closing_count < fence.fence_count {
986        return false;
987    }
988
989    // Rest of line must be empty
990    trimmed[closing_count..].trim().is_empty()
991}
992
993/// Emit chunk options as structured CST nodes while preserving all bytes.
994/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
995fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
996    if content.trim().is_empty() {
997        builder.token(SyntaxKind::TEXT.into(), content);
998        return;
999    }
1000
1001    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
1002
1003    let mut pos = 0;
1004    let bytes = content.as_bytes();
1005
1006    while pos < bytes.len() {
1007        // Emit leading whitespace/commas as TEXT
1008        let ws_start = pos;
1009        while pos < bytes.len() {
1010            let ch = bytes[pos] as char;
1011            if ch != ' ' && ch != '\t' && ch != ',' {
1012                break;
1013            }
1014            pos += 1;
1015        }
1016        if pos > ws_start {
1017            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
1018        }
1019
1020        if pos >= bytes.len() {
1021            break;
1022        }
1023
1024        // Check if this is a closing brace
1025        if bytes[pos] as char == '}' {
1026            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
1027            pos += 1;
1028            if pos < bytes.len() {
1029                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
1030            }
1031            break;
1032        }
1033
1034        // Read key
1035        let key_start = pos;
1036        while pos < bytes.len() {
1037            let ch = bytes[pos] as char;
1038            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
1039                break;
1040            }
1041            pos += 1;
1042        }
1043
1044        if pos == key_start {
1045            // No key found, emit rest as TEXT
1046            if pos < bytes.len() {
1047                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
1048            }
1049            break;
1050        }
1051
1052        let key = &content[key_start..pos];
1053
1054        // Check for whitespace before '='
1055        let ws_before_eq_start = pos;
1056        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
1057            pos += 1;
1058        }
1059
1060        // Check if there's a value (=)
1061        if pos < bytes.len() && bytes[pos] as char == '=' {
1062            // Has value - emit as CHUNK_OPTION
1063            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
1064            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
1065
1066            // Emit whitespace before '=' if any
1067            if pos > ws_before_eq_start {
1068                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1069            }
1070
1071            builder.token(SyntaxKind::TEXT.into(), "=");
1072            pos += 1; // consume '='
1073
1074            // Emit whitespace after '='
1075            let ws_after_eq_start = pos;
1076            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
1077                pos += 1;
1078            }
1079            if pos > ws_after_eq_start {
1080                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
1081            }
1082
1083            // Parse value (might be quoted)
1084            if pos < bytes.len() {
1085                let quote_char = bytes[pos] as char;
1086                if quote_char == '"' || quote_char == '\'' {
1087                    // Quoted value
1088                    builder.token(
1089                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
1090                        &content[pos..pos + 1],
1091                    );
1092                    pos += 1; // consume opening quote
1093
1094                    let val_start = pos;
1095                    let mut escaped = false;
1096                    while pos < bytes.len() {
1097                        let ch = bytes[pos] as char;
1098                        if !escaped && ch == quote_char {
1099                            break;
1100                        }
1101                        escaped = !escaped && ch == '\\';
1102                        pos += 1;
1103                    }
1104
1105                    if pos > val_start {
1106                        builder.token(
1107                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
1108                            &content[val_start..pos],
1109                        );
1110                    }
1111
1112                    // Emit closing quote
1113                    if pos < bytes.len() && bytes[pos] as char == quote_char {
1114                        builder.token(
1115                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
1116                            &content[pos..pos + 1],
1117                        );
1118                        pos += 1;
1119                    }
1120                } else {
1121                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
1122                    let val_start = pos;
1123                    let mut depth = 0;
1124
1125                    while pos < bytes.len() {
1126                        let ch = bytes[pos] as char;
1127                        match ch {
1128                            '(' | '[' | '{' => depth += 1,
1129                            ')' | ']' => {
1130                                if depth > 0 {
1131                                    depth -= 1;
1132                                } else {
1133                                    break;
1134                                }
1135                            }
1136                            '}' => {
1137                                if depth > 0 {
1138                                    depth -= 1;
1139                                } else {
1140                                    break; // End of chunk options
1141                                }
1142                            }
1143                            ',' if depth == 0 => {
1144                                break; // Next option
1145                            }
1146                            ' ' | '\t' if depth == 0 => {
1147                                break; // Space separator
1148                            }
1149                            _ => {}
1150                        }
1151                        pos += 1;
1152                    }
1153
1154                    if pos > val_start {
1155                        builder.token(
1156                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
1157                            &content[val_start..pos],
1158                        );
1159                    }
1160                }
1161            }
1162
1163            builder.finish_node(); // CHUNK_OPTION
1164        } else {
1165            // No '=' - this is a label or bareword option
1166            // Emit any whitespace we skipped as TEXT
1167            if pos > ws_before_eq_start {
1168                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1169                builder.token(SyntaxKind::TEXT.into(), key);
1170                builder.finish_node(); // CHUNK_LABEL
1171                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1172            } else {
1173                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1174                builder.token(SyntaxKind::TEXT.into(), key);
1175                builder.finish_node(); // CHUNK_LABEL
1176            }
1177        }
1178    }
1179
1180    builder.finish_node(); // CHUNK_OPTIONS
1181}
1182
1183/// Helper to parse info string and emit CodeInfo node with parsed components.
1184/// This breaks down the info string into its logical parts while preserving all bytes.
1185fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1186    builder.start_node(SyntaxKind::CODE_INFO.into());
1187
1188    let info = InfoString::parse(info_string);
1189
1190    match &info.block_type {
1191        CodeBlockType::DisplayShortcut { language } => {
1192            // Simple case: python or python {.class}
1193            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1194
1195            // If there's more after the language, emit it as TEXT
1196            let after_lang = &info_string[language.len()..];
1197            if !after_lang.is_empty() {
1198                builder.token(SyntaxKind::TEXT.into(), after_lang);
1199            }
1200        }
1201        CodeBlockType::Executable { language } => {
1202            // Quarto: {r} or {r my-label, echo=FALSE}
1203            builder.token(SyntaxKind::TEXT.into(), "{");
1204            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1205
1206            // Parse and emit chunk options
1207            let start_offset = 1 + language.len(); // Skip "{r"
1208            if start_offset < info_string.len() {
1209                let rest = &info_string[start_offset..];
1210                emit_chunk_options(builder, rest);
1211            }
1212        }
1213        CodeBlockType::DisplayExplicit { classes } => {
1214            // Pandoc: {.python} or {#id .haskell .numberLines}
1215            // We need to find the first class in the raw string and emit everything around it
1216
1217            if let Some(lang) = classes.first() {
1218                // Find where ".lang" appears in the info string
1219                let needle = format!(".{}", lang);
1220                if let Some(lang_start) = info_string.find(&needle) {
1221                    // Emit everything before the language
1222                    if lang_start > 0 {
1223                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1224                    }
1225
1226                    // Emit the dot
1227                    builder.token(SyntaxKind::TEXT.into(), ".");
1228
1229                    // Emit the language
1230                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1231
1232                    // Emit everything after
1233                    let after_lang_start = lang_start + 1 + lang.len();
1234                    if after_lang_start < info_string.len() {
1235                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1236                    }
1237                } else {
1238                    // Couldn't find it, just emit as TEXT
1239                    builder.token(SyntaxKind::TEXT.into(), info_string);
1240                }
1241            } else {
1242                // No classes
1243                builder.token(SyntaxKind::TEXT.into(), info_string);
1244            }
1245        }
1246        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1247            // No language, just emit as TEXT
1248            builder.token(SyntaxKind::TEXT.into(), info_string);
1249        }
1250    }
1251
1252    builder.finish_node(); // CodeInfo
1253}
1254
1255/// Parse a fenced code block, consuming lines from the parser.
1256/// Returns the new position after the code block.
1257/// Parse a fenced code block, consuming lines from the parser.
1258/// Returns the new position after the code block.
1259/// list_content_col + content_indent account for container indentation
1260/// (list-item indent + footnote/definition base indent) that should be
1261/// stripped from each line. `bq_outer` flips the bq-vs-list strip
1262/// order to match the container stack.
1263#[allow(clippy::too_many_arguments)]
1264pub(crate) fn parse_fenced_code_block(
1265    builder: &mut GreenNodeBuilder<'static>,
1266    lines: &[&str],
1267    start_pos: usize,
1268    fence: FenceInfo,
1269    bq_depth: usize,
1270    list_content_col: usize,
1271    list_marker_consumed_on_line_0: bool,
1272    bq_outer: bool,
1273    content_indent: usize,
1274    first_line_override: Option<&str>,
1275) -> usize {
1276    // Start code block
1277    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1278
1279    // Opening fence
1280    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1281        builder,
1282        lines[start_pos],
1283        first_line_override,
1284        bq_depth,
1285        list_content_col,
1286        list_marker_consumed_on_line_0,
1287        bq_outer,
1288        content_indent,
1289    );
1290
1291    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1292    builder.token(
1293        SyntaxKind::CODE_FENCE_MARKER.into(),
1294        &first_trimmed[..fence.fence_count],
1295    );
1296
1297    // Emit any space between fence and info string (for losslessness)
1298    let after_fence = &first_trimmed[fence.fence_count..];
1299    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1300        // There was a space - emit it as WHITESPACE
1301        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1302        // Parse and emit the info string as a structured node
1303        if !fence.info_string.is_empty() {
1304            emit_code_info_node(builder, &fence.info_string);
1305        }
1306    } else if !fence.info_string.is_empty() {
1307        // No space - parse and emit info_string as a structured node
1308        emit_code_info_node(builder, &fence.info_string);
1309    }
1310
1311    // Extract and emit the actual newline from the opening fence line
1312    let (_, newline_str) = strip_newline(first_trimmed);
1313    if !newline_str.is_empty() {
1314        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1315    }
1316    builder.finish_node(); // CodeFenceOpen
1317
1318    let mut current_pos = start_pos + 1;
1319    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1320    let mut found_closing = false;
1321
1322    while current_pos < lines.len() {
1323        let line = lines[current_pos];
1324
1325        // Strip container prefix in stack order so the closing-fence
1326        // probe sees the post-prefix content.
1327        let after_bq_and_list = if bq_outer {
1328            let after_bq = if bq_depth > 0 {
1329                strip_n_blockquote_markers(line, bq_depth)
1330            } else {
1331                line
1332            };
1333            strip_list_indent(after_bq, list_content_col)
1334        } else {
1335            let after_list = strip_list_indent(line, list_content_col);
1336            if bq_depth > 0 {
1337                strip_n_blockquote_markers(after_list, bq_depth)
1338            } else {
1339                after_list
1340            }
1341        };
1342
1343        // Count blockquote markers on the *post-list-stripped*-or-raw
1344        // line to detect leaving the surrounding blockquote. For
1345        // bq_outer=true we already stripped bq markers, so probe the
1346        // raw line; for bq_outer=false we stripped list indent first,
1347        // so probe the post-list slice.
1348        let probe = if bq_outer {
1349            line
1350        } else {
1351            strip_list_indent(line, list_content_col)
1352        };
1353        let (line_bq_depth, _) = count_blockquote_markers(probe);
1354        if line_bq_depth < bq_depth {
1355            break;
1356        }
1357
1358        let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
1359        let inner_stripped = if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
1360            &after_bq_and_list[indent_bytes..]
1361        } else {
1362            after_bq_and_list
1363        };
1364
1365        if is_closing_fence(inner_stripped, &fence) {
1366            found_closing = true;
1367            current_pos += 1;
1368            break;
1369        }
1370
1371        content_lines.push(line);
1372        current_pos += 1;
1373    }
1374
1375    // Add content
1376    if !content_lines.is_empty() {
1377        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1378        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1379            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1380            _ => None,
1381        };
1382
1383        let mut line_idx = 0usize;
1384        if let Some(prefix) = hashpipe_prefix {
1385            let prepared_hashpipe_lines = compute_hashpipe_preamble_line_count(
1386                &content_lines,
1387                prefix,
1388                bq_depth,
1389                list_content_col,
1390                bq_outer,
1391                content_indent,
1392            );
1393            if prepared_hashpipe_lines > 0 {
1394                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1395                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1396                while line_idx < prepared_hashpipe_lines {
1397                    let content_line = content_lines[line_idx];
1398                    let after_indent = emit_content_line_prefixes(
1399                        builder,
1400                        content_line,
1401                        bq_depth,
1402                        list_content_col,
1403                        bq_outer,
1404                        content_indent,
1405                    );
1406                    let (line_without_newline, newline_str) = strip_newline(after_indent);
1407                    if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
1408                        let _ =
1409                            emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
1410                    }
1411                    if !newline_str.is_empty() {
1412                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1413                    }
1414                    line_idx += 1;
1415                }
1416                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1417                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1418            }
1419        }
1420
1421        for content_line in content_lines.iter().skip(line_idx) {
1422            let after_indent = emit_content_line_prefixes(
1423                builder,
1424                content_line,
1425                bq_depth,
1426                list_content_col,
1427                bq_outer,
1428                content_indent,
1429            );
1430            let (line_without_newline, newline_str) = strip_newline(after_indent);
1431
1432            if !line_without_newline.is_empty() {
1433                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1434            }
1435
1436            if !newline_str.is_empty() {
1437                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1438            }
1439        }
1440        builder.finish_node(); // CodeContent
1441    }
1442
1443    // Closing fence (if found)
1444    if found_closing {
1445        let closing_line = lines[current_pos - 1];
1446
1447        let closing_stripped = emit_content_line_prefixes(
1448            builder,
1449            closing_line,
1450            bq_depth,
1451            list_content_col,
1452            bq_outer,
1453            content_indent,
1454        );
1455        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1456        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1457        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1458        let closing_count = closing_trimmed_start
1459            .chars()
1460            .take_while(|&c| c == fence.fence_char)
1461            .count();
1462        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1463
1464        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1465        if leading_ws_len > 0 {
1466            builder.token(
1467                SyntaxKind::WHITESPACE.into(),
1468                &closing_without_newline[..leading_ws_len],
1469            );
1470        }
1471        builder.token(
1472            SyntaxKind::CODE_FENCE_MARKER.into(),
1473            &closing_trimmed_start[..closing_count],
1474        );
1475        if !trailing_after_marker.is_empty() {
1476            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1477        }
1478        if !newline_str.is_empty() {
1479            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1480        }
1481        builder.finish_node(); // CodeFenceClose
1482    }
1483
1484    builder.finish_node(); // CodeBlock
1485
1486    current_pos
1487}
1488
1489/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1490#[allow(clippy::too_many_arguments)]
1491pub(crate) fn parse_fenced_math_block(
1492    builder: &mut GreenNodeBuilder<'static>,
1493    lines: &[&str],
1494    start_pos: usize,
1495    fence: FenceInfo,
1496    bq_depth: usize,
1497    list_content_col: usize,
1498    list_marker_consumed_on_line_0: bool,
1499    bq_outer: bool,
1500    content_indent: usize,
1501    first_line_override: Option<&str>,
1502) -> usize {
1503    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1504
1505    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1506        builder,
1507        lines[start_pos],
1508        first_line_override,
1509        bq_depth,
1510        list_content_col,
1511        list_marker_consumed_on_line_0,
1512        bq_outer,
1513        content_indent,
1514    );
1515    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1516    builder.token(
1517        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1518        opening_without_newline,
1519    );
1520    if !opening_newline.is_empty() {
1521        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1522    }
1523
1524    let mut current_pos = start_pos + 1;
1525    let mut content_lines: Vec<&str> = Vec::new();
1526    let mut found_closing = false;
1527
1528    while current_pos < lines.len() {
1529        let line = lines[current_pos];
1530
1531        let after_bq_and_list = if bq_outer {
1532            let after_bq = if bq_depth > 0 {
1533                strip_n_blockquote_markers(line, bq_depth)
1534            } else {
1535                line
1536            };
1537            strip_list_indent(after_bq, list_content_col)
1538        } else {
1539            let after_list = strip_list_indent(line, list_content_col);
1540            if bq_depth > 0 {
1541                strip_n_blockquote_markers(after_list, bq_depth)
1542            } else {
1543                after_list
1544            }
1545        };
1546
1547        let probe = if bq_outer {
1548            line
1549        } else {
1550            strip_list_indent(line, list_content_col)
1551        };
1552        let (line_bq_depth, _) = count_blockquote_markers(probe);
1553        if line_bq_depth < bq_depth {
1554            break;
1555        }
1556
1557        let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
1558        let inner_stripped = if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
1559            &after_bq_and_list[indent_bytes..]
1560        } else {
1561            after_bq_and_list
1562        };
1563
1564        if is_closing_fence(inner_stripped, &fence) {
1565            found_closing = true;
1566            current_pos += 1;
1567            break;
1568        }
1569
1570        content_lines.push(line);
1571        current_pos += 1;
1572    }
1573
1574    if !content_lines.is_empty() {
1575        let mut content = String::new();
1576        for content_line in content_lines {
1577            let after_indent = emit_content_line_prefixes(
1578                builder,
1579                content_line,
1580                bq_depth,
1581                list_content_col,
1582                bq_outer,
1583                content_indent,
1584            );
1585            let (line_without_newline, newline_str) = strip_newline(after_indent);
1586            content.push_str(line_without_newline);
1587            content.push_str(newline_str);
1588        }
1589        builder.token(SyntaxKind::TEXT.into(), &content);
1590    }
1591
1592    if found_closing {
1593        let closing_line = lines[current_pos - 1];
1594
1595        let closing_stripped = emit_content_line_prefixes(
1596            builder,
1597            closing_line,
1598            bq_depth,
1599            list_content_col,
1600            bq_outer,
1601            content_indent,
1602        );
1603        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1604        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1605        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1606        let closing_count = closing_trimmed_start
1607            .chars()
1608            .take_while(|&c| c == fence.fence_char)
1609            .count();
1610        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1611
1612        if leading_ws_len > 0 {
1613            builder.token(
1614                SyntaxKind::WHITESPACE.into(),
1615                &closing_without_newline[..leading_ws_len],
1616            );
1617        }
1618        builder.token(
1619            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1620            &closing_trimmed_start[..closing_count],
1621        );
1622        if !trailing_after_marker.is_empty() {
1623            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1624        }
1625        if !newline_str.is_empty() {
1626            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1627        }
1628    }
1629
1630    builder.finish_node(); // DisplayMath
1631    current_pos
1632}
1633
1634#[cfg(test)]
1635mod tests {
1636    use super::*;
1637
1638    #[test]
1639    fn test_backtick_fence() {
1640        let fence = try_parse_fence_open("```python").unwrap();
1641        assert_eq!(fence.fence_char, '`');
1642        assert_eq!(fence.fence_count, 3);
1643        assert_eq!(fence.info_string, "python");
1644    }
1645
1646    #[test]
1647    fn test_tilde_fence() {
1648        let fence = try_parse_fence_open("~~~").unwrap();
1649        assert_eq!(fence.fence_char, '~');
1650        assert_eq!(fence.fence_count, 3);
1651        assert_eq!(fence.info_string, "");
1652    }
1653
1654    #[test]
1655    fn test_long_fence() {
1656        let fence = try_parse_fence_open("`````").unwrap();
1657        assert_eq!(fence.fence_count, 5);
1658    }
1659
1660    #[test]
1661    fn test_two_backticks_invalid() {
1662        assert!(try_parse_fence_open("``").is_none());
1663    }
1664
1665    #[test]
1666    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1667        assert!(try_parse_fence_open("`````hi````there`````").is_none());
1668    }
1669
1670    #[test]
1671    fn test_closing_fence() {
1672        let fence = FenceInfo {
1673            fence_char: '`',
1674            fence_count: 3,
1675            info_string: String::new(),
1676        };
1677        assert!(is_closing_fence("```", &fence));
1678        assert!(is_closing_fence("````", &fence));
1679        assert!(!is_closing_fence("``", &fence));
1680        assert!(!is_closing_fence("~~~", &fence));
1681    }
1682
1683    #[test]
1684    fn test_fenced_code_preserves_leading_gt() {
1685        let input = "```\n> foo\n```\n";
1686        let tree = crate::parse(input, None);
1687        assert_eq!(tree.text().to_string(), input);
1688    }
1689
1690    #[test]
1691    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1692        let input = "> ```\n> code\n> ```\n";
1693        let tree = crate::parse(input, None);
1694        assert_eq!(tree.text().to_string(), input);
1695    }
1696
1697    #[test]
1698    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1699        let input = "Term\n: ```\nā”œā”€ā”€ pyproject.toml\n```\n";
1700        let tree = crate::parse(input, None);
1701        assert_eq!(tree.text().to_string(), input);
1702    }
1703
1704    #[test]
1705    fn test_info_string_plain() {
1706        let info = InfoString::parse("");
1707        assert_eq!(info.block_type, CodeBlockType::Plain);
1708        assert!(info.attributes.is_empty());
1709    }
1710
1711    #[test]
1712    fn test_info_string_shortcut() {
1713        let info = InfoString::parse("python");
1714        assert_eq!(
1715            info.block_type,
1716            CodeBlockType::DisplayShortcut {
1717                language: "python".to_string()
1718            }
1719        );
1720        assert!(info.attributes.is_empty());
1721    }
1722
1723    #[test]
1724    fn test_info_string_shortcut_with_trailing() {
1725        let info = InfoString::parse("python extra stuff");
1726        assert_eq!(
1727            info.block_type,
1728            CodeBlockType::DisplayShortcut {
1729                language: "python".to_string()
1730            }
1731        );
1732    }
1733
1734    #[test]
1735    fn test_info_string_display_explicit() {
1736        let info = InfoString::parse("{.python}");
1737        assert_eq!(
1738            info.block_type,
1739            CodeBlockType::DisplayExplicit {
1740                classes: vec!["python".to_string()]
1741            }
1742        );
1743    }
1744
1745    #[test]
1746    fn test_info_string_display_explicit_multiple() {
1747        let info = InfoString::parse("{.python .numberLines}");
1748        assert_eq!(
1749            info.block_type,
1750            CodeBlockType::DisplayExplicit {
1751                classes: vec!["python".to_string(), "numberLines".to_string()]
1752            }
1753        );
1754    }
1755
1756    #[test]
1757    fn test_info_string_executable() {
1758        let info = InfoString::parse("{python}");
1759        assert_eq!(
1760            info.block_type,
1761            CodeBlockType::Executable {
1762                language: "python".to_string()
1763            }
1764        );
1765    }
1766
1767    #[test]
1768    fn test_info_string_executable_with_options() {
1769        let info = InfoString::parse("{python echo=false warning=true}");
1770        assert_eq!(
1771            info.block_type,
1772            CodeBlockType::Executable {
1773                language: "python".to_string()
1774            }
1775        );
1776        assert_eq!(info.attributes.len(), 2);
1777        assert_eq!(
1778            info.attributes[0],
1779            ("echo".to_string(), Some("false".to_string()))
1780        );
1781        assert_eq!(
1782            info.attributes[1],
1783            ("warning".to_string(), Some("true".to_string()))
1784        );
1785    }
1786
1787    #[test]
1788    fn test_info_string_executable_with_commas() {
1789        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1790        assert_eq!(
1791            info.block_type,
1792            CodeBlockType::Executable {
1793                language: "r".to_string()
1794            }
1795        );
1796        assert_eq!(info.attributes.len(), 2);
1797        assert_eq!(
1798            info.attributes[0],
1799            ("echo".to_string(), Some("FALSE".to_string()))
1800        );
1801        assert_eq!(
1802            info.attributes[1],
1803            ("warning".to_string(), Some("TRUE".to_string()))
1804        );
1805    }
1806
1807    #[test]
1808    fn test_info_string_executable_mixed_commas_spaces() {
1809        // R-style with commas and spaces
1810        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1811        assert_eq!(
1812            info.block_type,
1813            CodeBlockType::Executable {
1814                language: "r".to_string()
1815            }
1816        );
1817        assert_eq!(info.attributes.len(), 2);
1818        assert_eq!(
1819            info.attributes[0],
1820            ("echo".to_string(), Some("FALSE".to_string()))
1821        );
1822        assert_eq!(
1823            info.attributes[1],
1824            ("label".to_string(), Some("my chunk".to_string()))
1825        );
1826    }
1827
1828    #[test]
1829    fn test_info_string_mixed_shortcut_and_attrs() {
1830        let info = InfoString::parse("python {.numberLines}");
1831        assert_eq!(
1832            info.block_type,
1833            CodeBlockType::DisplayShortcut {
1834                language: "python".to_string()
1835            }
1836        );
1837        assert_eq!(info.attributes.len(), 1);
1838        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1839    }
1840
1841    #[test]
1842    fn test_info_string_mixed_with_key_value() {
1843        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1844        assert_eq!(
1845            info.block_type,
1846            CodeBlockType::DisplayShortcut {
1847                language: "python".to_string()
1848            }
1849        );
1850        assert_eq!(info.attributes.len(), 2);
1851        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1852        assert_eq!(
1853            info.attributes[1],
1854            ("startFrom".to_string(), Some("100".to_string()))
1855        );
1856    }
1857
1858    #[test]
1859    fn test_info_string_explicit_with_id_and_classes() {
1860        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1861        assert_eq!(
1862            info.block_type,
1863            CodeBlockType::DisplayExplicit {
1864                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1865            }
1866        );
1867        // Non-class attributes
1868        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1869        let has_start = info
1870            .attributes
1871            .iter()
1872            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1873        assert!(has_id);
1874        assert!(has_start);
1875    }
1876
1877    #[test]
1878    fn test_info_string_raw_html() {
1879        let info = InfoString::parse("{=html}");
1880        assert_eq!(
1881            info.block_type,
1882            CodeBlockType::Raw {
1883                format: "html".to_string()
1884            }
1885        );
1886        assert!(info.attributes.is_empty());
1887    }
1888
1889    #[test]
1890    fn test_info_string_raw_latex() {
1891        let info = InfoString::parse("{=latex}");
1892        assert_eq!(
1893            info.block_type,
1894            CodeBlockType::Raw {
1895                format: "latex".to_string()
1896            }
1897        );
1898    }
1899
1900    #[test]
1901    fn test_info_string_raw_openxml() {
1902        let info = InfoString::parse("{=openxml}");
1903        assert_eq!(
1904            info.block_type,
1905            CodeBlockType::Raw {
1906                format: "openxml".to_string()
1907            }
1908        );
1909    }
1910
1911    #[test]
1912    fn test_info_string_raw_ms() {
1913        let info = InfoString::parse("{=ms}");
1914        assert_eq!(
1915            info.block_type,
1916            CodeBlockType::Raw {
1917                format: "ms".to_string()
1918            }
1919        );
1920    }
1921
1922    #[test]
1923    fn test_info_string_raw_html5() {
1924        let info = InfoString::parse("{=html5}");
1925        assert_eq!(
1926            info.block_type,
1927            CodeBlockType::Raw {
1928                format: "html5".to_string()
1929            }
1930        );
1931    }
1932
1933    #[test]
1934    fn test_info_string_raw_not_combined_with_attrs() {
1935        // If there are other attributes with =format, it should not be treated as raw
1936        let info = InfoString::parse("{=html .class}");
1937        // This should NOT be parsed as raw because there's more than one attribute
1938        assert_ne!(
1939            info.block_type,
1940            CodeBlockType::Raw {
1941                format: "html".to_string()
1942            }
1943        );
1944    }
1945
1946    #[test]
1947    fn test_parse_pandoc_attributes_spaces() {
1948        // Pandoc display blocks use spaces as delimiters
1949        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1950        assert_eq!(attrs.len(), 3);
1951        assert_eq!(attrs[0], (".python".to_string(), None));
1952        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1953        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1954    }
1955
1956    #[test]
1957    fn test_parse_pandoc_attributes_no_commas() {
1958        // Commas in Pandoc attributes should be treated as part of the value
1959        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1960        assert_eq!(attrs.len(), 3);
1961        assert_eq!(attrs[0], ("#id".to_string(), None));
1962        assert_eq!(attrs[1], (".class".to_string(), None));
1963        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1964    }
1965
1966    #[test]
1967    fn test_parse_chunk_options_commas() {
1968        // Quarto/RMarkdown chunks use commas as delimiters
1969        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1970        assert_eq!(attrs.len(), 3);
1971        assert_eq!(attrs[0], ("r".to_string(), None));
1972        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1973        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1974    }
1975
1976    #[test]
1977    fn test_parse_chunk_options_no_spaces() {
1978        // Should handle comma-separated without spaces
1979        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1980        assert_eq!(attrs.len(), 3);
1981        assert_eq!(attrs[0], ("r".to_string(), None));
1982        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1983        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1984    }
1985
1986    #[test]
1987    fn test_parse_chunk_options_mixed() {
1988        // Handle both commas and spaces
1989        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1990        assert_eq!(attrs.len(), 3);
1991        assert_eq!(attrs[0], ("python".to_string(), None));
1992        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1993        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1994    }
1995
1996    #[test]
1997    fn test_parse_chunk_options_nested_function_call() {
1998        // R function calls with nested commas should be treated as single value
1999        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
2000        assert_eq!(attrs.len(), 3);
2001        assert_eq!(attrs[0], ("r".to_string(), None));
2002        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
2003        assert_eq!(
2004            attrs[2],
2005            (
2006                "dependson".to_string(),
2007                Some(r#"c("foo", "bar")"#.to_string())
2008            )
2009        );
2010    }
2011
2012    #[test]
2013    fn test_parse_chunk_options_nested_with_spaces() {
2014        // Function call with spaces inside
2015        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
2016        assert_eq!(attrs.len(), 2);
2017        assert_eq!(attrs[0], ("r".to_string(), None));
2018        assert_eq!(
2019            attrs[1],
2020            (
2021                "cache.path".to_string(),
2022                Some(r#"file.path("cache", "dir")"#.to_string())
2023            )
2024        );
2025    }
2026
2027    #[test]
2028    fn test_parse_chunk_options_deeply_nested() {
2029        // Multiple levels of nesting
2030        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
2031        assert_eq!(attrs.len(), 2);
2032        assert_eq!(attrs[0], ("r".to_string(), None));
2033        assert_eq!(
2034            attrs[1],
2035            (
2036                "x".to_string(),
2037                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
2038            )
2039        );
2040    }
2041
2042    #[test]
2043    fn test_parse_chunk_options_brackets_and_braces() {
2044        // Test all bracket types
2045        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
2046        assert_eq!(attrs.len(), 3);
2047        assert_eq!(attrs[0], ("r".to_string(), None));
2048        assert_eq!(
2049            attrs[1],
2050            ("data".to_string(), Some("df[rows, cols]".to_string()))
2051        );
2052        assert_eq!(
2053            attrs[2],
2054            ("config".to_string(), Some("{a:1, b:2}".to_string()))
2055        );
2056    }
2057
2058    #[test]
2059    fn test_parse_chunk_options_quotes_with_parens() {
2060        // Parentheses inside quoted strings shouldn't affect depth tracking
2061        // Note: The parser strips outer quotes from quoted values
2062        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
2063        assert_eq!(attrs.len(), 3);
2064        assert_eq!(attrs[0], ("r".to_string(), None));
2065        assert_eq!(
2066            attrs[1],
2067            ("label".to_string(), Some("test (with parens)".to_string()))
2068        );
2069        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
2070    }
2071
2072    #[test]
2073    fn test_parse_chunk_options_escaped_quotes() {
2074        // Escaped quotes inside string values
2075        // Note: The parser strips outer quotes and processes escapes
2076        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
2077        assert_eq!(attrs.len(), 2);
2078        assert_eq!(attrs[0], ("r".to_string(), None));
2079        assert_eq!(
2080            attrs[1],
2081            (
2082                "label".to_string(),
2083                Some(r#"has "quoted" text"#.to_string())
2084            )
2085        );
2086    }
2087
2088    #[test]
2089    fn test_display_vs_executable_parsing() {
2090        // Display block should use Pandoc parser (spaces)
2091        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
2092        assert!(matches!(
2093            info1.block_type,
2094            CodeBlockType::DisplayExplicit { .. }
2095        ));
2096
2097        // Executable chunk should use chunk options parser (commas)
2098        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
2099        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
2100        assert_eq!(info2.attributes.len(), 2);
2101    }
2102
2103    #[test]
2104    fn test_info_string_executable_implicit_label() {
2105        // {r mylabel} should parse as label=mylabel
2106        let info = InfoString::parse("{r mylabel}");
2107        assert!(matches!(
2108            info.block_type,
2109            CodeBlockType::Executable { ref language } if language == "r"
2110        ));
2111        assert_eq!(info.attributes.len(), 1);
2112        assert_eq!(
2113            info.attributes[0],
2114            ("label".to_string(), Some("mylabel".to_string()))
2115        );
2116    }
2117
2118    #[test]
2119    fn test_info_string_executable_implicit_label_with_options() {
2120        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
2121        let info = InfoString::parse("{r mylabel, echo=FALSE}");
2122        assert!(matches!(
2123            info.block_type,
2124            CodeBlockType::Executable { ref language } if language == "r"
2125        ));
2126        assert_eq!(info.attributes.len(), 2);
2127        assert_eq!(
2128            info.attributes[0],
2129            ("label".to_string(), Some("mylabel".to_string()))
2130        );
2131        assert_eq!(
2132            info.attributes[1],
2133            ("echo".to_string(), Some("FALSE".to_string()))
2134        );
2135    }
2136
2137    #[test]
2138    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
2139        let content_lines = vec![
2140            "#| fig-cap: |\n",
2141            "#|   A caption\n",
2142            "#|   spanning lines\n",
2143            "a <- 1\n",
2144        ];
2145        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2146        assert_eq!(count, 3);
2147    }
2148
2149    #[test]
2150    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
2151        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
2152        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2153        assert_eq!(count, 1);
2154    }
2155
2156    #[test]
2157    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
2158        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
2159        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2160        assert_eq!(count, 1);
2161    }
2162}