Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::diagnostics::{Diagnostics, SyntaxError, SyntaxErrorSource};
4use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
5use crate::syntax::SyntaxKind;
6use rowan::{GreenNodeBuilder, TextRange};
7
8use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
9use super::container_prefix::{StrippedLines, advance_columns};
10use crate::parser::utils::container_stack::byte_index_at_column;
11use crate::parser::utils::tree_copy::copy_green_children;
12use crate::parser::yaml::{locate_yaml_diagnostic, parse_stream_with_prefix};
13
14// Container-prefix primitives live in `container_prefix.rs` (the lower
15// layer that hosts `StrippedLines`); re-export so existing call sites in
16// this module, `tables.rs`, `line_blocks.rs`, and `block_dispatcher.rs`
17// keep their `code_blocks::…` import paths working.
18pub(crate) use super::container_prefix::{
19    bq_outer_of_list, emit_blockquote_prefix_tokens, strip_list_indent,
20};
21
22use crate::parser::utils::helpers::{
23    strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
24};
25
26/// Represents the type of code block based on its info string syntax.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum CodeBlockType {
29    /// Display-only block with shortcut syntax: ```python
30    DisplayShortcut { language: String },
31    /// Display-only block with explicit Pandoc syntax: ```{.python}
32    DisplayExplicit { classes: Vec<String> },
33    /// Executable chunk (Quarto/RMarkdown): ```{python}
34    Executable { language: String },
35    /// Raw block for specific output format: ```{=html}
36    Raw { format: String },
37    /// No language specified: ```
38    Plain,
39}
40
41/// Parsed attributes from a code block info string.
42#[derive(Debug, Clone, PartialEq)]
43pub struct InfoString {
44    pub raw: String,
45    pub block_type: CodeBlockType,
46    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
47}
48
49impl InfoString {
50    /// Parse an info string into structured attributes.
51    pub fn parse(raw: &str) -> Self {
52        let trimmed = raw.trim();
53
54        if trimmed.is_empty() {
55            return InfoString {
56                raw: raw.to_string(),
57                block_type: CodeBlockType::Plain,
58                attributes: Vec::new(),
59            };
60        }
61
62        // Check if it starts with '{' - explicit attribute block
63        if let Some(stripped) = trimmed.strip_prefix('{')
64            && let Some(content) = stripped.strip_suffix('}')
65        {
66            return Self::parse_explicit(raw, content);
67        }
68
69        // Check for mixed form: python {.numberLines}
70        if let Some(brace_start) = trimmed.find('{') {
71            let language = trimmed[..brace_start].trim();
72            if !language.is_empty() && !language.contains(char::is_whitespace) {
73                let attr_part = &trimmed[brace_start..];
74                if let Some(stripped) = attr_part.strip_prefix('{')
75                    && let Some(content) = stripped.strip_suffix('}')
76                {
77                    let attrs = Self::parse_attributes(content);
78                    return InfoString {
79                        raw: raw.to_string(),
80                        block_type: CodeBlockType::DisplayShortcut {
81                            language: language.to_string(),
82                        },
83                        attributes: attrs,
84                    };
85                }
86            }
87        }
88
89        // Otherwise, it's a shortcut form (just the language name)
90        // Only take the first word as language
91        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
92        InfoString {
93            raw: raw.to_string(),
94            block_type: CodeBlockType::DisplayShortcut {
95                language: language.to_string(),
96            },
97            attributes: Vec::new(),
98        }
99    }
100
101    fn parse_explicit(raw: &str, content: &str) -> Self {
102        // Check for raw attribute FIRST: {=format}
103        // The content should start with '=' and have only alphanumeric chars after
104        let trimmed_content = content.trim();
105        if let Some(format_name) = trimmed_content.strip_prefix('=') {
106            // Validate format name: alphanumeric only, no spaces
107            if !format_name.is_empty()
108                && format_name.chars().all(|c| c.is_alphanumeric())
109                && !format_name.contains(char::is_whitespace)
110            {
111                return InfoString {
112                    raw: raw.to_string(),
113                    block_type: CodeBlockType::Raw {
114                        format: format_name.to_string(),
115                    },
116                    attributes: Vec::new(),
117                };
118            }
119        }
120
121        // First, do a preliminary parse to determine block type
122        // Use chunk options parser (comma-aware) for initial detection
123        let prelim_attrs = Self::parse_chunk_options(content);
124
125        // First non-ID, non-attribute token determines if it's executable or display
126        let mut first_lang_token = None;
127        for (key, val) in prelim_attrs.iter() {
128            if val.is_none() && !key.starts_with('#') {
129                first_lang_token = Some(key.as_str());
130                break;
131            }
132        }
133
134        let first_token = first_lang_token.unwrap_or("");
135
136        if first_token.starts_with('.') {
137            // Display block: {.python} or {.haskell .numberLines}
138            // Re-parse with Pandoc-style parser (space-delimited)
139            let attrs = Self::parse_pandoc_attributes(content);
140
141            let classes: Vec<String> = attrs
142                .iter()
143                .filter(|(k, v)| k.starts_with('.') && v.is_none())
144                .map(|(k, _)| k[1..].to_string())
145                .collect();
146
147            let non_class_attrs: Vec<(String, Option<String>)> = attrs
148                .into_iter()
149                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
150                .collect();
151
152            InfoString {
153                raw: raw.to_string(),
154                block_type: CodeBlockType::DisplayExplicit { classes },
155                attributes: non_class_attrs,
156            }
157        } else if !first_token.is_empty() && !first_token.starts_with('#') {
158            // Executable chunk: {python} or {r}
159            // Use chunk options parser (comma-delimited)
160            let attrs = Self::parse_chunk_options(content);
161            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
162
163            // Check if there's a second bareword (implicit label in R/Quarto chunks)
164            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}.
165            // Skip tokens that are actually class (`.foo`) or id (`#foo`)
166            // attributes — those are not labels.
167            let mut has_implicit_label = false;
168            let implicit_label_value = if lang_index + 1 < attrs.len() {
169                let (label_key, val) = &attrs[lang_index + 1];
170                if val.is_none() && !label_key.starts_with('.') && !label_key.starts_with('#') {
171                    has_implicit_label = true;
172                    Some(label_key.clone())
173                } else {
174                    None
175                }
176            } else {
177                None
178            };
179
180            let mut final_attrs: Vec<(String, Option<String>)> = attrs
181                .into_iter()
182                .enumerate()
183                .filter(|(i, _)| {
184                    // Remove language token
185                    if *i == lang_index {
186                        return false;
187                    }
188                    // Remove implicit label token (will be added back explicitly)
189                    if has_implicit_label && *i == lang_index + 1 {
190                        return false;
191                    }
192                    true
193                })
194                .map(|(_, attr)| attr)
195                .collect();
196
197            // Add explicit label if we found an implicit one
198            if let Some(label_val) = implicit_label_value {
199                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
200            }
201
202            InfoString {
203                raw: raw.to_string(),
204                block_type: CodeBlockType::Executable {
205                    language: first_token.to_string(),
206                },
207                attributes: final_attrs,
208            }
209        } else {
210            // Just attributes, no language - use Pandoc parser
211            let attrs = Self::parse_pandoc_attributes(content);
212            InfoString {
213                raw: raw.to_string(),
214                block_type: CodeBlockType::Plain,
215                attributes: attrs,
216            }
217        }
218    }
219
220    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
221    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
222    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
223        let mut attrs = Vec::new();
224        let mut chars = content.chars().peekable();
225
226        while chars.peek().is_some() {
227            // Skip whitespace
228            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
229                chars.next();
230            }
231
232            if chars.peek().is_none() {
233                break;
234            }
235
236            // Read key
237            let mut key = String::new();
238            while let Some(&ch) = chars.peek() {
239                if ch == '=' || ch == ' ' || ch == '\t' {
240                    break;
241                }
242                key.push(ch);
243                chars.next();
244            }
245
246            if key.is_empty() {
247                break;
248            }
249
250            // Skip whitespace
251            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
252                chars.next();
253            }
254
255            // Check for value
256            if chars.peek() == Some(&'=') {
257                chars.next(); // consume '='
258
259                // Skip whitespace after '='
260                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
261                    chars.next();
262                }
263
264                // Read value (might be quoted)
265                let value = if chars.peek() == Some(&'"') {
266                    chars.next(); // consume opening quote
267                    let mut val = String::new();
268                    while let Some(&ch) = chars.peek() {
269                        chars.next();
270                        if ch == '"' {
271                            break;
272                        }
273                        if ch == '\\' {
274                            if let Some(&next_ch) = chars.peek() {
275                                chars.next();
276                                val.push(next_ch);
277                            }
278                        } else {
279                            val.push(ch);
280                        }
281                    }
282                    val
283                } else {
284                    // Unquoted value - read until space
285                    let mut val = String::new();
286                    while let Some(&ch) = chars.peek() {
287                        if ch == ' ' || ch == '\t' {
288                            break;
289                        }
290                        val.push(ch);
291                        chars.next();
292                    }
293                    val
294                };
295
296                attrs.push((key, Some(value)));
297            } else {
298                attrs.push((key, None));
299            }
300        }
301
302        attrs
303    }
304
305    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
306    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
307    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
308        let mut attrs = Vec::new();
309        let mut chars = content.chars().peekable();
310
311        while chars.peek().is_some() {
312            // Skip whitespace and commas
313            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
314                chars.next();
315            }
316
317            if chars.peek().is_none() {
318                break;
319            }
320
321            // Read key
322            let mut key = String::new();
323            while let Some(&ch) = chars.peek() {
324                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
325                    break;
326                }
327                key.push(ch);
328                chars.next();
329            }
330
331            if key.is_empty() {
332                break;
333            }
334
335            // Skip whitespace and commas
336            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
337                chars.next();
338            }
339
340            // Check for value
341            if chars.peek() == Some(&'=') {
342                chars.next(); // consume '='
343
344                // Skip whitespace and commas after '='
345                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
346                    chars.next();
347                }
348
349                // Read value (might be quoted)
350                let value = if chars.peek() == Some(&'"') {
351                    chars.next(); // consume opening quote
352                    let mut val = String::new();
353                    while let Some(&ch) = chars.peek() {
354                        chars.next();
355                        if ch == '"' {
356                            break;
357                        }
358                        if ch == '\\' {
359                            if let Some(&next_ch) = chars.peek() {
360                                chars.next();
361                                val.push(next_ch);
362                            }
363                        } else {
364                            val.push(ch);
365                        }
366                    }
367                    val
368                } else {
369                    // Unquoted value - read until comma, space, or tab at depth 0
370                    // Track nesting depth for (), [], {} and quote state
371                    let mut val = String::new();
372                    let mut depth = 0; // Track parentheses/brackets/braces depth
373                    let mut in_quote: Option<char> = None; // Track if inside ' or "
374                    let mut escaped = false; // Track if previous char was backslash
375
376                    while let Some(&ch) = chars.peek() {
377                        // Handle escape sequences
378                        if escaped {
379                            val.push(ch);
380                            chars.next();
381                            escaped = false;
382                            continue;
383                        }
384
385                        if ch == '\\' {
386                            val.push(ch);
387                            chars.next();
388                            escaped = true;
389                            continue;
390                        }
391
392                        // Handle quotes
393                        if let Some(quote_char) = in_quote {
394                            val.push(ch);
395                            chars.next();
396                            if ch == quote_char {
397                                in_quote = None; // Close quote
398                            }
399                            continue;
400                        }
401
402                        // Not in a quote - check for quote start
403                        if ch == '"' || ch == '\'' {
404                            in_quote = Some(ch);
405                            val.push(ch);
406                            chars.next();
407                            continue;
408                        }
409
410                        // Track nesting depth (only when not in quotes)
411                        if ch == '(' || ch == '[' || ch == '{' {
412                            depth += 1;
413                            val.push(ch);
414                            chars.next();
415                            continue;
416                        }
417
418                        if ch == ')' || ch == ']' || ch == '}' {
419                            depth -= 1;
420                            val.push(ch);
421                            chars.next();
422                            continue;
423                        }
424
425                        // Check for delimiters - only break at depth 0
426                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
427                            break;
428                        }
429
430                        // Regular character
431                        val.push(ch);
432                        chars.next();
433                    }
434                    val
435                };
436
437                attrs.push((key, Some(value)));
438            } else {
439                attrs.push((key, None));
440            }
441        }
442
443        attrs
444    }
445
446    /// Legacy function - kept for backward compatibility in mixed-form parsing
447    /// For new code, use parse_pandoc_attributes or parse_chunk_options
448    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
449        // Default to chunk options parsing (comma-aware)
450        Self::parse_chunk_options(content)
451    }
452}
453
454/// Information about a detected code fence opening.
455#[derive(Debug, Clone)]
456pub(crate) struct FenceInfo {
457    pub fence_char: char,
458    pub fence_count: usize,
459    pub info_string: String,
460}
461
462pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
463    fence.info_string.trim() == "math"
464}
465
466/// Try to detect a fenced code block opening from content.
467/// Returns fence info if this is a valid opening fence.
468pub(crate) fn try_parse_fence_open(
469    content: &str,
470    dialect: crate::options::Dialect,
471) -> Option<FenceInfo> {
472    let trimmed = strip_leading_spaces(content);
473
474    // Check for fence opening (``` or ~~~)
475    let (fence_char, fence_count) = if trimmed.starts_with('`') {
476        let count = trimmed.chars().take_while(|&c| c == '`').count();
477        ('`', count)
478    } else if trimmed.starts_with('~') {
479        let count = trimmed.chars().take_while(|&c| c == '~').count();
480        ('~', count)
481    } else {
482        return None;
483    };
484
485    if fence_count < 3 {
486        return None;
487    }
488
489    let info_string_raw = &trimmed[fence_count..];
490    // Strip trailing newline (LF or CRLF) and at most one leading space
491    let (info_string_trimmed, _) = strip_newline(info_string_raw);
492    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
493        stripped.to_string()
494    } else {
495        info_string_trimmed.to_string()
496    };
497
498    // Backtick-fenced blocks cannot have backticks in the info string.
499    if fence_char == '`' && info_string.contains('`') {
500        return None;
501    }
502
503    // In Pandoc-markdown, a fence info string is valid only as one of:
504    //   `lang`            a single bare language word,
505    //   `{attrs}`         a brace-delimited attribute block, or
506    //   `lang {attrs}`    a single language word plus an attribute block,
507    // with nothing trailing after the attribute block. Anything else — a
508    // multi-word bare info string (```` ```haskell foo ````), a word before
509    // the brace (```` ```a b {.x} ````), or content after the closing brace
510    // (```` ```{.x} foo ````) — is not a code fence: pandoc reads the backtick
511    // run as an inline code span (and a tilde run as plain inline text).
512    // CommonMark and GFM instead take the first word as the language class and
513    // accept the rest, so this restriction is gated to the Pandoc dialect.
514    if dialect == crate::options::Dialect::Pandoc {
515        let bare = info_string.trim();
516        if !bare.is_empty() {
517            let is_valid = if let Some(brace_start) = bare.find('{') {
518                let before = bare[..brace_start].trim();
519                !before.contains(char::is_whitespace) && bare.ends_with('}')
520            } else {
521                bare.split_whitespace().nth(1).is_none()
522            };
523            if !is_valid {
524                return None;
525            }
526        }
527    }
528
529    Some(FenceInfo {
530        fence_char,
531        fence_count,
532        info_string,
533    })
534}
535
536#[allow(clippy::too_many_arguments)]
537fn prepare_fence_open_line<'a>(
538    builder: &mut GreenNodeBuilder<'static>,
539    source_line: &'a str,
540    first_line_override: Option<&'a str>,
541    bq_depth: usize,
542    list_content_col: usize,
543    list_marker_consumed_on_line_0: bool,
544    bq_outer: bool,
545    content_indent: usize,
546) -> (&'a str, &'a str) {
547    // Strip the active container prefix on line 0 in container-stack
548    // order. Bq markers are always upstream-emitted by the blockquote
549    // dispatch and silently consumed here. The list_content_col indent
550    // is upstream-emitted only on a marker-line dispatch
551    // (`list_marker_consumed_on_line_0=true`); on continuation-line
552    // dispatch it must be emitted here as WHITESPACE. Adjacent
553    // WHITESPACE emissions are coalesced into one token for
554    // byte-range-equivalent CST stability.
555    if let Some(first_line) = first_line_override {
556        if bq_depth > 0 && source_line != first_line {
557            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
558            let prefix_len = source_line.len().saturating_sub(stripped.len());
559            if prefix_len > 0 {
560                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
561            }
562        }
563        let first_trimmed = strip_leading_spaces(first_line);
564        let leading_ws_len = first_line.len().saturating_sub(first_trimmed.len());
565        if leading_ws_len > 0 {
566            builder.token(SyntaxKind::WHITESPACE.into(), &first_line[..leading_ws_len]);
567        }
568        return (first_trimmed, first_line);
569    }
570
571    let mut s: &'a str = source_line;
572    let mut pending_ws_start: Option<usize> = None;
573    let suppress_list = list_marker_consumed_on_line_0;
574
575    let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
576                    pending: &mut Option<usize>,
577                    current_offset: usize| {
578        if let Some(start) = *pending
579            && current_offset > start
580        {
581            builder.token(
582                SyntaxKind::WHITESPACE.into(),
583                &source_line[start..current_offset],
584            );
585        }
586        *pending = None;
587    };
588
589    let do_strip_list = |s: &mut &'a str, pending: &mut Option<usize>| {
590        if list_content_col == 0 {
591            return;
592        }
593        // On a marker-line dispatch (`suppress_list=true`), the list
594        // marker bytes have already been emitted upstream and may not
595        // be whitespace (e.g. `- > ```` has a leading `-`). Use
596        // `advance_columns` which counts columns through any char.
597        // On continuation lines, the leading bytes ARE whitespace
598        // (the list-content-indent) so use the whitespace-only
599        // `strip_list_indent` to stop at non-whitespace.
600        let stripped = if suppress_list {
601            advance_columns(s, list_content_col)
602        } else {
603            strip_list_indent(s, list_content_col)
604        };
605        let consumed = s.len() - stripped.len();
606        if consumed > 0 {
607            let start = source_line.len() - s.len();
608            if !suppress_list && pending.is_none() {
609                *pending = Some(start);
610            }
611            *s = stripped;
612        }
613    };
614
615    let do_strip_bq =
616        |builder: &mut GreenNodeBuilder<'static>, s: &mut &'a str, pending: &mut Option<usize>| {
617            if bq_depth == 0 {
618                return;
619            }
620            let current_offset = source_line.len() - s.len();
621            flush_ws(builder, pending, current_offset);
622            *s = strip_n_blockquote_markers(s, bq_depth);
623        };
624
625    if bq_outer {
626        do_strip_bq(builder, &mut s, &mut pending_ws_start);
627        do_strip_list(&mut s, &mut pending_ws_start);
628    } else {
629        do_strip_list(&mut s, &mut pending_ws_start);
630        do_strip_bq(builder, &mut s, &mut pending_ws_start);
631    }
632
633    // content_indent (footnote/definition) — always emit as WHITESPACE.
634    if content_indent > 0 {
635        let indent_bytes = byte_index_at_column(s, content_indent);
636        if s.len() >= indent_bytes && indent_bytes > 0 {
637            let start = source_line.len() - s.len();
638            if pending_ws_start.is_none() {
639                pending_ws_start = Some(start);
640            }
641            s = &s[indent_bytes..];
642        }
643    }
644
645    let final_offset = source_line.len() - s.len();
646    flush_ws(builder, &mut pending_ws_start, final_offset);
647
648    let first_trimmed = strip_leading_spaces(s);
649    let leading_ws_len = s.len().saturating_sub(first_trimmed.len());
650    if leading_ws_len > 0 {
651        builder.token(SyntaxKind::WHITESPACE.into(), &s[..leading_ws_len]);
652    }
653    (first_trimmed, s)
654}
655
656fn strip_content_line_prefixes(
657    content_line: &str,
658    bq_depth: usize,
659    list_content_col: usize,
660    bq_outer: bool,
661    content_indent: usize,
662) -> &str {
663    let after_bq_and_list = if bq_outer {
664        let after_bq = if bq_depth > 0 {
665            strip_n_blockquote_markers(content_line, bq_depth)
666        } else {
667            content_line
668        };
669        strip_list_indent(after_bq, list_content_col)
670    } else {
671        let after_list = strip_list_indent(content_line, list_content_col);
672        if bq_depth > 0 {
673            strip_n_blockquote_markers(after_list, bq_depth)
674        } else {
675            after_list
676        }
677    };
678
679    let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
680    if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
681        &after_bq_and_list[indent_bytes..]
682    } else {
683        after_bq_and_list
684    }
685}
686
687pub(crate) fn compute_hashpipe_preamble_line_count(
688    content_lines: &[&str],
689    prefix: &str,
690    bq_depth: usize,
691    list_content_col: usize,
692    bq_outer: bool,
693    content_indent: usize,
694) -> usize {
695    let preview = |idx: usize| -> Option<&str> {
696        let line = content_lines.get(idx)?;
697        let after_indent =
698            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
699        Some(strip_newline(after_indent).0)
700    };
701
702    let mut line_idx = 0usize;
703    while let Some(preview_without_newline) = preview(line_idx) {
704        if is_hashpipe_option_line(preview_without_newline, prefix)
705            || is_hashpipe_continuation_line(preview_without_newline, prefix)
706        {
707            line_idx += 1;
708            continue;
709        }
710        // A blank `#|` line continues the preamble only when followed by another
711        // prefixed line — i.e. it is a blank interior line of a block scalar
712        // (issue_201). A trailing blank `#|` before body code ends the preamble.
713        if is_hashpipe_blank_line(preview_without_newline, prefix)
714            && preview(line_idx + 1)
715                .is_some_and(|next| trim_start_spaces_tabs(next).starts_with(prefix))
716        {
717            line_idx += 1;
718            continue;
719        }
720        break;
721    }
722
723    line_idx
724}
725
726/// Compute the composite per-line prefix marker for a hashpipe preamble:
727/// the uniform container prefix (blockquote markers / list indent /
728/// content indent) plus any leading whitespace up to and including the
729/// hashpipe comment marker (`prefix`), taken from the first preamble line.
730///
731/// Within a preamble the container prefix is uniform per line, so matching
732/// this composite marker via `strip_prefix` lets the prefix-aware YAML
733/// parser splice a nested (list-/blockquote-indented) cell exactly as a
734/// top-level one, peeling the whole prefix into one `YAML_LINE_PREFIX`
735/// leaf. A non-uniform preamble fails validation and falls back to opaque
736/// tokens.
737fn hashpipe_composite_marker<'a>(
738    first_line: &'a str,
739    prefix: &str,
740    bq_depth: usize,
741    list_content_col: usize,
742    bq_outer: bool,
743    content_indent: usize,
744) -> &'a str {
745    let after_container = strip_content_line_prefixes(
746        first_line,
747        bq_depth,
748        list_content_col,
749        bq_outer,
750        content_indent,
751    );
752    let container_len = first_line.len() - after_container.len();
753    let ws_before = after_container.len() - trim_start_spaces_tabs(after_container).len();
754    let marker_len = (container_len + ws_before + prefix.len()).min(first_line.len());
755    &first_line[..marker_len]
756}
757
758fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
759    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
760    if !trimmed_start.starts_with(prefix) {
761        return false;
762    }
763    let after_prefix = &trimmed_start[prefix.len()..];
764    let rest = trim_start_spaces_tabs(after_prefix);
765    let Some(colon_idx) = rest.find(':') else {
766        return false;
767    };
768    let key = trim_end_spaces_tabs(&rest[..colon_idx]);
769    if key.is_empty() {
770        return false;
771    }
772    true
773}
774
775fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
776    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
777    if !trimmed_start.starts_with(prefix) {
778        return false;
779    }
780    let after_prefix = &trimmed_start[prefix.len()..];
781    let Some(first) = after_prefix.chars().next() else {
782        return false;
783    };
784    if first != ' ' && first != '\t' {
785        return false;
786    }
787    !trim_start_spaces_tabs(after_prefix).is_empty()
788}
789
790/// A bare/blank hashpipe line — the marker followed only by optional whitespace
791/// (e.g. `#|`). Such a line is a valid blank *inside* a block scalar (the
792/// `issue_201` literal-with-blank-line case) or a trailing blank in the preamble,
793/// so it continues the preamble rather than ending it. Without this, the
794/// preamble scan stops at the blank and the parser truncates the block scalar,
795/// embedding only the lines before it.
796fn is_hashpipe_blank_line(line_without_newline: &str, prefix: &str) -> bool {
797    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
798    let Some(after_prefix) = trimmed_start.strip_prefix(prefix) else {
799        return false;
800    };
801    trim_start_spaces_tabs(after_prefix).is_empty()
802}
803
804/// Check if a line is a valid closing fence for the given fence info.
805pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
806    let trimmed = strip_leading_spaces(content);
807
808    if !trimmed.starts_with(fence.fence_char) {
809        return false;
810    }
811
812    let closing_count = trimmed
813        .chars()
814        .take_while(|&c| c == fence.fence_char)
815        .count();
816
817    if closing_count < fence.fence_count {
818        return false;
819    }
820
821    // Rest of line must be empty
822    trimmed[closing_count..].trim().is_empty()
823}
824
825/// Emit chunk options as structured CST nodes while preserving all bytes.
826/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
827fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
828    if content.trim().is_empty() {
829        builder.token(SyntaxKind::TEXT.into(), content);
830        return;
831    }
832
833    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
834
835    let mut pos = 0;
836    let bytes = content.as_bytes();
837
838    while pos < bytes.len() {
839        // Emit leading whitespace/commas as TEXT
840        let ws_start = pos;
841        while pos < bytes.len() {
842            let ch = bytes[pos] as char;
843            if ch != ' ' && ch != '\t' && ch != ',' {
844                break;
845            }
846            pos += 1;
847        }
848        if pos > ws_start {
849            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
850        }
851
852        if pos >= bytes.len() {
853            break;
854        }
855
856        // Check if this is a closing brace
857        if bytes[pos] as char == '}' {
858            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
859            pos += 1;
860            if pos < bytes.len() {
861                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
862            }
863            break;
864        }
865
866        // Read key
867        let key_start = pos;
868        while pos < bytes.len() {
869            let ch = bytes[pos] as char;
870            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
871                break;
872            }
873            pos += 1;
874        }
875
876        if pos == key_start {
877            // No key found, emit rest as TEXT
878            if pos < bytes.len() {
879                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
880            }
881            break;
882        }
883
884        let key = &content[key_start..pos];
885
886        // Check for whitespace before '='
887        let ws_before_eq_start = pos;
888        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
889            pos += 1;
890        }
891
892        // Check if there's a value (=)
893        if pos < bytes.len() && bytes[pos] as char == '=' {
894            // Has value - emit as CHUNK_OPTION
895            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
896            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
897
898            // Emit whitespace before '=' if any
899            if pos > ws_before_eq_start {
900                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
901            }
902
903            builder.token(SyntaxKind::TEXT.into(), "=");
904            pos += 1; // consume '='
905
906            // Emit whitespace after '='
907            let ws_after_eq_start = pos;
908            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
909                pos += 1;
910            }
911            if pos > ws_after_eq_start {
912                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
913            }
914
915            // Parse value (might be quoted)
916            if pos < bytes.len() {
917                let quote_char = bytes[pos] as char;
918                if quote_char == '"' || quote_char == '\'' {
919                    // Quoted value
920                    builder.token(
921                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
922                        &content[pos..pos + 1],
923                    );
924                    pos += 1; // consume opening quote
925
926                    let val_start = pos;
927                    let mut escaped = false;
928                    while pos < bytes.len() {
929                        let ch = bytes[pos] as char;
930                        if !escaped && ch == quote_char {
931                            break;
932                        }
933                        escaped = !escaped && ch == '\\';
934                        pos += 1;
935                    }
936
937                    if pos > val_start {
938                        builder.token(
939                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
940                            &content[val_start..pos],
941                        );
942                    }
943
944                    // Emit closing quote
945                    if pos < bytes.len() && bytes[pos] as char == quote_char {
946                        builder.token(
947                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
948                            &content[pos..pos + 1],
949                        );
950                        pos += 1;
951                    }
952                } else {
953                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
954                    let val_start = pos;
955                    let mut depth = 0;
956
957                    while pos < bytes.len() {
958                        let ch = bytes[pos] as char;
959                        match ch {
960                            '(' | '[' | '{' => depth += 1,
961                            ')' | ']' => {
962                                if depth > 0 {
963                                    depth -= 1;
964                                } else {
965                                    break;
966                                }
967                            }
968                            '}' => {
969                                if depth > 0 {
970                                    depth -= 1;
971                                } else {
972                                    break; // End of chunk options
973                                }
974                            }
975                            ',' if depth == 0 => {
976                                break; // Next option
977                            }
978                            ' ' | '\t' if depth == 0 => {
979                                break; // Space separator
980                            }
981                            _ => {}
982                        }
983                        pos += 1;
984                    }
985
986                    if pos > val_start {
987                        builder.token(
988                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
989                            &content[val_start..pos],
990                        );
991                    }
992                }
993            }
994
995            builder.finish_node(); // CHUNK_OPTION
996        } else {
997            // No '=' - classify by prefix: '.foo' is a class, '#foo' is an id,
998            // anything else is a chunk label (e.g. `{r mylabel}`).
999            let kind = match key.as_bytes().first() {
1000                Some(b'.') => SyntaxKind::ATTR_CLASS,
1001                Some(b'#') => SyntaxKind::ATTR_ID,
1002                _ => SyntaxKind::CHUNK_LABEL,
1003            };
1004            builder.start_node(kind.into());
1005            builder.token(SyntaxKind::TEXT.into(), key);
1006            builder.finish_node();
1007            if pos > ws_before_eq_start {
1008                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1009            }
1010        }
1011    }
1012
1013    builder.finish_node(); // CHUNK_OPTIONS
1014}
1015
1016/// Helper to parse info string and emit CodeInfo node with parsed components.
1017/// This breaks down the info string into its logical parts while preserving all bytes.
1018fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1019    builder.start_node(SyntaxKind::CODE_INFO.into());
1020
1021    let info = InfoString::parse(info_string);
1022
1023    match &info.block_type {
1024        CodeBlockType::DisplayShortcut { language } => {
1025            // Simple case: python or python {.class}
1026            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1027
1028            // If there's more after the language, emit it as TEXT
1029            let after_lang = &info_string[language.len()..];
1030            if !after_lang.is_empty() {
1031                builder.token(SyntaxKind::TEXT.into(), after_lang);
1032            }
1033        }
1034        CodeBlockType::Executable { language } => {
1035            // Quarto: {r} or {r my-label, echo=FALSE}
1036            builder.token(SyntaxKind::TEXT.into(), "{");
1037            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1038
1039            // Parse and emit chunk options
1040            let start_offset = 1 + language.len(); // Skip "{r"
1041            if start_offset < info_string.len() {
1042                let rest = &info_string[start_offset..];
1043                emit_chunk_options(builder, rest);
1044            }
1045        }
1046        CodeBlockType::DisplayExplicit { classes } => {
1047            // Pandoc: {.python} or {#id .haskell .numberLines}
1048            // We need to find the first class in the raw string and emit everything around it
1049
1050            if let Some(lang) = classes.first() {
1051                // Find where ".lang" appears in the info string
1052                let needle = format!(".{}", lang);
1053                if let Some(lang_start) = info_string.find(&needle) {
1054                    // Emit everything before the language
1055                    if lang_start > 0 {
1056                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1057                    }
1058
1059                    // Emit the dot
1060                    builder.token(SyntaxKind::TEXT.into(), ".");
1061
1062                    // Emit the language
1063                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1064
1065                    // Emit everything after
1066                    let after_lang_start = lang_start + 1 + lang.len();
1067                    if after_lang_start < info_string.len() {
1068                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1069                    }
1070                } else {
1071                    // Couldn't find it, just emit as TEXT
1072                    builder.token(SyntaxKind::TEXT.into(), info_string);
1073                }
1074            } else {
1075                // No classes
1076                builder.token(SyntaxKind::TEXT.into(), info_string);
1077            }
1078        }
1079        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1080            // No language, just emit as TEXT
1081            builder.token(SyntaxKind::TEXT.into(), info_string);
1082        }
1083    }
1084
1085    builder.finish_node(); // CodeInfo
1086}
1087
1088/// Parse a fenced code block, consuming lines from the parser.
1089/// Parse a fenced code block, consuming lines from the parser.
1090/// Returns the new position after the code block.
1091///
1092/// All container geometry (blockquote depth, list-item indent,
1093/// footnote/definition base indent, and the bq-vs-list strip order) is
1094/// derived from `window.prefix()`; detection scans and the open-fence
1095/// emitter read those derived scalars, and content/closing-fence lines
1096/// re-emit their container prefix via [`StrippedLines::emit_prefix_at`].
1097pub(crate) fn parse_fenced_code_block(
1098    builder: &mut GreenNodeBuilder<'static>,
1099    window: &StrippedLines<'_, '_>,
1100    fence: FenceInfo,
1101    first_line_override: Option<&str>,
1102    diags: &Diagnostics,
1103) -> usize {
1104    let lines = window.raw();
1105    let start_pos = window.pos();
1106    let prefix = window.prefix();
1107    let bq_depth = prefix.bq_depth();
1108    let list_content_col = prefix.list_content_col();
1109    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1110    let bq_outer = bq_outer_of_list(prefix);
1111    let content_indent = prefix.content_indent();
1112
1113    // Start code block
1114    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1115
1116    // Opening fence
1117    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1118        builder,
1119        lines[start_pos],
1120        first_line_override,
1121        bq_depth,
1122        list_content_col,
1123        list_marker_consumed_on_line_0,
1124        bq_outer,
1125        content_indent,
1126    );
1127
1128    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1129    builder.token(
1130        SyntaxKind::CODE_FENCE_MARKER.into(),
1131        &first_trimmed[..fence.fence_count],
1132    );
1133
1134    // Emit any space between fence and info string (for losslessness)
1135    let after_fence = &first_trimmed[fence.fence_count..];
1136    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1137        // There was a space - emit it as WHITESPACE
1138        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1139        // Parse and emit the info string as a structured node
1140        if !fence.info_string.is_empty() {
1141            emit_code_info_node(builder, &fence.info_string);
1142        }
1143    } else if !fence.info_string.is_empty() {
1144        // No space - parse and emit info_string as a structured node
1145        emit_code_info_node(builder, &fence.info_string);
1146    }
1147
1148    // Extract and emit the actual newline from the opening fence line
1149    let (_, newline_str) = strip_newline(first_trimmed);
1150    if !newline_str.is_empty() {
1151        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1152    }
1153    builder.finish_node(); // CodeFenceOpen
1154
1155    let mut current_pos = start_pos + 1;
1156    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1157    let mut found_closing = false;
1158
1159    while current_pos < lines.len() {
1160        let line = lines[current_pos];
1161
1162        // Count blockquote markers to detect leaving the surrounding
1163        // blockquote. For bq_outer=true probe the raw line (bq markers
1164        // lead); for bq_outer=false strip the list indent first, then
1165        // probe the post-list slice. This forward-scan termination has no
1166        // `StrippedLines` equivalent, so it stays inline.
1167        let probe = if bq_outer {
1168            line
1169        } else {
1170            strip_list_indent(line, list_content_col)
1171        };
1172        let (line_bq_depth, _) = count_blockquote_markers(probe);
1173        if line_bq_depth < bq_depth {
1174            break;
1175        }
1176
1177        // Detection only (emits nothing): the same 2-bucket container
1178        // strip the emission path applies via `emit_content_line_prefixes`
1179        // / `emit_prefix_at`, kept here rather than `strip_at` (a per-op
1180        // walk) to stay byte-identical in interleaved nesting.
1181        let inner_stripped =
1182            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1183
1184        if is_closing_fence(inner_stripped, &fence) {
1185            found_closing = true;
1186            current_pos += 1;
1187            break;
1188        }
1189
1190        content_lines.push(line);
1191        current_pos += 1;
1192    }
1193
1194    // Add content
1195    if !content_lines.is_empty() {
1196        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1197        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1198            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1199            _ => None,
1200        };
1201
1202        let mut line_idx = 0usize;
1203        if let Some(prefix) = hashpipe_prefix {
1204            let prepared_hashpipe_lines = compute_hashpipe_preamble_line_count(
1205                &content_lines,
1206                prefix,
1207                bq_depth,
1208                list_content_col,
1209                bq_outer,
1210                content_indent,
1211            );
1212            if prepared_hashpipe_lines > 0 {
1213                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1214                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1215
1216                // Exact host bytes of the preamble region: the lines retain
1217                // their trailing LF/CRLF, so concatenation rebuilds the
1218                // source between the open fence and the body exactly.
1219                let content: String = content_lines[..prepared_hashpipe_lines].concat();
1220                // Composite per-line marker (container prefix + `#|`). Uniform
1221                // across the preamble, so a nested cell splices as a top-level
1222                // one (see `hashpipe_composite_marker`).
1223                let marker = hashpipe_composite_marker(
1224                    content_lines[0],
1225                    prefix,
1226                    bq_depth,
1227                    list_content_col,
1228                    bq_outer,
1229                    content_indent,
1230                );
1231
1232                if let Some((diag, start_off, end_off)) = locate_yaml_diagnostic(&content, marker) {
1233                    // Malformed hashpipe YAML: record the syntax error at its
1234                    // host position — the parser already computed the verdict,
1235                    // so it surfaces the diagnostic here instead of discarding
1236                    // it (the linter would otherwise re-parse to recover it).
1237                    // `content` is `content_lines[..n]` concatenated and those
1238                    // lines are subslices of the host input, so the preamble's
1239                    // host start is their pointer offset from line 0.
1240                    let host_start =
1241                        content_lines[0].as_ptr() as usize - lines[0].as_ptr() as usize;
1242                    diags.push(SyntaxError {
1243                        range: TextRange::new(
1244                            ((host_start + start_off) as u32).into(),
1245                            ((host_start + end_off) as u32).into(),
1246                        ),
1247                        message: diag.message.to_string(),
1248                        source: SyntaxErrorSource::Yaml,
1249                    });
1250                    // Fall back to opaque line tokens (container prefix + TEXT +
1251                    // NEWLINE), preserving the bytes without imposing a
1252                    // structure that didn't parse.
1253                    while line_idx < prepared_hashpipe_lines {
1254                        let after_indent = window.emit_prefix_at(builder, start_pos + 1 + line_idx);
1255                        let (line_without_newline, newline_str) = strip_newline(after_indent);
1256                        if !line_without_newline.is_empty() {
1257                            builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1258                        }
1259                        if !newline_str.is_empty() {
1260                            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1261                        }
1262                        line_idx += 1;
1263                    }
1264                } else {
1265                    // Valid: splice the prefix-aware YAML subtree. Token ranges
1266                    // are host ranges directly, the composite prefix peeled into
1267                    // `YAML_LINE_PREFIX` trivia. Mirrors the frontmatter
1268                    // `emit_yaml_block` validate→splice→fallback pattern.
1269                    let stream = parse_stream_with_prefix(&content, marker)
1270                        .green()
1271                        .into_owned();
1272                    copy_green_children(builder, &stream);
1273                }
1274                // Whether spliced or fallback, the preamble lines are consumed.
1275                line_idx = prepared_hashpipe_lines;
1276
1277                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1278                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1279            }
1280        }
1281
1282        for k in line_idx..content_lines.len() {
1283            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1284            let (line_without_newline, newline_str) = strip_newline(after_indent);
1285
1286            if !line_without_newline.is_empty() {
1287                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1288            }
1289
1290            if !newline_str.is_empty() {
1291                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1292            }
1293        }
1294        builder.finish_node(); // CodeContent
1295    }
1296
1297    // Closing fence (if found)
1298    if found_closing {
1299        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1300        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1301        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1302        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1303        let closing_count = closing_trimmed_start
1304            .chars()
1305            .take_while(|&c| c == fence.fence_char)
1306            .count();
1307        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1308
1309        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1310        if leading_ws_len > 0 {
1311            builder.token(
1312                SyntaxKind::WHITESPACE.into(),
1313                &closing_without_newline[..leading_ws_len],
1314            );
1315        }
1316        builder.token(
1317            SyntaxKind::CODE_FENCE_MARKER.into(),
1318            &closing_trimmed_start[..closing_count],
1319        );
1320        if !trailing_after_marker.is_empty() {
1321            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1322        }
1323        if !newline_str.is_empty() {
1324            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1325        }
1326        builder.finish_node(); // CodeFenceClose
1327    }
1328
1329    builder.finish_node(); // CodeBlock
1330
1331    current_pos
1332}
1333
1334/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1335///
1336/// Container geometry is derived from `window.prefix()`, mirroring
1337/// [`parse_fenced_code_block`].
1338pub(crate) fn parse_fenced_math_block(
1339    builder: &mut GreenNodeBuilder<'static>,
1340    window: &StrippedLines<'_, '_>,
1341    fence: FenceInfo,
1342    first_line_override: Option<&str>,
1343) -> usize {
1344    let lines = window.raw();
1345    let start_pos = window.pos();
1346    let prefix = window.prefix();
1347    let bq_depth = prefix.bq_depth();
1348    let list_content_col = prefix.list_content_col();
1349    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1350    let bq_outer = bq_outer_of_list(prefix);
1351    let content_indent = prefix.content_indent();
1352
1353    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1354
1355    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1356        builder,
1357        lines[start_pos],
1358        first_line_override,
1359        bq_depth,
1360        list_content_col,
1361        list_marker_consumed_on_line_0,
1362        bq_outer,
1363        content_indent,
1364    );
1365    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1366    builder.token(
1367        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1368        opening_without_newline,
1369    );
1370    if !opening_newline.is_empty() {
1371        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1372    }
1373
1374    let mut current_pos = start_pos + 1;
1375    let mut content_lines: Vec<&str> = Vec::new();
1376    let mut found_closing = false;
1377
1378    while current_pos < lines.len() {
1379        let line = lines[current_pos];
1380
1381        // Forward-scan termination on blockquote depth — stays inline (no
1382        // `StrippedLines` equivalent), mirroring `parse_fenced_code_block`.
1383        let probe = if bq_outer {
1384            line
1385        } else {
1386            strip_list_indent(line, list_content_col)
1387        };
1388        let (line_bq_depth, _) = count_blockquote_markers(probe);
1389        if line_bq_depth < bq_depth {
1390            break;
1391        }
1392
1393        // Detection only (emits nothing): same 2-bucket strip as emission.
1394        let inner_stripped =
1395            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1396
1397        if is_closing_fence(inner_stripped, &fence) {
1398            found_closing = true;
1399            current_pos += 1;
1400            break;
1401        }
1402
1403        content_lines.push(line);
1404        current_pos += 1;
1405    }
1406
1407    if !content_lines.is_empty() {
1408        let mut content = String::new();
1409        for k in 0..content_lines.len() {
1410            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1411            let (line_without_newline, newline_str) = strip_newline(after_indent);
1412            content.push_str(line_without_newline);
1413            content.push_str(newline_str);
1414        }
1415        builder.token(SyntaxKind::TEXT.into(), &content);
1416    }
1417
1418    if found_closing {
1419        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1420        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1421        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1422        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1423        let closing_count = closing_trimmed_start
1424            .chars()
1425            .take_while(|&c| c == fence.fence_char)
1426            .count();
1427        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1428
1429        if leading_ws_len > 0 {
1430            builder.token(
1431                SyntaxKind::WHITESPACE.into(),
1432                &closing_without_newline[..leading_ws_len],
1433            );
1434        }
1435        builder.token(
1436            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1437            &closing_trimmed_start[..closing_count],
1438        );
1439        if !trailing_after_marker.is_empty() {
1440            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1441        }
1442        if !newline_str.is_empty() {
1443            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1444        }
1445    }
1446
1447    builder.finish_node(); // DisplayMath
1448    current_pos
1449}
1450
1451#[cfg(test)]
1452mod tests {
1453    use super::*;
1454
1455    use crate::options::Dialect;
1456
1457    #[test]
1458    fn test_backtick_fence() {
1459        let fence = try_parse_fence_open("```python", Dialect::Pandoc).unwrap();
1460        assert_eq!(fence.fence_char, '`');
1461        assert_eq!(fence.fence_count, 3);
1462        assert_eq!(fence.info_string, "python");
1463    }
1464
1465    #[test]
1466    fn multiword_bare_info_is_not_a_fence_in_pandoc() {
1467        // ```haskell foo => inline code span in pandoc-markdown, not a fence.
1468        assert!(try_parse_fence_open("```haskell foo", Dialect::Pandoc).is_none());
1469        assert!(try_parse_fence_open("~~~haskell foo", Dialect::Pandoc).is_none());
1470        assert!(try_parse_fence_open("```@example foo bar", Dialect::Pandoc).is_none());
1471        // A single bare word (with surrounding space) is still a valid fence.
1472        assert!(try_parse_fence_open("```haskell ", Dialect::Pandoc).is_some());
1473        assert!(try_parse_fence_open("``` haskell", Dialect::Pandoc).is_some());
1474        // Braced attribute forms carry their own whitespace and stay valid.
1475        assert!(try_parse_fence_open("```{.haskell .foo}", Dialect::Pandoc).is_some());
1476        // Mixed `lang {attrs}` form (e.g. Quarto's `bash {filename="..."}`)
1477        // is valid; extra words or trailing content after the brace are not.
1478        assert!(try_parse_fence_open("```bash {filename=\"Terminal\"}", Dialect::Pandoc).is_some());
1479        assert!(try_parse_fence_open("```haskell {.numberLines}", Dialect::Pandoc).is_some());
1480        assert!(try_parse_fence_open("```haskell {.numberLines} foo", Dialect::Pandoc).is_none());
1481        assert!(try_parse_fence_open("```haskell foo {.x}", Dialect::Pandoc).is_none());
1482        assert!(try_parse_fence_open("```{.x} foo", Dialect::Pandoc).is_none());
1483    }
1484
1485    #[test]
1486    fn multiword_bare_info_is_a_fence_in_commonmark() {
1487        // CommonMark/GFM take the first word as the language class and keep
1488        // the rest of the info string, so the fence is still recognized.
1489        let fence = try_parse_fence_open("```haskell foo", Dialect::CommonMark).unwrap();
1490        assert_eq!(fence.info_string, "haskell foo");
1491        assert!(try_parse_fence_open("~~~haskell foo", Dialect::CommonMark).is_some());
1492    }
1493
1494    #[test]
1495    fn hashpipe_preamble_includes_blank_line_in_block_scalar() {
1496        // A blank `#|` line inside a literal block scalar must stay in the
1497        // preamble (issue_201) — otherwise the scalar is truncated.
1498        let lines = [
1499            "#| fig-alt: |\n",
1500            "#|   First paragraph.\n",
1501            "#|\n",
1502            "#|   Second paragraph.\n",
1503            "plot(1)\n",
1504        ];
1505        assert_eq!(
1506            compute_hashpipe_preamble_line_count(&lines, "#|", 0, 0, false, 0),
1507            4
1508        );
1509    }
1510
1511    #[test]
1512    fn hashpipe_blank_line_predicate() {
1513        assert!(is_hashpipe_blank_line("#|", "#|"));
1514        assert!(is_hashpipe_blank_line("#|   ", "#|"));
1515        assert!(!is_hashpipe_blank_line("#| key: v", "#|"));
1516        assert!(!is_hashpipe_blank_line("plot(1)", "#|"));
1517    }
1518
1519    #[test]
1520    fn test_tilde_fence() {
1521        let fence = try_parse_fence_open("~~~", Dialect::Pandoc).unwrap();
1522        assert_eq!(fence.fence_char, '~');
1523        assert_eq!(fence.fence_count, 3);
1524        assert_eq!(fence.info_string, "");
1525    }
1526
1527    #[test]
1528    fn test_long_fence() {
1529        let fence = try_parse_fence_open("`````", Dialect::Pandoc).unwrap();
1530        assert_eq!(fence.fence_count, 5);
1531    }
1532
1533    #[test]
1534    fn test_two_backticks_invalid() {
1535        assert!(try_parse_fence_open("``", Dialect::Pandoc).is_none());
1536    }
1537
1538    #[test]
1539    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1540        assert!(try_parse_fence_open("`````hi````there`````", Dialect::Pandoc).is_none());
1541    }
1542
1543    #[test]
1544    fn test_closing_fence() {
1545        let fence = FenceInfo {
1546            fence_char: '`',
1547            fence_count: 3,
1548            info_string: String::new(),
1549        };
1550        assert!(is_closing_fence("```", &fence));
1551        assert!(is_closing_fence("````", &fence));
1552        assert!(!is_closing_fence("``", &fence));
1553        assert!(!is_closing_fence("~~~", &fence));
1554    }
1555
1556    #[test]
1557    fn test_fenced_code_preserves_leading_gt() {
1558        let input = "```\n> foo\n```\n";
1559        let tree = crate::parse(input, None);
1560        assert_eq!(tree.text().to_string(), input);
1561    }
1562
1563    #[test]
1564    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1565        let input = "> ```\n> code\n> ```\n";
1566        let tree = crate::parse(input, None);
1567        assert_eq!(tree.text().to_string(), input);
1568    }
1569
1570    #[test]
1571    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1572        let input = "Term\n: ```\n├── pyproject.toml\n```\n";
1573        let tree = crate::parse(input, None);
1574        assert_eq!(tree.text().to_string(), input);
1575    }
1576
1577    #[test]
1578    fn test_info_string_plain() {
1579        let info = InfoString::parse("");
1580        assert_eq!(info.block_type, CodeBlockType::Plain);
1581        assert!(info.attributes.is_empty());
1582    }
1583
1584    #[test]
1585    fn test_info_string_shortcut() {
1586        let info = InfoString::parse("python");
1587        assert_eq!(
1588            info.block_type,
1589            CodeBlockType::DisplayShortcut {
1590                language: "python".to_string()
1591            }
1592        );
1593        assert!(info.attributes.is_empty());
1594    }
1595
1596    #[test]
1597    fn test_info_string_shortcut_with_trailing() {
1598        let info = InfoString::parse("python extra stuff");
1599        assert_eq!(
1600            info.block_type,
1601            CodeBlockType::DisplayShortcut {
1602                language: "python".to_string()
1603            }
1604        );
1605    }
1606
1607    #[test]
1608    fn test_info_string_display_explicit() {
1609        let info = InfoString::parse("{.python}");
1610        assert_eq!(
1611            info.block_type,
1612            CodeBlockType::DisplayExplicit {
1613                classes: vec!["python".to_string()]
1614            }
1615        );
1616    }
1617
1618    #[test]
1619    fn test_info_string_display_explicit_multiple() {
1620        let info = InfoString::parse("{.python .numberLines}");
1621        assert_eq!(
1622            info.block_type,
1623            CodeBlockType::DisplayExplicit {
1624                classes: vec!["python".to_string(), "numberLines".to_string()]
1625            }
1626        );
1627    }
1628
1629    #[test]
1630    fn test_info_string_executable() {
1631        let info = InfoString::parse("{python}");
1632        assert_eq!(
1633            info.block_type,
1634            CodeBlockType::Executable {
1635                language: "python".to_string()
1636            }
1637        );
1638    }
1639
1640    #[test]
1641    fn test_info_string_executable_with_options() {
1642        let info = InfoString::parse("{python echo=false warning=true}");
1643        assert_eq!(
1644            info.block_type,
1645            CodeBlockType::Executable {
1646                language: "python".to_string()
1647            }
1648        );
1649        assert_eq!(info.attributes.len(), 2);
1650        assert_eq!(
1651            info.attributes[0],
1652            ("echo".to_string(), Some("false".to_string()))
1653        );
1654        assert_eq!(
1655            info.attributes[1],
1656            ("warning".to_string(), Some("true".to_string()))
1657        );
1658    }
1659
1660    #[test]
1661    fn test_info_string_executable_with_commas() {
1662        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1663        assert_eq!(
1664            info.block_type,
1665            CodeBlockType::Executable {
1666                language: "r".to_string()
1667            }
1668        );
1669        assert_eq!(info.attributes.len(), 2);
1670        assert_eq!(
1671            info.attributes[0],
1672            ("echo".to_string(), Some("FALSE".to_string()))
1673        );
1674        assert_eq!(
1675            info.attributes[1],
1676            ("warning".to_string(), Some("TRUE".to_string()))
1677        );
1678    }
1679
1680    #[test]
1681    fn test_info_string_executable_mixed_commas_spaces() {
1682        // R-style with commas and spaces
1683        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1684        assert_eq!(
1685            info.block_type,
1686            CodeBlockType::Executable {
1687                language: "r".to_string()
1688            }
1689        );
1690        assert_eq!(info.attributes.len(), 2);
1691        assert_eq!(
1692            info.attributes[0],
1693            ("echo".to_string(), Some("FALSE".to_string()))
1694        );
1695        assert_eq!(
1696            info.attributes[1],
1697            ("label".to_string(), Some("my chunk".to_string()))
1698        );
1699    }
1700
1701    #[test]
1702    fn test_info_string_mixed_shortcut_and_attrs() {
1703        let info = InfoString::parse("python {.numberLines}");
1704        assert_eq!(
1705            info.block_type,
1706            CodeBlockType::DisplayShortcut {
1707                language: "python".to_string()
1708            }
1709        );
1710        assert_eq!(info.attributes.len(), 1);
1711        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1712    }
1713
1714    #[test]
1715    fn test_info_string_mixed_with_key_value() {
1716        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1717        assert_eq!(
1718            info.block_type,
1719            CodeBlockType::DisplayShortcut {
1720                language: "python".to_string()
1721            }
1722        );
1723        assert_eq!(info.attributes.len(), 2);
1724        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1725        assert_eq!(
1726            info.attributes[1],
1727            ("startFrom".to_string(), Some("100".to_string()))
1728        );
1729    }
1730
1731    #[test]
1732    fn test_info_string_explicit_with_id_and_classes() {
1733        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1734        assert_eq!(
1735            info.block_type,
1736            CodeBlockType::DisplayExplicit {
1737                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1738            }
1739        );
1740        // Non-class attributes
1741        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1742        let has_start = info
1743            .attributes
1744            .iter()
1745            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1746        assert!(has_id);
1747        assert!(has_start);
1748    }
1749
1750    #[test]
1751    fn test_info_string_raw_html() {
1752        let info = InfoString::parse("{=html}");
1753        assert_eq!(
1754            info.block_type,
1755            CodeBlockType::Raw {
1756                format: "html".to_string()
1757            }
1758        );
1759        assert!(info.attributes.is_empty());
1760    }
1761
1762    #[test]
1763    fn test_info_string_raw_latex() {
1764        let info = InfoString::parse("{=latex}");
1765        assert_eq!(
1766            info.block_type,
1767            CodeBlockType::Raw {
1768                format: "latex".to_string()
1769            }
1770        );
1771    }
1772
1773    #[test]
1774    fn test_info_string_raw_openxml() {
1775        let info = InfoString::parse("{=openxml}");
1776        assert_eq!(
1777            info.block_type,
1778            CodeBlockType::Raw {
1779                format: "openxml".to_string()
1780            }
1781        );
1782    }
1783
1784    #[test]
1785    fn test_info_string_raw_ms() {
1786        let info = InfoString::parse("{=ms}");
1787        assert_eq!(
1788            info.block_type,
1789            CodeBlockType::Raw {
1790                format: "ms".to_string()
1791            }
1792        );
1793    }
1794
1795    #[test]
1796    fn test_info_string_raw_html5() {
1797        let info = InfoString::parse("{=html5}");
1798        assert_eq!(
1799            info.block_type,
1800            CodeBlockType::Raw {
1801                format: "html5".to_string()
1802            }
1803        );
1804    }
1805
1806    #[test]
1807    fn test_info_string_raw_not_combined_with_attrs() {
1808        // If there are other attributes with =format, it should not be treated as raw
1809        let info = InfoString::parse("{=html .class}");
1810        // This should NOT be parsed as raw because there's more than one attribute
1811        assert_ne!(
1812            info.block_type,
1813            CodeBlockType::Raw {
1814                format: "html".to_string()
1815            }
1816        );
1817    }
1818
1819    #[test]
1820    fn test_parse_pandoc_attributes_spaces() {
1821        // Pandoc display blocks use spaces as delimiters
1822        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1823        assert_eq!(attrs.len(), 3);
1824        assert_eq!(attrs[0], (".python".to_string(), None));
1825        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1826        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1827    }
1828
1829    #[test]
1830    fn test_parse_pandoc_attributes_no_commas() {
1831        // Commas in Pandoc attributes should be treated as part of the value
1832        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1833        assert_eq!(attrs.len(), 3);
1834        assert_eq!(attrs[0], ("#id".to_string(), None));
1835        assert_eq!(attrs[1], (".class".to_string(), None));
1836        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1837    }
1838
1839    #[test]
1840    fn test_parse_chunk_options_commas() {
1841        // Quarto/RMarkdown chunks use commas as delimiters
1842        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1843        assert_eq!(attrs.len(), 3);
1844        assert_eq!(attrs[0], ("r".to_string(), None));
1845        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1846        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1847    }
1848
1849    #[test]
1850    fn test_parse_chunk_options_no_spaces() {
1851        // Should handle comma-separated without spaces
1852        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1853        assert_eq!(attrs.len(), 3);
1854        assert_eq!(attrs[0], ("r".to_string(), None));
1855        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1856        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1857    }
1858
1859    #[test]
1860    fn test_parse_chunk_options_mixed() {
1861        // Handle both commas and spaces
1862        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1863        assert_eq!(attrs.len(), 3);
1864        assert_eq!(attrs[0], ("python".to_string(), None));
1865        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1866        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1867    }
1868
1869    #[test]
1870    fn test_parse_chunk_options_nested_function_call() {
1871        // R function calls with nested commas should be treated as single value
1872        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1873        assert_eq!(attrs.len(), 3);
1874        assert_eq!(attrs[0], ("r".to_string(), None));
1875        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1876        assert_eq!(
1877            attrs[2],
1878            (
1879                "dependson".to_string(),
1880                Some(r#"c("foo", "bar")"#.to_string())
1881            )
1882        );
1883    }
1884
1885    #[test]
1886    fn test_parse_chunk_options_nested_with_spaces() {
1887        // Function call with spaces inside
1888        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1889        assert_eq!(attrs.len(), 2);
1890        assert_eq!(attrs[0], ("r".to_string(), None));
1891        assert_eq!(
1892            attrs[1],
1893            (
1894                "cache.path".to_string(),
1895                Some(r#"file.path("cache", "dir")"#.to_string())
1896            )
1897        );
1898    }
1899
1900    #[test]
1901    fn test_parse_chunk_options_deeply_nested() {
1902        // Multiple levels of nesting
1903        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1904        assert_eq!(attrs.len(), 2);
1905        assert_eq!(attrs[0], ("r".to_string(), None));
1906        assert_eq!(
1907            attrs[1],
1908            (
1909                "x".to_string(),
1910                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1911            )
1912        );
1913    }
1914
1915    #[test]
1916    fn test_parse_chunk_options_brackets_and_braces() {
1917        // Test all bracket types
1918        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1919        assert_eq!(attrs.len(), 3);
1920        assert_eq!(attrs[0], ("r".to_string(), None));
1921        assert_eq!(
1922            attrs[1],
1923            ("data".to_string(), Some("df[rows, cols]".to_string()))
1924        );
1925        assert_eq!(
1926            attrs[2],
1927            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1928        );
1929    }
1930
1931    #[test]
1932    fn test_parse_chunk_options_quotes_with_parens() {
1933        // Parentheses inside quoted strings shouldn't affect depth tracking
1934        // Note: The parser strips outer quotes from quoted values
1935        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1936        assert_eq!(attrs.len(), 3);
1937        assert_eq!(attrs[0], ("r".to_string(), None));
1938        assert_eq!(
1939            attrs[1],
1940            ("label".to_string(), Some("test (with parens)".to_string()))
1941        );
1942        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1943    }
1944
1945    #[test]
1946    fn test_parse_chunk_options_escaped_quotes() {
1947        // Escaped quotes inside string values
1948        // Note: The parser strips outer quotes and processes escapes
1949        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1950        assert_eq!(attrs.len(), 2);
1951        assert_eq!(attrs[0], ("r".to_string(), None));
1952        assert_eq!(
1953            attrs[1],
1954            (
1955                "label".to_string(),
1956                Some(r#"has "quoted" text"#.to_string())
1957            )
1958        );
1959    }
1960
1961    #[test]
1962    fn test_display_vs_executable_parsing() {
1963        // Display block should use Pandoc parser (spaces)
1964        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1965        assert!(matches!(
1966            info1.block_type,
1967            CodeBlockType::DisplayExplicit { .. }
1968        ));
1969
1970        // Executable chunk should use chunk options parser (commas)
1971        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1972        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1973        assert_eq!(info2.attributes.len(), 2);
1974    }
1975
1976    #[test]
1977    fn test_info_string_executable_implicit_label() {
1978        // {r mylabel} should parse as label=mylabel
1979        let info = InfoString::parse("{r mylabel}");
1980        assert!(matches!(
1981            info.block_type,
1982            CodeBlockType::Executable { ref language } if language == "r"
1983        ));
1984        assert_eq!(info.attributes.len(), 1);
1985        assert_eq!(
1986            info.attributes[0],
1987            ("label".to_string(), Some("mylabel".to_string()))
1988        );
1989    }
1990
1991    #[test]
1992    fn test_info_string_executable_implicit_label_with_options() {
1993        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
1994        let info = InfoString::parse("{r mylabel, echo=FALSE}");
1995        assert!(matches!(
1996            info.block_type,
1997            CodeBlockType::Executable { ref language } if language == "r"
1998        ));
1999        assert_eq!(info.attributes.len(), 2);
2000        assert_eq!(
2001            info.attributes[0],
2002            ("label".to_string(), Some("mylabel".to_string()))
2003        );
2004        assert_eq!(
2005            info.attributes[1],
2006            ("echo".to_string(), Some("FALSE".to_string()))
2007        );
2008    }
2009
2010    #[test]
2011    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
2012        let content_lines = vec![
2013            "#| fig-cap: |\n",
2014            "#|   A caption\n",
2015            "#|   spanning lines\n",
2016            "a <- 1\n",
2017        ];
2018        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2019        assert_eq!(count, 3);
2020    }
2021
2022    #[test]
2023    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
2024        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
2025        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2026        assert_eq!(count, 1);
2027    }
2028
2029    #[test]
2030    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
2031        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
2032        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2033        assert_eq!(count, 1);
2034    }
2035}