Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::diagnostics::{Diagnostics, SyntaxError, SyntaxErrorSource};
4use crate::parser::utils::attributes::emit_code_info_attrs;
5use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
6use crate::syntax::SyntaxKind;
7use rowan::{GreenNodeBuilder, TextRange};
8
9use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
10use super::container_prefix::{StrippedLines, advance_columns};
11use crate::options::Flavor;
12use crate::parser::utils::container_stack::byte_index_at_column;
13use crate::parser::utils::tree_copy::copy_green_children;
14use crate::parser::yaml::{
15    YamlValidationContext, locate_yaml_diagnostic_ctx, parse_stream_with_prefix,
16};
17
18// Container-prefix primitives live in `container_prefix.rs` (the lower
19// layer that hosts `StrippedLines`); re-export so existing call sites in
20// this module, `tables.rs`, `line_blocks.rs`, and `block_dispatcher.rs`
21// keep their `code_blocks::…` import paths working.
22pub(crate) use super::container_prefix::{
23    bq_outer_of_list, emit_blockquote_prefix_tokens, strip_list_indent,
24};
25
26use crate::parser::utils::helpers::{
27    strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
28};
29
30/// Represents the type of code block based on its info string syntax.
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub enum CodeBlockType {
33    /// Display-only block with shortcut syntax: ```python
34    DisplayShortcut { language: String },
35    /// Display-only block with explicit Pandoc syntax: ```{.python}
36    DisplayExplicit { classes: Vec<String> },
37    /// Executable chunk (Quarto/RMarkdown): ```{python}
38    Executable { language: String },
39    /// Raw block for specific output format: ```{=html}
40    Raw { format: String },
41    /// No language specified: ```
42    Plain,
43}
44
45/// Parsed attributes from a code block info string.
46#[derive(Debug, Clone, PartialEq)]
47pub struct InfoString {
48    pub raw: String,
49    pub block_type: CodeBlockType,
50    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
51}
52
53impl InfoString {
54    /// Parse an info string into structured attributes.
55    pub fn parse(raw: &str) -> Self {
56        let trimmed = raw.trim();
57
58        if trimmed.is_empty() {
59            return InfoString {
60                raw: raw.to_string(),
61                block_type: CodeBlockType::Plain,
62                attributes: Vec::new(),
63            };
64        }
65
66        // Check if it starts with '{' - explicit attribute block
67        if let Some(stripped) = trimmed.strip_prefix('{')
68            && let Some(content) = stripped.strip_suffix('}')
69        {
70            return Self::parse_explicit(raw, content);
71        }
72
73        // Check for mixed form: python {.numberLines}
74        if let Some(brace_start) = trimmed.find('{') {
75            let language = trimmed[..brace_start].trim();
76            if !language.is_empty() && !language.contains(char::is_whitespace) {
77                let attr_part = &trimmed[brace_start..];
78                if let Some(stripped) = attr_part.strip_prefix('{')
79                    && let Some(content) = stripped.strip_suffix('}')
80                {
81                    let attrs = Self::parse_attributes(content);
82                    return InfoString {
83                        raw: raw.to_string(),
84                        block_type: CodeBlockType::DisplayShortcut {
85                            language: language.to_string(),
86                        },
87                        attributes: attrs,
88                    };
89                }
90            }
91        }
92
93        // Otherwise, it's a shortcut form (just the language name)
94        // Only take the first word as language
95        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
96        InfoString {
97            raw: raw.to_string(),
98            block_type: CodeBlockType::DisplayShortcut {
99                language: language.to_string(),
100            },
101            attributes: Vec::new(),
102        }
103    }
104
105    fn parse_explicit(raw: &str, content: &str) -> Self {
106        // Check for raw attribute FIRST: {=format}
107        // The content should start with '=' and have only alphanumeric chars after
108        let trimmed_content = content.trim();
109        if let Some(format_name) = trimmed_content.strip_prefix('=') {
110            // Validate format name: alphanumeric only, no spaces
111            if !format_name.is_empty()
112                && format_name.chars().all(|c| c.is_alphanumeric())
113                && !format_name.contains(char::is_whitespace)
114            {
115                return InfoString {
116                    raw: raw.to_string(),
117                    block_type: CodeBlockType::Raw {
118                        format: format_name.to_string(),
119                    },
120                    attributes: Vec::new(),
121                };
122            }
123        }
124
125        // First, do a preliminary parse to determine block type
126        // Use chunk options parser (comma-aware) for initial detection
127        let prelim_attrs = Self::parse_chunk_options(content);
128
129        // First non-ID, non-attribute token determines if it's executable or display
130        let mut first_lang_token = None;
131        for (key, val) in prelim_attrs.iter() {
132            if val.is_none() && !key.starts_with('#') {
133                first_lang_token = Some(key.as_str());
134                break;
135            }
136        }
137
138        let first_token = first_lang_token.unwrap_or("");
139
140        if first_token.starts_with('.') {
141            // Display block: {.python} or {.haskell .numberLines}
142            // Re-parse with Pandoc-style parser (space-delimited)
143            let attrs = Self::parse_pandoc_attributes(content);
144
145            let classes: Vec<String> = attrs
146                .iter()
147                .filter(|(k, v)| k.starts_with('.') && v.is_none())
148                .map(|(k, _)| k[1..].to_string())
149                .collect();
150
151            let non_class_attrs: Vec<(String, Option<String>)> = attrs
152                .into_iter()
153                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
154                .collect();
155
156            InfoString {
157                raw: raw.to_string(),
158                block_type: CodeBlockType::DisplayExplicit { classes },
159                attributes: non_class_attrs,
160            }
161        } else if !first_token.is_empty() && !first_token.starts_with('#') {
162            // Executable chunk: {python} or {r}
163            // Use chunk options parser (comma-delimited)
164            let attrs = Self::parse_chunk_options(content);
165            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
166
167            // Check if there's a second bareword (implicit label in R/Quarto chunks)
168            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}.
169            // Skip tokens that are actually class (`.foo`) or id (`#foo`)
170            // attributes — those are not labels.
171            let mut has_implicit_label = false;
172            let implicit_label_value = if lang_index + 1 < attrs.len() {
173                let (label_key, val) = &attrs[lang_index + 1];
174                if val.is_none() && !label_key.starts_with('.') && !label_key.starts_with('#') {
175                    has_implicit_label = true;
176                    Some(label_key.clone())
177                } else {
178                    None
179                }
180            } else {
181                None
182            };
183
184            let mut final_attrs: Vec<(String, Option<String>)> = attrs
185                .into_iter()
186                .enumerate()
187                .filter(|(i, _)| {
188                    // Remove language token
189                    if *i == lang_index {
190                        return false;
191                    }
192                    // Remove implicit label token (will be added back explicitly)
193                    if has_implicit_label && *i == lang_index + 1 {
194                        return false;
195                    }
196                    true
197                })
198                .map(|(_, attr)| attr)
199                .collect();
200
201            // Add explicit label if we found an implicit one
202            if let Some(label_val) = implicit_label_value {
203                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
204            }
205
206            InfoString {
207                raw: raw.to_string(),
208                block_type: CodeBlockType::Executable {
209                    language: first_token.to_string(),
210                },
211                attributes: final_attrs,
212            }
213        } else {
214            // Just attributes, no language - use Pandoc parser
215            let attrs = Self::parse_pandoc_attributes(content);
216            InfoString {
217                raw: raw.to_string(),
218                block_type: CodeBlockType::Plain,
219                attributes: attrs,
220            }
221        }
222    }
223
224    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
225    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
226    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
227        let mut attrs = Vec::new();
228        let mut chars = content.chars().peekable();
229
230        while chars.peek().is_some() {
231            // Skip whitespace
232            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
233                chars.next();
234            }
235
236            if chars.peek().is_none() {
237                break;
238            }
239
240            // Read key
241            let mut key = String::new();
242            while let Some(&ch) = chars.peek() {
243                if ch == '=' || ch == ' ' || ch == '\t' {
244                    break;
245                }
246                key.push(ch);
247                chars.next();
248            }
249
250            if key.is_empty() {
251                break;
252            }
253
254            // Skip whitespace
255            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
256                chars.next();
257            }
258
259            // Check for value
260            if chars.peek() == Some(&'=') {
261                chars.next(); // consume '='
262
263                // Skip whitespace after '='
264                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
265                    chars.next();
266                }
267
268                // Read value (might be quoted)
269                let value = if chars.peek() == Some(&'"') {
270                    chars.next(); // consume opening quote
271                    let mut val = String::new();
272                    while let Some(&ch) = chars.peek() {
273                        chars.next();
274                        if ch == '"' {
275                            break;
276                        }
277                        if ch == '\\' {
278                            if let Some(&next_ch) = chars.peek() {
279                                chars.next();
280                                val.push(next_ch);
281                            }
282                        } else {
283                            val.push(ch);
284                        }
285                    }
286                    val
287                } else {
288                    // Unquoted value - read until space
289                    let mut val = String::new();
290                    while let Some(&ch) = chars.peek() {
291                        if ch == ' ' || ch == '\t' {
292                            break;
293                        }
294                        val.push(ch);
295                        chars.next();
296                    }
297                    val
298                };
299
300                attrs.push((key, Some(value)));
301            } else {
302                attrs.push((key, None));
303            }
304        }
305
306        attrs
307    }
308
309    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
310    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
311    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
312        let mut attrs = Vec::new();
313        let mut chars = content.chars().peekable();
314
315        while chars.peek().is_some() {
316            // Skip whitespace and commas
317            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
318                chars.next();
319            }
320
321            if chars.peek().is_none() {
322                break;
323            }
324
325            // Read key
326            let mut key = String::new();
327            while let Some(&ch) = chars.peek() {
328                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
329                    break;
330                }
331                key.push(ch);
332                chars.next();
333            }
334
335            if key.is_empty() {
336                break;
337            }
338
339            // Skip whitespace and commas
340            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
341                chars.next();
342            }
343
344            // Check for value
345            if chars.peek() == Some(&'=') {
346                chars.next(); // consume '='
347
348                // Skip whitespace and commas after '='
349                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
350                    chars.next();
351                }
352
353                // Read value (might be quoted)
354                let value = if chars.peek() == Some(&'"') {
355                    chars.next(); // consume opening quote
356                    let mut val = String::new();
357                    while let Some(&ch) = chars.peek() {
358                        chars.next();
359                        if ch == '"' {
360                            break;
361                        }
362                        if ch == '\\' {
363                            if let Some(&next_ch) = chars.peek() {
364                                chars.next();
365                                val.push(next_ch);
366                            }
367                        } else {
368                            val.push(ch);
369                        }
370                    }
371                    val
372                } else {
373                    // Unquoted value - read until comma, space, or tab at depth 0
374                    // Track nesting depth for (), [], {} and quote state
375                    let mut val = String::new();
376                    let mut depth = 0; // Track parentheses/brackets/braces depth
377                    let mut in_quote: Option<char> = None; // Track if inside ' or "
378                    let mut escaped = false; // Track if previous char was backslash
379
380                    while let Some(&ch) = chars.peek() {
381                        // Handle escape sequences
382                        if escaped {
383                            val.push(ch);
384                            chars.next();
385                            escaped = false;
386                            continue;
387                        }
388
389                        if ch == '\\' {
390                            val.push(ch);
391                            chars.next();
392                            escaped = true;
393                            continue;
394                        }
395
396                        // Handle quotes
397                        if let Some(quote_char) = in_quote {
398                            val.push(ch);
399                            chars.next();
400                            if ch == quote_char {
401                                in_quote = None; // Close quote
402                            }
403                            continue;
404                        }
405
406                        // Not in a quote - check for quote start
407                        if ch == '"' || ch == '\'' {
408                            in_quote = Some(ch);
409                            val.push(ch);
410                            chars.next();
411                            continue;
412                        }
413
414                        // Track nesting depth (only when not in quotes)
415                        if ch == '(' || ch == '[' || ch == '{' {
416                            depth += 1;
417                            val.push(ch);
418                            chars.next();
419                            continue;
420                        }
421
422                        if ch == ')' || ch == ']' || ch == '}' {
423                            depth -= 1;
424                            val.push(ch);
425                            chars.next();
426                            continue;
427                        }
428
429                        // Check for delimiters - only break at depth 0
430                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
431                            break;
432                        }
433
434                        // Regular character
435                        val.push(ch);
436                        chars.next();
437                    }
438                    val
439                };
440
441                attrs.push((key, Some(value)));
442            } else {
443                attrs.push((key, None));
444            }
445        }
446
447        attrs
448    }
449
450    /// Legacy function - kept for backward compatibility in mixed-form parsing
451    /// For new code, use parse_pandoc_attributes or parse_chunk_options
452    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
453        // Default to chunk options parsing (comma-aware)
454        Self::parse_chunk_options(content)
455    }
456}
457
458/// Information about a detected code fence opening.
459#[derive(Debug, Clone)]
460pub(crate) struct FenceInfo {
461    pub fence_char: char,
462    pub fence_count: usize,
463    pub info_string: String,
464}
465
466pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
467    fence.info_string.trim() == "math"
468}
469
470/// Try to detect a fenced code block opening from content.
471/// Returns fence info if this is a valid opening fence.
472pub(crate) fn try_parse_fence_open(
473    content: &str,
474    dialect: crate::options::Dialect,
475) -> Option<FenceInfo> {
476    let trimmed = strip_leading_spaces(content);
477
478    // Check for fence opening (``` or ~~~)
479    let (fence_char, fence_count) = if trimmed.starts_with('`') {
480        let count = trimmed.chars().take_while(|&c| c == '`').count();
481        ('`', count)
482    } else if trimmed.starts_with('~') {
483        let count = trimmed.chars().take_while(|&c| c == '~').count();
484        ('~', count)
485    } else {
486        return None;
487    };
488
489    if fence_count < 3 {
490        return None;
491    }
492
493    let info_string_raw = &trimmed[fence_count..];
494    // Strip trailing newline (LF or CRLF) and at most one leading space
495    let (info_string_trimmed, _) = strip_newline(info_string_raw);
496    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
497        stripped.to_string()
498    } else {
499        info_string_trimmed.to_string()
500    };
501
502    // Backtick-fenced blocks cannot have backticks in the info string.
503    if fence_char == '`' && info_string.contains('`') {
504        return None;
505    }
506
507    // In Pandoc-markdown, a fence info string is valid only as one of:
508    //   `lang`            a single bare language word,
509    //   `{attrs}`         a brace-delimited attribute block, or
510    //   `lang {attrs}`    a single language word plus an attribute block,
511    // with nothing trailing after the attribute block. Anything else — a
512    // multi-word bare info string (```` ```haskell foo ````), a word before
513    // the brace (```` ```a b {.x} ````), or content after the closing brace
514    // (```` ```{.x} foo ````) — is not a code fence: pandoc reads the backtick
515    // run as an inline code span (and a tilde run as plain inline text).
516    // CommonMark and GFM instead take the first word as the language class and
517    // accept the rest, so this restriction is gated to the Pandoc dialect.
518    if dialect == crate::options::Dialect::Pandoc {
519        let bare = info_string.trim();
520        if !bare.is_empty() {
521            let is_valid = if let Some(brace_start) = bare.find('{') {
522                let before = bare[..brace_start].trim();
523                !before.contains(char::is_whitespace) && bare.ends_with('}')
524            } else {
525                bare.split_whitespace().nth(1).is_none()
526            };
527            if !is_valid {
528                return None;
529            }
530        }
531    }
532
533    Some(FenceInfo {
534        fence_char,
535        fence_count,
536        info_string,
537    })
538}
539
540#[allow(clippy::too_many_arguments)]
541fn prepare_fence_open_line<'a>(
542    builder: &mut GreenNodeBuilder<'static>,
543    source_line: &'a str,
544    first_line_override: Option<&'a str>,
545    bq_depth: usize,
546    list_content_col: usize,
547    list_marker_consumed_on_line_0: bool,
548    bq_outer: bool,
549    content_indent: usize,
550) -> (&'a str, &'a str) {
551    // Strip the active container prefix on line 0 in container-stack
552    // order. Bq markers are always upstream-emitted by the blockquote
553    // dispatch and silently consumed here. The list_content_col indent
554    // is upstream-emitted only on a marker-line dispatch
555    // (`list_marker_consumed_on_line_0=true`); on continuation-line
556    // dispatch it must be emitted here as WHITESPACE. Adjacent
557    // WHITESPACE emissions are coalesced into one token for
558    // byte-range-equivalent CST stability.
559    if let Some(first_line) = first_line_override {
560        if bq_depth > 0 && source_line != first_line {
561            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
562            let prefix_len = source_line.len().saturating_sub(stripped.len());
563            if prefix_len > 0 {
564                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
565            }
566        }
567        let first_trimmed = strip_leading_spaces(first_line);
568        let leading_ws_len = first_line.len().saturating_sub(first_trimmed.len());
569        if leading_ws_len > 0 {
570            builder.token(SyntaxKind::WHITESPACE.into(), &first_line[..leading_ws_len]);
571        }
572        return (first_trimmed, first_line);
573    }
574
575    let mut s: &'a str = source_line;
576    let mut pending_ws_start: Option<usize> = None;
577    let suppress_list = list_marker_consumed_on_line_0;
578
579    let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
580                    pending: &mut Option<usize>,
581                    current_offset: usize| {
582        if let Some(start) = *pending
583            && current_offset > start
584        {
585            builder.token(
586                SyntaxKind::WHITESPACE.into(),
587                &source_line[start..current_offset],
588            );
589        }
590        *pending = None;
591    };
592
593    let do_strip_list = |s: &mut &'a str, pending: &mut Option<usize>| {
594        if list_content_col == 0 {
595            return;
596        }
597        // On a marker-line dispatch (`suppress_list=true`), the list
598        // marker bytes have already been emitted upstream and may not
599        // be whitespace (e.g. `- > ```` has a leading `-`). Use
600        // `advance_columns` which counts columns through any char.
601        // On continuation lines, the leading bytes ARE whitespace
602        // (the list-content-indent) so use the whitespace-only
603        // `strip_list_indent` to stop at non-whitespace.
604        let stripped = if suppress_list {
605            advance_columns(s, list_content_col)
606        } else {
607            strip_list_indent(s, list_content_col)
608        };
609        let consumed = s.len() - stripped.len();
610        if consumed > 0 {
611            let start = source_line.len() - s.len();
612            if !suppress_list && pending.is_none() {
613                *pending = Some(start);
614            }
615            *s = stripped;
616        }
617    };
618
619    let do_strip_bq =
620        |builder: &mut GreenNodeBuilder<'static>, s: &mut &'a str, pending: &mut Option<usize>| {
621            if bq_depth == 0 {
622                return;
623            }
624            let current_offset = source_line.len() - s.len();
625            flush_ws(builder, pending, current_offset);
626            *s = strip_n_blockquote_markers(s, bq_depth);
627        };
628
629    if bq_outer {
630        do_strip_bq(builder, &mut s, &mut pending_ws_start);
631        do_strip_list(&mut s, &mut pending_ws_start);
632    } else {
633        do_strip_list(&mut s, &mut pending_ws_start);
634        do_strip_bq(builder, &mut s, &mut pending_ws_start);
635    }
636
637    // content_indent (footnote/definition) — always emit as WHITESPACE.
638    if content_indent > 0 {
639        let indent_bytes = byte_index_at_column(s, content_indent);
640        if s.len() >= indent_bytes && indent_bytes > 0 {
641            let start = source_line.len() - s.len();
642            if pending_ws_start.is_none() {
643                pending_ws_start = Some(start);
644            }
645            s = &s[indent_bytes..];
646        }
647    }
648
649    let final_offset = source_line.len() - s.len();
650    flush_ws(builder, &mut pending_ws_start, final_offset);
651
652    let first_trimmed = strip_leading_spaces(s);
653    let leading_ws_len = s.len().saturating_sub(first_trimmed.len());
654    if leading_ws_len > 0 {
655        builder.token(SyntaxKind::WHITESPACE.into(), &s[..leading_ws_len]);
656    }
657    (first_trimmed, s)
658}
659
660fn strip_content_line_prefixes(
661    content_line: &str,
662    bq_depth: usize,
663    list_content_col: usize,
664    bq_outer: bool,
665    content_indent: usize,
666) -> &str {
667    let after_bq_and_list = if bq_outer {
668        let after_bq = if bq_depth > 0 {
669            strip_n_blockquote_markers(content_line, bq_depth)
670        } else {
671            content_line
672        };
673        strip_list_indent(after_bq, list_content_col)
674    } else {
675        let after_list = strip_list_indent(content_line, list_content_col);
676        if bq_depth > 0 {
677            strip_n_blockquote_markers(after_list, bq_depth)
678        } else {
679            after_list
680        }
681    };
682
683    let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
684    if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
685        &after_bq_and_list[indent_bytes..]
686    } else {
687        after_bq_and_list
688    }
689}
690
691pub(crate) fn compute_hashpipe_preamble_line_count(
692    content_lines: &[&str],
693    prefix: &str,
694    bq_depth: usize,
695    list_content_col: usize,
696    bq_outer: bool,
697    content_indent: usize,
698) -> usize {
699    let preview = |idx: usize| -> Option<&str> {
700        let line = content_lines.get(idx)?;
701        let after_indent =
702            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
703        Some(strip_newline(after_indent).0)
704    };
705
706    let mut line_idx = 0usize;
707    while let Some(preview_without_newline) = preview(line_idx) {
708        if is_hashpipe_option_line(preview_without_newline, prefix)
709            || is_hashpipe_continuation_line(preview_without_newline, prefix)
710        {
711            line_idx += 1;
712            continue;
713        }
714        // A blank `#|` line continues the preamble only when followed by another
715        // prefixed line — i.e. it is a blank interior line of a block scalar
716        // (issue_201). A trailing blank `#|` before body code ends the preamble.
717        if is_hashpipe_blank_line(preview_without_newline, prefix)
718            && preview(line_idx + 1)
719                .is_some_and(|next| trim_start_spaces_tabs(next).starts_with(prefix))
720        {
721            line_idx += 1;
722            continue;
723        }
724        break;
725    }
726
727    line_idx
728}
729
730/// Compute the composite per-line prefix marker for a hashpipe preamble:
731/// the uniform container prefix (blockquote markers / list indent /
732/// content indent) plus any leading whitespace up to and including the
733/// hashpipe comment marker (`prefix`), taken from the first preamble line.
734///
735/// Within a preamble the container prefix is uniform per line, so matching
736/// this composite marker via `strip_prefix` lets the prefix-aware YAML
737/// parser splice a nested (list-/blockquote-indented) cell exactly as a
738/// top-level one, peeling the whole prefix into one `YAML_LINE_PREFIX`
739/// leaf. A non-uniform preamble fails validation and falls back to opaque
740/// tokens.
741fn hashpipe_composite_marker<'a>(
742    first_line: &'a str,
743    prefix: &str,
744    bq_depth: usize,
745    list_content_col: usize,
746    bq_outer: bool,
747    content_indent: usize,
748) -> &'a str {
749    let after_container = strip_content_line_prefixes(
750        first_line,
751        bq_depth,
752        list_content_col,
753        bq_outer,
754        content_indent,
755    );
756    let container_len = first_line.len() - after_container.len();
757    let ws_before = after_container.len() - trim_start_spaces_tabs(after_container).len();
758    let marker_len = (container_len + ws_before + prefix.len()).min(first_line.len());
759    &first_line[..marker_len]
760}
761
762fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
763    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
764    if !trimmed_start.starts_with(prefix) {
765        return false;
766    }
767    let after_prefix = &trimmed_start[prefix.len()..];
768    let rest = trim_start_spaces_tabs(after_prefix);
769    let Some(colon_idx) = rest.find(':') else {
770        return false;
771    };
772    let key = trim_end_spaces_tabs(&rest[..colon_idx]);
773    if key.is_empty() {
774        return false;
775    }
776    true
777}
778
779fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
780    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
781    if !trimmed_start.starts_with(prefix) {
782        return false;
783    }
784    let after_prefix = &trimmed_start[prefix.len()..];
785    let Some(first) = after_prefix.chars().next() else {
786        return false;
787    };
788    if first != ' ' && first != '\t' {
789        return false;
790    }
791    !trim_start_spaces_tabs(after_prefix).is_empty()
792}
793
794/// A bare/blank hashpipe line — the marker followed only by optional whitespace
795/// (e.g. `#|`). Such a line is a valid blank *inside* a block scalar (the
796/// `issue_201` literal-with-blank-line case) or a trailing blank in the preamble,
797/// so it continues the preamble rather than ending it. Without this, the
798/// preamble scan stops at the blank and the parser truncates the block scalar,
799/// embedding only the lines before it.
800fn is_hashpipe_blank_line(line_without_newline: &str, prefix: &str) -> bool {
801    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
802    let Some(after_prefix) = trimmed_start.strip_prefix(prefix) else {
803        return false;
804    };
805    trim_start_spaces_tabs(after_prefix).is_empty()
806}
807
808/// Check if a line is a valid closing fence for the given fence info.
809pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
810    let trimmed = strip_leading_spaces(content);
811
812    if !trimmed.starts_with(fence.fence_char) {
813        return false;
814    }
815
816    let closing_count = trimmed
817        .chars()
818        .take_while(|&c| c == fence.fence_char)
819        .count();
820
821    if closing_count < fence.fence_count {
822        return false;
823    }
824
825    // Rest of line must be empty
826    trimmed[closing_count..].trim().is_empty()
827}
828
829/// Emit chunk options as structured CST nodes while preserving all bytes.
830/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
831fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
832    if content.trim().is_empty() {
833        builder.token(SyntaxKind::TEXT.into(), content);
834        return;
835    }
836
837    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
838
839    let mut pos = 0;
840    let bytes = content.as_bytes();
841
842    while pos < bytes.len() {
843        // Emit leading whitespace/commas as TEXT
844        let ws_start = pos;
845        while pos < bytes.len() {
846            let ch = bytes[pos] as char;
847            if ch != ' ' && ch != '\t' && ch != ',' {
848                break;
849            }
850            pos += 1;
851        }
852        if pos > ws_start {
853            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
854        }
855
856        if pos >= bytes.len() {
857            break;
858        }
859
860        // Check if this is a closing brace
861        if bytes[pos] as char == '}' {
862            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
863            pos += 1;
864            if pos < bytes.len() {
865                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
866            }
867            break;
868        }
869
870        // Read key
871        let key_start = pos;
872        while pos < bytes.len() {
873            let ch = bytes[pos] as char;
874            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
875                break;
876            }
877            pos += 1;
878        }
879
880        if pos == key_start {
881            // No key found, emit rest as TEXT
882            if pos < bytes.len() {
883                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
884            }
885            break;
886        }
887
888        let key = &content[key_start..pos];
889
890        // Check for whitespace before '='
891        let ws_before_eq_start = pos;
892        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
893            pos += 1;
894        }
895
896        // Check if there's a value (=)
897        if pos < bytes.len() && bytes[pos] as char == '=' {
898            // Has value - emit as CHUNK_OPTION
899            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
900            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
901
902            // Emit whitespace before '=' if any
903            if pos > ws_before_eq_start {
904                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
905            }
906
907            builder.token(SyntaxKind::TEXT.into(), "=");
908            pos += 1; // consume '='
909
910            // Emit whitespace after '='
911            let ws_after_eq_start = pos;
912            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
913                pos += 1;
914            }
915            if pos > ws_after_eq_start {
916                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
917            }
918
919            // Parse value (might be quoted)
920            if pos < bytes.len() {
921                let quote_char = bytes[pos] as char;
922                if quote_char == '"' || quote_char == '\'' {
923                    // Quoted value
924                    builder.token(
925                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
926                        &content[pos..pos + 1],
927                    );
928                    pos += 1; // consume opening quote
929
930                    let val_start = pos;
931                    let mut escaped = false;
932                    while pos < bytes.len() {
933                        let ch = bytes[pos] as char;
934                        if !escaped && ch == quote_char {
935                            break;
936                        }
937                        escaped = !escaped && ch == '\\';
938                        pos += 1;
939                    }
940
941                    if pos > val_start {
942                        builder.token(
943                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
944                            &content[val_start..pos],
945                        );
946                    }
947
948                    // Emit closing quote
949                    if pos < bytes.len() && bytes[pos] as char == quote_char {
950                        builder.token(
951                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
952                            &content[pos..pos + 1],
953                        );
954                        pos += 1;
955                    }
956                } else {
957                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
958                    let val_start = pos;
959                    let mut depth = 0;
960
961                    while pos < bytes.len() {
962                        let ch = bytes[pos] as char;
963                        match ch {
964                            '(' | '[' | '{' => depth += 1,
965                            ')' | ']' => {
966                                if depth > 0 {
967                                    depth -= 1;
968                                } else {
969                                    break;
970                                }
971                            }
972                            '}' => {
973                                if depth > 0 {
974                                    depth -= 1;
975                                } else {
976                                    break; // End of chunk options
977                                }
978                            }
979                            ',' if depth == 0 => {
980                                break; // Next option
981                            }
982                            ' ' | '\t' if depth == 0 => {
983                                break; // Space separator
984                            }
985                            _ => {}
986                        }
987                        pos += 1;
988                    }
989
990                    if pos > val_start {
991                        builder.token(
992                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
993                            &content[val_start..pos],
994                        );
995                    }
996                }
997            }
998
999            builder.finish_node(); // CHUNK_OPTION
1000        } else {
1001            // No '=' - classify by prefix: '.foo' is a class, '#foo' is an id,
1002            // anything else is a chunk label (e.g. `{r mylabel}`).
1003            let kind = match key.as_bytes().first() {
1004                Some(b'.') => SyntaxKind::ATTR_CLASS,
1005                Some(b'#') => SyntaxKind::ATTR_ID,
1006                _ => SyntaxKind::CHUNK_LABEL,
1007            };
1008            builder.start_node(kind.into());
1009            builder.token(SyntaxKind::TEXT.into(), key);
1010            builder.finish_node();
1011            if pos > ws_before_eq_start {
1012                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1013            }
1014        }
1015    }
1016
1017    builder.finish_node(); // CHUNK_OPTIONS
1018}
1019
1020/// Helper to parse info string and emit CodeInfo node with parsed components.
1021/// This breaks down the info string into its logical parts while preserving all bytes.
1022fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1023    builder.start_node(SyntaxKind::CODE_INFO.into());
1024
1025    let info = InfoString::parse(info_string);
1026
1027    match &info.block_type {
1028        CodeBlockType::DisplayShortcut { language } => {
1029            // Simple case: python or python {.class}
1030            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1031
1032            // Structure a trailing `{...}` attribute block (the language is
1033            // already emitted, so no carve). Falls back to one opaque TEXT token
1034            // for unrecognized remainders, preserving the prior shape.
1035            let after_lang = &info_string[language.len()..];
1036            if !after_lang.is_empty()
1037                && !emit_code_info_attrs(builder, after_lang, /* carve */ false)
1038            {
1039                builder.token(SyntaxKind::TEXT.into(), after_lang);
1040            }
1041        }
1042        CodeBlockType::Executable { language } => {
1043            // Quarto: {r} or {r my-label, echo=FALSE}
1044            builder.token(SyntaxKind::TEXT.into(), "{");
1045            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1046
1047            // Parse and emit chunk options
1048            let start_offset = 1 + language.len(); // Skip "{r"
1049            if start_offset < info_string.len() {
1050                let rest = &info_string[start_offset..];
1051                emit_chunk_options(builder, rest);
1052            }
1053        }
1054        CodeBlockType::DisplayExplicit { .. } => {
1055            // Pandoc: `{.python}` or `{#id .haskell .numberLines startFrom="10"}`.
1056            // Structure the `{...}` body into ATTR_* children, carving the first
1057            // `.class` out as the CODE_LANGUAGE token (language-first semantics).
1058            // Falls back to one opaque TEXT token when the body is unrecognized,
1059            // preserving the prior shape.
1060            if !emit_code_info_attrs(builder, info_string, /* carve */ true) {
1061                builder.token(SyntaxKind::TEXT.into(), info_string);
1062            }
1063        }
1064        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1065            // No language, just emit as TEXT
1066            builder.token(SyntaxKind::TEXT.into(), info_string);
1067        }
1068    }
1069
1070    builder.finish_node(); // CodeInfo
1071}
1072
1073/// Parse a fenced code block, consuming lines from the parser.
1074/// Parse a fenced code block, consuming lines from the parser.
1075/// Returns the new position after the code block.
1076///
1077/// All container geometry (blockquote depth, list-item indent,
1078/// footnote/definition base indent, and the bq-vs-list strip order) is
1079/// derived from `window.prefix()`; detection scans and the open-fence
1080/// emitter read those derived scalars, and content/closing-fence lines
1081/// re-emit their container prefix via [`StrippedLines::emit_prefix_at`].
1082pub(crate) fn parse_fenced_code_block(
1083    builder: &mut GreenNodeBuilder<'static>,
1084    window: &StrippedLines<'_, '_>,
1085    fence: FenceInfo,
1086    first_line_override: Option<&str>,
1087    diags: &Diagnostics,
1088    flavor: Flavor,
1089) -> usize {
1090    let lines = window.raw();
1091    let start_pos = window.pos();
1092    let prefix = window.prefix();
1093    let bq_depth = prefix.bq_depth();
1094    let list_content_col = prefix.list_content_col();
1095    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1096    let bq_outer = bq_outer_of_list(prefix);
1097    let content_indent = prefix.content_indent();
1098
1099    // Start code block
1100    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1101
1102    // Opening fence
1103    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1104        builder,
1105        lines[start_pos],
1106        first_line_override,
1107        bq_depth,
1108        list_content_col,
1109        list_marker_consumed_on_line_0,
1110        bq_outer,
1111        content_indent,
1112    );
1113
1114    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1115    builder.token(
1116        SyntaxKind::CODE_FENCE_MARKER.into(),
1117        &first_trimmed[..fence.fence_count],
1118    );
1119
1120    // Emit any space between fence and info string (for losslessness)
1121    let after_fence = &first_trimmed[fence.fence_count..];
1122    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1123        // There was a space - emit it as WHITESPACE
1124        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1125        // Parse and emit the info string as a structured node
1126        if !fence.info_string.is_empty() {
1127            emit_code_info_node(builder, &fence.info_string);
1128        }
1129    } else if !fence.info_string.is_empty() {
1130        // No space - parse and emit info_string as a structured node
1131        emit_code_info_node(builder, &fence.info_string);
1132    }
1133
1134    // Extract and emit the actual newline from the opening fence line
1135    let (_, newline_str) = strip_newline(first_trimmed);
1136    if !newline_str.is_empty() {
1137        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1138    }
1139    builder.finish_node(); // CodeFenceOpen
1140
1141    let mut current_pos = start_pos + 1;
1142    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1143    let mut found_closing = false;
1144
1145    while current_pos < lines.len() {
1146        let line = lines[current_pos];
1147
1148        // Count blockquote markers to detect leaving the surrounding
1149        // blockquote. For bq_outer=true probe the raw line (bq markers
1150        // lead); for bq_outer=false strip the list indent first, then
1151        // probe the post-list slice. This forward-scan termination has no
1152        // `StrippedLines` equivalent, so it stays inline.
1153        let probe = if bq_outer {
1154            line
1155        } else {
1156            strip_list_indent(line, list_content_col)
1157        };
1158        let (line_bq_depth, _) = count_blockquote_markers(probe);
1159        if line_bq_depth < bq_depth {
1160            break;
1161        }
1162
1163        // Detection only (emits nothing): the same 2-bucket container
1164        // strip the emission path applies via `emit_content_line_prefixes`
1165        // / `emit_prefix_at`, kept here rather than `strip_at` (a per-op
1166        // walk) to stay byte-identical in interleaved nesting.
1167        let inner_stripped =
1168            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1169
1170        if is_closing_fence(inner_stripped, &fence) {
1171            found_closing = true;
1172            current_pos += 1;
1173            break;
1174        }
1175
1176        content_lines.push(line);
1177        current_pos += 1;
1178    }
1179
1180    // Add content
1181    if !content_lines.is_empty() {
1182        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1183        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1184            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1185            _ => None,
1186        };
1187
1188        let mut line_idx = 0usize;
1189        if let Some(prefix) = hashpipe_prefix {
1190            let prepared_hashpipe_lines = compute_hashpipe_preamble_line_count(
1191                &content_lines,
1192                prefix,
1193                bq_depth,
1194                list_content_col,
1195                bq_outer,
1196                content_indent,
1197            );
1198            if prepared_hashpipe_lines > 0 {
1199                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1200                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1201
1202                // Exact host bytes of the preamble region: the lines retain
1203                // their trailing LF/CRLF, so concatenation rebuilds the
1204                // source between the open fence and the body exactly.
1205                let content: String = content_lines[..prepared_hashpipe_lines].concat();
1206                // Composite per-line marker (container prefix + `#|`). Uniform
1207                // across the preamble, so a nested cell splices as a top-level
1208                // one (see `hashpipe_composite_marker`).
1209                let marker = hashpipe_composite_marker(
1210                    content_lines[0],
1211                    prefix,
1212                    bq_depth,
1213                    list_content_col,
1214                    bq_outer,
1215                    content_indent,
1216                );
1217
1218                let yaml_ctx = YamlValidationContext::hashpipe(flavor);
1219                if let Some((diag, start_off, end_off)) =
1220                    locate_yaml_diagnostic_ctx(&content, marker, yaml_ctx)
1221                {
1222                    // Malformed hashpipe YAML: record the syntax error at its
1223                    // host position — the parser already computed the verdict,
1224                    // so it surfaces the diagnostic here instead of discarding
1225                    // it (the linter would otherwise re-parse to recover it).
1226                    // `content` is `content_lines[..n]` concatenated and those
1227                    // lines are subslices of the host input, so the preamble's
1228                    // host start is their pointer offset from line 0.
1229                    let host_start =
1230                        content_lines[0].as_ptr() as usize - lines[0].as_ptr() as usize;
1231                    diags.push(SyntaxError {
1232                        range: TextRange::new(
1233                            ((host_start + start_off) as u32).into(),
1234                            ((host_start + end_off) as u32).into(),
1235                        ),
1236                        message: diag.message.to_string(),
1237                        source: SyntaxErrorSource::Yaml,
1238                    });
1239                    // Fall back to opaque line tokens (container prefix + TEXT +
1240                    // NEWLINE), preserving the bytes without imposing a
1241                    // structure that didn't parse.
1242                    while line_idx < prepared_hashpipe_lines {
1243                        let after_indent = window.emit_prefix_at(builder, start_pos + 1 + line_idx);
1244                        let (line_without_newline, newline_str) = strip_newline(after_indent);
1245                        if !line_without_newline.is_empty() {
1246                            builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1247                        }
1248                        if !newline_str.is_empty() {
1249                            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1250                        }
1251                        line_idx += 1;
1252                    }
1253                } else {
1254                    // Valid: splice the prefix-aware YAML subtree. Token ranges
1255                    // are host ranges directly, the composite prefix peeled into
1256                    // `YAML_LINE_PREFIX` trivia. Mirrors the frontmatter
1257                    // `emit_yaml_block` validate→splice→fallback pattern.
1258                    let stream = parse_stream_with_prefix(&content, marker)
1259                        .green()
1260                        .into_owned();
1261                    copy_green_children(builder, &stream);
1262                }
1263                // Whether spliced or fallback, the preamble lines are consumed.
1264                line_idx = prepared_hashpipe_lines;
1265
1266                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1267                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1268            }
1269        }
1270
1271        for k in line_idx..content_lines.len() {
1272            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1273            let (line_without_newline, newline_str) = strip_newline(after_indent);
1274
1275            if !line_without_newline.is_empty() {
1276                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1277            }
1278
1279            if !newline_str.is_empty() {
1280                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1281            }
1282        }
1283        builder.finish_node(); // CodeContent
1284    }
1285
1286    // Closing fence (if found)
1287    if found_closing {
1288        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1289        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1290        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1291        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1292        let closing_count = closing_trimmed_start
1293            .chars()
1294            .take_while(|&c| c == fence.fence_char)
1295            .count();
1296        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1297
1298        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1299        if leading_ws_len > 0 {
1300            builder.token(
1301                SyntaxKind::WHITESPACE.into(),
1302                &closing_without_newline[..leading_ws_len],
1303            );
1304        }
1305        builder.token(
1306            SyntaxKind::CODE_FENCE_MARKER.into(),
1307            &closing_trimmed_start[..closing_count],
1308        );
1309        if !trailing_after_marker.is_empty() {
1310            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1311        }
1312        if !newline_str.is_empty() {
1313            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1314        }
1315        builder.finish_node(); // CodeFenceClose
1316    }
1317
1318    builder.finish_node(); // CodeBlock
1319
1320    current_pos
1321}
1322
1323/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1324///
1325/// Container geometry is derived from `window.prefix()`, mirroring
1326/// [`parse_fenced_code_block`].
1327pub(crate) fn parse_fenced_math_block(
1328    builder: &mut GreenNodeBuilder<'static>,
1329    window: &StrippedLines<'_, '_>,
1330    fence: FenceInfo,
1331    first_line_override: Option<&str>,
1332) -> usize {
1333    let lines = window.raw();
1334    let start_pos = window.pos();
1335    let prefix = window.prefix();
1336    let bq_depth = prefix.bq_depth();
1337    let list_content_col = prefix.list_content_col();
1338    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1339    let bq_outer = bq_outer_of_list(prefix);
1340    let content_indent = prefix.content_indent();
1341
1342    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1343
1344    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1345        builder,
1346        lines[start_pos],
1347        first_line_override,
1348        bq_depth,
1349        list_content_col,
1350        list_marker_consumed_on_line_0,
1351        bq_outer,
1352        content_indent,
1353    );
1354    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1355    builder.token(
1356        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1357        opening_without_newline,
1358    );
1359    if !opening_newline.is_empty() {
1360        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1361    }
1362
1363    let mut current_pos = start_pos + 1;
1364    let mut content_lines: Vec<&str> = Vec::new();
1365    let mut found_closing = false;
1366
1367    while current_pos < lines.len() {
1368        let line = lines[current_pos];
1369
1370        // Forward-scan termination on blockquote depth — stays inline (no
1371        // `StrippedLines` equivalent), mirroring `parse_fenced_code_block`.
1372        let probe = if bq_outer {
1373            line
1374        } else {
1375            strip_list_indent(line, list_content_col)
1376        };
1377        let (line_bq_depth, _) = count_blockquote_markers(probe);
1378        if line_bq_depth < bq_depth {
1379            break;
1380        }
1381
1382        // Detection only (emits nothing): same 2-bucket strip as emission.
1383        let inner_stripped =
1384            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1385
1386        if is_closing_fence(inner_stripped, &fence) {
1387            found_closing = true;
1388            current_pos += 1;
1389            break;
1390        }
1391
1392        content_lines.push(line);
1393        current_pos += 1;
1394    }
1395
1396    if !content_lines.is_empty() {
1397        let mut content = String::new();
1398        for k in 0..content_lines.len() {
1399            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1400            let (line_without_newline, newline_str) = strip_newline(after_indent);
1401            content.push_str(line_without_newline);
1402            content.push_str(newline_str);
1403        }
1404        builder.token(SyntaxKind::TEXT.into(), &content);
1405    }
1406
1407    if found_closing {
1408        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1409        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1410        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1411        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1412        let closing_count = closing_trimmed_start
1413            .chars()
1414            .take_while(|&c| c == fence.fence_char)
1415            .count();
1416        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1417
1418        if leading_ws_len > 0 {
1419            builder.token(
1420                SyntaxKind::WHITESPACE.into(),
1421                &closing_without_newline[..leading_ws_len],
1422            );
1423        }
1424        builder.token(
1425            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1426            &closing_trimmed_start[..closing_count],
1427        );
1428        if !trailing_after_marker.is_empty() {
1429            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1430        }
1431        if !newline_str.is_empty() {
1432            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1433        }
1434    }
1435
1436    builder.finish_node(); // DisplayMath
1437    current_pos
1438}
1439
1440#[cfg(test)]
1441mod tests {
1442    use super::*;
1443
1444    use crate::options::Dialect;
1445
1446    #[test]
1447    fn test_backtick_fence() {
1448        let fence = try_parse_fence_open("```python", Dialect::Pandoc).unwrap();
1449        assert_eq!(fence.fence_char, '`');
1450        assert_eq!(fence.fence_count, 3);
1451        assert_eq!(fence.info_string, "python");
1452    }
1453
1454    #[test]
1455    fn multiword_bare_info_is_not_a_fence_in_pandoc() {
1456        // ```haskell foo => inline code span in pandoc-markdown, not a fence.
1457        assert!(try_parse_fence_open("```haskell foo", Dialect::Pandoc).is_none());
1458        assert!(try_parse_fence_open("~~~haskell foo", Dialect::Pandoc).is_none());
1459        assert!(try_parse_fence_open("```@example foo bar", Dialect::Pandoc).is_none());
1460        // A single bare word (with surrounding space) is still a valid fence.
1461        assert!(try_parse_fence_open("```haskell ", Dialect::Pandoc).is_some());
1462        assert!(try_parse_fence_open("``` haskell", Dialect::Pandoc).is_some());
1463        // Braced attribute forms carry their own whitespace and stay valid.
1464        assert!(try_parse_fence_open("```{.haskell .foo}", Dialect::Pandoc).is_some());
1465        // Mixed `lang {attrs}` form (e.g. Quarto's `bash {filename="..."}`)
1466        // is valid; extra words or trailing content after the brace are not.
1467        assert!(try_parse_fence_open("```bash {filename=\"Terminal\"}", Dialect::Pandoc).is_some());
1468        assert!(try_parse_fence_open("```haskell {.numberLines}", Dialect::Pandoc).is_some());
1469        assert!(try_parse_fence_open("```haskell {.numberLines} foo", Dialect::Pandoc).is_none());
1470        assert!(try_parse_fence_open("```haskell foo {.x}", Dialect::Pandoc).is_none());
1471        assert!(try_parse_fence_open("```{.x} foo", Dialect::Pandoc).is_none());
1472    }
1473
1474    #[test]
1475    fn multiword_bare_info_is_a_fence_in_commonmark() {
1476        // CommonMark/GFM take the first word as the language class and keep
1477        // the rest of the info string, so the fence is still recognized.
1478        let fence = try_parse_fence_open("```haskell foo", Dialect::CommonMark).unwrap();
1479        assert_eq!(fence.info_string, "haskell foo");
1480        assert!(try_parse_fence_open("~~~haskell foo", Dialect::CommonMark).is_some());
1481    }
1482
1483    #[test]
1484    fn hashpipe_preamble_includes_blank_line_in_block_scalar() {
1485        // A blank `#|` line inside a literal block scalar must stay in the
1486        // preamble (issue_201) — otherwise the scalar is truncated.
1487        let lines = [
1488            "#| fig-alt: |\n",
1489            "#|   First paragraph.\n",
1490            "#|\n",
1491            "#|   Second paragraph.\n",
1492            "plot(1)\n",
1493        ];
1494        assert_eq!(
1495            compute_hashpipe_preamble_line_count(&lines, "#|", 0, 0, false, 0),
1496            4
1497        );
1498    }
1499
1500    #[test]
1501    fn hashpipe_blank_line_predicate() {
1502        assert!(is_hashpipe_blank_line("#|", "#|"));
1503        assert!(is_hashpipe_blank_line("#|   ", "#|"));
1504        assert!(!is_hashpipe_blank_line("#| key: v", "#|"));
1505        assert!(!is_hashpipe_blank_line("plot(1)", "#|"));
1506    }
1507
1508    #[test]
1509    fn test_tilde_fence() {
1510        let fence = try_parse_fence_open("~~~", Dialect::Pandoc).unwrap();
1511        assert_eq!(fence.fence_char, '~');
1512        assert_eq!(fence.fence_count, 3);
1513        assert_eq!(fence.info_string, "");
1514    }
1515
1516    #[test]
1517    fn test_long_fence() {
1518        let fence = try_parse_fence_open("`````", Dialect::Pandoc).unwrap();
1519        assert_eq!(fence.fence_count, 5);
1520    }
1521
1522    #[test]
1523    fn test_two_backticks_invalid() {
1524        assert!(try_parse_fence_open("``", Dialect::Pandoc).is_none());
1525    }
1526
1527    #[test]
1528    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1529        assert!(try_parse_fence_open("`````hi````there`````", Dialect::Pandoc).is_none());
1530    }
1531
1532    #[test]
1533    fn test_closing_fence() {
1534        let fence = FenceInfo {
1535            fence_char: '`',
1536            fence_count: 3,
1537            info_string: String::new(),
1538        };
1539        assert!(is_closing_fence("```", &fence));
1540        assert!(is_closing_fence("````", &fence));
1541        assert!(!is_closing_fence("``", &fence));
1542        assert!(!is_closing_fence("~~~", &fence));
1543    }
1544
1545    #[test]
1546    fn test_fenced_code_preserves_leading_gt() {
1547        let input = "```\n> foo\n```\n";
1548        let tree = crate::parse(input, None);
1549        assert_eq!(tree.text().to_string(), input);
1550    }
1551
1552    #[test]
1553    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1554        let input = "> ```\n> code\n> ```\n";
1555        let tree = crate::parse(input, None);
1556        assert_eq!(tree.text().to_string(), input);
1557    }
1558
1559    #[test]
1560    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1561        let input = "Term\n: ```\n├── pyproject.toml\n```\n";
1562        let tree = crate::parse(input, None);
1563        assert_eq!(tree.text().to_string(), input);
1564    }
1565
1566    #[test]
1567    fn test_info_string_plain() {
1568        let info = InfoString::parse("");
1569        assert_eq!(info.block_type, CodeBlockType::Plain);
1570        assert!(info.attributes.is_empty());
1571    }
1572
1573    #[test]
1574    fn test_info_string_shortcut() {
1575        let info = InfoString::parse("python");
1576        assert_eq!(
1577            info.block_type,
1578            CodeBlockType::DisplayShortcut {
1579                language: "python".to_string()
1580            }
1581        );
1582        assert!(info.attributes.is_empty());
1583    }
1584
1585    #[test]
1586    fn test_info_string_shortcut_with_trailing() {
1587        let info = InfoString::parse("python extra stuff");
1588        assert_eq!(
1589            info.block_type,
1590            CodeBlockType::DisplayShortcut {
1591                language: "python".to_string()
1592            }
1593        );
1594    }
1595
1596    #[test]
1597    fn test_info_string_display_explicit() {
1598        let info = InfoString::parse("{.python}");
1599        assert_eq!(
1600            info.block_type,
1601            CodeBlockType::DisplayExplicit {
1602                classes: vec!["python".to_string()]
1603            }
1604        );
1605    }
1606
1607    #[test]
1608    fn test_info_string_display_explicit_multiple() {
1609        let info = InfoString::parse("{.python .numberLines}");
1610        assert_eq!(
1611            info.block_type,
1612            CodeBlockType::DisplayExplicit {
1613                classes: vec!["python".to_string(), "numberLines".to_string()]
1614            }
1615        );
1616    }
1617
1618    #[test]
1619    fn test_info_string_executable() {
1620        let info = InfoString::parse("{python}");
1621        assert_eq!(
1622            info.block_type,
1623            CodeBlockType::Executable {
1624                language: "python".to_string()
1625            }
1626        );
1627    }
1628
1629    #[test]
1630    fn test_info_string_executable_with_options() {
1631        let info = InfoString::parse("{python echo=false warning=true}");
1632        assert_eq!(
1633            info.block_type,
1634            CodeBlockType::Executable {
1635                language: "python".to_string()
1636            }
1637        );
1638        assert_eq!(info.attributes.len(), 2);
1639        assert_eq!(
1640            info.attributes[0],
1641            ("echo".to_string(), Some("false".to_string()))
1642        );
1643        assert_eq!(
1644            info.attributes[1],
1645            ("warning".to_string(), Some("true".to_string()))
1646        );
1647    }
1648
1649    #[test]
1650    fn test_info_string_executable_with_commas() {
1651        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1652        assert_eq!(
1653            info.block_type,
1654            CodeBlockType::Executable {
1655                language: "r".to_string()
1656            }
1657        );
1658        assert_eq!(info.attributes.len(), 2);
1659        assert_eq!(
1660            info.attributes[0],
1661            ("echo".to_string(), Some("FALSE".to_string()))
1662        );
1663        assert_eq!(
1664            info.attributes[1],
1665            ("warning".to_string(), Some("TRUE".to_string()))
1666        );
1667    }
1668
1669    #[test]
1670    fn test_info_string_executable_mixed_commas_spaces() {
1671        // R-style with commas and spaces
1672        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1673        assert_eq!(
1674            info.block_type,
1675            CodeBlockType::Executable {
1676                language: "r".to_string()
1677            }
1678        );
1679        assert_eq!(info.attributes.len(), 2);
1680        assert_eq!(
1681            info.attributes[0],
1682            ("echo".to_string(), Some("FALSE".to_string()))
1683        );
1684        assert_eq!(
1685            info.attributes[1],
1686            ("label".to_string(), Some("my chunk".to_string()))
1687        );
1688    }
1689
1690    #[test]
1691    fn test_info_string_mixed_shortcut_and_attrs() {
1692        let info = InfoString::parse("python {.numberLines}");
1693        assert_eq!(
1694            info.block_type,
1695            CodeBlockType::DisplayShortcut {
1696                language: "python".to_string()
1697            }
1698        );
1699        assert_eq!(info.attributes.len(), 1);
1700        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1701    }
1702
1703    #[test]
1704    fn test_info_string_mixed_with_key_value() {
1705        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1706        assert_eq!(
1707            info.block_type,
1708            CodeBlockType::DisplayShortcut {
1709                language: "python".to_string()
1710            }
1711        );
1712        assert_eq!(info.attributes.len(), 2);
1713        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1714        assert_eq!(
1715            info.attributes[1],
1716            ("startFrom".to_string(), Some("100".to_string()))
1717        );
1718    }
1719
1720    #[test]
1721    fn test_info_string_explicit_with_id_and_classes() {
1722        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1723        assert_eq!(
1724            info.block_type,
1725            CodeBlockType::DisplayExplicit {
1726                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1727            }
1728        );
1729        // Non-class attributes
1730        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1731        let has_start = info
1732            .attributes
1733            .iter()
1734            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1735        assert!(has_id);
1736        assert!(has_start);
1737    }
1738
1739    #[test]
1740    fn test_info_string_raw_html() {
1741        let info = InfoString::parse("{=html}");
1742        assert_eq!(
1743            info.block_type,
1744            CodeBlockType::Raw {
1745                format: "html".to_string()
1746            }
1747        );
1748        assert!(info.attributes.is_empty());
1749    }
1750
1751    #[test]
1752    fn test_info_string_raw_latex() {
1753        let info = InfoString::parse("{=latex}");
1754        assert_eq!(
1755            info.block_type,
1756            CodeBlockType::Raw {
1757                format: "latex".to_string()
1758            }
1759        );
1760    }
1761
1762    #[test]
1763    fn test_info_string_raw_openxml() {
1764        let info = InfoString::parse("{=openxml}");
1765        assert_eq!(
1766            info.block_type,
1767            CodeBlockType::Raw {
1768                format: "openxml".to_string()
1769            }
1770        );
1771    }
1772
1773    #[test]
1774    fn test_info_string_raw_ms() {
1775        let info = InfoString::parse("{=ms}");
1776        assert_eq!(
1777            info.block_type,
1778            CodeBlockType::Raw {
1779                format: "ms".to_string()
1780            }
1781        );
1782    }
1783
1784    #[test]
1785    fn test_info_string_raw_html5() {
1786        let info = InfoString::parse("{=html5}");
1787        assert_eq!(
1788            info.block_type,
1789            CodeBlockType::Raw {
1790                format: "html5".to_string()
1791            }
1792        );
1793    }
1794
1795    #[test]
1796    fn test_info_string_raw_not_combined_with_attrs() {
1797        // If there are other attributes with =format, it should not be treated as raw
1798        let info = InfoString::parse("{=html .class}");
1799        // This should NOT be parsed as raw because there's more than one attribute
1800        assert_ne!(
1801            info.block_type,
1802            CodeBlockType::Raw {
1803                format: "html".to_string()
1804            }
1805        );
1806    }
1807
1808    #[test]
1809    fn test_parse_pandoc_attributes_spaces() {
1810        // Pandoc display blocks use spaces as delimiters
1811        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1812        assert_eq!(attrs.len(), 3);
1813        assert_eq!(attrs[0], (".python".to_string(), None));
1814        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1815        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1816    }
1817
1818    #[test]
1819    fn test_parse_pandoc_attributes_no_commas() {
1820        // Commas in Pandoc attributes should be treated as part of the value
1821        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1822        assert_eq!(attrs.len(), 3);
1823        assert_eq!(attrs[0], ("#id".to_string(), None));
1824        assert_eq!(attrs[1], (".class".to_string(), None));
1825        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1826    }
1827
1828    #[test]
1829    fn test_parse_chunk_options_commas() {
1830        // Quarto/RMarkdown chunks use commas as delimiters
1831        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1832        assert_eq!(attrs.len(), 3);
1833        assert_eq!(attrs[0], ("r".to_string(), None));
1834        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1835        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1836    }
1837
1838    #[test]
1839    fn test_parse_chunk_options_no_spaces() {
1840        // Should handle comma-separated without spaces
1841        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1842        assert_eq!(attrs.len(), 3);
1843        assert_eq!(attrs[0], ("r".to_string(), None));
1844        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1845        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1846    }
1847
1848    #[test]
1849    fn test_parse_chunk_options_mixed() {
1850        // Handle both commas and spaces
1851        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1852        assert_eq!(attrs.len(), 3);
1853        assert_eq!(attrs[0], ("python".to_string(), None));
1854        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1855        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1856    }
1857
1858    #[test]
1859    fn test_parse_chunk_options_nested_function_call() {
1860        // R function calls with nested commas should be treated as single value
1861        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1862        assert_eq!(attrs.len(), 3);
1863        assert_eq!(attrs[0], ("r".to_string(), None));
1864        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1865        assert_eq!(
1866            attrs[2],
1867            (
1868                "dependson".to_string(),
1869                Some(r#"c("foo", "bar")"#.to_string())
1870            )
1871        );
1872    }
1873
1874    #[test]
1875    fn test_parse_chunk_options_nested_with_spaces() {
1876        // Function call with spaces inside
1877        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1878        assert_eq!(attrs.len(), 2);
1879        assert_eq!(attrs[0], ("r".to_string(), None));
1880        assert_eq!(
1881            attrs[1],
1882            (
1883                "cache.path".to_string(),
1884                Some(r#"file.path("cache", "dir")"#.to_string())
1885            )
1886        );
1887    }
1888
1889    #[test]
1890    fn test_parse_chunk_options_deeply_nested() {
1891        // Multiple levels of nesting
1892        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1893        assert_eq!(attrs.len(), 2);
1894        assert_eq!(attrs[0], ("r".to_string(), None));
1895        assert_eq!(
1896            attrs[1],
1897            (
1898                "x".to_string(),
1899                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1900            )
1901        );
1902    }
1903
1904    #[test]
1905    fn test_parse_chunk_options_brackets_and_braces() {
1906        // Test all bracket types
1907        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1908        assert_eq!(attrs.len(), 3);
1909        assert_eq!(attrs[0], ("r".to_string(), None));
1910        assert_eq!(
1911            attrs[1],
1912            ("data".to_string(), Some("df[rows, cols]".to_string()))
1913        );
1914        assert_eq!(
1915            attrs[2],
1916            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1917        );
1918    }
1919
1920    #[test]
1921    fn test_parse_chunk_options_quotes_with_parens() {
1922        // Parentheses inside quoted strings shouldn't affect depth tracking
1923        // Note: The parser strips outer quotes from quoted values
1924        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1925        assert_eq!(attrs.len(), 3);
1926        assert_eq!(attrs[0], ("r".to_string(), None));
1927        assert_eq!(
1928            attrs[1],
1929            ("label".to_string(), Some("test (with parens)".to_string()))
1930        );
1931        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1932    }
1933
1934    #[test]
1935    fn test_parse_chunk_options_escaped_quotes() {
1936        // Escaped quotes inside string values
1937        // Note: The parser strips outer quotes and processes escapes
1938        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1939        assert_eq!(attrs.len(), 2);
1940        assert_eq!(attrs[0], ("r".to_string(), None));
1941        assert_eq!(
1942            attrs[1],
1943            (
1944                "label".to_string(),
1945                Some(r#"has "quoted" text"#.to_string())
1946            )
1947        );
1948    }
1949
1950    #[test]
1951    fn test_display_vs_executable_parsing() {
1952        // Display block should use Pandoc parser (spaces)
1953        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1954        assert!(matches!(
1955            info1.block_type,
1956            CodeBlockType::DisplayExplicit { .. }
1957        ));
1958
1959        // Executable chunk should use chunk options parser (commas)
1960        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1961        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1962        assert_eq!(info2.attributes.len(), 2);
1963    }
1964
1965    #[test]
1966    fn test_info_string_executable_implicit_label() {
1967        // {r mylabel} should parse as label=mylabel
1968        let info = InfoString::parse("{r mylabel}");
1969        assert!(matches!(
1970            info.block_type,
1971            CodeBlockType::Executable { ref language } if language == "r"
1972        ));
1973        assert_eq!(info.attributes.len(), 1);
1974        assert_eq!(
1975            info.attributes[0],
1976            ("label".to_string(), Some("mylabel".to_string()))
1977        );
1978    }
1979
1980    #[test]
1981    fn test_info_string_executable_implicit_label_with_options() {
1982        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
1983        let info = InfoString::parse("{r mylabel, echo=FALSE}");
1984        assert!(matches!(
1985            info.block_type,
1986            CodeBlockType::Executable { ref language } if language == "r"
1987        ));
1988        assert_eq!(info.attributes.len(), 2);
1989        assert_eq!(
1990            info.attributes[0],
1991            ("label".to_string(), Some("mylabel".to_string()))
1992        );
1993        assert_eq!(
1994            info.attributes[1],
1995            ("echo".to_string(), Some("FALSE".to_string()))
1996        );
1997    }
1998
1999    #[test]
2000    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
2001        let content_lines = vec![
2002            "#| fig-cap: |\n",
2003            "#|   A caption\n",
2004            "#|   spanning lines\n",
2005            "a <- 1\n",
2006        ];
2007        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2008        assert_eq!(count, 3);
2009    }
2010
2011    #[test]
2012    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
2013        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
2014        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2015        assert_eq!(count, 1);
2016    }
2017
2018    #[test]
2019    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
2020        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
2021        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2022        assert_eq!(count, 1);
2023    }
2024}