Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::diagnostics::{Diagnostics, SyntaxError, SyntaxErrorSource};
4use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
5use crate::syntax::SyntaxKind;
6use rowan::{GreenNodeBuilder, TextRange};
7
8use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
9use super::container_prefix::{StrippedLines, advance_columns};
10use crate::options::Flavor;
11use crate::parser::utils::container_stack::byte_index_at_column;
12use crate::parser::utils::tree_copy::copy_green_children;
13use crate::parser::yaml::{
14    YamlValidationContext, locate_yaml_diagnostic_ctx, parse_stream_with_prefix,
15};
16
17// Container-prefix primitives live in `container_prefix.rs` (the lower
18// layer that hosts `StrippedLines`); re-export so existing call sites in
19// this module, `tables.rs`, `line_blocks.rs`, and `block_dispatcher.rs`
20// keep their `code_blocks::…` import paths working.
21pub(crate) use super::container_prefix::{
22    bq_outer_of_list, emit_blockquote_prefix_tokens, strip_list_indent,
23};
24
25use crate::parser::utils::helpers::{
26    strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
27};
28
29/// Represents the type of code block based on its info string syntax.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum CodeBlockType {
32    /// Display-only block with shortcut syntax: ```python
33    DisplayShortcut { language: String },
34    /// Display-only block with explicit Pandoc syntax: ```{.python}
35    DisplayExplicit { classes: Vec<String> },
36    /// Executable chunk (Quarto/RMarkdown): ```{python}
37    Executable { language: String },
38    /// Raw block for specific output format: ```{=html}
39    Raw { format: String },
40    /// No language specified: ```
41    Plain,
42}
43
44/// Parsed attributes from a code block info string.
45#[derive(Debug, Clone, PartialEq)]
46pub struct InfoString {
47    pub raw: String,
48    pub block_type: CodeBlockType,
49    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
50}
51
52impl InfoString {
53    /// Parse an info string into structured attributes.
54    pub fn parse(raw: &str) -> Self {
55        let trimmed = raw.trim();
56
57        if trimmed.is_empty() {
58            return InfoString {
59                raw: raw.to_string(),
60                block_type: CodeBlockType::Plain,
61                attributes: Vec::new(),
62            };
63        }
64
65        // Check if it starts with '{' - explicit attribute block
66        if let Some(stripped) = trimmed.strip_prefix('{')
67            && let Some(content) = stripped.strip_suffix('}')
68        {
69            return Self::parse_explicit(raw, content);
70        }
71
72        // Check for mixed form: python {.numberLines}
73        if let Some(brace_start) = trimmed.find('{') {
74            let language = trimmed[..brace_start].trim();
75            if !language.is_empty() && !language.contains(char::is_whitespace) {
76                let attr_part = &trimmed[brace_start..];
77                if let Some(stripped) = attr_part.strip_prefix('{')
78                    && let Some(content) = stripped.strip_suffix('}')
79                {
80                    let attrs = Self::parse_attributes(content);
81                    return InfoString {
82                        raw: raw.to_string(),
83                        block_type: CodeBlockType::DisplayShortcut {
84                            language: language.to_string(),
85                        },
86                        attributes: attrs,
87                    };
88                }
89            }
90        }
91
92        // Otherwise, it's a shortcut form (just the language name)
93        // Only take the first word as language
94        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
95        InfoString {
96            raw: raw.to_string(),
97            block_type: CodeBlockType::DisplayShortcut {
98                language: language.to_string(),
99            },
100            attributes: Vec::new(),
101        }
102    }
103
104    fn parse_explicit(raw: &str, content: &str) -> Self {
105        // Check for raw attribute FIRST: {=format}
106        // The content should start with '=' and have only alphanumeric chars after
107        let trimmed_content = content.trim();
108        if let Some(format_name) = trimmed_content.strip_prefix('=') {
109            // Validate format name: alphanumeric only, no spaces
110            if !format_name.is_empty()
111                && format_name.chars().all(|c| c.is_alphanumeric())
112                && !format_name.contains(char::is_whitespace)
113            {
114                return InfoString {
115                    raw: raw.to_string(),
116                    block_type: CodeBlockType::Raw {
117                        format: format_name.to_string(),
118                    },
119                    attributes: Vec::new(),
120                };
121            }
122        }
123
124        // First, do a preliminary parse to determine block type
125        // Use chunk options parser (comma-aware) for initial detection
126        let prelim_attrs = Self::parse_chunk_options(content);
127
128        // First non-ID, non-attribute token determines if it's executable or display
129        let mut first_lang_token = None;
130        for (key, val) in prelim_attrs.iter() {
131            if val.is_none() && !key.starts_with('#') {
132                first_lang_token = Some(key.as_str());
133                break;
134            }
135        }
136
137        let first_token = first_lang_token.unwrap_or("");
138
139        if first_token.starts_with('.') {
140            // Display block: {.python} or {.haskell .numberLines}
141            // Re-parse with Pandoc-style parser (space-delimited)
142            let attrs = Self::parse_pandoc_attributes(content);
143
144            let classes: Vec<String> = attrs
145                .iter()
146                .filter(|(k, v)| k.starts_with('.') && v.is_none())
147                .map(|(k, _)| k[1..].to_string())
148                .collect();
149
150            let non_class_attrs: Vec<(String, Option<String>)> = attrs
151                .into_iter()
152                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
153                .collect();
154
155            InfoString {
156                raw: raw.to_string(),
157                block_type: CodeBlockType::DisplayExplicit { classes },
158                attributes: non_class_attrs,
159            }
160        } else if !first_token.is_empty() && !first_token.starts_with('#') {
161            // Executable chunk: {python} or {r}
162            // Use chunk options parser (comma-delimited)
163            let attrs = Self::parse_chunk_options(content);
164            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
165
166            // Check if there's a second bareword (implicit label in R/Quarto chunks)
167            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}.
168            // Skip tokens that are actually class (`.foo`) or id (`#foo`)
169            // attributes — those are not labels.
170            let mut has_implicit_label = false;
171            let implicit_label_value = if lang_index + 1 < attrs.len() {
172                let (label_key, val) = &attrs[lang_index + 1];
173                if val.is_none() && !label_key.starts_with('.') && !label_key.starts_with('#') {
174                    has_implicit_label = true;
175                    Some(label_key.clone())
176                } else {
177                    None
178                }
179            } else {
180                None
181            };
182
183            let mut final_attrs: Vec<(String, Option<String>)> = attrs
184                .into_iter()
185                .enumerate()
186                .filter(|(i, _)| {
187                    // Remove language token
188                    if *i == lang_index {
189                        return false;
190                    }
191                    // Remove implicit label token (will be added back explicitly)
192                    if has_implicit_label && *i == lang_index + 1 {
193                        return false;
194                    }
195                    true
196                })
197                .map(|(_, attr)| attr)
198                .collect();
199
200            // Add explicit label if we found an implicit one
201            if let Some(label_val) = implicit_label_value {
202                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
203            }
204
205            InfoString {
206                raw: raw.to_string(),
207                block_type: CodeBlockType::Executable {
208                    language: first_token.to_string(),
209                },
210                attributes: final_attrs,
211            }
212        } else {
213            // Just attributes, no language - use Pandoc parser
214            let attrs = Self::parse_pandoc_attributes(content);
215            InfoString {
216                raw: raw.to_string(),
217                block_type: CodeBlockType::Plain,
218                attributes: attrs,
219            }
220        }
221    }
222
223    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
224    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
225    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
226        let mut attrs = Vec::new();
227        let mut chars = content.chars().peekable();
228
229        while chars.peek().is_some() {
230            // Skip whitespace
231            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
232                chars.next();
233            }
234
235            if chars.peek().is_none() {
236                break;
237            }
238
239            // Read key
240            let mut key = String::new();
241            while let Some(&ch) = chars.peek() {
242                if ch == '=' || ch == ' ' || ch == '\t' {
243                    break;
244                }
245                key.push(ch);
246                chars.next();
247            }
248
249            if key.is_empty() {
250                break;
251            }
252
253            // Skip whitespace
254            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
255                chars.next();
256            }
257
258            // Check for value
259            if chars.peek() == Some(&'=') {
260                chars.next(); // consume '='
261
262                // Skip whitespace after '='
263                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
264                    chars.next();
265                }
266
267                // Read value (might be quoted)
268                let value = if chars.peek() == Some(&'"') {
269                    chars.next(); // consume opening quote
270                    let mut val = String::new();
271                    while let Some(&ch) = chars.peek() {
272                        chars.next();
273                        if ch == '"' {
274                            break;
275                        }
276                        if ch == '\\' {
277                            if let Some(&next_ch) = chars.peek() {
278                                chars.next();
279                                val.push(next_ch);
280                            }
281                        } else {
282                            val.push(ch);
283                        }
284                    }
285                    val
286                } else {
287                    // Unquoted value - read until space
288                    let mut val = String::new();
289                    while let Some(&ch) = chars.peek() {
290                        if ch == ' ' || ch == '\t' {
291                            break;
292                        }
293                        val.push(ch);
294                        chars.next();
295                    }
296                    val
297                };
298
299                attrs.push((key, Some(value)));
300            } else {
301                attrs.push((key, None));
302            }
303        }
304
305        attrs
306    }
307
308    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
309    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
310    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
311        let mut attrs = Vec::new();
312        let mut chars = content.chars().peekable();
313
314        while chars.peek().is_some() {
315            // Skip whitespace and commas
316            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
317                chars.next();
318            }
319
320            if chars.peek().is_none() {
321                break;
322            }
323
324            // Read key
325            let mut key = String::new();
326            while let Some(&ch) = chars.peek() {
327                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
328                    break;
329                }
330                key.push(ch);
331                chars.next();
332            }
333
334            if key.is_empty() {
335                break;
336            }
337
338            // Skip whitespace and commas
339            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
340                chars.next();
341            }
342
343            // Check for value
344            if chars.peek() == Some(&'=') {
345                chars.next(); // consume '='
346
347                // Skip whitespace and commas after '='
348                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
349                    chars.next();
350                }
351
352                // Read value (might be quoted)
353                let value = if chars.peek() == Some(&'"') {
354                    chars.next(); // consume opening quote
355                    let mut val = String::new();
356                    while let Some(&ch) = chars.peek() {
357                        chars.next();
358                        if ch == '"' {
359                            break;
360                        }
361                        if ch == '\\' {
362                            if let Some(&next_ch) = chars.peek() {
363                                chars.next();
364                                val.push(next_ch);
365                            }
366                        } else {
367                            val.push(ch);
368                        }
369                    }
370                    val
371                } else {
372                    // Unquoted value - read until comma, space, or tab at depth 0
373                    // Track nesting depth for (), [], {} and quote state
374                    let mut val = String::new();
375                    let mut depth = 0; // Track parentheses/brackets/braces depth
376                    let mut in_quote: Option<char> = None; // Track if inside ' or "
377                    let mut escaped = false; // Track if previous char was backslash
378
379                    while let Some(&ch) = chars.peek() {
380                        // Handle escape sequences
381                        if escaped {
382                            val.push(ch);
383                            chars.next();
384                            escaped = false;
385                            continue;
386                        }
387
388                        if ch == '\\' {
389                            val.push(ch);
390                            chars.next();
391                            escaped = true;
392                            continue;
393                        }
394
395                        // Handle quotes
396                        if let Some(quote_char) = in_quote {
397                            val.push(ch);
398                            chars.next();
399                            if ch == quote_char {
400                                in_quote = None; // Close quote
401                            }
402                            continue;
403                        }
404
405                        // Not in a quote - check for quote start
406                        if ch == '"' || ch == '\'' {
407                            in_quote = Some(ch);
408                            val.push(ch);
409                            chars.next();
410                            continue;
411                        }
412
413                        // Track nesting depth (only when not in quotes)
414                        if ch == '(' || ch == '[' || ch == '{' {
415                            depth += 1;
416                            val.push(ch);
417                            chars.next();
418                            continue;
419                        }
420
421                        if ch == ')' || ch == ']' || ch == '}' {
422                            depth -= 1;
423                            val.push(ch);
424                            chars.next();
425                            continue;
426                        }
427
428                        // Check for delimiters - only break at depth 0
429                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
430                            break;
431                        }
432
433                        // Regular character
434                        val.push(ch);
435                        chars.next();
436                    }
437                    val
438                };
439
440                attrs.push((key, Some(value)));
441            } else {
442                attrs.push((key, None));
443            }
444        }
445
446        attrs
447    }
448
449    /// Legacy function - kept for backward compatibility in mixed-form parsing
450    /// For new code, use parse_pandoc_attributes or parse_chunk_options
451    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
452        // Default to chunk options parsing (comma-aware)
453        Self::parse_chunk_options(content)
454    }
455}
456
457/// Information about a detected code fence opening.
458#[derive(Debug, Clone)]
459pub(crate) struct FenceInfo {
460    pub fence_char: char,
461    pub fence_count: usize,
462    pub info_string: String,
463}
464
465pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
466    fence.info_string.trim() == "math"
467}
468
469/// Try to detect a fenced code block opening from content.
470/// Returns fence info if this is a valid opening fence.
471pub(crate) fn try_parse_fence_open(
472    content: &str,
473    dialect: crate::options::Dialect,
474) -> Option<FenceInfo> {
475    let trimmed = strip_leading_spaces(content);
476
477    // Check for fence opening (``` or ~~~)
478    let (fence_char, fence_count) = if trimmed.starts_with('`') {
479        let count = trimmed.chars().take_while(|&c| c == '`').count();
480        ('`', count)
481    } else if trimmed.starts_with('~') {
482        let count = trimmed.chars().take_while(|&c| c == '~').count();
483        ('~', count)
484    } else {
485        return None;
486    };
487
488    if fence_count < 3 {
489        return None;
490    }
491
492    let info_string_raw = &trimmed[fence_count..];
493    // Strip trailing newline (LF or CRLF) and at most one leading space
494    let (info_string_trimmed, _) = strip_newline(info_string_raw);
495    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
496        stripped.to_string()
497    } else {
498        info_string_trimmed.to_string()
499    };
500
501    // Backtick-fenced blocks cannot have backticks in the info string.
502    if fence_char == '`' && info_string.contains('`') {
503        return None;
504    }
505
506    // In Pandoc-markdown, a fence info string is valid only as one of:
507    //   `lang`            a single bare language word,
508    //   `{attrs}`         a brace-delimited attribute block, or
509    //   `lang {attrs}`    a single language word plus an attribute block,
510    // with nothing trailing after the attribute block. Anything else — a
511    // multi-word bare info string (```` ```haskell foo ````), a word before
512    // the brace (```` ```a b {.x} ````), or content after the closing brace
513    // (```` ```{.x} foo ````) — is not a code fence: pandoc reads the backtick
514    // run as an inline code span (and a tilde run as plain inline text).
515    // CommonMark and GFM instead take the first word as the language class and
516    // accept the rest, so this restriction is gated to the Pandoc dialect.
517    if dialect == crate::options::Dialect::Pandoc {
518        let bare = info_string.trim();
519        if !bare.is_empty() {
520            let is_valid = if let Some(brace_start) = bare.find('{') {
521                let before = bare[..brace_start].trim();
522                !before.contains(char::is_whitespace) && bare.ends_with('}')
523            } else {
524                bare.split_whitespace().nth(1).is_none()
525            };
526            if !is_valid {
527                return None;
528            }
529        }
530    }
531
532    Some(FenceInfo {
533        fence_char,
534        fence_count,
535        info_string,
536    })
537}
538
539#[allow(clippy::too_many_arguments)]
540fn prepare_fence_open_line<'a>(
541    builder: &mut GreenNodeBuilder<'static>,
542    source_line: &'a str,
543    first_line_override: Option<&'a str>,
544    bq_depth: usize,
545    list_content_col: usize,
546    list_marker_consumed_on_line_0: bool,
547    bq_outer: bool,
548    content_indent: usize,
549) -> (&'a str, &'a str) {
550    // Strip the active container prefix on line 0 in container-stack
551    // order. Bq markers are always upstream-emitted by the blockquote
552    // dispatch and silently consumed here. The list_content_col indent
553    // is upstream-emitted only on a marker-line dispatch
554    // (`list_marker_consumed_on_line_0=true`); on continuation-line
555    // dispatch it must be emitted here as WHITESPACE. Adjacent
556    // WHITESPACE emissions are coalesced into one token for
557    // byte-range-equivalent CST stability.
558    if let Some(first_line) = first_line_override {
559        if bq_depth > 0 && source_line != first_line {
560            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
561            let prefix_len = source_line.len().saturating_sub(stripped.len());
562            if prefix_len > 0 {
563                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
564            }
565        }
566        let first_trimmed = strip_leading_spaces(first_line);
567        let leading_ws_len = first_line.len().saturating_sub(first_trimmed.len());
568        if leading_ws_len > 0 {
569            builder.token(SyntaxKind::WHITESPACE.into(), &first_line[..leading_ws_len]);
570        }
571        return (first_trimmed, first_line);
572    }
573
574    let mut s: &'a str = source_line;
575    let mut pending_ws_start: Option<usize> = None;
576    let suppress_list = list_marker_consumed_on_line_0;
577
578    let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
579                    pending: &mut Option<usize>,
580                    current_offset: usize| {
581        if let Some(start) = *pending
582            && current_offset > start
583        {
584            builder.token(
585                SyntaxKind::WHITESPACE.into(),
586                &source_line[start..current_offset],
587            );
588        }
589        *pending = None;
590    };
591
592    let do_strip_list = |s: &mut &'a str, pending: &mut Option<usize>| {
593        if list_content_col == 0 {
594            return;
595        }
596        // On a marker-line dispatch (`suppress_list=true`), the list
597        // marker bytes have already been emitted upstream and may not
598        // be whitespace (e.g. `- > ```` has a leading `-`). Use
599        // `advance_columns` which counts columns through any char.
600        // On continuation lines, the leading bytes ARE whitespace
601        // (the list-content-indent) so use the whitespace-only
602        // `strip_list_indent` to stop at non-whitespace.
603        let stripped = if suppress_list {
604            advance_columns(s, list_content_col)
605        } else {
606            strip_list_indent(s, list_content_col)
607        };
608        let consumed = s.len() - stripped.len();
609        if consumed > 0 {
610            let start = source_line.len() - s.len();
611            if !suppress_list && pending.is_none() {
612                *pending = Some(start);
613            }
614            *s = stripped;
615        }
616    };
617
618    let do_strip_bq =
619        |builder: &mut GreenNodeBuilder<'static>, s: &mut &'a str, pending: &mut Option<usize>| {
620            if bq_depth == 0 {
621                return;
622            }
623            let current_offset = source_line.len() - s.len();
624            flush_ws(builder, pending, current_offset);
625            *s = strip_n_blockquote_markers(s, bq_depth);
626        };
627
628    if bq_outer {
629        do_strip_bq(builder, &mut s, &mut pending_ws_start);
630        do_strip_list(&mut s, &mut pending_ws_start);
631    } else {
632        do_strip_list(&mut s, &mut pending_ws_start);
633        do_strip_bq(builder, &mut s, &mut pending_ws_start);
634    }
635
636    // content_indent (footnote/definition) — always emit as WHITESPACE.
637    if content_indent > 0 {
638        let indent_bytes = byte_index_at_column(s, content_indent);
639        if s.len() >= indent_bytes && indent_bytes > 0 {
640            let start = source_line.len() - s.len();
641            if pending_ws_start.is_none() {
642                pending_ws_start = Some(start);
643            }
644            s = &s[indent_bytes..];
645        }
646    }
647
648    let final_offset = source_line.len() - s.len();
649    flush_ws(builder, &mut pending_ws_start, final_offset);
650
651    let first_trimmed = strip_leading_spaces(s);
652    let leading_ws_len = s.len().saturating_sub(first_trimmed.len());
653    if leading_ws_len > 0 {
654        builder.token(SyntaxKind::WHITESPACE.into(), &s[..leading_ws_len]);
655    }
656    (first_trimmed, s)
657}
658
659fn strip_content_line_prefixes(
660    content_line: &str,
661    bq_depth: usize,
662    list_content_col: usize,
663    bq_outer: bool,
664    content_indent: usize,
665) -> &str {
666    let after_bq_and_list = if bq_outer {
667        let after_bq = if bq_depth > 0 {
668            strip_n_blockquote_markers(content_line, bq_depth)
669        } else {
670            content_line
671        };
672        strip_list_indent(after_bq, list_content_col)
673    } else {
674        let after_list = strip_list_indent(content_line, list_content_col);
675        if bq_depth > 0 {
676            strip_n_blockquote_markers(after_list, bq_depth)
677        } else {
678            after_list
679        }
680    };
681
682    let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
683    if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
684        &after_bq_and_list[indent_bytes..]
685    } else {
686        after_bq_and_list
687    }
688}
689
690pub(crate) fn compute_hashpipe_preamble_line_count(
691    content_lines: &[&str],
692    prefix: &str,
693    bq_depth: usize,
694    list_content_col: usize,
695    bq_outer: bool,
696    content_indent: usize,
697) -> usize {
698    let preview = |idx: usize| -> Option<&str> {
699        let line = content_lines.get(idx)?;
700        let after_indent =
701            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
702        Some(strip_newline(after_indent).0)
703    };
704
705    let mut line_idx = 0usize;
706    while let Some(preview_without_newline) = preview(line_idx) {
707        if is_hashpipe_option_line(preview_without_newline, prefix)
708            || is_hashpipe_continuation_line(preview_without_newline, prefix)
709        {
710            line_idx += 1;
711            continue;
712        }
713        // A blank `#|` line continues the preamble only when followed by another
714        // prefixed line — i.e. it is a blank interior line of a block scalar
715        // (issue_201). A trailing blank `#|` before body code ends the preamble.
716        if is_hashpipe_blank_line(preview_without_newline, prefix)
717            && preview(line_idx + 1)
718                .is_some_and(|next| trim_start_spaces_tabs(next).starts_with(prefix))
719        {
720            line_idx += 1;
721            continue;
722        }
723        break;
724    }
725
726    line_idx
727}
728
729/// Compute the composite per-line prefix marker for a hashpipe preamble:
730/// the uniform container prefix (blockquote markers / list indent /
731/// content indent) plus any leading whitespace up to and including the
732/// hashpipe comment marker (`prefix`), taken from the first preamble line.
733///
734/// Within a preamble the container prefix is uniform per line, so matching
735/// this composite marker via `strip_prefix` lets the prefix-aware YAML
736/// parser splice a nested (list-/blockquote-indented) cell exactly as a
737/// top-level one, peeling the whole prefix into one `YAML_LINE_PREFIX`
738/// leaf. A non-uniform preamble fails validation and falls back to opaque
739/// tokens.
740fn hashpipe_composite_marker<'a>(
741    first_line: &'a str,
742    prefix: &str,
743    bq_depth: usize,
744    list_content_col: usize,
745    bq_outer: bool,
746    content_indent: usize,
747) -> &'a str {
748    let after_container = strip_content_line_prefixes(
749        first_line,
750        bq_depth,
751        list_content_col,
752        bq_outer,
753        content_indent,
754    );
755    let container_len = first_line.len() - after_container.len();
756    let ws_before = after_container.len() - trim_start_spaces_tabs(after_container).len();
757    let marker_len = (container_len + ws_before + prefix.len()).min(first_line.len());
758    &first_line[..marker_len]
759}
760
761fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
762    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
763    if !trimmed_start.starts_with(prefix) {
764        return false;
765    }
766    let after_prefix = &trimmed_start[prefix.len()..];
767    let rest = trim_start_spaces_tabs(after_prefix);
768    let Some(colon_idx) = rest.find(':') else {
769        return false;
770    };
771    let key = trim_end_spaces_tabs(&rest[..colon_idx]);
772    if key.is_empty() {
773        return false;
774    }
775    true
776}
777
778fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
779    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
780    if !trimmed_start.starts_with(prefix) {
781        return false;
782    }
783    let after_prefix = &trimmed_start[prefix.len()..];
784    let Some(first) = after_prefix.chars().next() else {
785        return false;
786    };
787    if first != ' ' && first != '\t' {
788        return false;
789    }
790    !trim_start_spaces_tabs(after_prefix).is_empty()
791}
792
793/// A bare/blank hashpipe line — the marker followed only by optional whitespace
794/// (e.g. `#|`). Such a line is a valid blank *inside* a block scalar (the
795/// `issue_201` literal-with-blank-line case) or a trailing blank in the preamble,
796/// so it continues the preamble rather than ending it. Without this, the
797/// preamble scan stops at the blank and the parser truncates the block scalar,
798/// embedding only the lines before it.
799fn is_hashpipe_blank_line(line_without_newline: &str, prefix: &str) -> bool {
800    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
801    let Some(after_prefix) = trimmed_start.strip_prefix(prefix) else {
802        return false;
803    };
804    trim_start_spaces_tabs(after_prefix).is_empty()
805}
806
807/// Check if a line is a valid closing fence for the given fence info.
808pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
809    let trimmed = strip_leading_spaces(content);
810
811    if !trimmed.starts_with(fence.fence_char) {
812        return false;
813    }
814
815    let closing_count = trimmed
816        .chars()
817        .take_while(|&c| c == fence.fence_char)
818        .count();
819
820    if closing_count < fence.fence_count {
821        return false;
822    }
823
824    // Rest of line must be empty
825    trimmed[closing_count..].trim().is_empty()
826}
827
828/// Emit chunk options as structured CST nodes while preserving all bytes.
829/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
830fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
831    if content.trim().is_empty() {
832        builder.token(SyntaxKind::TEXT.into(), content);
833        return;
834    }
835
836    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
837
838    let mut pos = 0;
839    let bytes = content.as_bytes();
840
841    while pos < bytes.len() {
842        // Emit leading whitespace/commas as TEXT
843        let ws_start = pos;
844        while pos < bytes.len() {
845            let ch = bytes[pos] as char;
846            if ch != ' ' && ch != '\t' && ch != ',' {
847                break;
848            }
849            pos += 1;
850        }
851        if pos > ws_start {
852            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
853        }
854
855        if pos >= bytes.len() {
856            break;
857        }
858
859        // Check if this is a closing brace
860        if bytes[pos] as char == '}' {
861            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
862            pos += 1;
863            if pos < bytes.len() {
864                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
865            }
866            break;
867        }
868
869        // Read key
870        let key_start = pos;
871        while pos < bytes.len() {
872            let ch = bytes[pos] as char;
873            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
874                break;
875            }
876            pos += 1;
877        }
878
879        if pos == key_start {
880            // No key found, emit rest as TEXT
881            if pos < bytes.len() {
882                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
883            }
884            break;
885        }
886
887        let key = &content[key_start..pos];
888
889        // Check for whitespace before '='
890        let ws_before_eq_start = pos;
891        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
892            pos += 1;
893        }
894
895        // Check if there's a value (=)
896        if pos < bytes.len() && bytes[pos] as char == '=' {
897            // Has value - emit as CHUNK_OPTION
898            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
899            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
900
901            // Emit whitespace before '=' if any
902            if pos > ws_before_eq_start {
903                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
904            }
905
906            builder.token(SyntaxKind::TEXT.into(), "=");
907            pos += 1; // consume '='
908
909            // Emit whitespace after '='
910            let ws_after_eq_start = pos;
911            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
912                pos += 1;
913            }
914            if pos > ws_after_eq_start {
915                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
916            }
917
918            // Parse value (might be quoted)
919            if pos < bytes.len() {
920                let quote_char = bytes[pos] as char;
921                if quote_char == '"' || quote_char == '\'' {
922                    // Quoted value
923                    builder.token(
924                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
925                        &content[pos..pos + 1],
926                    );
927                    pos += 1; // consume opening quote
928
929                    let val_start = pos;
930                    let mut escaped = false;
931                    while pos < bytes.len() {
932                        let ch = bytes[pos] as char;
933                        if !escaped && ch == quote_char {
934                            break;
935                        }
936                        escaped = !escaped && ch == '\\';
937                        pos += 1;
938                    }
939
940                    if pos > val_start {
941                        builder.token(
942                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
943                            &content[val_start..pos],
944                        );
945                    }
946
947                    // Emit closing quote
948                    if pos < bytes.len() && bytes[pos] as char == quote_char {
949                        builder.token(
950                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
951                            &content[pos..pos + 1],
952                        );
953                        pos += 1;
954                    }
955                } else {
956                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
957                    let val_start = pos;
958                    let mut depth = 0;
959
960                    while pos < bytes.len() {
961                        let ch = bytes[pos] as char;
962                        match ch {
963                            '(' | '[' | '{' => depth += 1,
964                            ')' | ']' => {
965                                if depth > 0 {
966                                    depth -= 1;
967                                } else {
968                                    break;
969                                }
970                            }
971                            '}' => {
972                                if depth > 0 {
973                                    depth -= 1;
974                                } else {
975                                    break; // End of chunk options
976                                }
977                            }
978                            ',' if depth == 0 => {
979                                break; // Next option
980                            }
981                            ' ' | '\t' if depth == 0 => {
982                                break; // Space separator
983                            }
984                            _ => {}
985                        }
986                        pos += 1;
987                    }
988
989                    if pos > val_start {
990                        builder.token(
991                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
992                            &content[val_start..pos],
993                        );
994                    }
995                }
996            }
997
998            builder.finish_node(); // CHUNK_OPTION
999        } else {
1000            // No '=' - classify by prefix: '.foo' is a class, '#foo' is an id,
1001            // anything else is a chunk label (e.g. `{r mylabel}`).
1002            let kind = match key.as_bytes().first() {
1003                Some(b'.') => SyntaxKind::ATTR_CLASS,
1004                Some(b'#') => SyntaxKind::ATTR_ID,
1005                _ => SyntaxKind::CHUNK_LABEL,
1006            };
1007            builder.start_node(kind.into());
1008            builder.token(SyntaxKind::TEXT.into(), key);
1009            builder.finish_node();
1010            if pos > ws_before_eq_start {
1011                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1012            }
1013        }
1014    }
1015
1016    builder.finish_node(); // CHUNK_OPTIONS
1017}
1018
1019/// Helper to parse info string and emit CodeInfo node with parsed components.
1020/// This breaks down the info string into its logical parts while preserving all bytes.
1021fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1022    builder.start_node(SyntaxKind::CODE_INFO.into());
1023
1024    let info = InfoString::parse(info_string);
1025
1026    match &info.block_type {
1027        CodeBlockType::DisplayShortcut { language } => {
1028            // Simple case: python or python {.class}
1029            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1030
1031            // If there's more after the language, emit it as TEXT
1032            let after_lang = &info_string[language.len()..];
1033            if !after_lang.is_empty() {
1034                builder.token(SyntaxKind::TEXT.into(), after_lang);
1035            }
1036        }
1037        CodeBlockType::Executable { language } => {
1038            // Quarto: {r} or {r my-label, echo=FALSE}
1039            builder.token(SyntaxKind::TEXT.into(), "{");
1040            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1041
1042            // Parse and emit chunk options
1043            let start_offset = 1 + language.len(); // Skip "{r"
1044            if start_offset < info_string.len() {
1045                let rest = &info_string[start_offset..];
1046                emit_chunk_options(builder, rest);
1047            }
1048        }
1049        CodeBlockType::DisplayExplicit { classes } => {
1050            // Pandoc: {.python} or {#id .haskell .numberLines}
1051            // We need to find the first class in the raw string and emit everything around it
1052
1053            if let Some(lang) = classes.first() {
1054                // Find where ".lang" appears in the info string
1055                let needle = format!(".{}", lang);
1056                if let Some(lang_start) = info_string.find(&needle) {
1057                    // Emit everything before the language
1058                    if lang_start > 0 {
1059                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1060                    }
1061
1062                    // Emit the dot
1063                    builder.token(SyntaxKind::TEXT.into(), ".");
1064
1065                    // Emit the language
1066                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1067
1068                    // Emit everything after
1069                    let after_lang_start = lang_start + 1 + lang.len();
1070                    if after_lang_start < info_string.len() {
1071                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1072                    }
1073                } else {
1074                    // Couldn't find it, just emit as TEXT
1075                    builder.token(SyntaxKind::TEXT.into(), info_string);
1076                }
1077            } else {
1078                // No classes
1079                builder.token(SyntaxKind::TEXT.into(), info_string);
1080            }
1081        }
1082        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1083            // No language, just emit as TEXT
1084            builder.token(SyntaxKind::TEXT.into(), info_string);
1085        }
1086    }
1087
1088    builder.finish_node(); // CodeInfo
1089}
1090
1091/// Parse a fenced code block, consuming lines from the parser.
1092/// Parse a fenced code block, consuming lines from the parser.
1093/// Returns the new position after the code block.
1094///
1095/// All container geometry (blockquote depth, list-item indent,
1096/// footnote/definition base indent, and the bq-vs-list strip order) is
1097/// derived from `window.prefix()`; detection scans and the open-fence
1098/// emitter read those derived scalars, and content/closing-fence lines
1099/// re-emit their container prefix via [`StrippedLines::emit_prefix_at`].
1100pub(crate) fn parse_fenced_code_block(
1101    builder: &mut GreenNodeBuilder<'static>,
1102    window: &StrippedLines<'_, '_>,
1103    fence: FenceInfo,
1104    first_line_override: Option<&str>,
1105    diags: &Diagnostics,
1106    flavor: Flavor,
1107) -> usize {
1108    let lines = window.raw();
1109    let start_pos = window.pos();
1110    let prefix = window.prefix();
1111    let bq_depth = prefix.bq_depth();
1112    let list_content_col = prefix.list_content_col();
1113    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1114    let bq_outer = bq_outer_of_list(prefix);
1115    let content_indent = prefix.content_indent();
1116
1117    // Start code block
1118    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1119
1120    // Opening fence
1121    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1122        builder,
1123        lines[start_pos],
1124        first_line_override,
1125        bq_depth,
1126        list_content_col,
1127        list_marker_consumed_on_line_0,
1128        bq_outer,
1129        content_indent,
1130    );
1131
1132    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1133    builder.token(
1134        SyntaxKind::CODE_FENCE_MARKER.into(),
1135        &first_trimmed[..fence.fence_count],
1136    );
1137
1138    // Emit any space between fence and info string (for losslessness)
1139    let after_fence = &first_trimmed[fence.fence_count..];
1140    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1141        // There was a space - emit it as WHITESPACE
1142        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1143        // Parse and emit the info string as a structured node
1144        if !fence.info_string.is_empty() {
1145            emit_code_info_node(builder, &fence.info_string);
1146        }
1147    } else if !fence.info_string.is_empty() {
1148        // No space - parse and emit info_string as a structured node
1149        emit_code_info_node(builder, &fence.info_string);
1150    }
1151
1152    // Extract and emit the actual newline from the opening fence line
1153    let (_, newline_str) = strip_newline(first_trimmed);
1154    if !newline_str.is_empty() {
1155        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1156    }
1157    builder.finish_node(); // CodeFenceOpen
1158
1159    let mut current_pos = start_pos + 1;
1160    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1161    let mut found_closing = false;
1162
1163    while current_pos < lines.len() {
1164        let line = lines[current_pos];
1165
1166        // Count blockquote markers to detect leaving the surrounding
1167        // blockquote. For bq_outer=true probe the raw line (bq markers
1168        // lead); for bq_outer=false strip the list indent first, then
1169        // probe the post-list slice. This forward-scan termination has no
1170        // `StrippedLines` equivalent, so it stays inline.
1171        let probe = if bq_outer {
1172            line
1173        } else {
1174            strip_list_indent(line, list_content_col)
1175        };
1176        let (line_bq_depth, _) = count_blockquote_markers(probe);
1177        if line_bq_depth < bq_depth {
1178            break;
1179        }
1180
1181        // Detection only (emits nothing): the same 2-bucket container
1182        // strip the emission path applies via `emit_content_line_prefixes`
1183        // / `emit_prefix_at`, kept here rather than `strip_at` (a per-op
1184        // walk) to stay byte-identical in interleaved nesting.
1185        let inner_stripped =
1186            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1187
1188        if is_closing_fence(inner_stripped, &fence) {
1189            found_closing = true;
1190            current_pos += 1;
1191            break;
1192        }
1193
1194        content_lines.push(line);
1195        current_pos += 1;
1196    }
1197
1198    // Add content
1199    if !content_lines.is_empty() {
1200        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1201        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1202            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1203            _ => None,
1204        };
1205
1206        let mut line_idx = 0usize;
1207        if let Some(prefix) = hashpipe_prefix {
1208            let prepared_hashpipe_lines = compute_hashpipe_preamble_line_count(
1209                &content_lines,
1210                prefix,
1211                bq_depth,
1212                list_content_col,
1213                bq_outer,
1214                content_indent,
1215            );
1216            if prepared_hashpipe_lines > 0 {
1217                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1218                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1219
1220                // Exact host bytes of the preamble region: the lines retain
1221                // their trailing LF/CRLF, so concatenation rebuilds the
1222                // source between the open fence and the body exactly.
1223                let content: String = content_lines[..prepared_hashpipe_lines].concat();
1224                // Composite per-line marker (container prefix + `#|`). Uniform
1225                // across the preamble, so a nested cell splices as a top-level
1226                // one (see `hashpipe_composite_marker`).
1227                let marker = hashpipe_composite_marker(
1228                    content_lines[0],
1229                    prefix,
1230                    bq_depth,
1231                    list_content_col,
1232                    bq_outer,
1233                    content_indent,
1234                );
1235
1236                let yaml_ctx = YamlValidationContext::hashpipe(flavor);
1237                if let Some((diag, start_off, end_off)) =
1238                    locate_yaml_diagnostic_ctx(&content, marker, yaml_ctx)
1239                {
1240                    // Malformed hashpipe YAML: record the syntax error at its
1241                    // host position — the parser already computed the verdict,
1242                    // so it surfaces the diagnostic here instead of discarding
1243                    // it (the linter would otherwise re-parse to recover it).
1244                    // `content` is `content_lines[..n]` concatenated and those
1245                    // lines are subslices of the host input, so the preamble's
1246                    // host start is their pointer offset from line 0.
1247                    let host_start =
1248                        content_lines[0].as_ptr() as usize - lines[0].as_ptr() as usize;
1249                    diags.push(SyntaxError {
1250                        range: TextRange::new(
1251                            ((host_start + start_off) as u32).into(),
1252                            ((host_start + end_off) as u32).into(),
1253                        ),
1254                        message: diag.message.to_string(),
1255                        source: SyntaxErrorSource::Yaml,
1256                    });
1257                    // Fall back to opaque line tokens (container prefix + TEXT +
1258                    // NEWLINE), preserving the bytes without imposing a
1259                    // structure that didn't parse.
1260                    while line_idx < prepared_hashpipe_lines {
1261                        let after_indent = window.emit_prefix_at(builder, start_pos + 1 + line_idx);
1262                        let (line_without_newline, newline_str) = strip_newline(after_indent);
1263                        if !line_without_newline.is_empty() {
1264                            builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1265                        }
1266                        if !newline_str.is_empty() {
1267                            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1268                        }
1269                        line_idx += 1;
1270                    }
1271                } else {
1272                    // Valid: splice the prefix-aware YAML subtree. Token ranges
1273                    // are host ranges directly, the composite prefix peeled into
1274                    // `YAML_LINE_PREFIX` trivia. Mirrors the frontmatter
1275                    // `emit_yaml_block` validate→splice→fallback pattern.
1276                    let stream = parse_stream_with_prefix(&content, marker)
1277                        .green()
1278                        .into_owned();
1279                    copy_green_children(builder, &stream);
1280                }
1281                // Whether spliced or fallback, the preamble lines are consumed.
1282                line_idx = prepared_hashpipe_lines;
1283
1284                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1285                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1286            }
1287        }
1288
1289        for k in line_idx..content_lines.len() {
1290            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1291            let (line_without_newline, newline_str) = strip_newline(after_indent);
1292
1293            if !line_without_newline.is_empty() {
1294                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1295            }
1296
1297            if !newline_str.is_empty() {
1298                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1299            }
1300        }
1301        builder.finish_node(); // CodeContent
1302    }
1303
1304    // Closing fence (if found)
1305    if found_closing {
1306        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1307        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1308        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1309        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1310        let closing_count = closing_trimmed_start
1311            .chars()
1312            .take_while(|&c| c == fence.fence_char)
1313            .count();
1314        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1315
1316        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1317        if leading_ws_len > 0 {
1318            builder.token(
1319                SyntaxKind::WHITESPACE.into(),
1320                &closing_without_newline[..leading_ws_len],
1321            );
1322        }
1323        builder.token(
1324            SyntaxKind::CODE_FENCE_MARKER.into(),
1325            &closing_trimmed_start[..closing_count],
1326        );
1327        if !trailing_after_marker.is_empty() {
1328            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1329        }
1330        if !newline_str.is_empty() {
1331            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1332        }
1333        builder.finish_node(); // CodeFenceClose
1334    }
1335
1336    builder.finish_node(); // CodeBlock
1337
1338    current_pos
1339}
1340
1341/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1342///
1343/// Container geometry is derived from `window.prefix()`, mirroring
1344/// [`parse_fenced_code_block`].
1345pub(crate) fn parse_fenced_math_block(
1346    builder: &mut GreenNodeBuilder<'static>,
1347    window: &StrippedLines<'_, '_>,
1348    fence: FenceInfo,
1349    first_line_override: Option<&str>,
1350) -> usize {
1351    let lines = window.raw();
1352    let start_pos = window.pos();
1353    let prefix = window.prefix();
1354    let bq_depth = prefix.bq_depth();
1355    let list_content_col = prefix.list_content_col();
1356    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1357    let bq_outer = bq_outer_of_list(prefix);
1358    let content_indent = prefix.content_indent();
1359
1360    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1361
1362    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1363        builder,
1364        lines[start_pos],
1365        first_line_override,
1366        bq_depth,
1367        list_content_col,
1368        list_marker_consumed_on_line_0,
1369        bq_outer,
1370        content_indent,
1371    );
1372    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1373    builder.token(
1374        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1375        opening_without_newline,
1376    );
1377    if !opening_newline.is_empty() {
1378        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1379    }
1380
1381    let mut current_pos = start_pos + 1;
1382    let mut content_lines: Vec<&str> = Vec::new();
1383    let mut found_closing = false;
1384
1385    while current_pos < lines.len() {
1386        let line = lines[current_pos];
1387
1388        // Forward-scan termination on blockquote depth — stays inline (no
1389        // `StrippedLines` equivalent), mirroring `parse_fenced_code_block`.
1390        let probe = if bq_outer {
1391            line
1392        } else {
1393            strip_list_indent(line, list_content_col)
1394        };
1395        let (line_bq_depth, _) = count_blockquote_markers(probe);
1396        if line_bq_depth < bq_depth {
1397            break;
1398        }
1399
1400        // Detection only (emits nothing): same 2-bucket strip as emission.
1401        let inner_stripped =
1402            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1403
1404        if is_closing_fence(inner_stripped, &fence) {
1405            found_closing = true;
1406            current_pos += 1;
1407            break;
1408        }
1409
1410        content_lines.push(line);
1411        current_pos += 1;
1412    }
1413
1414    if !content_lines.is_empty() {
1415        let mut content = String::new();
1416        for k in 0..content_lines.len() {
1417            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1418            let (line_without_newline, newline_str) = strip_newline(after_indent);
1419            content.push_str(line_without_newline);
1420            content.push_str(newline_str);
1421        }
1422        builder.token(SyntaxKind::TEXT.into(), &content);
1423    }
1424
1425    if found_closing {
1426        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1427        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1428        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1429        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1430        let closing_count = closing_trimmed_start
1431            .chars()
1432            .take_while(|&c| c == fence.fence_char)
1433            .count();
1434        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1435
1436        if leading_ws_len > 0 {
1437            builder.token(
1438                SyntaxKind::WHITESPACE.into(),
1439                &closing_without_newline[..leading_ws_len],
1440            );
1441        }
1442        builder.token(
1443            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1444            &closing_trimmed_start[..closing_count],
1445        );
1446        if !trailing_after_marker.is_empty() {
1447            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1448        }
1449        if !newline_str.is_empty() {
1450            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1451        }
1452    }
1453
1454    builder.finish_node(); // DisplayMath
1455    current_pos
1456}
1457
1458#[cfg(test)]
1459mod tests {
1460    use super::*;
1461
1462    use crate::options::Dialect;
1463
1464    #[test]
1465    fn test_backtick_fence() {
1466        let fence = try_parse_fence_open("```python", Dialect::Pandoc).unwrap();
1467        assert_eq!(fence.fence_char, '`');
1468        assert_eq!(fence.fence_count, 3);
1469        assert_eq!(fence.info_string, "python");
1470    }
1471
1472    #[test]
1473    fn multiword_bare_info_is_not_a_fence_in_pandoc() {
1474        // ```haskell foo => inline code span in pandoc-markdown, not a fence.
1475        assert!(try_parse_fence_open("```haskell foo", Dialect::Pandoc).is_none());
1476        assert!(try_parse_fence_open("~~~haskell foo", Dialect::Pandoc).is_none());
1477        assert!(try_parse_fence_open("```@example foo bar", Dialect::Pandoc).is_none());
1478        // A single bare word (with surrounding space) is still a valid fence.
1479        assert!(try_parse_fence_open("```haskell ", Dialect::Pandoc).is_some());
1480        assert!(try_parse_fence_open("``` haskell", Dialect::Pandoc).is_some());
1481        // Braced attribute forms carry their own whitespace and stay valid.
1482        assert!(try_parse_fence_open("```{.haskell .foo}", Dialect::Pandoc).is_some());
1483        // Mixed `lang {attrs}` form (e.g. Quarto's `bash {filename="..."}`)
1484        // is valid; extra words or trailing content after the brace are not.
1485        assert!(try_parse_fence_open("```bash {filename=\"Terminal\"}", Dialect::Pandoc).is_some());
1486        assert!(try_parse_fence_open("```haskell {.numberLines}", Dialect::Pandoc).is_some());
1487        assert!(try_parse_fence_open("```haskell {.numberLines} foo", Dialect::Pandoc).is_none());
1488        assert!(try_parse_fence_open("```haskell foo {.x}", Dialect::Pandoc).is_none());
1489        assert!(try_parse_fence_open("```{.x} foo", Dialect::Pandoc).is_none());
1490    }
1491
1492    #[test]
1493    fn multiword_bare_info_is_a_fence_in_commonmark() {
1494        // CommonMark/GFM take the first word as the language class and keep
1495        // the rest of the info string, so the fence is still recognized.
1496        let fence = try_parse_fence_open("```haskell foo", Dialect::CommonMark).unwrap();
1497        assert_eq!(fence.info_string, "haskell foo");
1498        assert!(try_parse_fence_open("~~~haskell foo", Dialect::CommonMark).is_some());
1499    }
1500
1501    #[test]
1502    fn hashpipe_preamble_includes_blank_line_in_block_scalar() {
1503        // A blank `#|` line inside a literal block scalar must stay in the
1504        // preamble (issue_201) — otherwise the scalar is truncated.
1505        let lines = [
1506            "#| fig-alt: |\n",
1507            "#|   First paragraph.\n",
1508            "#|\n",
1509            "#|   Second paragraph.\n",
1510            "plot(1)\n",
1511        ];
1512        assert_eq!(
1513            compute_hashpipe_preamble_line_count(&lines, "#|", 0, 0, false, 0),
1514            4
1515        );
1516    }
1517
1518    #[test]
1519    fn hashpipe_blank_line_predicate() {
1520        assert!(is_hashpipe_blank_line("#|", "#|"));
1521        assert!(is_hashpipe_blank_line("#|   ", "#|"));
1522        assert!(!is_hashpipe_blank_line("#| key: v", "#|"));
1523        assert!(!is_hashpipe_blank_line("plot(1)", "#|"));
1524    }
1525
1526    #[test]
1527    fn test_tilde_fence() {
1528        let fence = try_parse_fence_open("~~~", Dialect::Pandoc).unwrap();
1529        assert_eq!(fence.fence_char, '~');
1530        assert_eq!(fence.fence_count, 3);
1531        assert_eq!(fence.info_string, "");
1532    }
1533
1534    #[test]
1535    fn test_long_fence() {
1536        let fence = try_parse_fence_open("`````", Dialect::Pandoc).unwrap();
1537        assert_eq!(fence.fence_count, 5);
1538    }
1539
1540    #[test]
1541    fn test_two_backticks_invalid() {
1542        assert!(try_parse_fence_open("``", Dialect::Pandoc).is_none());
1543    }
1544
1545    #[test]
1546    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1547        assert!(try_parse_fence_open("`````hi````there`````", Dialect::Pandoc).is_none());
1548    }
1549
1550    #[test]
1551    fn test_closing_fence() {
1552        let fence = FenceInfo {
1553            fence_char: '`',
1554            fence_count: 3,
1555            info_string: String::new(),
1556        };
1557        assert!(is_closing_fence("```", &fence));
1558        assert!(is_closing_fence("````", &fence));
1559        assert!(!is_closing_fence("``", &fence));
1560        assert!(!is_closing_fence("~~~", &fence));
1561    }
1562
1563    #[test]
1564    fn test_fenced_code_preserves_leading_gt() {
1565        let input = "```\n> foo\n```\n";
1566        let tree = crate::parse(input, None);
1567        assert_eq!(tree.text().to_string(), input);
1568    }
1569
1570    #[test]
1571    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1572        let input = "> ```\n> code\n> ```\n";
1573        let tree = crate::parse(input, None);
1574        assert_eq!(tree.text().to_string(), input);
1575    }
1576
1577    #[test]
1578    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1579        let input = "Term\n: ```\n├── pyproject.toml\n```\n";
1580        let tree = crate::parse(input, None);
1581        assert_eq!(tree.text().to_string(), input);
1582    }
1583
1584    #[test]
1585    fn test_info_string_plain() {
1586        let info = InfoString::parse("");
1587        assert_eq!(info.block_type, CodeBlockType::Plain);
1588        assert!(info.attributes.is_empty());
1589    }
1590
1591    #[test]
1592    fn test_info_string_shortcut() {
1593        let info = InfoString::parse("python");
1594        assert_eq!(
1595            info.block_type,
1596            CodeBlockType::DisplayShortcut {
1597                language: "python".to_string()
1598            }
1599        );
1600        assert!(info.attributes.is_empty());
1601    }
1602
1603    #[test]
1604    fn test_info_string_shortcut_with_trailing() {
1605        let info = InfoString::parse("python extra stuff");
1606        assert_eq!(
1607            info.block_type,
1608            CodeBlockType::DisplayShortcut {
1609                language: "python".to_string()
1610            }
1611        );
1612    }
1613
1614    #[test]
1615    fn test_info_string_display_explicit() {
1616        let info = InfoString::parse("{.python}");
1617        assert_eq!(
1618            info.block_type,
1619            CodeBlockType::DisplayExplicit {
1620                classes: vec!["python".to_string()]
1621            }
1622        );
1623    }
1624
1625    #[test]
1626    fn test_info_string_display_explicit_multiple() {
1627        let info = InfoString::parse("{.python .numberLines}");
1628        assert_eq!(
1629            info.block_type,
1630            CodeBlockType::DisplayExplicit {
1631                classes: vec!["python".to_string(), "numberLines".to_string()]
1632            }
1633        );
1634    }
1635
1636    #[test]
1637    fn test_info_string_executable() {
1638        let info = InfoString::parse("{python}");
1639        assert_eq!(
1640            info.block_type,
1641            CodeBlockType::Executable {
1642                language: "python".to_string()
1643            }
1644        );
1645    }
1646
1647    #[test]
1648    fn test_info_string_executable_with_options() {
1649        let info = InfoString::parse("{python echo=false warning=true}");
1650        assert_eq!(
1651            info.block_type,
1652            CodeBlockType::Executable {
1653                language: "python".to_string()
1654            }
1655        );
1656        assert_eq!(info.attributes.len(), 2);
1657        assert_eq!(
1658            info.attributes[0],
1659            ("echo".to_string(), Some("false".to_string()))
1660        );
1661        assert_eq!(
1662            info.attributes[1],
1663            ("warning".to_string(), Some("true".to_string()))
1664        );
1665    }
1666
1667    #[test]
1668    fn test_info_string_executable_with_commas() {
1669        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1670        assert_eq!(
1671            info.block_type,
1672            CodeBlockType::Executable {
1673                language: "r".to_string()
1674            }
1675        );
1676        assert_eq!(info.attributes.len(), 2);
1677        assert_eq!(
1678            info.attributes[0],
1679            ("echo".to_string(), Some("FALSE".to_string()))
1680        );
1681        assert_eq!(
1682            info.attributes[1],
1683            ("warning".to_string(), Some("TRUE".to_string()))
1684        );
1685    }
1686
1687    #[test]
1688    fn test_info_string_executable_mixed_commas_spaces() {
1689        // R-style with commas and spaces
1690        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1691        assert_eq!(
1692            info.block_type,
1693            CodeBlockType::Executable {
1694                language: "r".to_string()
1695            }
1696        );
1697        assert_eq!(info.attributes.len(), 2);
1698        assert_eq!(
1699            info.attributes[0],
1700            ("echo".to_string(), Some("FALSE".to_string()))
1701        );
1702        assert_eq!(
1703            info.attributes[1],
1704            ("label".to_string(), Some("my chunk".to_string()))
1705        );
1706    }
1707
1708    #[test]
1709    fn test_info_string_mixed_shortcut_and_attrs() {
1710        let info = InfoString::parse("python {.numberLines}");
1711        assert_eq!(
1712            info.block_type,
1713            CodeBlockType::DisplayShortcut {
1714                language: "python".to_string()
1715            }
1716        );
1717        assert_eq!(info.attributes.len(), 1);
1718        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1719    }
1720
1721    #[test]
1722    fn test_info_string_mixed_with_key_value() {
1723        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1724        assert_eq!(
1725            info.block_type,
1726            CodeBlockType::DisplayShortcut {
1727                language: "python".to_string()
1728            }
1729        );
1730        assert_eq!(info.attributes.len(), 2);
1731        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1732        assert_eq!(
1733            info.attributes[1],
1734            ("startFrom".to_string(), Some("100".to_string()))
1735        );
1736    }
1737
1738    #[test]
1739    fn test_info_string_explicit_with_id_and_classes() {
1740        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1741        assert_eq!(
1742            info.block_type,
1743            CodeBlockType::DisplayExplicit {
1744                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1745            }
1746        );
1747        // Non-class attributes
1748        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1749        let has_start = info
1750            .attributes
1751            .iter()
1752            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1753        assert!(has_id);
1754        assert!(has_start);
1755    }
1756
1757    #[test]
1758    fn test_info_string_raw_html() {
1759        let info = InfoString::parse("{=html}");
1760        assert_eq!(
1761            info.block_type,
1762            CodeBlockType::Raw {
1763                format: "html".to_string()
1764            }
1765        );
1766        assert!(info.attributes.is_empty());
1767    }
1768
1769    #[test]
1770    fn test_info_string_raw_latex() {
1771        let info = InfoString::parse("{=latex}");
1772        assert_eq!(
1773            info.block_type,
1774            CodeBlockType::Raw {
1775                format: "latex".to_string()
1776            }
1777        );
1778    }
1779
1780    #[test]
1781    fn test_info_string_raw_openxml() {
1782        let info = InfoString::parse("{=openxml}");
1783        assert_eq!(
1784            info.block_type,
1785            CodeBlockType::Raw {
1786                format: "openxml".to_string()
1787            }
1788        );
1789    }
1790
1791    #[test]
1792    fn test_info_string_raw_ms() {
1793        let info = InfoString::parse("{=ms}");
1794        assert_eq!(
1795            info.block_type,
1796            CodeBlockType::Raw {
1797                format: "ms".to_string()
1798            }
1799        );
1800    }
1801
1802    #[test]
1803    fn test_info_string_raw_html5() {
1804        let info = InfoString::parse("{=html5}");
1805        assert_eq!(
1806            info.block_type,
1807            CodeBlockType::Raw {
1808                format: "html5".to_string()
1809            }
1810        );
1811    }
1812
1813    #[test]
1814    fn test_info_string_raw_not_combined_with_attrs() {
1815        // If there are other attributes with =format, it should not be treated as raw
1816        let info = InfoString::parse("{=html .class}");
1817        // This should NOT be parsed as raw because there's more than one attribute
1818        assert_ne!(
1819            info.block_type,
1820            CodeBlockType::Raw {
1821                format: "html".to_string()
1822            }
1823        );
1824    }
1825
1826    #[test]
1827    fn test_parse_pandoc_attributes_spaces() {
1828        // Pandoc display blocks use spaces as delimiters
1829        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1830        assert_eq!(attrs.len(), 3);
1831        assert_eq!(attrs[0], (".python".to_string(), None));
1832        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1833        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1834    }
1835
1836    #[test]
1837    fn test_parse_pandoc_attributes_no_commas() {
1838        // Commas in Pandoc attributes should be treated as part of the value
1839        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1840        assert_eq!(attrs.len(), 3);
1841        assert_eq!(attrs[0], ("#id".to_string(), None));
1842        assert_eq!(attrs[1], (".class".to_string(), None));
1843        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1844    }
1845
1846    #[test]
1847    fn test_parse_chunk_options_commas() {
1848        // Quarto/RMarkdown chunks use commas as delimiters
1849        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1850        assert_eq!(attrs.len(), 3);
1851        assert_eq!(attrs[0], ("r".to_string(), None));
1852        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1853        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1854    }
1855
1856    #[test]
1857    fn test_parse_chunk_options_no_spaces() {
1858        // Should handle comma-separated without spaces
1859        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1860        assert_eq!(attrs.len(), 3);
1861        assert_eq!(attrs[0], ("r".to_string(), None));
1862        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1863        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1864    }
1865
1866    #[test]
1867    fn test_parse_chunk_options_mixed() {
1868        // Handle both commas and spaces
1869        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1870        assert_eq!(attrs.len(), 3);
1871        assert_eq!(attrs[0], ("python".to_string(), None));
1872        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1873        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1874    }
1875
1876    #[test]
1877    fn test_parse_chunk_options_nested_function_call() {
1878        // R function calls with nested commas should be treated as single value
1879        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1880        assert_eq!(attrs.len(), 3);
1881        assert_eq!(attrs[0], ("r".to_string(), None));
1882        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1883        assert_eq!(
1884            attrs[2],
1885            (
1886                "dependson".to_string(),
1887                Some(r#"c("foo", "bar")"#.to_string())
1888            )
1889        );
1890    }
1891
1892    #[test]
1893    fn test_parse_chunk_options_nested_with_spaces() {
1894        // Function call with spaces inside
1895        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1896        assert_eq!(attrs.len(), 2);
1897        assert_eq!(attrs[0], ("r".to_string(), None));
1898        assert_eq!(
1899            attrs[1],
1900            (
1901                "cache.path".to_string(),
1902                Some(r#"file.path("cache", "dir")"#.to_string())
1903            )
1904        );
1905    }
1906
1907    #[test]
1908    fn test_parse_chunk_options_deeply_nested() {
1909        // Multiple levels of nesting
1910        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1911        assert_eq!(attrs.len(), 2);
1912        assert_eq!(attrs[0], ("r".to_string(), None));
1913        assert_eq!(
1914            attrs[1],
1915            (
1916                "x".to_string(),
1917                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1918            )
1919        );
1920    }
1921
1922    #[test]
1923    fn test_parse_chunk_options_brackets_and_braces() {
1924        // Test all bracket types
1925        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1926        assert_eq!(attrs.len(), 3);
1927        assert_eq!(attrs[0], ("r".to_string(), None));
1928        assert_eq!(
1929            attrs[1],
1930            ("data".to_string(), Some("df[rows, cols]".to_string()))
1931        );
1932        assert_eq!(
1933            attrs[2],
1934            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1935        );
1936    }
1937
1938    #[test]
1939    fn test_parse_chunk_options_quotes_with_parens() {
1940        // Parentheses inside quoted strings shouldn't affect depth tracking
1941        // Note: The parser strips outer quotes from quoted values
1942        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1943        assert_eq!(attrs.len(), 3);
1944        assert_eq!(attrs[0], ("r".to_string(), None));
1945        assert_eq!(
1946            attrs[1],
1947            ("label".to_string(), Some("test (with parens)".to_string()))
1948        );
1949        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1950    }
1951
1952    #[test]
1953    fn test_parse_chunk_options_escaped_quotes() {
1954        // Escaped quotes inside string values
1955        // Note: The parser strips outer quotes and processes escapes
1956        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1957        assert_eq!(attrs.len(), 2);
1958        assert_eq!(attrs[0], ("r".to_string(), None));
1959        assert_eq!(
1960            attrs[1],
1961            (
1962                "label".to_string(),
1963                Some(r#"has "quoted" text"#.to_string())
1964            )
1965        );
1966    }
1967
1968    #[test]
1969    fn test_display_vs_executable_parsing() {
1970        // Display block should use Pandoc parser (spaces)
1971        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1972        assert!(matches!(
1973            info1.block_type,
1974            CodeBlockType::DisplayExplicit { .. }
1975        ));
1976
1977        // Executable chunk should use chunk options parser (commas)
1978        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1979        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1980        assert_eq!(info2.attributes.len(), 2);
1981    }
1982
1983    #[test]
1984    fn test_info_string_executable_implicit_label() {
1985        // {r mylabel} should parse as label=mylabel
1986        let info = InfoString::parse("{r mylabel}");
1987        assert!(matches!(
1988            info.block_type,
1989            CodeBlockType::Executable { ref language } if language == "r"
1990        ));
1991        assert_eq!(info.attributes.len(), 1);
1992        assert_eq!(
1993            info.attributes[0],
1994            ("label".to_string(), Some("mylabel".to_string()))
1995        );
1996    }
1997
1998    #[test]
1999    fn test_info_string_executable_implicit_label_with_options() {
2000        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
2001        let info = InfoString::parse("{r mylabel, echo=FALSE}");
2002        assert!(matches!(
2003            info.block_type,
2004            CodeBlockType::Executable { ref language } if language == "r"
2005        ));
2006        assert_eq!(info.attributes.len(), 2);
2007        assert_eq!(
2008            info.attributes[0],
2009            ("label".to_string(), Some("mylabel".to_string()))
2010        );
2011        assert_eq!(
2012            info.attributes[1],
2013            ("echo".to_string(), Some("FALSE".to_string()))
2014        );
2015    }
2016
2017    #[test]
2018    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
2019        let content_lines = vec![
2020            "#| fig-cap: |\n",
2021            "#|   A caption\n",
2022            "#|   spanning lines\n",
2023            "a <- 1\n",
2024        ];
2025        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2026        assert_eq!(count, 3);
2027    }
2028
2029    #[test]
2030    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
2031        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
2032        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2033        assert_eq!(count, 1);
2034    }
2035
2036    #[test]
2037    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
2038        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
2039        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2040        assert_eq!(count, 1);
2041    }
2042}