Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use crate::parser::utils::container_stack::byte_index_at_column;
9use crate::parser::utils::helpers::{strip_leading_spaces, strip_newline};
10
11/// Represents the type of code block based on its info string syntax.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub enum CodeBlockType {
14    /// Display-only block with shortcut syntax: ```python
15    DisplayShortcut { language: String },
16    /// Display-only block with explicit Pandoc syntax: ```{.python}
17    DisplayExplicit { classes: Vec<String> },
18    /// Executable chunk (Quarto/RMarkdown): ```{python}
19    Executable { language: String },
20    /// Raw block for specific output format: ```{=html}
21    Raw { format: String },
22    /// No language specified: ```
23    Plain,
24}
25
26/// Parsed attributes from a code block info string.
27#[derive(Debug, Clone, PartialEq)]
28pub struct InfoString {
29    pub raw: String,
30    pub block_type: CodeBlockType,
31    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
32}
33
34impl InfoString {
35    /// Parse an info string into structured attributes.
36    pub fn parse(raw: &str) -> Self {
37        let trimmed = raw.trim();
38
39        if trimmed.is_empty() {
40            return InfoString {
41                raw: raw.to_string(),
42                block_type: CodeBlockType::Plain,
43                attributes: Vec::new(),
44            };
45        }
46
47        // Check if it starts with '{' - explicit attribute block
48        if let Some(stripped) = trimmed.strip_prefix('{')
49            && let Some(content) = stripped.strip_suffix('}')
50        {
51            return Self::parse_explicit(raw, content);
52        }
53
54        // Check for mixed form: python {.numberLines}
55        if let Some(brace_start) = trimmed.find('{') {
56            let language = trimmed[..brace_start].trim();
57            if !language.is_empty() && !language.contains(char::is_whitespace) {
58                let attr_part = &trimmed[brace_start..];
59                if let Some(stripped) = attr_part.strip_prefix('{')
60                    && let Some(content) = stripped.strip_suffix('}')
61                {
62                    let attrs = Self::parse_attributes(content);
63                    return InfoString {
64                        raw: raw.to_string(),
65                        block_type: CodeBlockType::DisplayShortcut {
66                            language: language.to_string(),
67                        },
68                        attributes: attrs,
69                    };
70                }
71            }
72        }
73
74        // Otherwise, it's a shortcut form (just the language name)
75        // Only take the first word as language
76        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
77        InfoString {
78            raw: raw.to_string(),
79            block_type: CodeBlockType::DisplayShortcut {
80                language: language.to_string(),
81            },
82            attributes: Vec::new(),
83        }
84    }
85
86    fn parse_explicit(raw: &str, content: &str) -> Self {
87        // Check for raw attribute FIRST: {=format}
88        // The content should start with '=' and have only alphanumeric chars after
89        let trimmed_content = content.trim();
90        if let Some(format_name) = trimmed_content.strip_prefix('=') {
91            // Validate format name: alphanumeric only, no spaces
92            if !format_name.is_empty()
93                && format_name.chars().all(|c| c.is_alphanumeric())
94                && !format_name.contains(char::is_whitespace)
95            {
96                return InfoString {
97                    raw: raw.to_string(),
98                    block_type: CodeBlockType::Raw {
99                        format: format_name.to_string(),
100                    },
101                    attributes: Vec::new(),
102                };
103            }
104        }
105
106        // First, do a preliminary parse to determine block type
107        // Use chunk options parser (comma-aware) for initial detection
108        let prelim_attrs = Self::parse_chunk_options(content);
109
110        // First non-ID, non-attribute token determines if it's executable or display
111        let mut first_lang_token = None;
112        for (key, val) in prelim_attrs.iter() {
113            if val.is_none() && !key.starts_with('#') {
114                first_lang_token = Some(key.as_str());
115                break;
116            }
117        }
118
119        let first_token = first_lang_token.unwrap_or("");
120
121        if first_token.starts_with('.') {
122            // Display block: {.python} or {.haskell .numberLines}
123            // Re-parse with Pandoc-style parser (space-delimited)
124            let attrs = Self::parse_pandoc_attributes(content);
125
126            let classes: Vec<String> = attrs
127                .iter()
128                .filter(|(k, v)| k.starts_with('.') && v.is_none())
129                .map(|(k, _)| k[1..].to_string())
130                .collect();
131
132            let non_class_attrs: Vec<(String, Option<String>)> = attrs
133                .into_iter()
134                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
135                .collect();
136
137            InfoString {
138                raw: raw.to_string(),
139                block_type: CodeBlockType::DisplayExplicit { classes },
140                attributes: non_class_attrs,
141            }
142        } else if !first_token.is_empty() && !first_token.starts_with('#') {
143            // Executable chunk: {python} or {r}
144            // Use chunk options parser (comma-delimited)
145            let attrs = Self::parse_chunk_options(content);
146            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
147
148            // Check if there's a second bareword (implicit label in R/Quarto chunks)
149            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}
150            let mut has_implicit_label = false;
151            let implicit_label_value = if lang_index + 1 < attrs.len() {
152                if let (label_key, None) = &attrs[lang_index + 1] {
153                    // Second bareword after language
154                    has_implicit_label = true;
155                    Some(label_key.clone())
156                } else {
157                    None
158                }
159            } else {
160                None
161            };
162
163            let mut final_attrs: Vec<(String, Option<String>)> = attrs
164                .into_iter()
165                .enumerate()
166                .filter(|(i, _)| {
167                    // Remove language token
168                    if *i == lang_index {
169                        return false;
170                    }
171                    // Remove implicit label token (will be added back explicitly)
172                    if has_implicit_label && *i == lang_index + 1 {
173                        return false;
174                    }
175                    true
176                })
177                .map(|(_, attr)| attr)
178                .collect();
179
180            // Add explicit label if we found an implicit one
181            if let Some(label_val) = implicit_label_value {
182                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
183            }
184
185            InfoString {
186                raw: raw.to_string(),
187                block_type: CodeBlockType::Executable {
188                    language: first_token.to_string(),
189                },
190                attributes: final_attrs,
191            }
192        } else {
193            // Just attributes, no language - use Pandoc parser
194            let attrs = Self::parse_pandoc_attributes(content);
195            InfoString {
196                raw: raw.to_string(),
197                block_type: CodeBlockType::Plain,
198                attributes: attrs,
199            }
200        }
201    }
202
203    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
204    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
205    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
206        let mut attrs = Vec::new();
207        let mut chars = content.chars().peekable();
208
209        while chars.peek().is_some() {
210            // Skip whitespace
211            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
212                chars.next();
213            }
214
215            if chars.peek().is_none() {
216                break;
217            }
218
219            // Read key
220            let mut key = String::new();
221            while let Some(&ch) = chars.peek() {
222                if ch == '=' || ch == ' ' || ch == '\t' {
223                    break;
224                }
225                key.push(ch);
226                chars.next();
227            }
228
229            if key.is_empty() {
230                break;
231            }
232
233            // Skip whitespace
234            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
235                chars.next();
236            }
237
238            // Check for value
239            if chars.peek() == Some(&'=') {
240                chars.next(); // consume '='
241
242                // Skip whitespace after '='
243                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
244                    chars.next();
245                }
246
247                // Read value (might be quoted)
248                let value = if chars.peek() == Some(&'"') {
249                    chars.next(); // consume opening quote
250                    let mut val = String::new();
251                    while let Some(&ch) = chars.peek() {
252                        chars.next();
253                        if ch == '"' {
254                            break;
255                        }
256                        if ch == '\\' {
257                            if let Some(&next_ch) = chars.peek() {
258                                chars.next();
259                                val.push(next_ch);
260                            }
261                        } else {
262                            val.push(ch);
263                        }
264                    }
265                    val
266                } else {
267                    // Unquoted value - read until space
268                    let mut val = String::new();
269                    while let Some(&ch) = chars.peek() {
270                        if ch == ' ' || ch == '\t' {
271                            break;
272                        }
273                        val.push(ch);
274                        chars.next();
275                    }
276                    val
277                };
278
279                attrs.push((key, Some(value)));
280            } else {
281                attrs.push((key, None));
282            }
283        }
284
285        attrs
286    }
287
288    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
289    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
290    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
291        let mut attrs = Vec::new();
292        let mut chars = content.chars().peekable();
293
294        while chars.peek().is_some() {
295            // Skip whitespace and commas
296            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
297                chars.next();
298            }
299
300            if chars.peek().is_none() {
301                break;
302            }
303
304            // Read key
305            let mut key = String::new();
306            while let Some(&ch) = chars.peek() {
307                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
308                    break;
309                }
310                key.push(ch);
311                chars.next();
312            }
313
314            if key.is_empty() {
315                break;
316            }
317
318            // Skip whitespace and commas
319            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
320                chars.next();
321            }
322
323            // Check for value
324            if chars.peek() == Some(&'=') {
325                chars.next(); // consume '='
326
327                // Skip whitespace and commas after '='
328                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
329                    chars.next();
330                }
331
332                // Read value (might be quoted)
333                let value = if chars.peek() == Some(&'"') {
334                    chars.next(); // consume opening quote
335                    let mut val = String::new();
336                    while let Some(&ch) = chars.peek() {
337                        chars.next();
338                        if ch == '"' {
339                            break;
340                        }
341                        if ch == '\\' {
342                            if let Some(&next_ch) = chars.peek() {
343                                chars.next();
344                                val.push(next_ch);
345                            }
346                        } else {
347                            val.push(ch);
348                        }
349                    }
350                    val
351                } else {
352                    // Unquoted value - read until comma, space, or tab at depth 0
353                    // Track nesting depth for (), [], {} and quote state
354                    let mut val = String::new();
355                    let mut depth = 0; // Track parentheses/brackets/braces depth
356                    let mut in_quote: Option<char> = None; // Track if inside ' or "
357                    let mut escaped = false; // Track if previous char was backslash
358
359                    while let Some(&ch) = chars.peek() {
360                        // Handle escape sequences
361                        if escaped {
362                            val.push(ch);
363                            chars.next();
364                            escaped = false;
365                            continue;
366                        }
367
368                        if ch == '\\' {
369                            val.push(ch);
370                            chars.next();
371                            escaped = true;
372                            continue;
373                        }
374
375                        // Handle quotes
376                        if let Some(quote_char) = in_quote {
377                            val.push(ch);
378                            chars.next();
379                            if ch == quote_char {
380                                in_quote = None; // Close quote
381                            }
382                            continue;
383                        }
384
385                        // Not in a quote - check for quote start
386                        if ch == '"' || ch == '\'' {
387                            in_quote = Some(ch);
388                            val.push(ch);
389                            chars.next();
390                            continue;
391                        }
392
393                        // Track nesting depth (only when not in quotes)
394                        if ch == '(' || ch == '[' || ch == '{' {
395                            depth += 1;
396                            val.push(ch);
397                            chars.next();
398                            continue;
399                        }
400
401                        if ch == ')' || ch == ']' || ch == '}' {
402                            depth -= 1;
403                            val.push(ch);
404                            chars.next();
405                            continue;
406                        }
407
408                        // Check for delimiters - only break at depth 0
409                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
410                            break;
411                        }
412
413                        // Regular character
414                        val.push(ch);
415                        chars.next();
416                    }
417                    val
418                };
419
420                attrs.push((key, Some(value)));
421            } else {
422                attrs.push((key, None));
423            }
424        }
425
426        attrs
427    }
428
429    /// Legacy function - kept for backward compatibility in mixed-form parsing
430    /// For new code, use parse_pandoc_attributes or parse_chunk_options
431    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
432        // Default to chunk options parsing (comma-aware)
433        Self::parse_chunk_options(content)
434    }
435}
436
437/// Information about a detected code fence opening.
438#[derive(Debug, Clone)]
439pub(crate) struct FenceInfo {
440    pub fence_char: char,
441    pub fence_count: usize,
442    pub info_string: String,
443}
444
445pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
446    fence.info_string.trim() == "math"
447}
448
449/// Try to detect a fenced code block opening from content.
450/// Returns fence info if this is a valid opening fence.
451pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
452    let trimmed = strip_leading_spaces(content);
453
454    // Check for fence opening (``` or ~~~)
455    let (fence_char, fence_count) = if trimmed.starts_with('`') {
456        let count = trimmed.chars().take_while(|&c| c == '`').count();
457        ('`', count)
458    } else if trimmed.starts_with('~') {
459        let count = trimmed.chars().take_while(|&c| c == '~').count();
460        ('~', count)
461    } else {
462        return None;
463    };
464
465    if fence_count < 3 {
466        return None;
467    }
468
469    let info_string_raw = &trimmed[fence_count..];
470    // Strip trailing newline (LF or CRLF) and at most one leading space
471    let (info_string_trimmed, _) = strip_newline(info_string_raw);
472    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
473        stripped.to_string()
474    } else {
475        info_string_trimmed.to_string()
476    };
477
478    // Backtick-fenced blocks cannot have backticks in the info string.
479    if fence_char == '`' && info_string.contains('`') {
480        return None;
481    }
482
483    Some(FenceInfo {
484        fence_char,
485        fence_count,
486        info_string,
487    })
488}
489
490fn prepare_fence_open_line<'a>(
491    builder: &mut GreenNodeBuilder<'static>,
492    source_line: &'a str,
493    first_line_override: Option<&'a str>,
494    bq_depth: usize,
495    base_indent: usize,
496) -> (&'a str, &'a str) {
497    let first_line = first_line_override.unwrap_or(source_line);
498
499    // Only strip blockquote markers for the *surrounding* blockquote depth.
500    // Anything beyond that (e.g. a literal `>` inside the code block) must be preserved.
501    let first_inner = if bq_depth > 0 && first_line_override.is_none() {
502        strip_n_blockquote_markers(first_line, bq_depth)
503    } else {
504        if bq_depth > 0 && first_line_override.is_some() && source_line != first_line {
505            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
506            let prefix_len = source_line.len().saturating_sub(stripped.len());
507            if prefix_len > 0 {
508                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
509            }
510        }
511        first_line
512    };
513
514    // For lossless parsing: emit the base indent before stripping it
515    let first_base_indent = if first_line_override.is_some() {
516        0
517    } else {
518        base_indent
519    };
520    let first_base_indent_bytes = byte_index_at_column(first_inner, first_base_indent);
521    let first_stripped = if first_base_indent > 0 && first_inner.len() >= first_base_indent_bytes {
522        let indent_str = &first_inner[..first_base_indent_bytes];
523        if !indent_str.is_empty() {
524            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
525        }
526        &first_inner[first_base_indent_bytes..]
527    } else {
528        first_inner
529    };
530
531    let first_trimmed = strip_leading_spaces(first_stripped);
532    let leading_ws_len = first_stripped.len().saturating_sub(first_trimmed.len());
533    if leading_ws_len > 0 {
534        builder.token(
535            SyntaxKind::WHITESPACE.into(),
536            &first_stripped[..leading_ws_len],
537        );
538    }
539    (first_trimmed, first_inner)
540}
541
542fn emit_blockquote_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
543    for ch in prefix.chars() {
544        if ch == '>' {
545            builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
546        } else {
547            let mut buf = [0u8; 4];
548            builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
549        }
550    }
551}
552
553fn emit_content_line_prefixes<'a>(
554    builder: &mut GreenNodeBuilder<'static>,
555    content_line: &'a str,
556    bq_depth: usize,
557    base_indent: usize,
558) -> &'a str {
559    let after_blockquote = if bq_depth > 0 {
560        let stripped = strip_n_blockquote_markers(content_line, bq_depth);
561        let prefix_len = content_line.len().saturating_sub(stripped.len());
562        if prefix_len > 0 {
563            emit_blockquote_prefix_tokens(builder, &content_line[..prefix_len]);
564        }
565        stripped
566    } else {
567        content_line
568    };
569
570    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
571    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
572        let indent_str = &after_blockquote[..base_indent_bytes];
573        if !indent_str.is_empty() {
574            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
575        }
576        &after_blockquote[base_indent_bytes..]
577    } else {
578        after_blockquote
579    }
580}
581
582fn strip_content_line_prefixes(content_line: &str, bq_depth: usize, base_indent: usize) -> &str {
583    let after_blockquote = if bq_depth > 0 {
584        strip_n_blockquote_markers(content_line, bq_depth)
585    } else {
586        content_line
587    };
588
589    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
590    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
591        &after_blockquote[base_indent_bytes..]
592    } else {
593        after_blockquote
594    }
595}
596
597pub(crate) fn compute_hashpipe_preamble_line_count(
598    content_lines: &[&str],
599    prefix: &str,
600    bq_depth: usize,
601    base_indent: usize,
602) -> usize {
603    let mut line_idx = 0usize;
604
605    while line_idx < content_lines.len() {
606        let preview_after_indent =
607            strip_content_line_prefixes(content_lines[line_idx], bq_depth, base_indent);
608        let (preview_without_newline, _) = strip_newline(preview_after_indent);
609        if !is_hashpipe_option_line(preview_without_newline, prefix) {
610            break;
611        }
612        line_idx += 1;
613
614        let option_value = hashpipe_option_value(preview_without_newline, prefix);
615        let mut multiline_value = option_value
616            .as_ref()
617            .filter(|value| is_unclosed_double_quoted(value))
618            .cloned();
619        let in_block_scalar = option_value
620            .as_ref()
621            .is_some_and(|value| is_yaml_block_scalar_indicator(value));
622        let in_indented_value = option_value.as_ref().is_some_and(|value| value.is_empty());
623
624        while multiline_value.is_some() || in_block_scalar || in_indented_value {
625            if line_idx >= content_lines.len() {
626                break;
627            }
628            let continuation_without_prefixes =
629                strip_content_line_prefixes(content_lines[line_idx], bq_depth, base_indent);
630            let (continuation_without_newline, _) = strip_newline(continuation_without_prefixes);
631
632            if in_block_scalar || in_indented_value {
633                if !is_hashpipe_block_scalar_continuation_line(continuation_without_newline, prefix)
634                {
635                    break;
636                }
637                line_idx += 1;
638                continue;
639            }
640
641            if let Some(mut current_value) = multiline_value.take() {
642                let Some(continuation_value) =
643                    hashpipe_continuation_value(continuation_without_newline, prefix)
644                else {
645                    break;
646                };
647                line_idx += 1;
648
649                if !current_value.ends_with(' ') {
650                    current_value.push(' ');
651                }
652                current_value.push_str(&continuation_value);
653                if is_unclosed_double_quoted(&current_value) {
654                    multiline_value = Some(current_value);
655                }
656            }
657        }
658    }
659
660    line_idx
661}
662
663fn emit_hashpipe_option_line(
664    builder: &mut GreenNodeBuilder<'static>,
665    line_without_newline: &str,
666    prefix: &str,
667) -> bool {
668    if !is_hashpipe_option_line(line_without_newline, prefix) {
669        return false;
670    }
671
672    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
673    let leading_ws_len = line_without_newline
674        .len()
675        .saturating_sub(trimmed_start.len());
676    let after_prefix = &trimmed_start[prefix.len()..];
677    let ws_after_prefix_len = after_prefix
678        .len()
679        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
680    let rest = &after_prefix[ws_after_prefix_len..];
681    let Some(colon_idx) = rest.find(':') else {
682        return false;
683    };
684
685    let key_with_ws = &rest[..colon_idx];
686    let key = key_with_ws.trim_end_matches([' ', '\t']);
687    if key.is_empty() {
688        return false;
689    }
690    let key_ws_suffix = &key_with_ws[key.len()..];
691
692    let after_colon = &rest[colon_idx + 1..];
693    let value_ws_prefix_len = after_colon
694        .len()
695        .saturating_sub(after_colon.trim_start_matches([' ', '\t']).len());
696    let value_with_trailing = &after_colon[value_ws_prefix_len..];
697    let value = value_with_trailing.trim_end_matches([' ', '\t']);
698    let value_ws_suffix = &value_with_trailing[value.len()..];
699
700    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
701    if leading_ws_len > 0 {
702        builder.token(
703            SyntaxKind::WHITESPACE.into(),
704            &line_without_newline[..leading_ws_len],
705        );
706    }
707    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
708    if ws_after_prefix_len > 0 {
709        builder.token(
710            SyntaxKind::WHITESPACE.into(),
711            &after_prefix[..ws_after_prefix_len],
712        );
713    }
714
715    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
716    if !key_ws_suffix.is_empty() {
717        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
718    }
719    builder.token(SyntaxKind::TEXT.into(), ":");
720    if value_ws_prefix_len > 0 {
721        builder.token(
722            SyntaxKind::WHITESPACE.into(),
723            &after_colon[..value_ws_prefix_len],
724        );
725    }
726
727    if !value.is_empty() {
728        if let Some(quote) = value.chars().next()
729            && (quote == '"' || quote == '\'')
730            && value.ends_with(quote)
731            && value.len() >= 2
732        {
733            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
734            builder.token(
735                SyntaxKind::CHUNK_OPTION_VALUE.into(),
736                &value[1..value.len() - 1],
737            );
738            builder.token(
739                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
740                &value[value.len() - 1..],
741            );
742        } else {
743            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
744        }
745    }
746
747    if !value_ws_suffix.is_empty() {
748        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
749    }
750    builder.finish_node();
751    true
752}
753
754fn emit_hashpipe_continuation_line(
755    builder: &mut GreenNodeBuilder<'static>,
756    line_without_newline: &str,
757    prefix: &str,
758) -> bool {
759    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
760        return false;
761    }
762    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
763    let leading_ws_len = line_without_newline
764        .len()
765        .saturating_sub(trimmed_start.len());
766    let after_prefix = &trimmed_start[prefix.len()..];
767    let ws_after_prefix_len = after_prefix
768        .len()
769        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
770    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
771    let continuation_value = continuation_with_trailing.trim_end_matches([' ', '\t']);
772    if continuation_value.is_empty() {
773        return false;
774    }
775    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
776
777    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
778    if leading_ws_len > 0 {
779        builder.token(
780            SyntaxKind::WHITESPACE.into(),
781            &line_without_newline[..leading_ws_len],
782        );
783    }
784    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
785    if ws_after_prefix_len > 0 {
786        builder.token(
787            SyntaxKind::WHITESPACE.into(),
788            &after_prefix[..ws_after_prefix_len],
789        );
790    }
791    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
792    if !continuation_ws_suffix.is_empty() {
793        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
794    }
795    builder.finish_node();
796    true
797}
798
799fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
800    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
801    if !trimmed_start.starts_with(prefix) {
802        return false;
803    }
804    let after_prefix = &trimmed_start[prefix.len()..];
805    let rest = after_prefix.trim_start_matches([' ', '\t']);
806    let Some(colon_idx) = rest.find(':') else {
807        return false;
808    };
809    let key = rest[..colon_idx].trim_end_matches([' ', '\t']);
810    if key.is_empty() {
811        return false;
812    }
813    true
814}
815
816fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
817    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
818    if !trimmed_start.starts_with(prefix) {
819        return false;
820    }
821    let after_prefix = &trimmed_start[prefix.len()..];
822    let Some(first) = after_prefix.chars().next() else {
823        return false;
824    };
825    if first != ' ' && first != '\t' {
826        return false;
827    }
828    !after_prefix.trim_start_matches([' ', '\t']).is_empty()
829}
830
831fn hashpipe_option_value(line_without_newline: &str, prefix: &str) -> Option<String> {
832    if !is_hashpipe_option_line(line_without_newline, prefix) {
833        return None;
834    }
835    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
836    let after_prefix = &trimmed_start[prefix.len()..];
837    let rest = after_prefix.trim_start_matches([' ', '\t']);
838    let colon_idx = rest.find(':')?;
839    let value = rest[colon_idx + 1..]
840        .trim_start_matches([' ', '\t'])
841        .trim_end_matches(['\r', '\n']);
842    Some(value.to_string())
843}
844
845fn hashpipe_continuation_value(line_without_newline: &str, prefix: &str) -> Option<String> {
846    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
847        return None;
848    }
849    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
850    let after_prefix = &trimmed_start[prefix.len()..];
851    Some(
852        after_prefix
853            .trim_start_matches([' ', '\t'])
854            .trim_end_matches(['\r', '\n'])
855            .to_string(),
856    )
857}
858
859fn is_yaml_block_scalar_indicator(value: &str) -> bool {
860    let s = value.trim();
861    if s.is_empty() {
862        return false;
863    }
864    let mut chars = s.chars();
865    let Some(style) = chars.next() else {
866        return false;
867    };
868    if style != '|' && style != '>' {
869        return false;
870    }
871    chars.all(|ch| ch == '+' || ch == '-' || ch.is_ascii_digit())
872}
873
874fn leading_ws_count(text: &str) -> usize {
875    text.chars().take_while(|c| matches!(c, ' ' | '\t')).count()
876}
877
878fn is_hashpipe_block_scalar_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
879    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
880    if !trimmed_start.starts_with(prefix) {
881        return false;
882    }
883    let after_prefix = &trimmed_start[prefix.len()..];
884    let text = after_prefix.trim_end_matches([' ', '\t']);
885    if text.is_empty() {
886        return true;
887    }
888    leading_ws_count(after_prefix) >= 2
889}
890
891fn is_unclosed_double_quoted(value: &str) -> bool {
892    if !value.starts_with('"') {
893        return false;
894    }
895    let mut escaped = false;
896    let mut quote_count = 0usize;
897    for ch in value.chars() {
898        if escaped {
899            escaped = false;
900            continue;
901        }
902        if ch == '\\' {
903            escaped = true;
904            continue;
905        }
906        if ch == '"' {
907            quote_count += 1;
908        }
909    }
910    quote_count % 2 == 1
911}
912
913/// Check if a line is a valid closing fence for the given fence info.
914pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
915    let trimmed = strip_leading_spaces(content);
916
917    if !trimmed.starts_with(fence.fence_char) {
918        return false;
919    }
920
921    let closing_count = trimmed
922        .chars()
923        .take_while(|&c| c == fence.fence_char)
924        .count();
925
926    if closing_count < fence.fence_count {
927        return false;
928    }
929
930    // Rest of line must be empty
931    trimmed[closing_count..].trim().is_empty()
932}
933
934/// Emit chunk options as structured CST nodes while preserving all bytes.
935/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
936fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
937    if content.trim().is_empty() {
938        builder.token(SyntaxKind::TEXT.into(), content);
939        return;
940    }
941
942    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
943
944    let mut pos = 0;
945    let bytes = content.as_bytes();
946
947    while pos < bytes.len() {
948        // Emit leading whitespace/commas as TEXT
949        let ws_start = pos;
950        while pos < bytes.len() {
951            let ch = bytes[pos] as char;
952            if ch != ' ' && ch != '\t' && ch != ',' {
953                break;
954            }
955            pos += 1;
956        }
957        if pos > ws_start {
958            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
959        }
960
961        if pos >= bytes.len() {
962            break;
963        }
964
965        // Check if this is a closing brace
966        if bytes[pos] as char == '}' {
967            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
968            pos += 1;
969            if pos < bytes.len() {
970                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
971            }
972            break;
973        }
974
975        // Read key
976        let key_start = pos;
977        while pos < bytes.len() {
978            let ch = bytes[pos] as char;
979            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
980                break;
981            }
982            pos += 1;
983        }
984
985        if pos == key_start {
986            // No key found, emit rest as TEXT
987            if pos < bytes.len() {
988                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
989            }
990            break;
991        }
992
993        let key = &content[key_start..pos];
994
995        // Check for whitespace before '='
996        let ws_before_eq_start = pos;
997        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
998            pos += 1;
999        }
1000
1001        // Check if there's a value (=)
1002        if pos < bytes.len() && bytes[pos] as char == '=' {
1003            // Has value - emit as CHUNK_OPTION
1004            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
1005            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
1006
1007            // Emit whitespace before '=' if any
1008            if pos > ws_before_eq_start {
1009                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1010            }
1011
1012            builder.token(SyntaxKind::TEXT.into(), "=");
1013            pos += 1; // consume '='
1014
1015            // Emit whitespace after '='
1016            let ws_after_eq_start = pos;
1017            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
1018                pos += 1;
1019            }
1020            if pos > ws_after_eq_start {
1021                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
1022            }
1023
1024            // Parse value (might be quoted)
1025            if pos < bytes.len() {
1026                let quote_char = bytes[pos] as char;
1027                if quote_char == '"' || quote_char == '\'' {
1028                    // Quoted value
1029                    builder.token(
1030                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
1031                        &content[pos..pos + 1],
1032                    );
1033                    pos += 1; // consume opening quote
1034
1035                    let val_start = pos;
1036                    let mut escaped = false;
1037                    while pos < bytes.len() {
1038                        let ch = bytes[pos] as char;
1039                        if !escaped && ch == quote_char {
1040                            break;
1041                        }
1042                        escaped = !escaped && ch == '\\';
1043                        pos += 1;
1044                    }
1045
1046                    if pos > val_start {
1047                        builder.token(
1048                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
1049                            &content[val_start..pos],
1050                        );
1051                    }
1052
1053                    // Emit closing quote
1054                    if pos < bytes.len() && bytes[pos] as char == quote_char {
1055                        builder.token(
1056                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
1057                            &content[pos..pos + 1],
1058                        );
1059                        pos += 1;
1060                    }
1061                } else {
1062                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
1063                    let val_start = pos;
1064                    let mut depth = 0;
1065
1066                    while pos < bytes.len() {
1067                        let ch = bytes[pos] as char;
1068                        match ch {
1069                            '(' | '[' | '{' => depth += 1,
1070                            ')' | ']' => {
1071                                if depth > 0 {
1072                                    depth -= 1;
1073                                } else {
1074                                    break;
1075                                }
1076                            }
1077                            '}' => {
1078                                if depth > 0 {
1079                                    depth -= 1;
1080                                } else {
1081                                    break; // End of chunk options
1082                                }
1083                            }
1084                            ',' => {
1085                                if depth == 0 {
1086                                    break; // Next option
1087                                }
1088                            }
1089                            ' ' | '\t' => {
1090                                if depth == 0 {
1091                                    break; // Space separator
1092                                }
1093                            }
1094                            _ => {}
1095                        }
1096                        pos += 1;
1097                    }
1098
1099                    if pos > val_start {
1100                        builder.token(
1101                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
1102                            &content[val_start..pos],
1103                        );
1104                    }
1105                }
1106            }
1107
1108            builder.finish_node(); // CHUNK_OPTION
1109        } else {
1110            // No '=' - this is a label or bareword option
1111            // Emit any whitespace we skipped as TEXT
1112            if pos > ws_before_eq_start {
1113                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1114                builder.token(SyntaxKind::TEXT.into(), key);
1115                builder.finish_node(); // CHUNK_LABEL
1116                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1117            } else {
1118                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1119                builder.token(SyntaxKind::TEXT.into(), key);
1120                builder.finish_node(); // CHUNK_LABEL
1121            }
1122        }
1123    }
1124
1125    builder.finish_node(); // CHUNK_OPTIONS
1126}
1127
1128/// Helper to parse info string and emit CodeInfo node with parsed components.
1129/// This breaks down the info string into its logical parts while preserving all bytes.
1130fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1131    builder.start_node(SyntaxKind::CODE_INFO.into());
1132
1133    let info = InfoString::parse(info_string);
1134
1135    match &info.block_type {
1136        CodeBlockType::DisplayShortcut { language } => {
1137            // Simple case: python or python {.class}
1138            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1139
1140            // If there's more after the language, emit it as TEXT
1141            let after_lang = &info_string[language.len()..];
1142            if !after_lang.is_empty() {
1143                builder.token(SyntaxKind::TEXT.into(), after_lang);
1144            }
1145        }
1146        CodeBlockType::Executable { language } => {
1147            // Quarto: {r} or {r my-label, echo=FALSE}
1148            builder.token(SyntaxKind::TEXT.into(), "{");
1149            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1150
1151            // Parse and emit chunk options
1152            let start_offset = 1 + language.len(); // Skip "{r"
1153            if start_offset < info_string.len() {
1154                let rest = &info_string[start_offset..];
1155                emit_chunk_options(builder, rest);
1156            }
1157        }
1158        CodeBlockType::DisplayExplicit { classes } => {
1159            // Pandoc: {.python} or {#id .haskell .numberLines}
1160            // We need to find the first class in the raw string and emit everything around it
1161
1162            if let Some(lang) = classes.first() {
1163                // Find where ".lang" appears in the info string
1164                let needle = format!(".{}", lang);
1165                if let Some(lang_start) = info_string.find(&needle) {
1166                    // Emit everything before the language
1167                    if lang_start > 0 {
1168                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1169                    }
1170
1171                    // Emit the dot
1172                    builder.token(SyntaxKind::TEXT.into(), ".");
1173
1174                    // Emit the language
1175                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1176
1177                    // Emit everything after
1178                    let after_lang_start = lang_start + 1 + lang.len();
1179                    if after_lang_start < info_string.len() {
1180                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1181                    }
1182                } else {
1183                    // Couldn't find it, just emit as TEXT
1184                    builder.token(SyntaxKind::TEXT.into(), info_string);
1185                }
1186            } else {
1187                // No classes
1188                builder.token(SyntaxKind::TEXT.into(), info_string);
1189            }
1190        }
1191        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1192            // No language, just emit as TEXT
1193            builder.token(SyntaxKind::TEXT.into(), info_string);
1194        }
1195    }
1196
1197    builder.finish_node(); // CodeInfo
1198}
1199
1200/// Parse a fenced code block, consuming lines from the parser.
1201/// Returns the new position after the code block.
1202/// Parse a fenced code block, consuming lines from the parser.
1203/// Returns the new position after the code block.
1204/// base_indent accounts for container indentation (e.g., footnotes) that should be stripped.
1205pub(crate) fn parse_fenced_code_block(
1206    builder: &mut GreenNodeBuilder<'static>,
1207    lines: &[&str],
1208    start_pos: usize,
1209    fence: FenceInfo,
1210    bq_depth: usize,
1211    base_indent: usize,
1212    first_line_override: Option<&str>,
1213) -> usize {
1214    // Start code block
1215    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1216
1217    // Opening fence
1218    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1219        builder,
1220        lines[start_pos],
1221        first_line_override,
1222        bq_depth,
1223        base_indent,
1224    );
1225
1226    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1227    builder.token(
1228        SyntaxKind::CODE_FENCE_MARKER.into(),
1229        &first_trimmed[..fence.fence_count],
1230    );
1231
1232    // Emit any space between fence and info string (for losslessness)
1233    let after_fence = &first_trimmed[fence.fence_count..];
1234    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1235        // There was a space - emit it as WHITESPACE
1236        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1237        // Parse and emit the info string as a structured node
1238        if !fence.info_string.is_empty() {
1239            emit_code_info_node(builder, &fence.info_string);
1240        }
1241    } else if !fence.info_string.is_empty() {
1242        // No space - parse and emit info_string as a structured node
1243        emit_code_info_node(builder, &fence.info_string);
1244    }
1245
1246    // Extract and emit the actual newline from the opening fence line
1247    let (_, newline_str) = strip_newline(first_trimmed);
1248    if !newline_str.is_empty() {
1249        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1250    }
1251    builder.finish_node(); // CodeFenceOpen
1252
1253    let mut current_pos = start_pos + 1;
1254    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1255    let mut found_closing = false;
1256
1257    while current_pos < lines.len() {
1258        let line = lines[current_pos];
1259
1260        // Count blockquote markers to detect leaving the surrounding blockquote.
1261        let (line_bq_depth, _) = count_blockquote_markers(line);
1262
1263        // If blockquote depth decreases, code block ends (we've left the blockquote)
1264        if line_bq_depth < bq_depth {
1265            break;
1266        }
1267
1268        // Strip exactly the surrounding blockquote depth; preserve any additional `>` literally.
1269        let inner = if bq_depth > 0 {
1270            strip_n_blockquote_markers(line, bq_depth)
1271        } else {
1272            line
1273        };
1274
1275        // Strip base indent (footnote context) from content lines for fence detection
1276        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1277        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1278            &inner[base_indent_bytes..]
1279        } else {
1280            inner
1281        };
1282
1283        // Check for closing fence
1284        if is_closing_fence(inner_stripped, &fence) {
1285            found_closing = true;
1286            current_pos += 1;
1287            break;
1288        }
1289
1290        // Store the original line for lossless parsing.
1291        content_lines.push(line);
1292        current_pos += 1;
1293    }
1294
1295    // Add content
1296    if !content_lines.is_empty() {
1297        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1298        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1299            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1300            _ => None,
1301        };
1302
1303        let mut line_idx = 0usize;
1304        if let Some(prefix) = hashpipe_prefix {
1305            let prepared_hashpipe_lines =
1306                compute_hashpipe_preamble_line_count(&content_lines, prefix, bq_depth, base_indent);
1307            if prepared_hashpipe_lines > 0 {
1308                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1309                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1310                while line_idx < prepared_hashpipe_lines {
1311                    let content_line = content_lines[line_idx];
1312                    let preview_after_indent =
1313                        strip_content_line_prefixes(content_line, bq_depth, base_indent);
1314                    let (preview_without_newline, _) = strip_newline(preview_after_indent);
1315                    if !is_hashpipe_option_line(preview_without_newline, prefix) {
1316                        break;
1317                    }
1318
1319                    let after_indent =
1320                        emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1321                    let (line_without_newline, newline_str) = strip_newline(after_indent);
1322                    let _ = emit_hashpipe_option_line(builder, line_without_newline, prefix);
1323                    if !newline_str.is_empty() {
1324                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1325                    }
1326                    line_idx += 1;
1327
1328                    let option_value = hashpipe_option_value(line_without_newline, prefix);
1329                    let mut multiline_value = option_value
1330                        .as_ref()
1331                        .filter(|value| is_unclosed_double_quoted(value))
1332                        .cloned();
1333                    let in_block_scalar = option_value
1334                        .as_ref()
1335                        .is_some_and(|value| is_yaml_block_scalar_indicator(value));
1336                    let in_indented_value =
1337                        option_value.as_ref().is_some_and(|value| value.is_empty());
1338
1339                    while multiline_value.is_some() || in_block_scalar || in_indented_value {
1340                        if line_idx >= prepared_hashpipe_lines || line_idx >= content_lines.len() {
1341                            break;
1342                        }
1343                        let continuation_line = content_lines[line_idx];
1344                        let continuation_preview =
1345                            strip_content_line_prefixes(continuation_line, bq_depth, base_indent);
1346                        let (continuation_without_newline, _continuation_newline) =
1347                            strip_newline(continuation_preview);
1348
1349                        if in_block_scalar || in_indented_value {
1350                            if !is_hashpipe_block_scalar_continuation_line(
1351                                continuation_without_newline,
1352                                prefix,
1353                            ) {
1354                                break;
1355                            }
1356                            let continuation_after_indent = emit_content_line_prefixes(
1357                                builder,
1358                                continuation_line,
1359                                bq_depth,
1360                                base_indent,
1361                            );
1362                            let (continuation_without_newline, continuation_newline) =
1363                                strip_newline(continuation_after_indent);
1364                            if !emit_hashpipe_continuation_line(
1365                                builder,
1366                                continuation_without_newline,
1367                                prefix,
1368                            ) {
1369                                break;
1370                            }
1371                            if !continuation_newline.is_empty() {
1372                                builder.token(SyntaxKind::NEWLINE.into(), continuation_newline);
1373                            }
1374                            line_idx += 1;
1375                            continue;
1376                        }
1377
1378                        if let Some(mut current_value) = multiline_value.take() {
1379                            let Some(continuation_value) =
1380                                hashpipe_continuation_value(continuation_without_newline, prefix)
1381                            else {
1382                                break;
1383                            };
1384                            let continuation_after_indent = emit_content_line_prefixes(
1385                                builder,
1386                                continuation_line,
1387                                bq_depth,
1388                                base_indent,
1389                            );
1390                            let (continuation_without_newline, continuation_newline) =
1391                                strip_newline(continuation_after_indent);
1392
1393                            if !emit_hashpipe_continuation_line(
1394                                builder,
1395                                continuation_without_newline,
1396                                prefix,
1397                            ) {
1398                                break;
1399                            }
1400                            if !continuation_newline.is_empty() {
1401                                builder.token(SyntaxKind::NEWLINE.into(), continuation_newline);
1402                            }
1403                            line_idx += 1;
1404
1405                            if !current_value.ends_with(' ') {
1406                                current_value.push(' ');
1407                            }
1408                            current_value.push_str(&continuation_value);
1409                            if is_unclosed_double_quoted(&current_value) {
1410                                multiline_value = Some(current_value);
1411                            }
1412                        }
1413                    }
1414                }
1415                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1416                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1417            }
1418        }
1419
1420        for content_line in content_lines.iter().skip(line_idx) {
1421            let after_indent =
1422                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1423            let (line_without_newline, newline_str) = strip_newline(after_indent);
1424
1425            if !line_without_newline.is_empty() {
1426                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1427            }
1428
1429            if !newline_str.is_empty() {
1430                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1431            }
1432        }
1433        builder.finish_node(); // CodeContent
1434    }
1435
1436    // Closing fence (if found)
1437    if found_closing {
1438        let closing_line = lines[current_pos - 1];
1439        let closing_after_blockquote = if bq_depth > 0 {
1440            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1441            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1442            if prefix_len > 0 {
1443                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1444            }
1445            stripped
1446        } else {
1447            closing_line
1448        };
1449
1450        // Emit base indent for lossless parsing
1451        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1452        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1453            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1454            if !indent_str.is_empty() {
1455                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1456            }
1457        }
1458
1459        // Strip base indent to get fence
1460        let closing_stripped =
1461            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1462                &closing_after_blockquote[base_indent_bytes..]
1463            } else {
1464                closing_after_blockquote
1465            };
1466        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1467        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1468        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1469        let closing_count = closing_trimmed_start
1470            .chars()
1471            .take_while(|&c| c == fence.fence_char)
1472            .count();
1473        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1474
1475        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1476        if leading_ws_len > 0 {
1477            builder.token(
1478                SyntaxKind::WHITESPACE.into(),
1479                &closing_without_newline[..leading_ws_len],
1480            );
1481        }
1482        builder.token(
1483            SyntaxKind::CODE_FENCE_MARKER.into(),
1484            &closing_trimmed_start[..closing_count],
1485        );
1486        if !trailing_after_marker.is_empty() {
1487            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1488        }
1489        if !newline_str.is_empty() {
1490            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1491        }
1492        builder.finish_node(); // CodeFenceClose
1493    }
1494
1495    builder.finish_node(); // CodeBlock
1496
1497    current_pos
1498}
1499
1500/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1501pub(crate) fn parse_fenced_math_block(
1502    builder: &mut GreenNodeBuilder<'static>,
1503    lines: &[&str],
1504    start_pos: usize,
1505    fence: FenceInfo,
1506    bq_depth: usize,
1507    base_indent: usize,
1508    first_line_override: Option<&str>,
1509) -> usize {
1510    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1511
1512    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1513        builder,
1514        lines[start_pos],
1515        first_line_override,
1516        bq_depth,
1517        base_indent,
1518    );
1519    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1520    builder.token(
1521        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1522        opening_without_newline,
1523    );
1524    if !opening_newline.is_empty() {
1525        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1526    }
1527
1528    let mut current_pos = start_pos + 1;
1529    let mut content_lines: Vec<&str> = Vec::new();
1530    let mut found_closing = false;
1531
1532    while current_pos < lines.len() {
1533        let line = lines[current_pos];
1534        let (line_bq_depth, _) = count_blockquote_markers(line);
1535        if line_bq_depth < bq_depth {
1536            break;
1537        }
1538
1539        let inner = if bq_depth > 0 {
1540            strip_n_blockquote_markers(line, bq_depth)
1541        } else {
1542            line
1543        };
1544        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1545        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1546            &inner[base_indent_bytes..]
1547        } else {
1548            inner
1549        };
1550
1551        if is_closing_fence(inner_stripped, &fence) {
1552            found_closing = true;
1553            current_pos += 1;
1554            break;
1555        }
1556
1557        content_lines.push(line);
1558        current_pos += 1;
1559    }
1560
1561    if !content_lines.is_empty() {
1562        let mut content = String::new();
1563        for content_line in content_lines {
1564            let after_indent =
1565                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1566            let (line_without_newline, newline_str) = strip_newline(after_indent);
1567            content.push_str(line_without_newline);
1568            content.push_str(newline_str);
1569        }
1570        builder.token(SyntaxKind::TEXT.into(), &content);
1571    }
1572
1573    if found_closing {
1574        let closing_line = lines[current_pos - 1];
1575        let closing_after_blockquote = if bq_depth > 0 {
1576            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1577            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1578            if prefix_len > 0 {
1579                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1580            }
1581            stripped
1582        } else {
1583            closing_line
1584        };
1585
1586        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1587        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1588            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1589            if !indent_str.is_empty() {
1590                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1591            }
1592        }
1593
1594        let closing_stripped =
1595            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1596                &closing_after_blockquote[base_indent_bytes..]
1597            } else {
1598                closing_after_blockquote
1599            };
1600        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1601        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1602        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1603        let closing_count = closing_trimmed_start
1604            .chars()
1605            .take_while(|&c| c == fence.fence_char)
1606            .count();
1607        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1608
1609        if leading_ws_len > 0 {
1610            builder.token(
1611                SyntaxKind::WHITESPACE.into(),
1612                &closing_without_newline[..leading_ws_len],
1613            );
1614        }
1615        builder.token(
1616            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1617            &closing_trimmed_start[..closing_count],
1618        );
1619        if !trailing_after_marker.is_empty() {
1620            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1621        }
1622        if !newline_str.is_empty() {
1623            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1624        }
1625    }
1626
1627    builder.finish_node(); // DisplayMath
1628    current_pos
1629}
1630
1631#[cfg(test)]
1632mod tests {
1633    use super::*;
1634
1635    #[test]
1636    fn test_backtick_fence() {
1637        let fence = try_parse_fence_open("```python").unwrap();
1638        assert_eq!(fence.fence_char, '`');
1639        assert_eq!(fence.fence_count, 3);
1640        assert_eq!(fence.info_string, "python");
1641    }
1642
1643    #[test]
1644    fn test_tilde_fence() {
1645        let fence = try_parse_fence_open("~~~").unwrap();
1646        assert_eq!(fence.fence_char, '~');
1647        assert_eq!(fence.fence_count, 3);
1648        assert_eq!(fence.info_string, "");
1649    }
1650
1651    #[test]
1652    fn test_long_fence() {
1653        let fence = try_parse_fence_open("`````").unwrap();
1654        assert_eq!(fence.fence_count, 5);
1655    }
1656
1657    #[test]
1658    fn test_two_backticks_invalid() {
1659        assert!(try_parse_fence_open("``").is_none());
1660    }
1661
1662    #[test]
1663    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1664        assert!(try_parse_fence_open("`````hi````there`````").is_none());
1665    }
1666
1667    #[test]
1668    fn test_closing_fence() {
1669        let fence = FenceInfo {
1670            fence_char: '`',
1671            fence_count: 3,
1672            info_string: String::new(),
1673        };
1674        assert!(is_closing_fence("```", &fence));
1675        assert!(is_closing_fence("````", &fence));
1676        assert!(!is_closing_fence("``", &fence));
1677        assert!(!is_closing_fence("~~~", &fence));
1678    }
1679
1680    #[test]
1681    fn test_fenced_code_preserves_leading_gt() {
1682        let input = "```\n> foo\n```\n";
1683        let tree = crate::parse(input, None);
1684        assert_eq!(tree.text().to_string(), input);
1685    }
1686
1687    #[test]
1688    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1689        let input = "> ```\n> code\n> ```\n";
1690        let tree = crate::parse(input, None);
1691        assert_eq!(tree.text().to_string(), input);
1692    }
1693
1694    #[test]
1695    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1696        let input = "Term\n: ```\nā”œā”€ā”€ pyproject.toml\n```\n";
1697        let tree = crate::parse(input, None);
1698        assert_eq!(tree.text().to_string(), input);
1699    }
1700
1701    #[test]
1702    fn test_info_string_plain() {
1703        let info = InfoString::parse("");
1704        assert_eq!(info.block_type, CodeBlockType::Plain);
1705        assert!(info.attributes.is_empty());
1706    }
1707
1708    #[test]
1709    fn test_info_string_shortcut() {
1710        let info = InfoString::parse("python");
1711        assert_eq!(
1712            info.block_type,
1713            CodeBlockType::DisplayShortcut {
1714                language: "python".to_string()
1715            }
1716        );
1717        assert!(info.attributes.is_empty());
1718    }
1719
1720    #[test]
1721    fn test_info_string_shortcut_with_trailing() {
1722        let info = InfoString::parse("python extra stuff");
1723        assert_eq!(
1724            info.block_type,
1725            CodeBlockType::DisplayShortcut {
1726                language: "python".to_string()
1727            }
1728        );
1729    }
1730
1731    #[test]
1732    fn test_info_string_display_explicit() {
1733        let info = InfoString::parse("{.python}");
1734        assert_eq!(
1735            info.block_type,
1736            CodeBlockType::DisplayExplicit {
1737                classes: vec!["python".to_string()]
1738            }
1739        );
1740    }
1741
1742    #[test]
1743    fn test_info_string_display_explicit_multiple() {
1744        let info = InfoString::parse("{.python .numberLines}");
1745        assert_eq!(
1746            info.block_type,
1747            CodeBlockType::DisplayExplicit {
1748                classes: vec!["python".to_string(), "numberLines".to_string()]
1749            }
1750        );
1751    }
1752
1753    #[test]
1754    fn test_info_string_executable() {
1755        let info = InfoString::parse("{python}");
1756        assert_eq!(
1757            info.block_type,
1758            CodeBlockType::Executable {
1759                language: "python".to_string()
1760            }
1761        );
1762    }
1763
1764    #[test]
1765    fn test_info_string_executable_with_options() {
1766        let info = InfoString::parse("{python echo=false warning=true}");
1767        assert_eq!(
1768            info.block_type,
1769            CodeBlockType::Executable {
1770                language: "python".to_string()
1771            }
1772        );
1773        assert_eq!(info.attributes.len(), 2);
1774        assert_eq!(
1775            info.attributes[0],
1776            ("echo".to_string(), Some("false".to_string()))
1777        );
1778        assert_eq!(
1779            info.attributes[1],
1780            ("warning".to_string(), Some("true".to_string()))
1781        );
1782    }
1783
1784    #[test]
1785    fn test_info_string_executable_with_commas() {
1786        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1787        assert_eq!(
1788            info.block_type,
1789            CodeBlockType::Executable {
1790                language: "r".to_string()
1791            }
1792        );
1793        assert_eq!(info.attributes.len(), 2);
1794        assert_eq!(
1795            info.attributes[0],
1796            ("echo".to_string(), Some("FALSE".to_string()))
1797        );
1798        assert_eq!(
1799            info.attributes[1],
1800            ("warning".to_string(), Some("TRUE".to_string()))
1801        );
1802    }
1803
1804    #[test]
1805    fn test_info_string_executable_mixed_commas_spaces() {
1806        // R-style with commas and spaces
1807        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1808        assert_eq!(
1809            info.block_type,
1810            CodeBlockType::Executable {
1811                language: "r".to_string()
1812            }
1813        );
1814        assert_eq!(info.attributes.len(), 2);
1815        assert_eq!(
1816            info.attributes[0],
1817            ("echo".to_string(), Some("FALSE".to_string()))
1818        );
1819        assert_eq!(
1820            info.attributes[1],
1821            ("label".to_string(), Some("my chunk".to_string()))
1822        );
1823    }
1824
1825    #[test]
1826    fn test_info_string_mixed_shortcut_and_attrs() {
1827        let info = InfoString::parse("python {.numberLines}");
1828        assert_eq!(
1829            info.block_type,
1830            CodeBlockType::DisplayShortcut {
1831                language: "python".to_string()
1832            }
1833        );
1834        assert_eq!(info.attributes.len(), 1);
1835        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1836    }
1837
1838    #[test]
1839    fn test_info_string_mixed_with_key_value() {
1840        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1841        assert_eq!(
1842            info.block_type,
1843            CodeBlockType::DisplayShortcut {
1844                language: "python".to_string()
1845            }
1846        );
1847        assert_eq!(info.attributes.len(), 2);
1848        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1849        assert_eq!(
1850            info.attributes[1],
1851            ("startFrom".to_string(), Some("100".to_string()))
1852        );
1853    }
1854
1855    #[test]
1856    fn test_info_string_explicit_with_id_and_classes() {
1857        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1858        assert_eq!(
1859            info.block_type,
1860            CodeBlockType::DisplayExplicit {
1861                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1862            }
1863        );
1864        // Non-class attributes
1865        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1866        let has_start = info
1867            .attributes
1868            .iter()
1869            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1870        assert!(has_id);
1871        assert!(has_start);
1872    }
1873
1874    #[test]
1875    fn test_info_string_raw_html() {
1876        let info = InfoString::parse("{=html}");
1877        assert_eq!(
1878            info.block_type,
1879            CodeBlockType::Raw {
1880                format: "html".to_string()
1881            }
1882        );
1883        assert!(info.attributes.is_empty());
1884    }
1885
1886    #[test]
1887    fn test_info_string_raw_latex() {
1888        let info = InfoString::parse("{=latex}");
1889        assert_eq!(
1890            info.block_type,
1891            CodeBlockType::Raw {
1892                format: "latex".to_string()
1893            }
1894        );
1895    }
1896
1897    #[test]
1898    fn test_info_string_raw_openxml() {
1899        let info = InfoString::parse("{=openxml}");
1900        assert_eq!(
1901            info.block_type,
1902            CodeBlockType::Raw {
1903                format: "openxml".to_string()
1904            }
1905        );
1906    }
1907
1908    #[test]
1909    fn test_info_string_raw_ms() {
1910        let info = InfoString::parse("{=ms}");
1911        assert_eq!(
1912            info.block_type,
1913            CodeBlockType::Raw {
1914                format: "ms".to_string()
1915            }
1916        );
1917    }
1918
1919    #[test]
1920    fn test_info_string_raw_html5() {
1921        let info = InfoString::parse("{=html5}");
1922        assert_eq!(
1923            info.block_type,
1924            CodeBlockType::Raw {
1925                format: "html5".to_string()
1926            }
1927        );
1928    }
1929
1930    #[test]
1931    fn test_info_string_raw_not_combined_with_attrs() {
1932        // If there are other attributes with =format, it should not be treated as raw
1933        let info = InfoString::parse("{=html .class}");
1934        // This should NOT be parsed as raw because there's more than one attribute
1935        assert_ne!(
1936            info.block_type,
1937            CodeBlockType::Raw {
1938                format: "html".to_string()
1939            }
1940        );
1941    }
1942
1943    #[test]
1944    fn test_parse_pandoc_attributes_spaces() {
1945        // Pandoc display blocks use spaces as delimiters
1946        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1947        assert_eq!(attrs.len(), 3);
1948        assert_eq!(attrs[0], (".python".to_string(), None));
1949        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1950        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1951    }
1952
1953    #[test]
1954    fn test_parse_pandoc_attributes_no_commas() {
1955        // Commas in Pandoc attributes should be treated as part of the value
1956        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1957        assert_eq!(attrs.len(), 3);
1958        assert_eq!(attrs[0], ("#id".to_string(), None));
1959        assert_eq!(attrs[1], (".class".to_string(), None));
1960        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1961    }
1962
1963    #[test]
1964    fn test_parse_chunk_options_commas() {
1965        // Quarto/RMarkdown chunks use commas as delimiters
1966        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1967        assert_eq!(attrs.len(), 3);
1968        assert_eq!(attrs[0], ("r".to_string(), None));
1969        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1970        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1971    }
1972
1973    #[test]
1974    fn test_parse_chunk_options_no_spaces() {
1975        // Should handle comma-separated without spaces
1976        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1977        assert_eq!(attrs.len(), 3);
1978        assert_eq!(attrs[0], ("r".to_string(), None));
1979        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1980        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1981    }
1982
1983    #[test]
1984    fn test_parse_chunk_options_mixed() {
1985        // Handle both commas and spaces
1986        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1987        assert_eq!(attrs.len(), 3);
1988        assert_eq!(attrs[0], ("python".to_string(), None));
1989        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1990        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1991    }
1992
1993    #[test]
1994    fn test_parse_chunk_options_nested_function_call() {
1995        // R function calls with nested commas should be treated as single value
1996        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1997        assert_eq!(attrs.len(), 3);
1998        assert_eq!(attrs[0], ("r".to_string(), None));
1999        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
2000        assert_eq!(
2001            attrs[2],
2002            (
2003                "dependson".to_string(),
2004                Some(r#"c("foo", "bar")"#.to_string())
2005            )
2006        );
2007    }
2008
2009    #[test]
2010    fn test_parse_chunk_options_nested_with_spaces() {
2011        // Function call with spaces inside
2012        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
2013        assert_eq!(attrs.len(), 2);
2014        assert_eq!(attrs[0], ("r".to_string(), None));
2015        assert_eq!(
2016            attrs[1],
2017            (
2018                "cache.path".to_string(),
2019                Some(r#"file.path("cache", "dir")"#.to_string())
2020            )
2021        );
2022    }
2023
2024    #[test]
2025    fn test_parse_chunk_options_deeply_nested() {
2026        // Multiple levels of nesting
2027        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
2028        assert_eq!(attrs.len(), 2);
2029        assert_eq!(attrs[0], ("r".to_string(), None));
2030        assert_eq!(
2031            attrs[1],
2032            (
2033                "x".to_string(),
2034                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
2035            )
2036        );
2037    }
2038
2039    #[test]
2040    fn test_parse_chunk_options_brackets_and_braces() {
2041        // Test all bracket types
2042        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
2043        assert_eq!(attrs.len(), 3);
2044        assert_eq!(attrs[0], ("r".to_string(), None));
2045        assert_eq!(
2046            attrs[1],
2047            ("data".to_string(), Some("df[rows, cols]".to_string()))
2048        );
2049        assert_eq!(
2050            attrs[2],
2051            ("config".to_string(), Some("{a:1, b:2}".to_string()))
2052        );
2053    }
2054
2055    #[test]
2056    fn test_parse_chunk_options_quotes_with_parens() {
2057        // Parentheses inside quoted strings shouldn't affect depth tracking
2058        // Note: The parser strips outer quotes from quoted values
2059        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
2060        assert_eq!(attrs.len(), 3);
2061        assert_eq!(attrs[0], ("r".to_string(), None));
2062        assert_eq!(
2063            attrs[1],
2064            ("label".to_string(), Some("test (with parens)".to_string()))
2065        );
2066        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
2067    }
2068
2069    #[test]
2070    fn test_parse_chunk_options_escaped_quotes() {
2071        // Escaped quotes inside string values
2072        // Note: The parser strips outer quotes and processes escapes
2073        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
2074        assert_eq!(attrs.len(), 2);
2075        assert_eq!(attrs[0], ("r".to_string(), None));
2076        assert_eq!(
2077            attrs[1],
2078            (
2079                "label".to_string(),
2080                Some(r#"has "quoted" text"#.to_string())
2081            )
2082        );
2083    }
2084
2085    #[test]
2086    fn test_display_vs_executable_parsing() {
2087        // Display block should use Pandoc parser (spaces)
2088        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
2089        assert!(matches!(
2090            info1.block_type,
2091            CodeBlockType::DisplayExplicit { .. }
2092        ));
2093
2094        // Executable chunk should use chunk options parser (commas)
2095        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
2096        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
2097        assert_eq!(info2.attributes.len(), 2);
2098    }
2099
2100    #[test]
2101    fn test_info_string_executable_implicit_label() {
2102        // {r mylabel} should parse as label=mylabel
2103        let info = InfoString::parse("{r mylabel}");
2104        assert!(matches!(
2105            info.block_type,
2106            CodeBlockType::Executable { ref language } if language == "r"
2107        ));
2108        assert_eq!(info.attributes.len(), 1);
2109        assert_eq!(
2110            info.attributes[0],
2111            ("label".to_string(), Some("mylabel".to_string()))
2112        );
2113    }
2114
2115    #[test]
2116    fn test_info_string_executable_implicit_label_with_options() {
2117        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
2118        let info = InfoString::parse("{r mylabel, echo=FALSE}");
2119        assert!(matches!(
2120            info.block_type,
2121            CodeBlockType::Executable { ref language } if language == "r"
2122        ));
2123        assert_eq!(info.attributes.len(), 2);
2124        assert_eq!(
2125            info.attributes[0],
2126            ("label".to_string(), Some("mylabel".to_string()))
2127        );
2128        assert_eq!(
2129            info.attributes[1],
2130            ("echo".to_string(), Some("FALSE".to_string()))
2131        );
2132    }
2133
2134    #[test]
2135    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
2136        let content_lines = vec![
2137            "#| fig-cap: |\n",
2138            "#|   A caption\n",
2139            "#|   spanning lines\n",
2140            "a <- 1\n",
2141        ];
2142        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
2143        assert_eq!(count, 3);
2144    }
2145
2146    #[test]
2147    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
2148        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
2149        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
2150        assert_eq!(count, 1);
2151    }
2152}