Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use super::container_prefix::{StrippedLines, advance_columns};
9use crate::parser::utils::container_stack::byte_index_at_column;
10
11// Container-prefix primitives live in `container_prefix.rs` (the lower
12// layer that hosts `StrippedLines`); re-export so existing call sites in
13// this module, `tables.rs`, `line_blocks.rs`, and `block_dispatcher.rs`
14// keep their `code_blocks::…` import paths working.
15pub(crate) use super::container_prefix::{
16    bq_outer_of_list, emit_blockquote_prefix_tokens, strip_list_indent,
17};
18
19use crate::parser::utils::helpers::{
20    strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
21};
22
23/// Represents the type of code block based on its info string syntax.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub enum CodeBlockType {
26    /// Display-only block with shortcut syntax: ```python
27    DisplayShortcut { language: String },
28    /// Display-only block with explicit Pandoc syntax: ```{.python}
29    DisplayExplicit { classes: Vec<String> },
30    /// Executable chunk (Quarto/RMarkdown): ```{python}
31    Executable { language: String },
32    /// Raw block for specific output format: ```{=html}
33    Raw { format: String },
34    /// No language specified: ```
35    Plain,
36}
37
38/// Parsed attributes from a code block info string.
39#[derive(Debug, Clone, PartialEq)]
40pub struct InfoString {
41    pub raw: String,
42    pub block_type: CodeBlockType,
43    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
44}
45
46impl InfoString {
47    /// Parse an info string into structured attributes.
48    pub fn parse(raw: &str) -> Self {
49        let trimmed = raw.trim();
50
51        if trimmed.is_empty() {
52            return InfoString {
53                raw: raw.to_string(),
54                block_type: CodeBlockType::Plain,
55                attributes: Vec::new(),
56            };
57        }
58
59        // Check if it starts with '{' - explicit attribute block
60        if let Some(stripped) = trimmed.strip_prefix('{')
61            && let Some(content) = stripped.strip_suffix('}')
62        {
63            return Self::parse_explicit(raw, content);
64        }
65
66        // Check for mixed form: python {.numberLines}
67        if let Some(brace_start) = trimmed.find('{') {
68            let language = trimmed[..brace_start].trim();
69            if !language.is_empty() && !language.contains(char::is_whitespace) {
70                let attr_part = &trimmed[brace_start..];
71                if let Some(stripped) = attr_part.strip_prefix('{')
72                    && let Some(content) = stripped.strip_suffix('}')
73                {
74                    let attrs = Self::parse_attributes(content);
75                    return InfoString {
76                        raw: raw.to_string(),
77                        block_type: CodeBlockType::DisplayShortcut {
78                            language: language.to_string(),
79                        },
80                        attributes: attrs,
81                    };
82                }
83            }
84        }
85
86        // Otherwise, it's a shortcut form (just the language name)
87        // Only take the first word as language
88        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
89        InfoString {
90            raw: raw.to_string(),
91            block_type: CodeBlockType::DisplayShortcut {
92                language: language.to_string(),
93            },
94            attributes: Vec::new(),
95        }
96    }
97
98    fn parse_explicit(raw: &str, content: &str) -> Self {
99        // Check for raw attribute FIRST: {=format}
100        // The content should start with '=' and have only alphanumeric chars after
101        let trimmed_content = content.trim();
102        if let Some(format_name) = trimmed_content.strip_prefix('=') {
103            // Validate format name: alphanumeric only, no spaces
104            if !format_name.is_empty()
105                && format_name.chars().all(|c| c.is_alphanumeric())
106                && !format_name.contains(char::is_whitespace)
107            {
108                return InfoString {
109                    raw: raw.to_string(),
110                    block_type: CodeBlockType::Raw {
111                        format: format_name.to_string(),
112                    },
113                    attributes: Vec::new(),
114                };
115            }
116        }
117
118        // First, do a preliminary parse to determine block type
119        // Use chunk options parser (comma-aware) for initial detection
120        let prelim_attrs = Self::parse_chunk_options(content);
121
122        // First non-ID, non-attribute token determines if it's executable or display
123        let mut first_lang_token = None;
124        for (key, val) in prelim_attrs.iter() {
125            if val.is_none() && !key.starts_with('#') {
126                first_lang_token = Some(key.as_str());
127                break;
128            }
129        }
130
131        let first_token = first_lang_token.unwrap_or("");
132
133        if first_token.starts_with('.') {
134            // Display block: {.python} or {.haskell .numberLines}
135            // Re-parse with Pandoc-style parser (space-delimited)
136            let attrs = Self::parse_pandoc_attributes(content);
137
138            let classes: Vec<String> = attrs
139                .iter()
140                .filter(|(k, v)| k.starts_with('.') && v.is_none())
141                .map(|(k, _)| k[1..].to_string())
142                .collect();
143
144            let non_class_attrs: Vec<(String, Option<String>)> = attrs
145                .into_iter()
146                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
147                .collect();
148
149            InfoString {
150                raw: raw.to_string(),
151                block_type: CodeBlockType::DisplayExplicit { classes },
152                attributes: non_class_attrs,
153            }
154        } else if !first_token.is_empty() && !first_token.starts_with('#') {
155            // Executable chunk: {python} or {r}
156            // Use chunk options parser (comma-delimited)
157            let attrs = Self::parse_chunk_options(content);
158            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
159
160            // Check if there's a second bareword (implicit label in R/Quarto chunks)
161            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}.
162            // Skip tokens that are actually class (`.foo`) or id (`#foo`)
163            // attributes — those are not labels.
164            let mut has_implicit_label = false;
165            let implicit_label_value = if lang_index + 1 < attrs.len() {
166                let (label_key, val) = &attrs[lang_index + 1];
167                if val.is_none() && !label_key.starts_with('.') && !label_key.starts_with('#') {
168                    has_implicit_label = true;
169                    Some(label_key.clone())
170                } else {
171                    None
172                }
173            } else {
174                None
175            };
176
177            let mut final_attrs: Vec<(String, Option<String>)> = attrs
178                .into_iter()
179                .enumerate()
180                .filter(|(i, _)| {
181                    // Remove language token
182                    if *i == lang_index {
183                        return false;
184                    }
185                    // Remove implicit label token (will be added back explicitly)
186                    if has_implicit_label && *i == lang_index + 1 {
187                        return false;
188                    }
189                    true
190                })
191                .map(|(_, attr)| attr)
192                .collect();
193
194            // Add explicit label if we found an implicit one
195            if let Some(label_val) = implicit_label_value {
196                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
197            }
198
199            InfoString {
200                raw: raw.to_string(),
201                block_type: CodeBlockType::Executable {
202                    language: first_token.to_string(),
203                },
204                attributes: final_attrs,
205            }
206        } else {
207            // Just attributes, no language - use Pandoc parser
208            let attrs = Self::parse_pandoc_attributes(content);
209            InfoString {
210                raw: raw.to_string(),
211                block_type: CodeBlockType::Plain,
212                attributes: attrs,
213            }
214        }
215    }
216
217    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
218    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
219    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
220        let mut attrs = Vec::new();
221        let mut chars = content.chars().peekable();
222
223        while chars.peek().is_some() {
224            // Skip whitespace
225            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
226                chars.next();
227            }
228
229            if chars.peek().is_none() {
230                break;
231            }
232
233            // Read key
234            let mut key = String::new();
235            while let Some(&ch) = chars.peek() {
236                if ch == '=' || ch == ' ' || ch == '\t' {
237                    break;
238                }
239                key.push(ch);
240                chars.next();
241            }
242
243            if key.is_empty() {
244                break;
245            }
246
247            // Skip whitespace
248            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
249                chars.next();
250            }
251
252            // Check for value
253            if chars.peek() == Some(&'=') {
254                chars.next(); // consume '='
255
256                // Skip whitespace after '='
257                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
258                    chars.next();
259                }
260
261                // Read value (might be quoted)
262                let value = if chars.peek() == Some(&'"') {
263                    chars.next(); // consume opening quote
264                    let mut val = String::new();
265                    while let Some(&ch) = chars.peek() {
266                        chars.next();
267                        if ch == '"' {
268                            break;
269                        }
270                        if ch == '\\' {
271                            if let Some(&next_ch) = chars.peek() {
272                                chars.next();
273                                val.push(next_ch);
274                            }
275                        } else {
276                            val.push(ch);
277                        }
278                    }
279                    val
280                } else {
281                    // Unquoted value - read until space
282                    let mut val = String::new();
283                    while let Some(&ch) = chars.peek() {
284                        if ch == ' ' || ch == '\t' {
285                            break;
286                        }
287                        val.push(ch);
288                        chars.next();
289                    }
290                    val
291                };
292
293                attrs.push((key, Some(value)));
294            } else {
295                attrs.push((key, None));
296            }
297        }
298
299        attrs
300    }
301
302    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
303    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
304    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
305        let mut attrs = Vec::new();
306        let mut chars = content.chars().peekable();
307
308        while chars.peek().is_some() {
309            // Skip whitespace and commas
310            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
311                chars.next();
312            }
313
314            if chars.peek().is_none() {
315                break;
316            }
317
318            // Read key
319            let mut key = String::new();
320            while let Some(&ch) = chars.peek() {
321                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
322                    break;
323                }
324                key.push(ch);
325                chars.next();
326            }
327
328            if key.is_empty() {
329                break;
330            }
331
332            // Skip whitespace and commas
333            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
334                chars.next();
335            }
336
337            // Check for value
338            if chars.peek() == Some(&'=') {
339                chars.next(); // consume '='
340
341                // Skip whitespace and commas after '='
342                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
343                    chars.next();
344                }
345
346                // Read value (might be quoted)
347                let value = if chars.peek() == Some(&'"') {
348                    chars.next(); // consume opening quote
349                    let mut val = String::new();
350                    while let Some(&ch) = chars.peek() {
351                        chars.next();
352                        if ch == '"' {
353                            break;
354                        }
355                        if ch == '\\' {
356                            if let Some(&next_ch) = chars.peek() {
357                                chars.next();
358                                val.push(next_ch);
359                            }
360                        } else {
361                            val.push(ch);
362                        }
363                    }
364                    val
365                } else {
366                    // Unquoted value - read until comma, space, or tab at depth 0
367                    // Track nesting depth for (), [], {} and quote state
368                    let mut val = String::new();
369                    let mut depth = 0; // Track parentheses/brackets/braces depth
370                    let mut in_quote: Option<char> = None; // Track if inside ' or "
371                    let mut escaped = false; // Track if previous char was backslash
372
373                    while let Some(&ch) = chars.peek() {
374                        // Handle escape sequences
375                        if escaped {
376                            val.push(ch);
377                            chars.next();
378                            escaped = false;
379                            continue;
380                        }
381
382                        if ch == '\\' {
383                            val.push(ch);
384                            chars.next();
385                            escaped = true;
386                            continue;
387                        }
388
389                        // Handle quotes
390                        if let Some(quote_char) = in_quote {
391                            val.push(ch);
392                            chars.next();
393                            if ch == quote_char {
394                                in_quote = None; // Close quote
395                            }
396                            continue;
397                        }
398
399                        // Not in a quote - check for quote start
400                        if ch == '"' || ch == '\'' {
401                            in_quote = Some(ch);
402                            val.push(ch);
403                            chars.next();
404                            continue;
405                        }
406
407                        // Track nesting depth (only when not in quotes)
408                        if ch == '(' || ch == '[' || ch == '{' {
409                            depth += 1;
410                            val.push(ch);
411                            chars.next();
412                            continue;
413                        }
414
415                        if ch == ')' || ch == ']' || ch == '}' {
416                            depth -= 1;
417                            val.push(ch);
418                            chars.next();
419                            continue;
420                        }
421
422                        // Check for delimiters - only break at depth 0
423                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
424                            break;
425                        }
426
427                        // Regular character
428                        val.push(ch);
429                        chars.next();
430                    }
431                    val
432                };
433
434                attrs.push((key, Some(value)));
435            } else {
436                attrs.push((key, None));
437            }
438        }
439
440        attrs
441    }
442
443    /// Legacy function - kept for backward compatibility in mixed-form parsing
444    /// For new code, use parse_pandoc_attributes or parse_chunk_options
445    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
446        // Default to chunk options parsing (comma-aware)
447        Self::parse_chunk_options(content)
448    }
449}
450
451/// Information about a detected code fence opening.
452#[derive(Debug, Clone)]
453pub(crate) struct FenceInfo {
454    pub fence_char: char,
455    pub fence_count: usize,
456    pub info_string: String,
457}
458
459pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
460    fence.info_string.trim() == "math"
461}
462
463/// Try to detect a fenced code block opening from content.
464/// Returns fence info if this is a valid opening fence.
465pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
466    let trimmed = strip_leading_spaces(content);
467
468    // Check for fence opening (``` or ~~~)
469    let (fence_char, fence_count) = if trimmed.starts_with('`') {
470        let count = trimmed.chars().take_while(|&c| c == '`').count();
471        ('`', count)
472    } else if trimmed.starts_with('~') {
473        let count = trimmed.chars().take_while(|&c| c == '~').count();
474        ('~', count)
475    } else {
476        return None;
477    };
478
479    if fence_count < 3 {
480        return None;
481    }
482
483    let info_string_raw = &trimmed[fence_count..];
484    // Strip trailing newline (LF or CRLF) and at most one leading space
485    let (info_string_trimmed, _) = strip_newline(info_string_raw);
486    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
487        stripped.to_string()
488    } else {
489        info_string_trimmed.to_string()
490    };
491
492    // Backtick-fenced blocks cannot have backticks in the info string.
493    if fence_char == '`' && info_string.contains('`') {
494        return None;
495    }
496
497    Some(FenceInfo {
498        fence_char,
499        fence_count,
500        info_string,
501    })
502}
503
504#[allow(clippy::too_many_arguments)]
505fn prepare_fence_open_line<'a>(
506    builder: &mut GreenNodeBuilder<'static>,
507    source_line: &'a str,
508    first_line_override: Option<&'a str>,
509    bq_depth: usize,
510    list_content_col: usize,
511    list_marker_consumed_on_line_0: bool,
512    bq_outer: bool,
513    content_indent: usize,
514) -> (&'a str, &'a str) {
515    // Strip the active container prefix on line 0 in container-stack
516    // order. Bq markers are always upstream-emitted by the blockquote
517    // dispatch and silently consumed here. The list_content_col indent
518    // is upstream-emitted only on a marker-line dispatch
519    // (`list_marker_consumed_on_line_0=true`); on continuation-line
520    // dispatch it must be emitted here as WHITESPACE. Adjacent
521    // WHITESPACE emissions are coalesced into one token for
522    // byte-range-equivalent CST stability.
523    if let Some(first_line) = first_line_override {
524        if bq_depth > 0 && source_line != first_line {
525            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
526            let prefix_len = source_line.len().saturating_sub(stripped.len());
527            if prefix_len > 0 {
528                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
529            }
530        }
531        let first_trimmed = strip_leading_spaces(first_line);
532        let leading_ws_len = first_line.len().saturating_sub(first_trimmed.len());
533        if leading_ws_len > 0 {
534            builder.token(SyntaxKind::WHITESPACE.into(), &first_line[..leading_ws_len]);
535        }
536        return (first_trimmed, first_line);
537    }
538
539    let mut s: &'a str = source_line;
540    let mut pending_ws_start: Option<usize> = None;
541    let suppress_list = list_marker_consumed_on_line_0;
542
543    let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
544                    pending: &mut Option<usize>,
545                    current_offset: usize| {
546        if let Some(start) = *pending
547            && current_offset > start
548        {
549            builder.token(
550                SyntaxKind::WHITESPACE.into(),
551                &source_line[start..current_offset],
552            );
553        }
554        *pending = None;
555    };
556
557    let do_strip_list = |s: &mut &'a str, pending: &mut Option<usize>| {
558        if list_content_col == 0 {
559            return;
560        }
561        // On a marker-line dispatch (`suppress_list=true`), the list
562        // marker bytes have already been emitted upstream and may not
563        // be whitespace (e.g. `- > ```` has a leading `-`). Use
564        // `advance_columns` which counts columns through any char.
565        // On continuation lines, the leading bytes ARE whitespace
566        // (the list-content-indent) so use the whitespace-only
567        // `strip_list_indent` to stop at non-whitespace.
568        let stripped = if suppress_list {
569            advance_columns(s, list_content_col)
570        } else {
571            strip_list_indent(s, list_content_col)
572        };
573        let consumed = s.len() - stripped.len();
574        if consumed > 0 {
575            let start = source_line.len() - s.len();
576            if !suppress_list && pending.is_none() {
577                *pending = Some(start);
578            }
579            *s = stripped;
580        }
581    };
582
583    let do_strip_bq =
584        |builder: &mut GreenNodeBuilder<'static>, s: &mut &'a str, pending: &mut Option<usize>| {
585            if bq_depth == 0 {
586                return;
587            }
588            let current_offset = source_line.len() - s.len();
589            flush_ws(builder, pending, current_offset);
590            *s = strip_n_blockquote_markers(s, bq_depth);
591        };
592
593    if bq_outer {
594        do_strip_bq(builder, &mut s, &mut pending_ws_start);
595        do_strip_list(&mut s, &mut pending_ws_start);
596    } else {
597        do_strip_list(&mut s, &mut pending_ws_start);
598        do_strip_bq(builder, &mut s, &mut pending_ws_start);
599    }
600
601    // content_indent (footnote/definition) — always emit as WHITESPACE.
602    if content_indent > 0 {
603        let indent_bytes = byte_index_at_column(s, content_indent);
604        if s.len() >= indent_bytes && indent_bytes > 0 {
605            let start = source_line.len() - s.len();
606            if pending_ws_start.is_none() {
607                pending_ws_start = Some(start);
608            }
609            s = &s[indent_bytes..];
610        }
611    }
612
613    let final_offset = source_line.len() - s.len();
614    flush_ws(builder, &mut pending_ws_start, final_offset);
615
616    let first_trimmed = strip_leading_spaces(s);
617    let leading_ws_len = s.len().saturating_sub(first_trimmed.len());
618    if leading_ws_len > 0 {
619        builder.token(SyntaxKind::WHITESPACE.into(), &s[..leading_ws_len]);
620    }
621    (first_trimmed, s)
622}
623
624fn strip_content_line_prefixes(
625    content_line: &str,
626    bq_depth: usize,
627    list_content_col: usize,
628    bq_outer: bool,
629    content_indent: usize,
630) -> &str {
631    let after_bq_and_list = if bq_outer {
632        let after_bq = if bq_depth > 0 {
633            strip_n_blockquote_markers(content_line, bq_depth)
634        } else {
635            content_line
636        };
637        strip_list_indent(after_bq, list_content_col)
638    } else {
639        let after_list = strip_list_indent(content_line, list_content_col);
640        if bq_depth > 0 {
641            strip_n_blockquote_markers(after_list, bq_depth)
642        } else {
643            after_list
644        }
645    };
646
647    let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
648    if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
649        &after_bq_and_list[indent_bytes..]
650    } else {
651        after_bq_and_list
652    }
653}
654
655pub(crate) fn compute_hashpipe_preamble_line_count(
656    content_lines: &[&str],
657    prefix: &str,
658    bq_depth: usize,
659    list_content_col: usize,
660    bq_outer: bool,
661    content_indent: usize,
662) -> usize {
663    let mut line_idx = 0usize;
664
665    while line_idx < content_lines.len() {
666        let preview_after_indent = strip_content_line_prefixes(
667            content_lines[line_idx],
668            bq_depth,
669            list_content_col,
670            bq_outer,
671            content_indent,
672        );
673        let (preview_without_newline, _) = strip_newline(preview_after_indent);
674        if !is_hashpipe_option_line(preview_without_newline, prefix)
675            && !is_hashpipe_continuation_line(preview_without_newline, prefix)
676        {
677            break;
678        }
679        line_idx += 1;
680    }
681
682    line_idx
683}
684
685fn emit_hashpipe_option_line(
686    builder: &mut GreenNodeBuilder<'static>,
687    line_without_newline: &str,
688    prefix: &str,
689) -> bool {
690    if !is_hashpipe_option_line(line_without_newline, prefix) {
691        return false;
692    }
693
694    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
695    let leading_ws_len = line_without_newline
696        .len()
697        .saturating_sub(trimmed_start.len());
698    let after_prefix = &trimmed_start[prefix.len()..];
699    let ws_after_prefix_len = after_prefix
700        .len()
701        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
702    let rest = &after_prefix[ws_after_prefix_len..];
703    let Some(colon_idx) = rest.find(':') else {
704        return false;
705    };
706
707    let key_with_ws = &rest[..colon_idx];
708    let key = trim_end_spaces_tabs(key_with_ws);
709    if key.is_empty() {
710        return false;
711    }
712    let key_ws_suffix = &key_with_ws[key.len()..];
713
714    let after_colon = &rest[colon_idx + 1..];
715    let value_ws_prefix_len = after_colon
716        .len()
717        .saturating_sub(trim_start_spaces_tabs(after_colon).len());
718    let value_with_trailing = &after_colon[value_ws_prefix_len..];
719    let value = trim_end_spaces_tabs(value_with_trailing);
720    let value_ws_suffix = &value_with_trailing[value.len()..];
721
722    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
723    if leading_ws_len > 0 {
724        builder.token(
725            SyntaxKind::WHITESPACE.into(),
726            &line_without_newline[..leading_ws_len],
727        );
728    }
729    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
730    if ws_after_prefix_len > 0 {
731        builder.token(
732            SyntaxKind::WHITESPACE.into(),
733            &after_prefix[..ws_after_prefix_len],
734        );
735    }
736
737    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
738    if !key_ws_suffix.is_empty() {
739        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
740    }
741    builder.token(SyntaxKind::TEXT.into(), ":");
742    if value_ws_prefix_len > 0 {
743        builder.token(
744            SyntaxKind::WHITESPACE.into(),
745            &after_colon[..value_ws_prefix_len],
746        );
747    }
748
749    if !value.is_empty() {
750        if let Some(quote) = value.chars().next()
751            && (quote == '"' || quote == '\'')
752            && value.ends_with(quote)
753            && value.len() >= 2
754        {
755            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
756            builder.token(
757                SyntaxKind::CHUNK_OPTION_VALUE.into(),
758                &value[1..value.len() - 1],
759            );
760            builder.token(
761                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
762                &value[value.len() - 1..],
763            );
764        } else {
765            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
766        }
767    }
768
769    if !value_ws_suffix.is_empty() {
770        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
771    }
772    builder.finish_node();
773    true
774}
775
776fn emit_hashpipe_continuation_line(
777    builder: &mut GreenNodeBuilder<'static>,
778    line_without_newline: &str,
779    prefix: &str,
780) -> bool {
781    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
782        return false;
783    }
784    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
785    let leading_ws_len = line_without_newline
786        .len()
787        .saturating_sub(trimmed_start.len());
788    let after_prefix = &trimmed_start[prefix.len()..];
789    let ws_after_prefix_len = after_prefix
790        .len()
791        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
792    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
793    let continuation_value = trim_end_spaces_tabs(continuation_with_trailing);
794    if continuation_value.is_empty() {
795        return false;
796    }
797    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
798
799    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
800    if leading_ws_len > 0 {
801        builder.token(
802            SyntaxKind::WHITESPACE.into(),
803            &line_without_newline[..leading_ws_len],
804        );
805    }
806    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
807    if ws_after_prefix_len > 0 {
808        builder.token(
809            SyntaxKind::WHITESPACE.into(),
810            &after_prefix[..ws_after_prefix_len],
811        );
812    }
813    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
814    if !continuation_ws_suffix.is_empty() {
815        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
816    }
817    builder.finish_node();
818    true
819}
820
821fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
822    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
823    if !trimmed_start.starts_with(prefix) {
824        return false;
825    }
826    let after_prefix = &trimmed_start[prefix.len()..];
827    let rest = trim_start_spaces_tabs(after_prefix);
828    let Some(colon_idx) = rest.find(':') else {
829        return false;
830    };
831    let key = trim_end_spaces_tabs(&rest[..colon_idx]);
832    if key.is_empty() {
833        return false;
834    }
835    true
836}
837
838fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
839    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
840    if !trimmed_start.starts_with(prefix) {
841        return false;
842    }
843    let after_prefix = &trimmed_start[prefix.len()..];
844    let Some(first) = after_prefix.chars().next() else {
845        return false;
846    };
847    if first != ' ' && first != '\t' {
848        return false;
849    }
850    !trim_start_spaces_tabs(after_prefix).is_empty()
851}
852
853/// Check if a line is a valid closing fence for the given fence info.
854pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
855    let trimmed = strip_leading_spaces(content);
856
857    if !trimmed.starts_with(fence.fence_char) {
858        return false;
859    }
860
861    let closing_count = trimmed
862        .chars()
863        .take_while(|&c| c == fence.fence_char)
864        .count();
865
866    if closing_count < fence.fence_count {
867        return false;
868    }
869
870    // Rest of line must be empty
871    trimmed[closing_count..].trim().is_empty()
872}
873
874/// Emit chunk options as structured CST nodes while preserving all bytes.
875/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
876fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
877    if content.trim().is_empty() {
878        builder.token(SyntaxKind::TEXT.into(), content);
879        return;
880    }
881
882    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
883
884    let mut pos = 0;
885    let bytes = content.as_bytes();
886
887    while pos < bytes.len() {
888        // Emit leading whitespace/commas as TEXT
889        let ws_start = pos;
890        while pos < bytes.len() {
891            let ch = bytes[pos] as char;
892            if ch != ' ' && ch != '\t' && ch != ',' {
893                break;
894            }
895            pos += 1;
896        }
897        if pos > ws_start {
898            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
899        }
900
901        if pos >= bytes.len() {
902            break;
903        }
904
905        // Check if this is a closing brace
906        if bytes[pos] as char == '}' {
907            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
908            pos += 1;
909            if pos < bytes.len() {
910                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
911            }
912            break;
913        }
914
915        // Read key
916        let key_start = pos;
917        while pos < bytes.len() {
918            let ch = bytes[pos] as char;
919            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
920                break;
921            }
922            pos += 1;
923        }
924
925        if pos == key_start {
926            // No key found, emit rest as TEXT
927            if pos < bytes.len() {
928                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
929            }
930            break;
931        }
932
933        let key = &content[key_start..pos];
934
935        // Check for whitespace before '='
936        let ws_before_eq_start = pos;
937        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
938            pos += 1;
939        }
940
941        // Check if there's a value (=)
942        if pos < bytes.len() && bytes[pos] as char == '=' {
943            // Has value - emit as CHUNK_OPTION
944            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
945            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
946
947            // Emit whitespace before '=' if any
948            if pos > ws_before_eq_start {
949                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
950            }
951
952            builder.token(SyntaxKind::TEXT.into(), "=");
953            pos += 1; // consume '='
954
955            // Emit whitespace after '='
956            let ws_after_eq_start = pos;
957            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
958                pos += 1;
959            }
960            if pos > ws_after_eq_start {
961                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
962            }
963
964            // Parse value (might be quoted)
965            if pos < bytes.len() {
966                let quote_char = bytes[pos] as char;
967                if quote_char == '"' || quote_char == '\'' {
968                    // Quoted value
969                    builder.token(
970                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
971                        &content[pos..pos + 1],
972                    );
973                    pos += 1; // consume opening quote
974
975                    let val_start = pos;
976                    let mut escaped = false;
977                    while pos < bytes.len() {
978                        let ch = bytes[pos] as char;
979                        if !escaped && ch == quote_char {
980                            break;
981                        }
982                        escaped = !escaped && ch == '\\';
983                        pos += 1;
984                    }
985
986                    if pos > val_start {
987                        builder.token(
988                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
989                            &content[val_start..pos],
990                        );
991                    }
992
993                    // Emit closing quote
994                    if pos < bytes.len() && bytes[pos] as char == quote_char {
995                        builder.token(
996                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
997                            &content[pos..pos + 1],
998                        );
999                        pos += 1;
1000                    }
1001                } else {
1002                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
1003                    let val_start = pos;
1004                    let mut depth = 0;
1005
1006                    while pos < bytes.len() {
1007                        let ch = bytes[pos] as char;
1008                        match ch {
1009                            '(' | '[' | '{' => depth += 1,
1010                            ')' | ']' => {
1011                                if depth > 0 {
1012                                    depth -= 1;
1013                                } else {
1014                                    break;
1015                                }
1016                            }
1017                            '}' => {
1018                                if depth > 0 {
1019                                    depth -= 1;
1020                                } else {
1021                                    break; // End of chunk options
1022                                }
1023                            }
1024                            ',' if depth == 0 => {
1025                                break; // Next option
1026                            }
1027                            ' ' | '\t' if depth == 0 => {
1028                                break; // Space separator
1029                            }
1030                            _ => {}
1031                        }
1032                        pos += 1;
1033                    }
1034
1035                    if pos > val_start {
1036                        builder.token(
1037                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
1038                            &content[val_start..pos],
1039                        );
1040                    }
1041                }
1042            }
1043
1044            builder.finish_node(); // CHUNK_OPTION
1045        } else {
1046            // No '=' - classify by prefix: '.foo' is a class, '#foo' is an id,
1047            // anything else is a chunk label (e.g. `{r mylabel}`).
1048            let kind = match key.as_bytes().first() {
1049                Some(b'.') => SyntaxKind::ATTR_CLASS,
1050                Some(b'#') => SyntaxKind::ATTR_ID,
1051                _ => SyntaxKind::CHUNK_LABEL,
1052            };
1053            builder.start_node(kind.into());
1054            builder.token(SyntaxKind::TEXT.into(), key);
1055            builder.finish_node();
1056            if pos > ws_before_eq_start {
1057                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1058            }
1059        }
1060    }
1061
1062    builder.finish_node(); // CHUNK_OPTIONS
1063}
1064
1065/// Helper to parse info string and emit CodeInfo node with parsed components.
1066/// This breaks down the info string into its logical parts while preserving all bytes.
1067fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1068    builder.start_node(SyntaxKind::CODE_INFO.into());
1069
1070    let info = InfoString::parse(info_string);
1071
1072    match &info.block_type {
1073        CodeBlockType::DisplayShortcut { language } => {
1074            // Simple case: python or python {.class}
1075            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1076
1077            // If there's more after the language, emit it as TEXT
1078            let after_lang = &info_string[language.len()..];
1079            if !after_lang.is_empty() {
1080                builder.token(SyntaxKind::TEXT.into(), after_lang);
1081            }
1082        }
1083        CodeBlockType::Executable { language } => {
1084            // Quarto: {r} or {r my-label, echo=FALSE}
1085            builder.token(SyntaxKind::TEXT.into(), "{");
1086            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1087
1088            // Parse and emit chunk options
1089            let start_offset = 1 + language.len(); // Skip "{r"
1090            if start_offset < info_string.len() {
1091                let rest = &info_string[start_offset..];
1092                emit_chunk_options(builder, rest);
1093            }
1094        }
1095        CodeBlockType::DisplayExplicit { classes } => {
1096            // Pandoc: {.python} or {#id .haskell .numberLines}
1097            // We need to find the first class in the raw string and emit everything around it
1098
1099            if let Some(lang) = classes.first() {
1100                // Find where ".lang" appears in the info string
1101                let needle = format!(".{}", lang);
1102                if let Some(lang_start) = info_string.find(&needle) {
1103                    // Emit everything before the language
1104                    if lang_start > 0 {
1105                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1106                    }
1107
1108                    // Emit the dot
1109                    builder.token(SyntaxKind::TEXT.into(), ".");
1110
1111                    // Emit the language
1112                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1113
1114                    // Emit everything after
1115                    let after_lang_start = lang_start + 1 + lang.len();
1116                    if after_lang_start < info_string.len() {
1117                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1118                    }
1119                } else {
1120                    // Couldn't find it, just emit as TEXT
1121                    builder.token(SyntaxKind::TEXT.into(), info_string);
1122                }
1123            } else {
1124                // No classes
1125                builder.token(SyntaxKind::TEXT.into(), info_string);
1126            }
1127        }
1128        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1129            // No language, just emit as TEXT
1130            builder.token(SyntaxKind::TEXT.into(), info_string);
1131        }
1132    }
1133
1134    builder.finish_node(); // CodeInfo
1135}
1136
1137/// Parse a fenced code block, consuming lines from the parser.
1138/// Parse a fenced code block, consuming lines from the parser.
1139/// Returns the new position after the code block.
1140///
1141/// All container geometry (blockquote depth, list-item indent,
1142/// footnote/definition base indent, and the bq-vs-list strip order) is
1143/// derived from `window.prefix()`; detection scans and the open-fence
1144/// emitter read those derived scalars, and content/closing-fence lines
1145/// re-emit their container prefix via [`StrippedLines::emit_prefix_at`].
1146pub(crate) fn parse_fenced_code_block(
1147    builder: &mut GreenNodeBuilder<'static>,
1148    window: &StrippedLines<'_, '_>,
1149    fence: FenceInfo,
1150    first_line_override: Option<&str>,
1151) -> usize {
1152    let lines = window.raw();
1153    let start_pos = window.pos();
1154    let prefix = window.prefix();
1155    let bq_depth = prefix.bq_depth();
1156    let list_content_col = prefix.list_content_col();
1157    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1158    let bq_outer = bq_outer_of_list(prefix);
1159    let content_indent = prefix.content_indent();
1160
1161    // Start code block
1162    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1163
1164    // Opening fence
1165    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1166        builder,
1167        lines[start_pos],
1168        first_line_override,
1169        bq_depth,
1170        list_content_col,
1171        list_marker_consumed_on_line_0,
1172        bq_outer,
1173        content_indent,
1174    );
1175
1176    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1177    builder.token(
1178        SyntaxKind::CODE_FENCE_MARKER.into(),
1179        &first_trimmed[..fence.fence_count],
1180    );
1181
1182    // Emit any space between fence and info string (for losslessness)
1183    let after_fence = &first_trimmed[fence.fence_count..];
1184    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1185        // There was a space - emit it as WHITESPACE
1186        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1187        // Parse and emit the info string as a structured node
1188        if !fence.info_string.is_empty() {
1189            emit_code_info_node(builder, &fence.info_string);
1190        }
1191    } else if !fence.info_string.is_empty() {
1192        // No space - parse and emit info_string as a structured node
1193        emit_code_info_node(builder, &fence.info_string);
1194    }
1195
1196    // Extract and emit the actual newline from the opening fence line
1197    let (_, newline_str) = strip_newline(first_trimmed);
1198    if !newline_str.is_empty() {
1199        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1200    }
1201    builder.finish_node(); // CodeFenceOpen
1202
1203    let mut current_pos = start_pos + 1;
1204    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1205    let mut found_closing = false;
1206
1207    while current_pos < lines.len() {
1208        let line = lines[current_pos];
1209
1210        // Count blockquote markers to detect leaving the surrounding
1211        // blockquote. For bq_outer=true probe the raw line (bq markers
1212        // lead); for bq_outer=false strip the list indent first, then
1213        // probe the post-list slice. This forward-scan termination has no
1214        // `StrippedLines` equivalent, so it stays inline.
1215        let probe = if bq_outer {
1216            line
1217        } else {
1218            strip_list_indent(line, list_content_col)
1219        };
1220        let (line_bq_depth, _) = count_blockquote_markers(probe);
1221        if line_bq_depth < bq_depth {
1222            break;
1223        }
1224
1225        // Detection only (emits nothing): the same 2-bucket container
1226        // strip the emission path applies via `emit_content_line_prefixes`
1227        // / `emit_prefix_at`, kept here rather than `strip_at` (a per-op
1228        // walk) to stay byte-identical in interleaved nesting.
1229        let inner_stripped =
1230            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1231
1232        if is_closing_fence(inner_stripped, &fence) {
1233            found_closing = true;
1234            current_pos += 1;
1235            break;
1236        }
1237
1238        content_lines.push(line);
1239        current_pos += 1;
1240    }
1241
1242    // Add content
1243    if !content_lines.is_empty() {
1244        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1245        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1246            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1247            _ => None,
1248        };
1249
1250        let mut line_idx = 0usize;
1251        if let Some(prefix) = hashpipe_prefix {
1252            let prepared_hashpipe_lines = compute_hashpipe_preamble_line_count(
1253                &content_lines,
1254                prefix,
1255                bq_depth,
1256                list_content_col,
1257                bq_outer,
1258                content_indent,
1259            );
1260            if prepared_hashpipe_lines > 0 {
1261                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1262                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1263                while line_idx < prepared_hashpipe_lines {
1264                    let after_indent = window.emit_prefix_at(builder, start_pos + 1 + line_idx);
1265                    let (line_without_newline, newline_str) = strip_newline(after_indent);
1266                    if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
1267                        let _ =
1268                            emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
1269                    }
1270                    if !newline_str.is_empty() {
1271                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1272                    }
1273                    line_idx += 1;
1274                }
1275                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1276                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1277            }
1278        }
1279
1280        for k in line_idx..content_lines.len() {
1281            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1282            let (line_without_newline, newline_str) = strip_newline(after_indent);
1283
1284            if !line_without_newline.is_empty() {
1285                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1286            }
1287
1288            if !newline_str.is_empty() {
1289                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1290            }
1291        }
1292        builder.finish_node(); // CodeContent
1293    }
1294
1295    // Closing fence (if found)
1296    if found_closing {
1297        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1298        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1299        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1300        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1301        let closing_count = closing_trimmed_start
1302            .chars()
1303            .take_while(|&c| c == fence.fence_char)
1304            .count();
1305        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1306
1307        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1308        if leading_ws_len > 0 {
1309            builder.token(
1310                SyntaxKind::WHITESPACE.into(),
1311                &closing_without_newline[..leading_ws_len],
1312            );
1313        }
1314        builder.token(
1315            SyntaxKind::CODE_FENCE_MARKER.into(),
1316            &closing_trimmed_start[..closing_count],
1317        );
1318        if !trailing_after_marker.is_empty() {
1319            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1320        }
1321        if !newline_str.is_empty() {
1322            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1323        }
1324        builder.finish_node(); // CodeFenceClose
1325    }
1326
1327    builder.finish_node(); // CodeBlock
1328
1329    current_pos
1330}
1331
1332/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1333///
1334/// Container geometry is derived from `window.prefix()`, mirroring
1335/// [`parse_fenced_code_block`].
1336pub(crate) fn parse_fenced_math_block(
1337    builder: &mut GreenNodeBuilder<'static>,
1338    window: &StrippedLines<'_, '_>,
1339    fence: FenceInfo,
1340    first_line_override: Option<&str>,
1341) -> usize {
1342    let lines = window.raw();
1343    let start_pos = window.pos();
1344    let prefix = window.prefix();
1345    let bq_depth = prefix.bq_depth();
1346    let list_content_col = prefix.list_content_col();
1347    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1348    let bq_outer = bq_outer_of_list(prefix);
1349    let content_indent = prefix.content_indent();
1350
1351    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1352
1353    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1354        builder,
1355        lines[start_pos],
1356        first_line_override,
1357        bq_depth,
1358        list_content_col,
1359        list_marker_consumed_on_line_0,
1360        bq_outer,
1361        content_indent,
1362    );
1363    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1364    builder.token(
1365        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1366        opening_without_newline,
1367    );
1368    if !opening_newline.is_empty() {
1369        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1370    }
1371
1372    let mut current_pos = start_pos + 1;
1373    let mut content_lines: Vec<&str> = Vec::new();
1374    let mut found_closing = false;
1375
1376    while current_pos < lines.len() {
1377        let line = lines[current_pos];
1378
1379        // Forward-scan termination on blockquote depth — stays inline (no
1380        // `StrippedLines` equivalent), mirroring `parse_fenced_code_block`.
1381        let probe = if bq_outer {
1382            line
1383        } else {
1384            strip_list_indent(line, list_content_col)
1385        };
1386        let (line_bq_depth, _) = count_blockquote_markers(probe);
1387        if line_bq_depth < bq_depth {
1388            break;
1389        }
1390
1391        // Detection only (emits nothing): same 2-bucket strip as emission.
1392        let inner_stripped =
1393            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1394
1395        if is_closing_fence(inner_stripped, &fence) {
1396            found_closing = true;
1397            current_pos += 1;
1398            break;
1399        }
1400
1401        content_lines.push(line);
1402        current_pos += 1;
1403    }
1404
1405    if !content_lines.is_empty() {
1406        let mut content = String::new();
1407        for k in 0..content_lines.len() {
1408            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1409            let (line_without_newline, newline_str) = strip_newline(after_indent);
1410            content.push_str(line_without_newline);
1411            content.push_str(newline_str);
1412        }
1413        builder.token(SyntaxKind::TEXT.into(), &content);
1414    }
1415
1416    if found_closing {
1417        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1418        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1419        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1420        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1421        let closing_count = closing_trimmed_start
1422            .chars()
1423            .take_while(|&c| c == fence.fence_char)
1424            .count();
1425        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1426
1427        if leading_ws_len > 0 {
1428            builder.token(
1429                SyntaxKind::WHITESPACE.into(),
1430                &closing_without_newline[..leading_ws_len],
1431            );
1432        }
1433        builder.token(
1434            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1435            &closing_trimmed_start[..closing_count],
1436        );
1437        if !trailing_after_marker.is_empty() {
1438            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1439        }
1440        if !newline_str.is_empty() {
1441            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1442        }
1443    }
1444
1445    builder.finish_node(); // DisplayMath
1446    current_pos
1447}
1448
1449#[cfg(test)]
1450mod tests {
1451    use super::*;
1452
1453    #[test]
1454    fn test_backtick_fence() {
1455        let fence = try_parse_fence_open("```python").unwrap();
1456        assert_eq!(fence.fence_char, '`');
1457        assert_eq!(fence.fence_count, 3);
1458        assert_eq!(fence.info_string, "python");
1459    }
1460
1461    #[test]
1462    fn test_tilde_fence() {
1463        let fence = try_parse_fence_open("~~~").unwrap();
1464        assert_eq!(fence.fence_char, '~');
1465        assert_eq!(fence.fence_count, 3);
1466        assert_eq!(fence.info_string, "");
1467    }
1468
1469    #[test]
1470    fn test_long_fence() {
1471        let fence = try_parse_fence_open("`````").unwrap();
1472        assert_eq!(fence.fence_count, 5);
1473    }
1474
1475    #[test]
1476    fn test_two_backticks_invalid() {
1477        assert!(try_parse_fence_open("``").is_none());
1478    }
1479
1480    #[test]
1481    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1482        assert!(try_parse_fence_open("`````hi````there`````").is_none());
1483    }
1484
1485    #[test]
1486    fn test_closing_fence() {
1487        let fence = FenceInfo {
1488            fence_char: '`',
1489            fence_count: 3,
1490            info_string: String::new(),
1491        };
1492        assert!(is_closing_fence("```", &fence));
1493        assert!(is_closing_fence("````", &fence));
1494        assert!(!is_closing_fence("``", &fence));
1495        assert!(!is_closing_fence("~~~", &fence));
1496    }
1497
1498    #[test]
1499    fn test_fenced_code_preserves_leading_gt() {
1500        let input = "```\n> foo\n```\n";
1501        let tree = crate::parse(input, None);
1502        assert_eq!(tree.text().to_string(), input);
1503    }
1504
1505    #[test]
1506    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1507        let input = "> ```\n> code\n> ```\n";
1508        let tree = crate::parse(input, None);
1509        assert_eq!(tree.text().to_string(), input);
1510    }
1511
1512    #[test]
1513    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1514        let input = "Term\n: ```\n├── pyproject.toml\n```\n";
1515        let tree = crate::parse(input, None);
1516        assert_eq!(tree.text().to_string(), input);
1517    }
1518
1519    #[test]
1520    fn test_info_string_plain() {
1521        let info = InfoString::parse("");
1522        assert_eq!(info.block_type, CodeBlockType::Plain);
1523        assert!(info.attributes.is_empty());
1524    }
1525
1526    #[test]
1527    fn test_info_string_shortcut() {
1528        let info = InfoString::parse("python");
1529        assert_eq!(
1530            info.block_type,
1531            CodeBlockType::DisplayShortcut {
1532                language: "python".to_string()
1533            }
1534        );
1535        assert!(info.attributes.is_empty());
1536    }
1537
1538    #[test]
1539    fn test_info_string_shortcut_with_trailing() {
1540        let info = InfoString::parse("python extra stuff");
1541        assert_eq!(
1542            info.block_type,
1543            CodeBlockType::DisplayShortcut {
1544                language: "python".to_string()
1545            }
1546        );
1547    }
1548
1549    #[test]
1550    fn test_info_string_display_explicit() {
1551        let info = InfoString::parse("{.python}");
1552        assert_eq!(
1553            info.block_type,
1554            CodeBlockType::DisplayExplicit {
1555                classes: vec!["python".to_string()]
1556            }
1557        );
1558    }
1559
1560    #[test]
1561    fn test_info_string_display_explicit_multiple() {
1562        let info = InfoString::parse("{.python .numberLines}");
1563        assert_eq!(
1564            info.block_type,
1565            CodeBlockType::DisplayExplicit {
1566                classes: vec!["python".to_string(), "numberLines".to_string()]
1567            }
1568        );
1569    }
1570
1571    #[test]
1572    fn test_info_string_executable() {
1573        let info = InfoString::parse("{python}");
1574        assert_eq!(
1575            info.block_type,
1576            CodeBlockType::Executable {
1577                language: "python".to_string()
1578            }
1579        );
1580    }
1581
1582    #[test]
1583    fn test_info_string_executable_with_options() {
1584        let info = InfoString::parse("{python echo=false warning=true}");
1585        assert_eq!(
1586            info.block_type,
1587            CodeBlockType::Executable {
1588                language: "python".to_string()
1589            }
1590        );
1591        assert_eq!(info.attributes.len(), 2);
1592        assert_eq!(
1593            info.attributes[0],
1594            ("echo".to_string(), Some("false".to_string()))
1595        );
1596        assert_eq!(
1597            info.attributes[1],
1598            ("warning".to_string(), Some("true".to_string()))
1599        );
1600    }
1601
1602    #[test]
1603    fn test_info_string_executable_with_commas() {
1604        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1605        assert_eq!(
1606            info.block_type,
1607            CodeBlockType::Executable {
1608                language: "r".to_string()
1609            }
1610        );
1611        assert_eq!(info.attributes.len(), 2);
1612        assert_eq!(
1613            info.attributes[0],
1614            ("echo".to_string(), Some("FALSE".to_string()))
1615        );
1616        assert_eq!(
1617            info.attributes[1],
1618            ("warning".to_string(), Some("TRUE".to_string()))
1619        );
1620    }
1621
1622    #[test]
1623    fn test_info_string_executable_mixed_commas_spaces() {
1624        // R-style with commas and spaces
1625        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1626        assert_eq!(
1627            info.block_type,
1628            CodeBlockType::Executable {
1629                language: "r".to_string()
1630            }
1631        );
1632        assert_eq!(info.attributes.len(), 2);
1633        assert_eq!(
1634            info.attributes[0],
1635            ("echo".to_string(), Some("FALSE".to_string()))
1636        );
1637        assert_eq!(
1638            info.attributes[1],
1639            ("label".to_string(), Some("my chunk".to_string()))
1640        );
1641    }
1642
1643    #[test]
1644    fn test_info_string_mixed_shortcut_and_attrs() {
1645        let info = InfoString::parse("python {.numberLines}");
1646        assert_eq!(
1647            info.block_type,
1648            CodeBlockType::DisplayShortcut {
1649                language: "python".to_string()
1650            }
1651        );
1652        assert_eq!(info.attributes.len(), 1);
1653        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1654    }
1655
1656    #[test]
1657    fn test_info_string_mixed_with_key_value() {
1658        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1659        assert_eq!(
1660            info.block_type,
1661            CodeBlockType::DisplayShortcut {
1662                language: "python".to_string()
1663            }
1664        );
1665        assert_eq!(info.attributes.len(), 2);
1666        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1667        assert_eq!(
1668            info.attributes[1],
1669            ("startFrom".to_string(), Some("100".to_string()))
1670        );
1671    }
1672
1673    #[test]
1674    fn test_info_string_explicit_with_id_and_classes() {
1675        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1676        assert_eq!(
1677            info.block_type,
1678            CodeBlockType::DisplayExplicit {
1679                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1680            }
1681        );
1682        // Non-class attributes
1683        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1684        let has_start = info
1685            .attributes
1686            .iter()
1687            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1688        assert!(has_id);
1689        assert!(has_start);
1690    }
1691
1692    #[test]
1693    fn test_info_string_raw_html() {
1694        let info = InfoString::parse("{=html}");
1695        assert_eq!(
1696            info.block_type,
1697            CodeBlockType::Raw {
1698                format: "html".to_string()
1699            }
1700        );
1701        assert!(info.attributes.is_empty());
1702    }
1703
1704    #[test]
1705    fn test_info_string_raw_latex() {
1706        let info = InfoString::parse("{=latex}");
1707        assert_eq!(
1708            info.block_type,
1709            CodeBlockType::Raw {
1710                format: "latex".to_string()
1711            }
1712        );
1713    }
1714
1715    #[test]
1716    fn test_info_string_raw_openxml() {
1717        let info = InfoString::parse("{=openxml}");
1718        assert_eq!(
1719            info.block_type,
1720            CodeBlockType::Raw {
1721                format: "openxml".to_string()
1722            }
1723        );
1724    }
1725
1726    #[test]
1727    fn test_info_string_raw_ms() {
1728        let info = InfoString::parse("{=ms}");
1729        assert_eq!(
1730            info.block_type,
1731            CodeBlockType::Raw {
1732                format: "ms".to_string()
1733            }
1734        );
1735    }
1736
1737    #[test]
1738    fn test_info_string_raw_html5() {
1739        let info = InfoString::parse("{=html5}");
1740        assert_eq!(
1741            info.block_type,
1742            CodeBlockType::Raw {
1743                format: "html5".to_string()
1744            }
1745        );
1746    }
1747
1748    #[test]
1749    fn test_info_string_raw_not_combined_with_attrs() {
1750        // If there are other attributes with =format, it should not be treated as raw
1751        let info = InfoString::parse("{=html .class}");
1752        // This should NOT be parsed as raw because there's more than one attribute
1753        assert_ne!(
1754            info.block_type,
1755            CodeBlockType::Raw {
1756                format: "html".to_string()
1757            }
1758        );
1759    }
1760
1761    #[test]
1762    fn test_parse_pandoc_attributes_spaces() {
1763        // Pandoc display blocks use spaces as delimiters
1764        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1765        assert_eq!(attrs.len(), 3);
1766        assert_eq!(attrs[0], (".python".to_string(), None));
1767        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1768        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1769    }
1770
1771    #[test]
1772    fn test_parse_pandoc_attributes_no_commas() {
1773        // Commas in Pandoc attributes should be treated as part of the value
1774        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1775        assert_eq!(attrs.len(), 3);
1776        assert_eq!(attrs[0], ("#id".to_string(), None));
1777        assert_eq!(attrs[1], (".class".to_string(), None));
1778        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1779    }
1780
1781    #[test]
1782    fn test_parse_chunk_options_commas() {
1783        // Quarto/RMarkdown chunks use commas as delimiters
1784        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1785        assert_eq!(attrs.len(), 3);
1786        assert_eq!(attrs[0], ("r".to_string(), None));
1787        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1788        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1789    }
1790
1791    #[test]
1792    fn test_parse_chunk_options_no_spaces() {
1793        // Should handle comma-separated without spaces
1794        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1795        assert_eq!(attrs.len(), 3);
1796        assert_eq!(attrs[0], ("r".to_string(), None));
1797        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1798        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1799    }
1800
1801    #[test]
1802    fn test_parse_chunk_options_mixed() {
1803        // Handle both commas and spaces
1804        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1805        assert_eq!(attrs.len(), 3);
1806        assert_eq!(attrs[0], ("python".to_string(), None));
1807        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1808        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1809    }
1810
1811    #[test]
1812    fn test_parse_chunk_options_nested_function_call() {
1813        // R function calls with nested commas should be treated as single value
1814        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1815        assert_eq!(attrs.len(), 3);
1816        assert_eq!(attrs[0], ("r".to_string(), None));
1817        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1818        assert_eq!(
1819            attrs[2],
1820            (
1821                "dependson".to_string(),
1822                Some(r#"c("foo", "bar")"#.to_string())
1823            )
1824        );
1825    }
1826
1827    #[test]
1828    fn test_parse_chunk_options_nested_with_spaces() {
1829        // Function call with spaces inside
1830        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1831        assert_eq!(attrs.len(), 2);
1832        assert_eq!(attrs[0], ("r".to_string(), None));
1833        assert_eq!(
1834            attrs[1],
1835            (
1836                "cache.path".to_string(),
1837                Some(r#"file.path("cache", "dir")"#.to_string())
1838            )
1839        );
1840    }
1841
1842    #[test]
1843    fn test_parse_chunk_options_deeply_nested() {
1844        // Multiple levels of nesting
1845        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1846        assert_eq!(attrs.len(), 2);
1847        assert_eq!(attrs[0], ("r".to_string(), None));
1848        assert_eq!(
1849            attrs[1],
1850            (
1851                "x".to_string(),
1852                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1853            )
1854        );
1855    }
1856
1857    #[test]
1858    fn test_parse_chunk_options_brackets_and_braces() {
1859        // Test all bracket types
1860        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1861        assert_eq!(attrs.len(), 3);
1862        assert_eq!(attrs[0], ("r".to_string(), None));
1863        assert_eq!(
1864            attrs[1],
1865            ("data".to_string(), Some("df[rows, cols]".to_string()))
1866        );
1867        assert_eq!(
1868            attrs[2],
1869            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1870        );
1871    }
1872
1873    #[test]
1874    fn test_parse_chunk_options_quotes_with_parens() {
1875        // Parentheses inside quoted strings shouldn't affect depth tracking
1876        // Note: The parser strips outer quotes from quoted values
1877        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1878        assert_eq!(attrs.len(), 3);
1879        assert_eq!(attrs[0], ("r".to_string(), None));
1880        assert_eq!(
1881            attrs[1],
1882            ("label".to_string(), Some("test (with parens)".to_string()))
1883        );
1884        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1885    }
1886
1887    #[test]
1888    fn test_parse_chunk_options_escaped_quotes() {
1889        // Escaped quotes inside string values
1890        // Note: The parser strips outer quotes and processes escapes
1891        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1892        assert_eq!(attrs.len(), 2);
1893        assert_eq!(attrs[0], ("r".to_string(), None));
1894        assert_eq!(
1895            attrs[1],
1896            (
1897                "label".to_string(),
1898                Some(r#"has "quoted" text"#.to_string())
1899            )
1900        );
1901    }
1902
1903    #[test]
1904    fn test_display_vs_executable_parsing() {
1905        // Display block should use Pandoc parser (spaces)
1906        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1907        assert!(matches!(
1908            info1.block_type,
1909            CodeBlockType::DisplayExplicit { .. }
1910        ));
1911
1912        // Executable chunk should use chunk options parser (commas)
1913        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1914        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1915        assert_eq!(info2.attributes.len(), 2);
1916    }
1917
1918    #[test]
1919    fn test_info_string_executable_implicit_label() {
1920        // {r mylabel} should parse as label=mylabel
1921        let info = InfoString::parse("{r mylabel}");
1922        assert!(matches!(
1923            info.block_type,
1924            CodeBlockType::Executable { ref language } if language == "r"
1925        ));
1926        assert_eq!(info.attributes.len(), 1);
1927        assert_eq!(
1928            info.attributes[0],
1929            ("label".to_string(), Some("mylabel".to_string()))
1930        );
1931    }
1932
1933    #[test]
1934    fn test_info_string_executable_implicit_label_with_options() {
1935        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
1936        let info = InfoString::parse("{r mylabel, echo=FALSE}");
1937        assert!(matches!(
1938            info.block_type,
1939            CodeBlockType::Executable { ref language } if language == "r"
1940        ));
1941        assert_eq!(info.attributes.len(), 2);
1942        assert_eq!(
1943            info.attributes[0],
1944            ("label".to_string(), Some("mylabel".to_string()))
1945        );
1946        assert_eq!(
1947            info.attributes[1],
1948            ("echo".to_string(), Some("FALSE".to_string()))
1949        );
1950    }
1951
1952    #[test]
1953    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
1954        let content_lines = vec![
1955            "#| fig-cap: |\n",
1956            "#|   A caption\n",
1957            "#|   spanning lines\n",
1958            "a <- 1\n",
1959        ];
1960        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
1961        assert_eq!(count, 3);
1962    }
1963
1964    #[test]
1965    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
1966        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
1967        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
1968        assert_eq!(count, 1);
1969    }
1970
1971    #[test]
1972    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
1973        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
1974        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
1975        assert_eq!(count, 1);
1976    }
1977}