Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use super::container_prefix::{StrippedLines, advance_columns};
9use crate::parser::utils::container_stack::byte_index_at_column;
10
11// Container-prefix primitives live in `container_prefix.rs` (the lower
12// layer that hosts `StrippedLines`); re-export so existing call sites in
13// this module, `tables.rs`, `line_blocks.rs`, and `block_dispatcher.rs`
14// keep their `code_blocks::…` import paths working.
15pub(crate) use super::container_prefix::{
16    bq_outer_of_list, emit_blockquote_prefix_tokens, strip_list_indent,
17};
18
19use crate::parser::utils::helpers::{
20    strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
21};
22
23/// Represents the type of code block based on its info string syntax.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub enum CodeBlockType {
26    /// Display-only block with shortcut syntax: ```python
27    DisplayShortcut { language: String },
28    /// Display-only block with explicit Pandoc syntax: ```{.python}
29    DisplayExplicit { classes: Vec<String> },
30    /// Executable chunk (Quarto/RMarkdown): ```{python}
31    Executable { language: String },
32    /// Raw block for specific output format: ```{=html}
33    Raw { format: String },
34    /// No language specified: ```
35    Plain,
36}
37
38/// Parsed attributes from a code block info string.
39#[derive(Debug, Clone, PartialEq)]
40pub struct InfoString {
41    pub raw: String,
42    pub block_type: CodeBlockType,
43    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
44}
45
46impl InfoString {
47    /// Parse an info string into structured attributes.
48    pub fn parse(raw: &str) -> Self {
49        let trimmed = raw.trim();
50
51        if trimmed.is_empty() {
52            return InfoString {
53                raw: raw.to_string(),
54                block_type: CodeBlockType::Plain,
55                attributes: Vec::new(),
56            };
57        }
58
59        // Check if it starts with '{' - explicit attribute block
60        if let Some(stripped) = trimmed.strip_prefix('{')
61            && let Some(content) = stripped.strip_suffix('}')
62        {
63            return Self::parse_explicit(raw, content);
64        }
65
66        // Check for mixed form: python {.numberLines}
67        if let Some(brace_start) = trimmed.find('{') {
68            let language = trimmed[..brace_start].trim();
69            if !language.is_empty() && !language.contains(char::is_whitespace) {
70                let attr_part = &trimmed[brace_start..];
71                if let Some(stripped) = attr_part.strip_prefix('{')
72                    && let Some(content) = stripped.strip_suffix('}')
73                {
74                    let attrs = Self::parse_attributes(content);
75                    return InfoString {
76                        raw: raw.to_string(),
77                        block_type: CodeBlockType::DisplayShortcut {
78                            language: language.to_string(),
79                        },
80                        attributes: attrs,
81                    };
82                }
83            }
84        }
85
86        // Otherwise, it's a shortcut form (just the language name)
87        // Only take the first word as language
88        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
89        InfoString {
90            raw: raw.to_string(),
91            block_type: CodeBlockType::DisplayShortcut {
92                language: language.to_string(),
93            },
94            attributes: Vec::new(),
95        }
96    }
97
98    fn parse_explicit(raw: &str, content: &str) -> Self {
99        // Check for raw attribute FIRST: {=format}
100        // The content should start with '=' and have only alphanumeric chars after
101        let trimmed_content = content.trim();
102        if let Some(format_name) = trimmed_content.strip_prefix('=') {
103            // Validate format name: alphanumeric only, no spaces
104            if !format_name.is_empty()
105                && format_name.chars().all(|c| c.is_alphanumeric())
106                && !format_name.contains(char::is_whitespace)
107            {
108                return InfoString {
109                    raw: raw.to_string(),
110                    block_type: CodeBlockType::Raw {
111                        format: format_name.to_string(),
112                    },
113                    attributes: Vec::new(),
114                };
115            }
116        }
117
118        // First, do a preliminary parse to determine block type
119        // Use chunk options parser (comma-aware) for initial detection
120        let prelim_attrs = Self::parse_chunk_options(content);
121
122        // First non-ID, non-attribute token determines if it's executable or display
123        let mut first_lang_token = None;
124        for (key, val) in prelim_attrs.iter() {
125            if val.is_none() && !key.starts_with('#') {
126                first_lang_token = Some(key.as_str());
127                break;
128            }
129        }
130
131        let first_token = first_lang_token.unwrap_or("");
132
133        if first_token.starts_with('.') {
134            // Display block: {.python} or {.haskell .numberLines}
135            // Re-parse with Pandoc-style parser (space-delimited)
136            let attrs = Self::parse_pandoc_attributes(content);
137
138            let classes: Vec<String> = attrs
139                .iter()
140                .filter(|(k, v)| k.starts_with('.') && v.is_none())
141                .map(|(k, _)| k[1..].to_string())
142                .collect();
143
144            let non_class_attrs: Vec<(String, Option<String>)> = attrs
145                .into_iter()
146                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
147                .collect();
148
149            InfoString {
150                raw: raw.to_string(),
151                block_type: CodeBlockType::DisplayExplicit { classes },
152                attributes: non_class_attrs,
153            }
154        } else if !first_token.is_empty() && !first_token.starts_with('#') {
155            // Executable chunk: {python} or {r}
156            // Use chunk options parser (comma-delimited)
157            let attrs = Self::parse_chunk_options(content);
158            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
159
160            // Check if there's a second bareword (implicit label in R/Quarto chunks)
161            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}
162            let mut has_implicit_label = false;
163            let implicit_label_value = if lang_index + 1 < attrs.len() {
164                if let (label_key, None) = &attrs[lang_index + 1] {
165                    // Second bareword after language
166                    has_implicit_label = true;
167                    Some(label_key.clone())
168                } else {
169                    None
170                }
171            } else {
172                None
173            };
174
175            let mut final_attrs: Vec<(String, Option<String>)> = attrs
176                .into_iter()
177                .enumerate()
178                .filter(|(i, _)| {
179                    // Remove language token
180                    if *i == lang_index {
181                        return false;
182                    }
183                    // Remove implicit label token (will be added back explicitly)
184                    if has_implicit_label && *i == lang_index + 1 {
185                        return false;
186                    }
187                    true
188                })
189                .map(|(_, attr)| attr)
190                .collect();
191
192            // Add explicit label if we found an implicit one
193            if let Some(label_val) = implicit_label_value {
194                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
195            }
196
197            InfoString {
198                raw: raw.to_string(),
199                block_type: CodeBlockType::Executable {
200                    language: first_token.to_string(),
201                },
202                attributes: final_attrs,
203            }
204        } else {
205            // Just attributes, no language - use Pandoc parser
206            let attrs = Self::parse_pandoc_attributes(content);
207            InfoString {
208                raw: raw.to_string(),
209                block_type: CodeBlockType::Plain,
210                attributes: attrs,
211            }
212        }
213    }
214
215    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
216    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
217    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
218        let mut attrs = Vec::new();
219        let mut chars = content.chars().peekable();
220
221        while chars.peek().is_some() {
222            // Skip whitespace
223            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
224                chars.next();
225            }
226
227            if chars.peek().is_none() {
228                break;
229            }
230
231            // Read key
232            let mut key = String::new();
233            while let Some(&ch) = chars.peek() {
234                if ch == '=' || ch == ' ' || ch == '\t' {
235                    break;
236                }
237                key.push(ch);
238                chars.next();
239            }
240
241            if key.is_empty() {
242                break;
243            }
244
245            // Skip whitespace
246            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
247                chars.next();
248            }
249
250            // Check for value
251            if chars.peek() == Some(&'=') {
252                chars.next(); // consume '='
253
254                // Skip whitespace after '='
255                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
256                    chars.next();
257                }
258
259                // Read value (might be quoted)
260                let value = if chars.peek() == Some(&'"') {
261                    chars.next(); // consume opening quote
262                    let mut val = String::new();
263                    while let Some(&ch) = chars.peek() {
264                        chars.next();
265                        if ch == '"' {
266                            break;
267                        }
268                        if ch == '\\' {
269                            if let Some(&next_ch) = chars.peek() {
270                                chars.next();
271                                val.push(next_ch);
272                            }
273                        } else {
274                            val.push(ch);
275                        }
276                    }
277                    val
278                } else {
279                    // Unquoted value - read until space
280                    let mut val = String::new();
281                    while let Some(&ch) = chars.peek() {
282                        if ch == ' ' || ch == '\t' {
283                            break;
284                        }
285                        val.push(ch);
286                        chars.next();
287                    }
288                    val
289                };
290
291                attrs.push((key, Some(value)));
292            } else {
293                attrs.push((key, None));
294            }
295        }
296
297        attrs
298    }
299
300    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
301    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
302    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
303        let mut attrs = Vec::new();
304        let mut chars = content.chars().peekable();
305
306        while chars.peek().is_some() {
307            // Skip whitespace and commas
308            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
309                chars.next();
310            }
311
312            if chars.peek().is_none() {
313                break;
314            }
315
316            // Read key
317            let mut key = String::new();
318            while let Some(&ch) = chars.peek() {
319                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
320                    break;
321                }
322                key.push(ch);
323                chars.next();
324            }
325
326            if key.is_empty() {
327                break;
328            }
329
330            // Skip whitespace and commas
331            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
332                chars.next();
333            }
334
335            // Check for value
336            if chars.peek() == Some(&'=') {
337                chars.next(); // consume '='
338
339                // Skip whitespace and commas after '='
340                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
341                    chars.next();
342                }
343
344                // Read value (might be quoted)
345                let value = if chars.peek() == Some(&'"') {
346                    chars.next(); // consume opening quote
347                    let mut val = String::new();
348                    while let Some(&ch) = chars.peek() {
349                        chars.next();
350                        if ch == '"' {
351                            break;
352                        }
353                        if ch == '\\' {
354                            if let Some(&next_ch) = chars.peek() {
355                                chars.next();
356                                val.push(next_ch);
357                            }
358                        } else {
359                            val.push(ch);
360                        }
361                    }
362                    val
363                } else {
364                    // Unquoted value - read until comma, space, or tab at depth 0
365                    // Track nesting depth for (), [], {} and quote state
366                    let mut val = String::new();
367                    let mut depth = 0; // Track parentheses/brackets/braces depth
368                    let mut in_quote: Option<char> = None; // Track if inside ' or "
369                    let mut escaped = false; // Track if previous char was backslash
370
371                    while let Some(&ch) = chars.peek() {
372                        // Handle escape sequences
373                        if escaped {
374                            val.push(ch);
375                            chars.next();
376                            escaped = false;
377                            continue;
378                        }
379
380                        if ch == '\\' {
381                            val.push(ch);
382                            chars.next();
383                            escaped = true;
384                            continue;
385                        }
386
387                        // Handle quotes
388                        if let Some(quote_char) = in_quote {
389                            val.push(ch);
390                            chars.next();
391                            if ch == quote_char {
392                                in_quote = None; // Close quote
393                            }
394                            continue;
395                        }
396
397                        // Not in a quote - check for quote start
398                        if ch == '"' || ch == '\'' {
399                            in_quote = Some(ch);
400                            val.push(ch);
401                            chars.next();
402                            continue;
403                        }
404
405                        // Track nesting depth (only when not in quotes)
406                        if ch == '(' || ch == '[' || ch == '{' {
407                            depth += 1;
408                            val.push(ch);
409                            chars.next();
410                            continue;
411                        }
412
413                        if ch == ')' || ch == ']' || ch == '}' {
414                            depth -= 1;
415                            val.push(ch);
416                            chars.next();
417                            continue;
418                        }
419
420                        // Check for delimiters - only break at depth 0
421                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
422                            break;
423                        }
424
425                        // Regular character
426                        val.push(ch);
427                        chars.next();
428                    }
429                    val
430                };
431
432                attrs.push((key, Some(value)));
433            } else {
434                attrs.push((key, None));
435            }
436        }
437
438        attrs
439    }
440
441    /// Legacy function - kept for backward compatibility in mixed-form parsing
442    /// For new code, use parse_pandoc_attributes or parse_chunk_options
443    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
444        // Default to chunk options parsing (comma-aware)
445        Self::parse_chunk_options(content)
446    }
447}
448
449/// Information about a detected code fence opening.
450#[derive(Debug, Clone)]
451pub(crate) struct FenceInfo {
452    pub fence_char: char,
453    pub fence_count: usize,
454    pub info_string: String,
455}
456
457pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
458    fence.info_string.trim() == "math"
459}
460
461/// Try to detect a fenced code block opening from content.
462/// Returns fence info if this is a valid opening fence.
463pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
464    let trimmed = strip_leading_spaces(content);
465
466    // Check for fence opening (``` or ~~~)
467    let (fence_char, fence_count) = if trimmed.starts_with('`') {
468        let count = trimmed.chars().take_while(|&c| c == '`').count();
469        ('`', count)
470    } else if trimmed.starts_with('~') {
471        let count = trimmed.chars().take_while(|&c| c == '~').count();
472        ('~', count)
473    } else {
474        return None;
475    };
476
477    if fence_count < 3 {
478        return None;
479    }
480
481    let info_string_raw = &trimmed[fence_count..];
482    // Strip trailing newline (LF or CRLF) and at most one leading space
483    let (info_string_trimmed, _) = strip_newline(info_string_raw);
484    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
485        stripped.to_string()
486    } else {
487        info_string_trimmed.to_string()
488    };
489
490    // Backtick-fenced blocks cannot have backticks in the info string.
491    if fence_char == '`' && info_string.contains('`') {
492        return None;
493    }
494
495    Some(FenceInfo {
496        fence_char,
497        fence_count,
498        info_string,
499    })
500}
501
502#[allow(clippy::too_many_arguments)]
503fn prepare_fence_open_line<'a>(
504    builder: &mut GreenNodeBuilder<'static>,
505    source_line: &'a str,
506    first_line_override: Option<&'a str>,
507    bq_depth: usize,
508    list_content_col: usize,
509    list_marker_consumed_on_line_0: bool,
510    bq_outer: bool,
511    content_indent: usize,
512) -> (&'a str, &'a str) {
513    // Strip the active container prefix on line 0 in container-stack
514    // order. Bq markers are always upstream-emitted by the blockquote
515    // dispatch and silently consumed here. The list_content_col indent
516    // is upstream-emitted only on a marker-line dispatch
517    // (`list_marker_consumed_on_line_0=true`); on continuation-line
518    // dispatch it must be emitted here as WHITESPACE. Adjacent
519    // WHITESPACE emissions are coalesced into one token for
520    // byte-range-equivalent CST stability.
521    if let Some(first_line) = first_line_override {
522        if bq_depth > 0 && source_line != first_line {
523            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
524            let prefix_len = source_line.len().saturating_sub(stripped.len());
525            if prefix_len > 0 {
526                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
527            }
528        }
529        let first_trimmed = strip_leading_spaces(first_line);
530        let leading_ws_len = first_line.len().saturating_sub(first_trimmed.len());
531        if leading_ws_len > 0 {
532            builder.token(SyntaxKind::WHITESPACE.into(), &first_line[..leading_ws_len]);
533        }
534        return (first_trimmed, first_line);
535    }
536
537    let mut s: &'a str = source_line;
538    let mut pending_ws_start: Option<usize> = None;
539    let suppress_list = list_marker_consumed_on_line_0;
540
541    let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
542                    pending: &mut Option<usize>,
543                    current_offset: usize| {
544        if let Some(start) = *pending
545            && current_offset > start
546        {
547            builder.token(
548                SyntaxKind::WHITESPACE.into(),
549                &source_line[start..current_offset],
550            );
551        }
552        *pending = None;
553    };
554
555    let do_strip_list = |s: &mut &'a str, pending: &mut Option<usize>| {
556        if list_content_col == 0 {
557            return;
558        }
559        // On a marker-line dispatch (`suppress_list=true`), the list
560        // marker bytes have already been emitted upstream and may not
561        // be whitespace (e.g. `- > ```` has a leading `-`). Use
562        // `advance_columns` which counts columns through any char.
563        // On continuation lines, the leading bytes ARE whitespace
564        // (the list-content-indent) so use the whitespace-only
565        // `strip_list_indent` to stop at non-whitespace.
566        let stripped = if suppress_list {
567            advance_columns(s, list_content_col)
568        } else {
569            strip_list_indent(s, list_content_col)
570        };
571        let consumed = s.len() - stripped.len();
572        if consumed > 0 {
573            let start = source_line.len() - s.len();
574            if !suppress_list && pending.is_none() {
575                *pending = Some(start);
576            }
577            *s = stripped;
578        }
579    };
580
581    let do_strip_bq =
582        |builder: &mut GreenNodeBuilder<'static>, s: &mut &'a str, pending: &mut Option<usize>| {
583            if bq_depth == 0 {
584                return;
585            }
586            let current_offset = source_line.len() - s.len();
587            flush_ws(builder, pending, current_offset);
588            *s = strip_n_blockquote_markers(s, bq_depth);
589        };
590
591    if bq_outer {
592        do_strip_bq(builder, &mut s, &mut pending_ws_start);
593        do_strip_list(&mut s, &mut pending_ws_start);
594    } else {
595        do_strip_list(&mut s, &mut pending_ws_start);
596        do_strip_bq(builder, &mut s, &mut pending_ws_start);
597    }
598
599    // content_indent (footnote/definition) — always emit as WHITESPACE.
600    if content_indent > 0 {
601        let indent_bytes = byte_index_at_column(s, content_indent);
602        if s.len() >= indent_bytes && indent_bytes > 0 {
603            let start = source_line.len() - s.len();
604            if pending_ws_start.is_none() {
605                pending_ws_start = Some(start);
606            }
607            s = &s[indent_bytes..];
608        }
609    }
610
611    let final_offset = source_line.len() - s.len();
612    flush_ws(builder, &mut pending_ws_start, final_offset);
613
614    let first_trimmed = strip_leading_spaces(s);
615    let leading_ws_len = s.len().saturating_sub(first_trimmed.len());
616    if leading_ws_len > 0 {
617        builder.token(SyntaxKind::WHITESPACE.into(), &s[..leading_ws_len]);
618    }
619    (first_trimmed, s)
620}
621
622fn strip_content_line_prefixes(
623    content_line: &str,
624    bq_depth: usize,
625    list_content_col: usize,
626    bq_outer: bool,
627    content_indent: usize,
628) -> &str {
629    let after_bq_and_list = if bq_outer {
630        let after_bq = if bq_depth > 0 {
631            strip_n_blockquote_markers(content_line, bq_depth)
632        } else {
633            content_line
634        };
635        strip_list_indent(after_bq, list_content_col)
636    } else {
637        let after_list = strip_list_indent(content_line, list_content_col);
638        if bq_depth > 0 {
639            strip_n_blockquote_markers(after_list, bq_depth)
640        } else {
641            after_list
642        }
643    };
644
645    let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
646    if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
647        &after_bq_and_list[indent_bytes..]
648    } else {
649        after_bq_and_list
650    }
651}
652
653pub(crate) fn compute_hashpipe_preamble_line_count(
654    content_lines: &[&str],
655    prefix: &str,
656    bq_depth: usize,
657    list_content_col: usize,
658    bq_outer: bool,
659    content_indent: usize,
660) -> usize {
661    let mut line_idx = 0usize;
662
663    while line_idx < content_lines.len() {
664        let preview_after_indent = strip_content_line_prefixes(
665            content_lines[line_idx],
666            bq_depth,
667            list_content_col,
668            bq_outer,
669            content_indent,
670        );
671        let (preview_without_newline, _) = strip_newline(preview_after_indent);
672        if !is_hashpipe_option_line(preview_without_newline, prefix)
673            && !is_hashpipe_continuation_line(preview_without_newline, prefix)
674        {
675            break;
676        }
677        line_idx += 1;
678    }
679
680    line_idx
681}
682
683fn emit_hashpipe_option_line(
684    builder: &mut GreenNodeBuilder<'static>,
685    line_without_newline: &str,
686    prefix: &str,
687) -> bool {
688    if !is_hashpipe_option_line(line_without_newline, prefix) {
689        return false;
690    }
691
692    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
693    let leading_ws_len = line_without_newline
694        .len()
695        .saturating_sub(trimmed_start.len());
696    let after_prefix = &trimmed_start[prefix.len()..];
697    let ws_after_prefix_len = after_prefix
698        .len()
699        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
700    let rest = &after_prefix[ws_after_prefix_len..];
701    let Some(colon_idx) = rest.find(':') else {
702        return false;
703    };
704
705    let key_with_ws = &rest[..colon_idx];
706    let key = trim_end_spaces_tabs(key_with_ws);
707    if key.is_empty() {
708        return false;
709    }
710    let key_ws_suffix = &key_with_ws[key.len()..];
711
712    let after_colon = &rest[colon_idx + 1..];
713    let value_ws_prefix_len = after_colon
714        .len()
715        .saturating_sub(trim_start_spaces_tabs(after_colon).len());
716    let value_with_trailing = &after_colon[value_ws_prefix_len..];
717    let value = trim_end_spaces_tabs(value_with_trailing);
718    let value_ws_suffix = &value_with_trailing[value.len()..];
719
720    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
721    if leading_ws_len > 0 {
722        builder.token(
723            SyntaxKind::WHITESPACE.into(),
724            &line_without_newline[..leading_ws_len],
725        );
726    }
727    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
728    if ws_after_prefix_len > 0 {
729        builder.token(
730            SyntaxKind::WHITESPACE.into(),
731            &after_prefix[..ws_after_prefix_len],
732        );
733    }
734
735    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
736    if !key_ws_suffix.is_empty() {
737        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
738    }
739    builder.token(SyntaxKind::TEXT.into(), ":");
740    if value_ws_prefix_len > 0 {
741        builder.token(
742            SyntaxKind::WHITESPACE.into(),
743            &after_colon[..value_ws_prefix_len],
744        );
745    }
746
747    if !value.is_empty() {
748        if let Some(quote) = value.chars().next()
749            && (quote == '"' || quote == '\'')
750            && value.ends_with(quote)
751            && value.len() >= 2
752        {
753            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
754            builder.token(
755                SyntaxKind::CHUNK_OPTION_VALUE.into(),
756                &value[1..value.len() - 1],
757            );
758            builder.token(
759                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
760                &value[value.len() - 1..],
761            );
762        } else {
763            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
764        }
765    }
766
767    if !value_ws_suffix.is_empty() {
768        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
769    }
770    builder.finish_node();
771    true
772}
773
774fn emit_hashpipe_continuation_line(
775    builder: &mut GreenNodeBuilder<'static>,
776    line_without_newline: &str,
777    prefix: &str,
778) -> bool {
779    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
780        return false;
781    }
782    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
783    let leading_ws_len = line_without_newline
784        .len()
785        .saturating_sub(trimmed_start.len());
786    let after_prefix = &trimmed_start[prefix.len()..];
787    let ws_after_prefix_len = after_prefix
788        .len()
789        .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
790    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
791    let continuation_value = trim_end_spaces_tabs(continuation_with_trailing);
792    if continuation_value.is_empty() {
793        return false;
794    }
795    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
796
797    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
798    if leading_ws_len > 0 {
799        builder.token(
800            SyntaxKind::WHITESPACE.into(),
801            &line_without_newline[..leading_ws_len],
802        );
803    }
804    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
805    if ws_after_prefix_len > 0 {
806        builder.token(
807            SyntaxKind::WHITESPACE.into(),
808            &after_prefix[..ws_after_prefix_len],
809        );
810    }
811    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
812    if !continuation_ws_suffix.is_empty() {
813        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
814    }
815    builder.finish_node();
816    true
817}
818
819fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
820    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
821    if !trimmed_start.starts_with(prefix) {
822        return false;
823    }
824    let after_prefix = &trimmed_start[prefix.len()..];
825    let rest = trim_start_spaces_tabs(after_prefix);
826    let Some(colon_idx) = rest.find(':') else {
827        return false;
828    };
829    let key = trim_end_spaces_tabs(&rest[..colon_idx]);
830    if key.is_empty() {
831        return false;
832    }
833    true
834}
835
836fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
837    let trimmed_start = trim_start_spaces_tabs(line_without_newline);
838    if !trimmed_start.starts_with(prefix) {
839        return false;
840    }
841    let after_prefix = &trimmed_start[prefix.len()..];
842    let Some(first) = after_prefix.chars().next() else {
843        return false;
844    };
845    if first != ' ' && first != '\t' {
846        return false;
847    }
848    !trim_start_spaces_tabs(after_prefix).is_empty()
849}
850
851/// Check if a line is a valid closing fence for the given fence info.
852pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
853    let trimmed = strip_leading_spaces(content);
854
855    if !trimmed.starts_with(fence.fence_char) {
856        return false;
857    }
858
859    let closing_count = trimmed
860        .chars()
861        .take_while(|&c| c == fence.fence_char)
862        .count();
863
864    if closing_count < fence.fence_count {
865        return false;
866    }
867
868    // Rest of line must be empty
869    trimmed[closing_count..].trim().is_empty()
870}
871
872/// Emit chunk options as structured CST nodes while preserving all bytes.
873/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
874fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
875    if content.trim().is_empty() {
876        builder.token(SyntaxKind::TEXT.into(), content);
877        return;
878    }
879
880    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
881
882    let mut pos = 0;
883    let bytes = content.as_bytes();
884
885    while pos < bytes.len() {
886        // Emit leading whitespace/commas as TEXT
887        let ws_start = pos;
888        while pos < bytes.len() {
889            let ch = bytes[pos] as char;
890            if ch != ' ' && ch != '\t' && ch != ',' {
891                break;
892            }
893            pos += 1;
894        }
895        if pos > ws_start {
896            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
897        }
898
899        if pos >= bytes.len() {
900            break;
901        }
902
903        // Check if this is a closing brace
904        if bytes[pos] as char == '}' {
905            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
906            pos += 1;
907            if pos < bytes.len() {
908                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
909            }
910            break;
911        }
912
913        // Read key
914        let key_start = pos;
915        while pos < bytes.len() {
916            let ch = bytes[pos] as char;
917            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
918                break;
919            }
920            pos += 1;
921        }
922
923        if pos == key_start {
924            // No key found, emit rest as TEXT
925            if pos < bytes.len() {
926                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
927            }
928            break;
929        }
930
931        let key = &content[key_start..pos];
932
933        // Check for whitespace before '='
934        let ws_before_eq_start = pos;
935        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
936            pos += 1;
937        }
938
939        // Check if there's a value (=)
940        if pos < bytes.len() && bytes[pos] as char == '=' {
941            // Has value - emit as CHUNK_OPTION
942            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
943            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
944
945            // Emit whitespace before '=' if any
946            if pos > ws_before_eq_start {
947                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
948            }
949
950            builder.token(SyntaxKind::TEXT.into(), "=");
951            pos += 1; // consume '='
952
953            // Emit whitespace after '='
954            let ws_after_eq_start = pos;
955            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
956                pos += 1;
957            }
958            if pos > ws_after_eq_start {
959                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
960            }
961
962            // Parse value (might be quoted)
963            if pos < bytes.len() {
964                let quote_char = bytes[pos] as char;
965                if quote_char == '"' || quote_char == '\'' {
966                    // Quoted value
967                    builder.token(
968                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
969                        &content[pos..pos + 1],
970                    );
971                    pos += 1; // consume opening quote
972
973                    let val_start = pos;
974                    let mut escaped = false;
975                    while pos < bytes.len() {
976                        let ch = bytes[pos] as char;
977                        if !escaped && ch == quote_char {
978                            break;
979                        }
980                        escaped = !escaped && ch == '\\';
981                        pos += 1;
982                    }
983
984                    if pos > val_start {
985                        builder.token(
986                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
987                            &content[val_start..pos],
988                        );
989                    }
990
991                    // Emit closing quote
992                    if pos < bytes.len() && bytes[pos] as char == quote_char {
993                        builder.token(
994                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
995                            &content[pos..pos + 1],
996                        );
997                        pos += 1;
998                    }
999                } else {
1000                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
1001                    let val_start = pos;
1002                    let mut depth = 0;
1003
1004                    while pos < bytes.len() {
1005                        let ch = bytes[pos] as char;
1006                        match ch {
1007                            '(' | '[' | '{' => depth += 1,
1008                            ')' | ']' => {
1009                                if depth > 0 {
1010                                    depth -= 1;
1011                                } else {
1012                                    break;
1013                                }
1014                            }
1015                            '}' => {
1016                                if depth > 0 {
1017                                    depth -= 1;
1018                                } else {
1019                                    break; // End of chunk options
1020                                }
1021                            }
1022                            ',' if depth == 0 => {
1023                                break; // Next option
1024                            }
1025                            ' ' | '\t' if depth == 0 => {
1026                                break; // Space separator
1027                            }
1028                            _ => {}
1029                        }
1030                        pos += 1;
1031                    }
1032
1033                    if pos > val_start {
1034                        builder.token(
1035                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
1036                            &content[val_start..pos],
1037                        );
1038                    }
1039                }
1040            }
1041
1042            builder.finish_node(); // CHUNK_OPTION
1043        } else {
1044            // No '=' - this is a label or bareword option
1045            // Emit any whitespace we skipped as TEXT
1046            if pos > ws_before_eq_start {
1047                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1048                builder.token(SyntaxKind::TEXT.into(), key);
1049                builder.finish_node(); // CHUNK_LABEL
1050                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1051            } else {
1052                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1053                builder.token(SyntaxKind::TEXT.into(), key);
1054                builder.finish_node(); // CHUNK_LABEL
1055            }
1056        }
1057    }
1058
1059    builder.finish_node(); // CHUNK_OPTIONS
1060}
1061
1062/// Helper to parse info string and emit CodeInfo node with parsed components.
1063/// This breaks down the info string into its logical parts while preserving all bytes.
1064fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1065    builder.start_node(SyntaxKind::CODE_INFO.into());
1066
1067    let info = InfoString::parse(info_string);
1068
1069    match &info.block_type {
1070        CodeBlockType::DisplayShortcut { language } => {
1071            // Simple case: python or python {.class}
1072            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1073
1074            // If there's more after the language, emit it as TEXT
1075            let after_lang = &info_string[language.len()..];
1076            if !after_lang.is_empty() {
1077                builder.token(SyntaxKind::TEXT.into(), after_lang);
1078            }
1079        }
1080        CodeBlockType::Executable { language } => {
1081            // Quarto: {r} or {r my-label, echo=FALSE}
1082            builder.token(SyntaxKind::TEXT.into(), "{");
1083            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1084
1085            // Parse and emit chunk options
1086            let start_offset = 1 + language.len(); // Skip "{r"
1087            if start_offset < info_string.len() {
1088                let rest = &info_string[start_offset..];
1089                emit_chunk_options(builder, rest);
1090            }
1091        }
1092        CodeBlockType::DisplayExplicit { classes } => {
1093            // Pandoc: {.python} or {#id .haskell .numberLines}
1094            // We need to find the first class in the raw string and emit everything around it
1095
1096            if let Some(lang) = classes.first() {
1097                // Find where ".lang" appears in the info string
1098                let needle = format!(".{}", lang);
1099                if let Some(lang_start) = info_string.find(&needle) {
1100                    // Emit everything before the language
1101                    if lang_start > 0 {
1102                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1103                    }
1104
1105                    // Emit the dot
1106                    builder.token(SyntaxKind::TEXT.into(), ".");
1107
1108                    // Emit the language
1109                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1110
1111                    // Emit everything after
1112                    let after_lang_start = lang_start + 1 + lang.len();
1113                    if after_lang_start < info_string.len() {
1114                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1115                    }
1116                } else {
1117                    // Couldn't find it, just emit as TEXT
1118                    builder.token(SyntaxKind::TEXT.into(), info_string);
1119                }
1120            } else {
1121                // No classes
1122                builder.token(SyntaxKind::TEXT.into(), info_string);
1123            }
1124        }
1125        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1126            // No language, just emit as TEXT
1127            builder.token(SyntaxKind::TEXT.into(), info_string);
1128        }
1129    }
1130
1131    builder.finish_node(); // CodeInfo
1132}
1133
1134/// Parse a fenced code block, consuming lines from the parser.
1135/// Parse a fenced code block, consuming lines from the parser.
1136/// Returns the new position after the code block.
1137///
1138/// All container geometry (blockquote depth, list-item indent,
1139/// footnote/definition base indent, and the bq-vs-list strip order) is
1140/// derived from `window.prefix()`; detection scans and the open-fence
1141/// emitter read those derived scalars, and content/closing-fence lines
1142/// re-emit their container prefix via [`StrippedLines::emit_prefix_at`].
1143pub(crate) fn parse_fenced_code_block(
1144    builder: &mut GreenNodeBuilder<'static>,
1145    window: &StrippedLines<'_, '_>,
1146    fence: FenceInfo,
1147    first_line_override: Option<&str>,
1148) -> usize {
1149    let lines = window.raw();
1150    let start_pos = window.pos();
1151    let prefix = window.prefix();
1152    let bq_depth = prefix.bq_depth();
1153    let list_content_col = prefix.list_content_col();
1154    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1155    let bq_outer = bq_outer_of_list(prefix);
1156    let content_indent = prefix.content_indent();
1157
1158    // Start code block
1159    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1160
1161    // Opening fence
1162    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1163        builder,
1164        lines[start_pos],
1165        first_line_override,
1166        bq_depth,
1167        list_content_col,
1168        list_marker_consumed_on_line_0,
1169        bq_outer,
1170        content_indent,
1171    );
1172
1173    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1174    builder.token(
1175        SyntaxKind::CODE_FENCE_MARKER.into(),
1176        &first_trimmed[..fence.fence_count],
1177    );
1178
1179    // Emit any space between fence and info string (for losslessness)
1180    let after_fence = &first_trimmed[fence.fence_count..];
1181    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1182        // There was a space - emit it as WHITESPACE
1183        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1184        // Parse and emit the info string as a structured node
1185        if !fence.info_string.is_empty() {
1186            emit_code_info_node(builder, &fence.info_string);
1187        }
1188    } else if !fence.info_string.is_empty() {
1189        // No space - parse and emit info_string as a structured node
1190        emit_code_info_node(builder, &fence.info_string);
1191    }
1192
1193    // Extract and emit the actual newline from the opening fence line
1194    let (_, newline_str) = strip_newline(first_trimmed);
1195    if !newline_str.is_empty() {
1196        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1197    }
1198    builder.finish_node(); // CodeFenceOpen
1199
1200    let mut current_pos = start_pos + 1;
1201    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1202    let mut found_closing = false;
1203
1204    while current_pos < lines.len() {
1205        let line = lines[current_pos];
1206
1207        // Count blockquote markers to detect leaving the surrounding
1208        // blockquote. For bq_outer=true probe the raw line (bq markers
1209        // lead); for bq_outer=false strip the list indent first, then
1210        // probe the post-list slice. This forward-scan termination has no
1211        // `StrippedLines` equivalent, so it stays inline.
1212        let probe = if bq_outer {
1213            line
1214        } else {
1215            strip_list_indent(line, list_content_col)
1216        };
1217        let (line_bq_depth, _) = count_blockquote_markers(probe);
1218        if line_bq_depth < bq_depth {
1219            break;
1220        }
1221
1222        // Detection only (emits nothing): the same 2-bucket container
1223        // strip the emission path applies via `emit_content_line_prefixes`
1224        // / `emit_prefix_at`, kept here rather than `strip_at` (a per-op
1225        // walk) to stay byte-identical in interleaved nesting.
1226        let inner_stripped =
1227            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1228
1229        if is_closing_fence(inner_stripped, &fence) {
1230            found_closing = true;
1231            current_pos += 1;
1232            break;
1233        }
1234
1235        content_lines.push(line);
1236        current_pos += 1;
1237    }
1238
1239    // Add content
1240    if !content_lines.is_empty() {
1241        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1242        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1243            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1244            _ => None,
1245        };
1246
1247        let mut line_idx = 0usize;
1248        if let Some(prefix) = hashpipe_prefix {
1249            let prepared_hashpipe_lines = compute_hashpipe_preamble_line_count(
1250                &content_lines,
1251                prefix,
1252                bq_depth,
1253                list_content_col,
1254                bq_outer,
1255                content_indent,
1256            );
1257            if prepared_hashpipe_lines > 0 {
1258                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1259                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1260                while line_idx < prepared_hashpipe_lines {
1261                    let after_indent = window.emit_prefix_at(builder, start_pos + 1 + line_idx);
1262                    let (line_without_newline, newline_str) = strip_newline(after_indent);
1263                    if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
1264                        let _ =
1265                            emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
1266                    }
1267                    if !newline_str.is_empty() {
1268                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1269                    }
1270                    line_idx += 1;
1271                }
1272                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1273                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1274            }
1275        }
1276
1277        for k in line_idx..content_lines.len() {
1278            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1279            let (line_without_newline, newline_str) = strip_newline(after_indent);
1280
1281            if !line_without_newline.is_empty() {
1282                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1283            }
1284
1285            if !newline_str.is_empty() {
1286                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1287            }
1288        }
1289        builder.finish_node(); // CodeContent
1290    }
1291
1292    // Closing fence (if found)
1293    if found_closing {
1294        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1295        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1296        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1297        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1298        let closing_count = closing_trimmed_start
1299            .chars()
1300            .take_while(|&c| c == fence.fence_char)
1301            .count();
1302        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1303
1304        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1305        if leading_ws_len > 0 {
1306            builder.token(
1307                SyntaxKind::WHITESPACE.into(),
1308                &closing_without_newline[..leading_ws_len],
1309            );
1310        }
1311        builder.token(
1312            SyntaxKind::CODE_FENCE_MARKER.into(),
1313            &closing_trimmed_start[..closing_count],
1314        );
1315        if !trailing_after_marker.is_empty() {
1316            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1317        }
1318        if !newline_str.is_empty() {
1319            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1320        }
1321        builder.finish_node(); // CodeFenceClose
1322    }
1323
1324    builder.finish_node(); // CodeBlock
1325
1326    current_pos
1327}
1328
1329/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1330///
1331/// Container geometry is derived from `window.prefix()`, mirroring
1332/// [`parse_fenced_code_block`].
1333pub(crate) fn parse_fenced_math_block(
1334    builder: &mut GreenNodeBuilder<'static>,
1335    window: &StrippedLines<'_, '_>,
1336    fence: FenceInfo,
1337    first_line_override: Option<&str>,
1338) -> usize {
1339    let lines = window.raw();
1340    let start_pos = window.pos();
1341    let prefix = window.prefix();
1342    let bq_depth = prefix.bq_depth();
1343    let list_content_col = prefix.list_content_col();
1344    let list_marker_consumed_on_line_0 = prefix.list_marker_consumed_on_line_0;
1345    let bq_outer = bq_outer_of_list(prefix);
1346    let content_indent = prefix.content_indent();
1347
1348    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1349
1350    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1351        builder,
1352        lines[start_pos],
1353        first_line_override,
1354        bq_depth,
1355        list_content_col,
1356        list_marker_consumed_on_line_0,
1357        bq_outer,
1358        content_indent,
1359    );
1360    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1361    builder.token(
1362        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1363        opening_without_newline,
1364    );
1365    if !opening_newline.is_empty() {
1366        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1367    }
1368
1369    let mut current_pos = start_pos + 1;
1370    let mut content_lines: Vec<&str> = Vec::new();
1371    let mut found_closing = false;
1372
1373    while current_pos < lines.len() {
1374        let line = lines[current_pos];
1375
1376        // Forward-scan termination on blockquote depth — stays inline (no
1377        // `StrippedLines` equivalent), mirroring `parse_fenced_code_block`.
1378        let probe = if bq_outer {
1379            line
1380        } else {
1381            strip_list_indent(line, list_content_col)
1382        };
1383        let (line_bq_depth, _) = count_blockquote_markers(probe);
1384        if line_bq_depth < bq_depth {
1385            break;
1386        }
1387
1388        // Detection only (emits nothing): same 2-bucket strip as emission.
1389        let inner_stripped =
1390            strip_content_line_prefixes(line, bq_depth, list_content_col, bq_outer, content_indent);
1391
1392        if is_closing_fence(inner_stripped, &fence) {
1393            found_closing = true;
1394            current_pos += 1;
1395            break;
1396        }
1397
1398        content_lines.push(line);
1399        current_pos += 1;
1400    }
1401
1402    if !content_lines.is_empty() {
1403        let mut content = String::new();
1404        for k in 0..content_lines.len() {
1405            let after_indent = window.emit_prefix_at(builder, start_pos + 1 + k);
1406            let (line_without_newline, newline_str) = strip_newline(after_indent);
1407            content.push_str(line_without_newline);
1408            content.push_str(newline_str);
1409        }
1410        builder.token(SyntaxKind::TEXT.into(), &content);
1411    }
1412
1413    if found_closing {
1414        let closing_stripped = window.emit_prefix_at(builder, current_pos - 1);
1415        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1416        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1417        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1418        let closing_count = closing_trimmed_start
1419            .chars()
1420            .take_while(|&c| c == fence.fence_char)
1421            .count();
1422        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1423
1424        if leading_ws_len > 0 {
1425            builder.token(
1426                SyntaxKind::WHITESPACE.into(),
1427                &closing_without_newline[..leading_ws_len],
1428            );
1429        }
1430        builder.token(
1431            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1432            &closing_trimmed_start[..closing_count],
1433        );
1434        if !trailing_after_marker.is_empty() {
1435            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1436        }
1437        if !newline_str.is_empty() {
1438            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1439        }
1440    }
1441
1442    builder.finish_node(); // DisplayMath
1443    current_pos
1444}
1445
1446#[cfg(test)]
1447mod tests {
1448    use super::*;
1449
1450    #[test]
1451    fn test_backtick_fence() {
1452        let fence = try_parse_fence_open("```python").unwrap();
1453        assert_eq!(fence.fence_char, '`');
1454        assert_eq!(fence.fence_count, 3);
1455        assert_eq!(fence.info_string, "python");
1456    }
1457
1458    #[test]
1459    fn test_tilde_fence() {
1460        let fence = try_parse_fence_open("~~~").unwrap();
1461        assert_eq!(fence.fence_char, '~');
1462        assert_eq!(fence.fence_count, 3);
1463        assert_eq!(fence.info_string, "");
1464    }
1465
1466    #[test]
1467    fn test_long_fence() {
1468        let fence = try_parse_fence_open("`````").unwrap();
1469        assert_eq!(fence.fence_count, 5);
1470    }
1471
1472    #[test]
1473    fn test_two_backticks_invalid() {
1474        assert!(try_parse_fence_open("``").is_none());
1475    }
1476
1477    #[test]
1478    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1479        assert!(try_parse_fence_open("`````hi````there`````").is_none());
1480    }
1481
1482    #[test]
1483    fn test_closing_fence() {
1484        let fence = FenceInfo {
1485            fence_char: '`',
1486            fence_count: 3,
1487            info_string: String::new(),
1488        };
1489        assert!(is_closing_fence("```", &fence));
1490        assert!(is_closing_fence("````", &fence));
1491        assert!(!is_closing_fence("``", &fence));
1492        assert!(!is_closing_fence("~~~", &fence));
1493    }
1494
1495    #[test]
1496    fn test_fenced_code_preserves_leading_gt() {
1497        let input = "```\n> foo\n```\n";
1498        let tree = crate::parse(input, None);
1499        assert_eq!(tree.text().to_string(), input);
1500    }
1501
1502    #[test]
1503    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1504        let input = "> ```\n> code\n> ```\n";
1505        let tree = crate::parse(input, None);
1506        assert_eq!(tree.text().to_string(), input);
1507    }
1508
1509    #[test]
1510    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1511        let input = "Term\n: ```\n├── pyproject.toml\n```\n";
1512        let tree = crate::parse(input, None);
1513        assert_eq!(tree.text().to_string(), input);
1514    }
1515
1516    #[test]
1517    fn test_info_string_plain() {
1518        let info = InfoString::parse("");
1519        assert_eq!(info.block_type, CodeBlockType::Plain);
1520        assert!(info.attributes.is_empty());
1521    }
1522
1523    #[test]
1524    fn test_info_string_shortcut() {
1525        let info = InfoString::parse("python");
1526        assert_eq!(
1527            info.block_type,
1528            CodeBlockType::DisplayShortcut {
1529                language: "python".to_string()
1530            }
1531        );
1532        assert!(info.attributes.is_empty());
1533    }
1534
1535    #[test]
1536    fn test_info_string_shortcut_with_trailing() {
1537        let info = InfoString::parse("python extra stuff");
1538        assert_eq!(
1539            info.block_type,
1540            CodeBlockType::DisplayShortcut {
1541                language: "python".to_string()
1542            }
1543        );
1544    }
1545
1546    #[test]
1547    fn test_info_string_display_explicit() {
1548        let info = InfoString::parse("{.python}");
1549        assert_eq!(
1550            info.block_type,
1551            CodeBlockType::DisplayExplicit {
1552                classes: vec!["python".to_string()]
1553            }
1554        );
1555    }
1556
1557    #[test]
1558    fn test_info_string_display_explicit_multiple() {
1559        let info = InfoString::parse("{.python .numberLines}");
1560        assert_eq!(
1561            info.block_type,
1562            CodeBlockType::DisplayExplicit {
1563                classes: vec!["python".to_string(), "numberLines".to_string()]
1564            }
1565        );
1566    }
1567
1568    #[test]
1569    fn test_info_string_executable() {
1570        let info = InfoString::parse("{python}");
1571        assert_eq!(
1572            info.block_type,
1573            CodeBlockType::Executable {
1574                language: "python".to_string()
1575            }
1576        );
1577    }
1578
1579    #[test]
1580    fn test_info_string_executable_with_options() {
1581        let info = InfoString::parse("{python echo=false warning=true}");
1582        assert_eq!(
1583            info.block_type,
1584            CodeBlockType::Executable {
1585                language: "python".to_string()
1586            }
1587        );
1588        assert_eq!(info.attributes.len(), 2);
1589        assert_eq!(
1590            info.attributes[0],
1591            ("echo".to_string(), Some("false".to_string()))
1592        );
1593        assert_eq!(
1594            info.attributes[1],
1595            ("warning".to_string(), Some("true".to_string()))
1596        );
1597    }
1598
1599    #[test]
1600    fn test_info_string_executable_with_commas() {
1601        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1602        assert_eq!(
1603            info.block_type,
1604            CodeBlockType::Executable {
1605                language: "r".to_string()
1606            }
1607        );
1608        assert_eq!(info.attributes.len(), 2);
1609        assert_eq!(
1610            info.attributes[0],
1611            ("echo".to_string(), Some("FALSE".to_string()))
1612        );
1613        assert_eq!(
1614            info.attributes[1],
1615            ("warning".to_string(), Some("TRUE".to_string()))
1616        );
1617    }
1618
1619    #[test]
1620    fn test_info_string_executable_mixed_commas_spaces() {
1621        // R-style with commas and spaces
1622        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1623        assert_eq!(
1624            info.block_type,
1625            CodeBlockType::Executable {
1626                language: "r".to_string()
1627            }
1628        );
1629        assert_eq!(info.attributes.len(), 2);
1630        assert_eq!(
1631            info.attributes[0],
1632            ("echo".to_string(), Some("FALSE".to_string()))
1633        );
1634        assert_eq!(
1635            info.attributes[1],
1636            ("label".to_string(), Some("my chunk".to_string()))
1637        );
1638    }
1639
1640    #[test]
1641    fn test_info_string_mixed_shortcut_and_attrs() {
1642        let info = InfoString::parse("python {.numberLines}");
1643        assert_eq!(
1644            info.block_type,
1645            CodeBlockType::DisplayShortcut {
1646                language: "python".to_string()
1647            }
1648        );
1649        assert_eq!(info.attributes.len(), 1);
1650        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1651    }
1652
1653    #[test]
1654    fn test_info_string_mixed_with_key_value() {
1655        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1656        assert_eq!(
1657            info.block_type,
1658            CodeBlockType::DisplayShortcut {
1659                language: "python".to_string()
1660            }
1661        );
1662        assert_eq!(info.attributes.len(), 2);
1663        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1664        assert_eq!(
1665            info.attributes[1],
1666            ("startFrom".to_string(), Some("100".to_string()))
1667        );
1668    }
1669
1670    #[test]
1671    fn test_info_string_explicit_with_id_and_classes() {
1672        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1673        assert_eq!(
1674            info.block_type,
1675            CodeBlockType::DisplayExplicit {
1676                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1677            }
1678        );
1679        // Non-class attributes
1680        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1681        let has_start = info
1682            .attributes
1683            .iter()
1684            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1685        assert!(has_id);
1686        assert!(has_start);
1687    }
1688
1689    #[test]
1690    fn test_info_string_raw_html() {
1691        let info = InfoString::parse("{=html}");
1692        assert_eq!(
1693            info.block_type,
1694            CodeBlockType::Raw {
1695                format: "html".to_string()
1696            }
1697        );
1698        assert!(info.attributes.is_empty());
1699    }
1700
1701    #[test]
1702    fn test_info_string_raw_latex() {
1703        let info = InfoString::parse("{=latex}");
1704        assert_eq!(
1705            info.block_type,
1706            CodeBlockType::Raw {
1707                format: "latex".to_string()
1708            }
1709        );
1710    }
1711
1712    #[test]
1713    fn test_info_string_raw_openxml() {
1714        let info = InfoString::parse("{=openxml}");
1715        assert_eq!(
1716            info.block_type,
1717            CodeBlockType::Raw {
1718                format: "openxml".to_string()
1719            }
1720        );
1721    }
1722
1723    #[test]
1724    fn test_info_string_raw_ms() {
1725        let info = InfoString::parse("{=ms}");
1726        assert_eq!(
1727            info.block_type,
1728            CodeBlockType::Raw {
1729                format: "ms".to_string()
1730            }
1731        );
1732    }
1733
1734    #[test]
1735    fn test_info_string_raw_html5() {
1736        let info = InfoString::parse("{=html5}");
1737        assert_eq!(
1738            info.block_type,
1739            CodeBlockType::Raw {
1740                format: "html5".to_string()
1741            }
1742        );
1743    }
1744
1745    #[test]
1746    fn test_info_string_raw_not_combined_with_attrs() {
1747        // If there are other attributes with =format, it should not be treated as raw
1748        let info = InfoString::parse("{=html .class}");
1749        // This should NOT be parsed as raw because there's more than one attribute
1750        assert_ne!(
1751            info.block_type,
1752            CodeBlockType::Raw {
1753                format: "html".to_string()
1754            }
1755        );
1756    }
1757
1758    #[test]
1759    fn test_parse_pandoc_attributes_spaces() {
1760        // Pandoc display blocks use spaces as delimiters
1761        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1762        assert_eq!(attrs.len(), 3);
1763        assert_eq!(attrs[0], (".python".to_string(), None));
1764        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1765        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1766    }
1767
1768    #[test]
1769    fn test_parse_pandoc_attributes_no_commas() {
1770        // Commas in Pandoc attributes should be treated as part of the value
1771        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1772        assert_eq!(attrs.len(), 3);
1773        assert_eq!(attrs[0], ("#id".to_string(), None));
1774        assert_eq!(attrs[1], (".class".to_string(), None));
1775        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1776    }
1777
1778    #[test]
1779    fn test_parse_chunk_options_commas() {
1780        // Quarto/RMarkdown chunks use commas as delimiters
1781        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1782        assert_eq!(attrs.len(), 3);
1783        assert_eq!(attrs[0], ("r".to_string(), None));
1784        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1785        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1786    }
1787
1788    #[test]
1789    fn test_parse_chunk_options_no_spaces() {
1790        // Should handle comma-separated without spaces
1791        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1792        assert_eq!(attrs.len(), 3);
1793        assert_eq!(attrs[0], ("r".to_string(), None));
1794        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1795        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1796    }
1797
1798    #[test]
1799    fn test_parse_chunk_options_mixed() {
1800        // Handle both commas and spaces
1801        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1802        assert_eq!(attrs.len(), 3);
1803        assert_eq!(attrs[0], ("python".to_string(), None));
1804        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1805        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1806    }
1807
1808    #[test]
1809    fn test_parse_chunk_options_nested_function_call() {
1810        // R function calls with nested commas should be treated as single value
1811        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1812        assert_eq!(attrs.len(), 3);
1813        assert_eq!(attrs[0], ("r".to_string(), None));
1814        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1815        assert_eq!(
1816            attrs[2],
1817            (
1818                "dependson".to_string(),
1819                Some(r#"c("foo", "bar")"#.to_string())
1820            )
1821        );
1822    }
1823
1824    #[test]
1825    fn test_parse_chunk_options_nested_with_spaces() {
1826        // Function call with spaces inside
1827        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1828        assert_eq!(attrs.len(), 2);
1829        assert_eq!(attrs[0], ("r".to_string(), None));
1830        assert_eq!(
1831            attrs[1],
1832            (
1833                "cache.path".to_string(),
1834                Some(r#"file.path("cache", "dir")"#.to_string())
1835            )
1836        );
1837    }
1838
1839    #[test]
1840    fn test_parse_chunk_options_deeply_nested() {
1841        // Multiple levels of nesting
1842        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1843        assert_eq!(attrs.len(), 2);
1844        assert_eq!(attrs[0], ("r".to_string(), None));
1845        assert_eq!(
1846            attrs[1],
1847            (
1848                "x".to_string(),
1849                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1850            )
1851        );
1852    }
1853
1854    #[test]
1855    fn test_parse_chunk_options_brackets_and_braces() {
1856        // Test all bracket types
1857        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1858        assert_eq!(attrs.len(), 3);
1859        assert_eq!(attrs[0], ("r".to_string(), None));
1860        assert_eq!(
1861            attrs[1],
1862            ("data".to_string(), Some("df[rows, cols]".to_string()))
1863        );
1864        assert_eq!(
1865            attrs[2],
1866            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1867        );
1868    }
1869
1870    #[test]
1871    fn test_parse_chunk_options_quotes_with_parens() {
1872        // Parentheses inside quoted strings shouldn't affect depth tracking
1873        // Note: The parser strips outer quotes from quoted values
1874        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1875        assert_eq!(attrs.len(), 3);
1876        assert_eq!(attrs[0], ("r".to_string(), None));
1877        assert_eq!(
1878            attrs[1],
1879            ("label".to_string(), Some("test (with parens)".to_string()))
1880        );
1881        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1882    }
1883
1884    #[test]
1885    fn test_parse_chunk_options_escaped_quotes() {
1886        // Escaped quotes inside string values
1887        // Note: The parser strips outer quotes and processes escapes
1888        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1889        assert_eq!(attrs.len(), 2);
1890        assert_eq!(attrs[0], ("r".to_string(), None));
1891        assert_eq!(
1892            attrs[1],
1893            (
1894                "label".to_string(),
1895                Some(r#"has "quoted" text"#.to_string())
1896            )
1897        );
1898    }
1899
1900    #[test]
1901    fn test_display_vs_executable_parsing() {
1902        // Display block should use Pandoc parser (spaces)
1903        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1904        assert!(matches!(
1905            info1.block_type,
1906            CodeBlockType::DisplayExplicit { .. }
1907        ));
1908
1909        // Executable chunk should use chunk options parser (commas)
1910        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1911        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1912        assert_eq!(info2.attributes.len(), 2);
1913    }
1914
1915    #[test]
1916    fn test_info_string_executable_implicit_label() {
1917        // {r mylabel} should parse as label=mylabel
1918        let info = InfoString::parse("{r mylabel}");
1919        assert!(matches!(
1920            info.block_type,
1921            CodeBlockType::Executable { ref language } if language == "r"
1922        ));
1923        assert_eq!(info.attributes.len(), 1);
1924        assert_eq!(
1925            info.attributes[0],
1926            ("label".to_string(), Some("mylabel".to_string()))
1927        );
1928    }
1929
1930    #[test]
1931    fn test_info_string_executable_implicit_label_with_options() {
1932        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
1933        let info = InfoString::parse("{r mylabel, echo=FALSE}");
1934        assert!(matches!(
1935            info.block_type,
1936            CodeBlockType::Executable { ref language } if language == "r"
1937        ));
1938        assert_eq!(info.attributes.len(), 2);
1939        assert_eq!(
1940            info.attributes[0],
1941            ("label".to_string(), Some("mylabel".to_string()))
1942        );
1943        assert_eq!(
1944            info.attributes[1],
1945            ("echo".to_string(), Some("FALSE".to_string()))
1946        );
1947    }
1948
1949    #[test]
1950    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
1951        let content_lines = vec![
1952            "#| fig-cap: |\n",
1953            "#|   A caption\n",
1954            "#|   spanning lines\n",
1955            "a <- 1\n",
1956        ];
1957        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
1958        assert_eq!(count, 3);
1959    }
1960
1961    #[test]
1962    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
1963        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
1964        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
1965        assert_eq!(count, 1);
1966    }
1967
1968    #[test]
1969    fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
1970        let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
1971        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
1972        assert_eq!(count, 1);
1973    }
1974}