Skip to main content

panache_parser/parser/blocks/
code_blocks.rs

1//! Fenced code block parsing utilities.
2
3use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use crate::parser::utils::container_stack::byte_index_at_column;
9use crate::parser::utils::helpers::{strip_leading_spaces, strip_newline};
10
11/// Represents the type of code block based on its info string syntax.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub enum CodeBlockType {
14    /// Display-only block with shortcut syntax: ```python
15    DisplayShortcut { language: String },
16    /// Display-only block with explicit Pandoc syntax: ```{.python}
17    DisplayExplicit { classes: Vec<String> },
18    /// Executable chunk (Quarto/RMarkdown): ```{python}
19    Executable { language: String },
20    /// Raw block for specific output format: ```{=html}
21    Raw { format: String },
22    /// No language specified: ```
23    Plain,
24}
25
26/// Parsed attributes from a code block info string.
27#[derive(Debug, Clone, PartialEq)]
28pub struct InfoString {
29    pub raw: String,
30    pub block_type: CodeBlockType,
31    pub attributes: Vec<(String, Option<String>)>, // key-value pairs
32}
33
34impl InfoString {
35    /// Parse an info string into structured attributes.
36    pub fn parse(raw: &str) -> Self {
37        let trimmed = raw.trim();
38
39        if trimmed.is_empty() {
40            return InfoString {
41                raw: raw.to_string(),
42                block_type: CodeBlockType::Plain,
43                attributes: Vec::new(),
44            };
45        }
46
47        // Check if it starts with '{' - explicit attribute block
48        if let Some(stripped) = trimmed.strip_prefix('{')
49            && let Some(content) = stripped.strip_suffix('}')
50        {
51            return Self::parse_explicit(raw, content);
52        }
53
54        // Check for mixed form: python {.numberLines}
55        if let Some(brace_start) = trimmed.find('{') {
56            let language = trimmed[..brace_start].trim();
57            if !language.is_empty() && !language.contains(char::is_whitespace) {
58                let attr_part = &trimmed[brace_start..];
59                if let Some(stripped) = attr_part.strip_prefix('{')
60                    && let Some(content) = stripped.strip_suffix('}')
61                {
62                    let attrs = Self::parse_attributes(content);
63                    return InfoString {
64                        raw: raw.to_string(),
65                        block_type: CodeBlockType::DisplayShortcut {
66                            language: language.to_string(),
67                        },
68                        attributes: attrs,
69                    };
70                }
71            }
72        }
73
74        // Otherwise, it's a shortcut form (just the language name)
75        // Only take the first word as language
76        let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
77        InfoString {
78            raw: raw.to_string(),
79            block_type: CodeBlockType::DisplayShortcut {
80                language: language.to_string(),
81            },
82            attributes: Vec::new(),
83        }
84    }
85
86    fn parse_explicit(raw: &str, content: &str) -> Self {
87        // Check for raw attribute FIRST: {=format}
88        // The content should start with '=' and have only alphanumeric chars after
89        let trimmed_content = content.trim();
90        if let Some(format_name) = trimmed_content.strip_prefix('=') {
91            // Validate format name: alphanumeric only, no spaces
92            if !format_name.is_empty()
93                && format_name.chars().all(|c| c.is_alphanumeric())
94                && !format_name.contains(char::is_whitespace)
95            {
96                return InfoString {
97                    raw: raw.to_string(),
98                    block_type: CodeBlockType::Raw {
99                        format: format_name.to_string(),
100                    },
101                    attributes: Vec::new(),
102                };
103            }
104        }
105
106        // First, do a preliminary parse to determine block type
107        // Use chunk options parser (comma-aware) for initial detection
108        let prelim_attrs = Self::parse_chunk_options(content);
109
110        // First non-ID, non-attribute token determines if it's executable or display
111        let mut first_lang_token = None;
112        for (key, val) in prelim_attrs.iter() {
113            if val.is_none() && !key.starts_with('#') {
114                first_lang_token = Some(key.as_str());
115                break;
116            }
117        }
118
119        let first_token = first_lang_token.unwrap_or("");
120
121        if first_token.starts_with('.') {
122            // Display block: {.python} or {.haskell .numberLines}
123            // Re-parse with Pandoc-style parser (space-delimited)
124            let attrs = Self::parse_pandoc_attributes(content);
125
126            let classes: Vec<String> = attrs
127                .iter()
128                .filter(|(k, v)| k.starts_with('.') && v.is_none())
129                .map(|(k, _)| k[1..].to_string())
130                .collect();
131
132            let non_class_attrs: Vec<(String, Option<String>)> = attrs
133                .into_iter()
134                .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
135                .collect();
136
137            InfoString {
138                raw: raw.to_string(),
139                block_type: CodeBlockType::DisplayExplicit { classes },
140                attributes: non_class_attrs,
141            }
142        } else if !first_token.is_empty() && !first_token.starts_with('#') {
143            // Executable chunk: {python} or {r}
144            // Use chunk options parser (comma-delimited)
145            let attrs = Self::parse_chunk_options(content);
146            let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
147
148            // Check if there's a second bareword (implicit label in R/Quarto chunks)
149            // Pattern: {r mylabel} is equivalent to {r, label=mylabel}
150            let mut has_implicit_label = false;
151            let implicit_label_value = if lang_index + 1 < attrs.len() {
152                if let (label_key, None) = &attrs[lang_index + 1] {
153                    // Second bareword after language
154                    has_implicit_label = true;
155                    Some(label_key.clone())
156                } else {
157                    None
158                }
159            } else {
160                None
161            };
162
163            let mut final_attrs: Vec<(String, Option<String>)> = attrs
164                .into_iter()
165                .enumerate()
166                .filter(|(i, _)| {
167                    // Remove language token
168                    if *i == lang_index {
169                        return false;
170                    }
171                    // Remove implicit label token (will be added back explicitly)
172                    if has_implicit_label && *i == lang_index + 1 {
173                        return false;
174                    }
175                    true
176                })
177                .map(|(_, attr)| attr)
178                .collect();
179
180            // Add explicit label if we found an implicit one
181            if let Some(label_val) = implicit_label_value {
182                final_attrs.insert(0, ("label".to_string(), Some(label_val)));
183            }
184
185            InfoString {
186                raw: raw.to_string(),
187                block_type: CodeBlockType::Executable {
188                    language: first_token.to_string(),
189                },
190                attributes: final_attrs,
191            }
192        } else {
193            // Just attributes, no language - use Pandoc parser
194            let attrs = Self::parse_pandoc_attributes(content);
195            InfoString {
196                raw: raw.to_string(),
197                block_type: CodeBlockType::Plain,
198                attributes: attrs,
199            }
200        }
201    }
202
203    /// Parse Pandoc-style attributes for display blocks: {.class #id key="value"}
204    /// Spaces are the primary delimiter. Pandoc spec prefers explicit quoting.
205    fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
206        let mut attrs = Vec::new();
207        let mut chars = content.chars().peekable();
208
209        while chars.peek().is_some() {
210            // Skip whitespace
211            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
212                chars.next();
213            }
214
215            if chars.peek().is_none() {
216                break;
217            }
218
219            // Read key
220            let mut key = String::new();
221            while let Some(&ch) = chars.peek() {
222                if ch == '=' || ch == ' ' || ch == '\t' {
223                    break;
224                }
225                key.push(ch);
226                chars.next();
227            }
228
229            if key.is_empty() {
230                break;
231            }
232
233            // Skip whitespace
234            while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
235                chars.next();
236            }
237
238            // Check for value
239            if chars.peek() == Some(&'=') {
240                chars.next(); // consume '='
241
242                // Skip whitespace after '='
243                while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
244                    chars.next();
245                }
246
247                // Read value (might be quoted)
248                let value = if chars.peek() == Some(&'"') {
249                    chars.next(); // consume opening quote
250                    let mut val = String::new();
251                    while let Some(&ch) = chars.peek() {
252                        chars.next();
253                        if ch == '"' {
254                            break;
255                        }
256                        if ch == '\\' {
257                            if let Some(&next_ch) = chars.peek() {
258                                chars.next();
259                                val.push(next_ch);
260                            }
261                        } else {
262                            val.push(ch);
263                        }
264                    }
265                    val
266                } else {
267                    // Unquoted value - read until space
268                    let mut val = String::new();
269                    while let Some(&ch) = chars.peek() {
270                        if ch == ' ' || ch == '\t' {
271                            break;
272                        }
273                        val.push(ch);
274                        chars.next();
275                    }
276                    val
277                };
278
279                attrs.push((key, Some(value)));
280            } else {
281                attrs.push((key, None));
282            }
283        }
284
285        attrs
286    }
287
288    /// Parse Quarto/RMarkdown chunk options: {language, option=value, option2=value2}
289    /// Commas are the primary delimiter (R CSV style). Supports unquoted barewords.
290    fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
291        let mut attrs = Vec::new();
292        let mut chars = content.chars().peekable();
293
294        while chars.peek().is_some() {
295            // Skip whitespace and commas
296            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
297                chars.next();
298            }
299
300            if chars.peek().is_none() {
301                break;
302            }
303
304            // Read key
305            let mut key = String::new();
306            while let Some(&ch) = chars.peek() {
307                if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
308                    break;
309                }
310                key.push(ch);
311                chars.next();
312            }
313
314            if key.is_empty() {
315                break;
316            }
317
318            // Skip whitespace and commas
319            while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
320                chars.next();
321            }
322
323            // Check for value
324            if chars.peek() == Some(&'=') {
325                chars.next(); // consume '='
326
327                // Skip whitespace and commas after '='
328                while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
329                    chars.next();
330                }
331
332                // Read value (might be quoted)
333                let value = if chars.peek() == Some(&'"') {
334                    chars.next(); // consume opening quote
335                    let mut val = String::new();
336                    while let Some(&ch) = chars.peek() {
337                        chars.next();
338                        if ch == '"' {
339                            break;
340                        }
341                        if ch == '\\' {
342                            if let Some(&next_ch) = chars.peek() {
343                                chars.next();
344                                val.push(next_ch);
345                            }
346                        } else {
347                            val.push(ch);
348                        }
349                    }
350                    val
351                } else {
352                    // Unquoted value - read until comma, space, or tab at depth 0
353                    // Track nesting depth for (), [], {} and quote state
354                    let mut val = String::new();
355                    let mut depth = 0; // Track parentheses/brackets/braces depth
356                    let mut in_quote: Option<char> = None; // Track if inside ' or "
357                    let mut escaped = false; // Track if previous char was backslash
358
359                    while let Some(&ch) = chars.peek() {
360                        // Handle escape sequences
361                        if escaped {
362                            val.push(ch);
363                            chars.next();
364                            escaped = false;
365                            continue;
366                        }
367
368                        if ch == '\\' {
369                            val.push(ch);
370                            chars.next();
371                            escaped = true;
372                            continue;
373                        }
374
375                        // Handle quotes
376                        if let Some(quote_char) = in_quote {
377                            val.push(ch);
378                            chars.next();
379                            if ch == quote_char {
380                                in_quote = None; // Close quote
381                            }
382                            continue;
383                        }
384
385                        // Not in a quote - check for quote start
386                        if ch == '"' || ch == '\'' {
387                            in_quote = Some(ch);
388                            val.push(ch);
389                            chars.next();
390                            continue;
391                        }
392
393                        // Track nesting depth (only when not in quotes)
394                        if ch == '(' || ch == '[' || ch == '{' {
395                            depth += 1;
396                            val.push(ch);
397                            chars.next();
398                            continue;
399                        }
400
401                        if ch == ')' || ch == ']' || ch == '}' {
402                            depth -= 1;
403                            val.push(ch);
404                            chars.next();
405                            continue;
406                        }
407
408                        // Check for delimiters - only break at depth 0
409                        if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
410                            break;
411                        }
412
413                        // Regular character
414                        val.push(ch);
415                        chars.next();
416                    }
417                    val
418                };
419
420                attrs.push((key, Some(value)));
421            } else {
422                attrs.push((key, None));
423            }
424        }
425
426        attrs
427    }
428
429    /// Legacy function - kept for backward compatibility in mixed-form parsing
430    /// For new code, use parse_pandoc_attributes or parse_chunk_options
431    fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
432        // Default to chunk options parsing (comma-aware)
433        Self::parse_chunk_options(content)
434    }
435}
436
437/// Information about a detected code fence opening.
438#[derive(Debug, Clone)]
439pub(crate) struct FenceInfo {
440    pub fence_char: char,
441    pub fence_count: usize,
442    pub info_string: String,
443}
444
445pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
446    fence.info_string.trim() == "math"
447}
448
449/// Try to detect a fenced code block opening from content.
450/// Returns fence info if this is a valid opening fence.
451pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
452    let trimmed = strip_leading_spaces(content);
453
454    // Check for fence opening (``` or ~~~)
455    let (fence_char, fence_count) = if trimmed.starts_with('`') {
456        let count = trimmed.chars().take_while(|&c| c == '`').count();
457        ('`', count)
458    } else if trimmed.starts_with('~') {
459        let count = trimmed.chars().take_while(|&c| c == '~').count();
460        ('~', count)
461    } else {
462        return None;
463    };
464
465    if fence_count < 3 {
466        return None;
467    }
468
469    let info_string_raw = &trimmed[fence_count..];
470    // Strip trailing newline (LF or CRLF) and at most one leading space
471    let (info_string_trimmed, _) = strip_newline(info_string_raw);
472    let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
473        stripped.to_string()
474    } else {
475        info_string_trimmed.to_string()
476    };
477
478    // Backtick-fenced blocks cannot have backticks in the info string.
479    if fence_char == '`' && info_string.contains('`') {
480        return None;
481    }
482
483    Some(FenceInfo {
484        fence_char,
485        fence_count,
486        info_string,
487    })
488}
489
490fn prepare_fence_open_line<'a>(
491    builder: &mut GreenNodeBuilder<'static>,
492    source_line: &'a str,
493    first_line_override: Option<&'a str>,
494    bq_depth: usize,
495    base_indent: usize,
496) -> (&'a str, &'a str) {
497    let first_line = first_line_override.unwrap_or(source_line);
498
499    // Only strip blockquote markers for the *surrounding* blockquote depth.
500    // Anything beyond that (e.g. a literal `>` inside the code block) must be preserved.
501    let first_inner = if bq_depth > 0 && first_line_override.is_none() {
502        strip_n_blockquote_markers(first_line, bq_depth)
503    } else {
504        if bq_depth > 0 && first_line_override.is_some() && source_line != first_line {
505            let stripped = strip_n_blockquote_markers(source_line, bq_depth);
506            let prefix_len = source_line.len().saturating_sub(stripped.len());
507            if prefix_len > 0 {
508                emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
509            }
510        }
511        first_line
512    };
513
514    // For lossless parsing: emit the base indent before stripping it
515    let first_base_indent = if first_line_override.is_some() {
516        0
517    } else {
518        base_indent
519    };
520    let first_base_indent_bytes = byte_index_at_column(first_inner, first_base_indent);
521    let first_stripped = if first_base_indent > 0 && first_inner.len() >= first_base_indent_bytes {
522        let indent_str = &first_inner[..first_base_indent_bytes];
523        if !indent_str.is_empty() {
524            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
525        }
526        &first_inner[first_base_indent_bytes..]
527    } else {
528        first_inner
529    };
530
531    let first_trimmed = strip_leading_spaces(first_stripped);
532    let leading_ws_len = first_stripped.len().saturating_sub(first_trimmed.len());
533    if leading_ws_len > 0 {
534        builder.token(
535            SyntaxKind::WHITESPACE.into(),
536            &first_stripped[..leading_ws_len],
537        );
538    }
539    (first_trimmed, first_inner)
540}
541
542fn emit_blockquote_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
543    for ch in prefix.chars() {
544        if ch == '>' {
545            builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
546        } else {
547            let mut buf = [0u8; 4];
548            builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
549        }
550    }
551}
552
553fn emit_content_line_prefixes<'a>(
554    builder: &mut GreenNodeBuilder<'static>,
555    content_line: &'a str,
556    bq_depth: usize,
557    base_indent: usize,
558) -> &'a str {
559    let after_blockquote = if bq_depth > 0 {
560        let stripped = strip_n_blockquote_markers(content_line, bq_depth);
561        let prefix_len = content_line.len().saturating_sub(stripped.len());
562        if prefix_len > 0 {
563            emit_blockquote_prefix_tokens(builder, &content_line[..prefix_len]);
564        }
565        stripped
566    } else {
567        content_line
568    };
569
570    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
571    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
572        let indent_str = &after_blockquote[..base_indent_bytes];
573        if !indent_str.is_empty() {
574            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
575        }
576        &after_blockquote[base_indent_bytes..]
577    } else {
578        after_blockquote
579    }
580}
581
582fn strip_content_line_prefixes(content_line: &str, bq_depth: usize, base_indent: usize) -> &str {
583    let after_blockquote = if bq_depth > 0 {
584        strip_n_blockquote_markers(content_line, bq_depth)
585    } else {
586        content_line
587    };
588
589    let base_indent_bytes = byte_index_at_column(after_blockquote, base_indent);
590    if base_indent > 0 && after_blockquote.len() >= base_indent_bytes {
591        &after_blockquote[base_indent_bytes..]
592    } else {
593        after_blockquote
594    }
595}
596
597pub(crate) fn compute_hashpipe_preamble_line_count(
598    content_lines: &[&str],
599    prefix: &str,
600    bq_depth: usize,
601    base_indent: usize,
602) -> usize {
603    let mut line_idx = 0usize;
604
605    while line_idx < content_lines.len() {
606        let preview_after_indent =
607            strip_content_line_prefixes(content_lines[line_idx], bq_depth, base_indent);
608        let (preview_without_newline, _) = strip_newline(preview_after_indent);
609        let trimmed = preview_without_newline.trim_start_matches([' ', '\t']);
610        if !trimmed.starts_with(prefix) {
611            break;
612        }
613        line_idx += 1;
614    }
615
616    line_idx
617}
618
619fn emit_hashpipe_option_line(
620    builder: &mut GreenNodeBuilder<'static>,
621    line_without_newline: &str,
622    prefix: &str,
623) -> bool {
624    if !is_hashpipe_option_line(line_without_newline, prefix) {
625        return false;
626    }
627
628    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
629    let leading_ws_len = line_without_newline
630        .len()
631        .saturating_sub(trimmed_start.len());
632    let after_prefix = &trimmed_start[prefix.len()..];
633    let ws_after_prefix_len = after_prefix
634        .len()
635        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
636    let rest = &after_prefix[ws_after_prefix_len..];
637    let Some(colon_idx) = rest.find(':') else {
638        return false;
639    };
640
641    let key_with_ws = &rest[..colon_idx];
642    let key = key_with_ws.trim_end_matches([' ', '\t']);
643    if key.is_empty() {
644        return false;
645    }
646    let key_ws_suffix = &key_with_ws[key.len()..];
647
648    let after_colon = &rest[colon_idx + 1..];
649    let value_ws_prefix_len = after_colon
650        .len()
651        .saturating_sub(after_colon.trim_start_matches([' ', '\t']).len());
652    let value_with_trailing = &after_colon[value_ws_prefix_len..];
653    let value = value_with_trailing.trim_end_matches([' ', '\t']);
654    let value_ws_suffix = &value_with_trailing[value.len()..];
655
656    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
657    if leading_ws_len > 0 {
658        builder.token(
659            SyntaxKind::WHITESPACE.into(),
660            &line_without_newline[..leading_ws_len],
661        );
662    }
663    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
664    if ws_after_prefix_len > 0 {
665        builder.token(
666            SyntaxKind::WHITESPACE.into(),
667            &after_prefix[..ws_after_prefix_len],
668        );
669    }
670
671    builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
672    if !key_ws_suffix.is_empty() {
673        builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
674    }
675    builder.token(SyntaxKind::TEXT.into(), ":");
676    if value_ws_prefix_len > 0 {
677        builder.token(
678            SyntaxKind::WHITESPACE.into(),
679            &after_colon[..value_ws_prefix_len],
680        );
681    }
682
683    if !value.is_empty() {
684        if let Some(quote) = value.chars().next()
685            && (quote == '"' || quote == '\'')
686            && value.ends_with(quote)
687            && value.len() >= 2
688        {
689            builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
690            builder.token(
691                SyntaxKind::CHUNK_OPTION_VALUE.into(),
692                &value[1..value.len() - 1],
693            );
694            builder.token(
695                SyntaxKind::CHUNK_OPTION_QUOTE.into(),
696                &value[value.len() - 1..],
697            );
698        } else {
699            builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
700        }
701    }
702
703    if !value_ws_suffix.is_empty() {
704        builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
705    }
706    builder.finish_node();
707    true
708}
709
710fn emit_hashpipe_continuation_line(
711    builder: &mut GreenNodeBuilder<'static>,
712    line_without_newline: &str,
713    prefix: &str,
714) -> bool {
715    if !is_hashpipe_continuation_line(line_without_newline, prefix) {
716        return false;
717    }
718    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
719    let leading_ws_len = line_without_newline
720        .len()
721        .saturating_sub(trimmed_start.len());
722    let after_prefix = &trimmed_start[prefix.len()..];
723    let ws_after_prefix_len = after_prefix
724        .len()
725        .saturating_sub(after_prefix.trim_start_matches([' ', '\t']).len());
726    let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
727    let continuation_value = continuation_with_trailing.trim_end_matches([' ', '\t']);
728    if continuation_value.is_empty() {
729        return false;
730    }
731    let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
732
733    builder.start_node(SyntaxKind::CHUNK_OPTION.into());
734    if leading_ws_len > 0 {
735        builder.token(
736            SyntaxKind::WHITESPACE.into(),
737            &line_without_newline[..leading_ws_len],
738        );
739    }
740    builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
741    if ws_after_prefix_len > 0 {
742        builder.token(
743            SyntaxKind::WHITESPACE.into(),
744            &after_prefix[..ws_after_prefix_len],
745        );
746    }
747    builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
748    if !continuation_ws_suffix.is_empty() {
749        builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
750    }
751    builder.finish_node();
752    true
753}
754
755fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
756    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
757    if !trimmed_start.starts_with(prefix) {
758        return false;
759    }
760    let after_prefix = &trimmed_start[prefix.len()..];
761    let rest = after_prefix.trim_start_matches([' ', '\t']);
762    let Some(colon_idx) = rest.find(':') else {
763        return false;
764    };
765    let key = rest[..colon_idx].trim_end_matches([' ', '\t']);
766    if key.is_empty() {
767        return false;
768    }
769    true
770}
771
772fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
773    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
774    if !trimmed_start.starts_with(prefix) {
775        return false;
776    }
777    let after_prefix = &trimmed_start[prefix.len()..];
778    let Some(first) = after_prefix.chars().next() else {
779        return false;
780    };
781    if first != ' ' && first != '\t' {
782        return false;
783    }
784    !after_prefix.trim_start_matches([' ', '\t']).is_empty()
785}
786
787/// Check if a line is a valid closing fence for the given fence info.
788pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
789    let trimmed = strip_leading_spaces(content);
790
791    if !trimmed.starts_with(fence.fence_char) {
792        return false;
793    }
794
795    let closing_count = trimmed
796        .chars()
797        .take_while(|&c| c == fence.fence_char)
798        .count();
799
800    if closing_count < fence.fence_count {
801        return false;
802    }
803
804    // Rest of line must be empty
805    trimmed[closing_count..].trim().is_empty()
806}
807
808/// Emit chunk options as structured CST nodes while preserving all bytes.
809/// This parses {r, echo=TRUE, fig.cap="text"} into CHUNK_OPTIONS with individual CHUNK_OPTION nodes.
810fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
811    if content.trim().is_empty() {
812        builder.token(SyntaxKind::TEXT.into(), content);
813        return;
814    }
815
816    builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
817
818    let mut pos = 0;
819    let bytes = content.as_bytes();
820
821    while pos < bytes.len() {
822        // Emit leading whitespace/commas as TEXT
823        let ws_start = pos;
824        while pos < bytes.len() {
825            let ch = bytes[pos] as char;
826            if ch != ' ' && ch != '\t' && ch != ',' {
827                break;
828            }
829            pos += 1;
830        }
831        if pos > ws_start {
832            builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
833        }
834
835        if pos >= bytes.len() {
836            break;
837        }
838
839        // Check if this is a closing brace
840        if bytes[pos] as char == '}' {
841            builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
842            pos += 1;
843            if pos < bytes.len() {
844                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
845            }
846            break;
847        }
848
849        // Read key
850        let key_start = pos;
851        while pos < bytes.len() {
852            let ch = bytes[pos] as char;
853            if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
854                break;
855            }
856            pos += 1;
857        }
858
859        if pos == key_start {
860            // No key found, emit rest as TEXT
861            if pos < bytes.len() {
862                builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
863            }
864            break;
865        }
866
867        let key = &content[key_start..pos];
868
869        // Check for whitespace before '='
870        let ws_before_eq_start = pos;
871        while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
872            pos += 1;
873        }
874
875        // Check if there's a value (=)
876        if pos < bytes.len() && bytes[pos] as char == '=' {
877            // Has value - emit as CHUNK_OPTION
878            builder.start_node(SyntaxKind::CHUNK_OPTION.into());
879            builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
880
881            // Emit whitespace before '=' if any
882            if pos > ws_before_eq_start {
883                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
884            }
885
886            builder.token(SyntaxKind::TEXT.into(), "=");
887            pos += 1; // consume '='
888
889            // Emit whitespace after '='
890            let ws_after_eq_start = pos;
891            while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
892                pos += 1;
893            }
894            if pos > ws_after_eq_start {
895                builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
896            }
897
898            // Parse value (might be quoted)
899            if pos < bytes.len() {
900                let quote_char = bytes[pos] as char;
901                if quote_char == '"' || quote_char == '\'' {
902                    // Quoted value
903                    builder.token(
904                        SyntaxKind::CHUNK_OPTION_QUOTE.into(),
905                        &content[pos..pos + 1],
906                    );
907                    pos += 1; // consume opening quote
908
909                    let val_start = pos;
910                    let mut escaped = false;
911                    while pos < bytes.len() {
912                        let ch = bytes[pos] as char;
913                        if !escaped && ch == quote_char {
914                            break;
915                        }
916                        escaped = !escaped && ch == '\\';
917                        pos += 1;
918                    }
919
920                    if pos > val_start {
921                        builder.token(
922                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
923                            &content[val_start..pos],
924                        );
925                    }
926
927                    // Emit closing quote
928                    if pos < bytes.len() && bytes[pos] as char == quote_char {
929                        builder.token(
930                            SyntaxKind::CHUNK_OPTION_QUOTE.into(),
931                            &content[pos..pos + 1],
932                        );
933                        pos += 1;
934                    }
935                } else {
936                    // Unquoted value - read until comma, space, closing brace, or balanced delimiter
937                    let val_start = pos;
938                    let mut depth = 0;
939
940                    while pos < bytes.len() {
941                        let ch = bytes[pos] as char;
942                        match ch {
943                            '(' | '[' | '{' => depth += 1,
944                            ')' | ']' => {
945                                if depth > 0 {
946                                    depth -= 1;
947                                } else {
948                                    break;
949                                }
950                            }
951                            '}' => {
952                                if depth > 0 {
953                                    depth -= 1;
954                                } else {
955                                    break; // End of chunk options
956                                }
957                            }
958                            ',' if depth == 0 => {
959                                break; // Next option
960                            }
961                            ' ' | '\t' if depth == 0 => {
962                                break; // Space separator
963                            }
964                            _ => {}
965                        }
966                        pos += 1;
967                    }
968
969                    if pos > val_start {
970                        builder.token(
971                            SyntaxKind::CHUNK_OPTION_VALUE.into(),
972                            &content[val_start..pos],
973                        );
974                    }
975                }
976            }
977
978            builder.finish_node(); // CHUNK_OPTION
979        } else {
980            // No '=' - this is a label or bareword option
981            // Emit any whitespace we skipped as TEXT
982            if pos > ws_before_eq_start {
983                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
984                builder.token(SyntaxKind::TEXT.into(), key);
985                builder.finish_node(); // CHUNK_LABEL
986                builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
987            } else {
988                builder.start_node(SyntaxKind::CHUNK_LABEL.into());
989                builder.token(SyntaxKind::TEXT.into(), key);
990                builder.finish_node(); // CHUNK_LABEL
991            }
992        }
993    }
994
995    builder.finish_node(); // CHUNK_OPTIONS
996}
997
998/// Helper to parse info string and emit CodeInfo node with parsed components.
999/// This breaks down the info string into its logical parts while preserving all bytes.
1000fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1001    builder.start_node(SyntaxKind::CODE_INFO.into());
1002
1003    let info = InfoString::parse(info_string);
1004
1005    match &info.block_type {
1006        CodeBlockType::DisplayShortcut { language } => {
1007            // Simple case: python or python {.class}
1008            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1009
1010            // If there's more after the language, emit it as TEXT
1011            let after_lang = &info_string[language.len()..];
1012            if !after_lang.is_empty() {
1013                builder.token(SyntaxKind::TEXT.into(), after_lang);
1014            }
1015        }
1016        CodeBlockType::Executable { language } => {
1017            // Quarto: {r} or {r my-label, echo=FALSE}
1018            builder.token(SyntaxKind::TEXT.into(), "{");
1019            builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1020
1021            // Parse and emit chunk options
1022            let start_offset = 1 + language.len(); // Skip "{r"
1023            if start_offset < info_string.len() {
1024                let rest = &info_string[start_offset..];
1025                emit_chunk_options(builder, rest);
1026            }
1027        }
1028        CodeBlockType::DisplayExplicit { classes } => {
1029            // Pandoc: {.python} or {#id .haskell .numberLines}
1030            // We need to find the first class in the raw string and emit everything around it
1031
1032            if let Some(lang) = classes.first() {
1033                // Find where ".lang" appears in the info string
1034                let needle = format!(".{}", lang);
1035                if let Some(lang_start) = info_string.find(&needle) {
1036                    // Emit everything before the language
1037                    if lang_start > 0 {
1038                        builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1039                    }
1040
1041                    // Emit the dot
1042                    builder.token(SyntaxKind::TEXT.into(), ".");
1043
1044                    // Emit the language
1045                    builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1046
1047                    // Emit everything after
1048                    let after_lang_start = lang_start + 1 + lang.len();
1049                    if after_lang_start < info_string.len() {
1050                        builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1051                    }
1052                } else {
1053                    // Couldn't find it, just emit as TEXT
1054                    builder.token(SyntaxKind::TEXT.into(), info_string);
1055                }
1056            } else {
1057                // No classes
1058                builder.token(SyntaxKind::TEXT.into(), info_string);
1059            }
1060        }
1061        CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1062            // No language, just emit as TEXT
1063            builder.token(SyntaxKind::TEXT.into(), info_string);
1064        }
1065    }
1066
1067    builder.finish_node(); // CodeInfo
1068}
1069
1070/// Parse a fenced code block, consuming lines from the parser.
1071/// Returns the new position after the code block.
1072/// Parse a fenced code block, consuming lines from the parser.
1073/// Returns the new position after the code block.
1074/// base_indent accounts for container indentation (e.g., footnotes) that should be stripped.
1075pub(crate) fn parse_fenced_code_block(
1076    builder: &mut GreenNodeBuilder<'static>,
1077    lines: &[&str],
1078    start_pos: usize,
1079    fence: FenceInfo,
1080    bq_depth: usize,
1081    base_indent: usize,
1082    first_line_override: Option<&str>,
1083) -> usize {
1084    // Start code block
1085    builder.start_node(SyntaxKind::CODE_BLOCK.into());
1086
1087    // Opening fence
1088    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1089        builder,
1090        lines[start_pos],
1091        first_line_override,
1092        bq_depth,
1093        base_indent,
1094    );
1095
1096    builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1097    builder.token(
1098        SyntaxKind::CODE_FENCE_MARKER.into(),
1099        &first_trimmed[..fence.fence_count],
1100    );
1101
1102    // Emit any space between fence and info string (for losslessness)
1103    let after_fence = &first_trimmed[fence.fence_count..];
1104    if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1105        // There was a space - emit it as WHITESPACE
1106        builder.token(SyntaxKind::WHITESPACE.into(), " ");
1107        // Parse and emit the info string as a structured node
1108        if !fence.info_string.is_empty() {
1109            emit_code_info_node(builder, &fence.info_string);
1110        }
1111    } else if !fence.info_string.is_empty() {
1112        // No space - parse and emit info_string as a structured node
1113        emit_code_info_node(builder, &fence.info_string);
1114    }
1115
1116    // Extract and emit the actual newline from the opening fence line
1117    let (_, newline_str) = strip_newline(first_trimmed);
1118    if !newline_str.is_empty() {
1119        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1120    }
1121    builder.finish_node(); // CodeFenceOpen
1122
1123    let mut current_pos = start_pos + 1;
1124    let mut content_lines: Vec<&str> = Vec::new(); // Store original lines for lossless parsing
1125    let mut found_closing = false;
1126
1127    while current_pos < lines.len() {
1128        let line = lines[current_pos];
1129
1130        // Count blockquote markers to detect leaving the surrounding blockquote.
1131        let (line_bq_depth, _) = count_blockquote_markers(line);
1132
1133        // If blockquote depth decreases, code block ends (we've left the blockquote)
1134        if line_bq_depth < bq_depth {
1135            break;
1136        }
1137
1138        // Strip exactly the surrounding blockquote depth; preserve any additional `>` literally.
1139        let inner = if bq_depth > 0 {
1140            strip_n_blockquote_markers(line, bq_depth)
1141        } else {
1142            line
1143        };
1144
1145        // Strip base indent (footnote context) from content lines for fence detection
1146        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1147        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1148            &inner[base_indent_bytes..]
1149        } else {
1150            inner
1151        };
1152
1153        // Check for closing fence
1154        if is_closing_fence(inner_stripped, &fence) {
1155            found_closing = true;
1156            current_pos += 1;
1157            break;
1158        }
1159
1160        // Store the original line for lossless parsing.
1161        content_lines.push(line);
1162        current_pos += 1;
1163    }
1164
1165    // Add content
1166    if !content_lines.is_empty() {
1167        builder.start_node(SyntaxKind::CODE_CONTENT.into());
1168        let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1169            CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1170            _ => None,
1171        };
1172
1173        let mut line_idx = 0usize;
1174        if let Some(prefix) = hashpipe_prefix {
1175            let prepared_hashpipe_lines =
1176                compute_hashpipe_preamble_line_count(&content_lines, prefix, bq_depth, base_indent);
1177            if prepared_hashpipe_lines > 0 {
1178                builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1179                builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1180                while line_idx < prepared_hashpipe_lines {
1181                    let content_line = content_lines[line_idx];
1182                    let after_indent =
1183                        emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1184                    let (line_without_newline, newline_str) = strip_newline(after_indent);
1185                    if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
1186                        let _ =
1187                            emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
1188                    }
1189                    if !newline_str.is_empty() {
1190                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1191                    }
1192                    line_idx += 1;
1193                }
1194                builder.finish_node(); // HASHPIPE_YAML_CONTENT
1195                builder.finish_node(); // HASHPIPE_YAML_PREAMBLE
1196            }
1197        }
1198
1199        for content_line in content_lines.iter().skip(line_idx) {
1200            let after_indent =
1201                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1202            let (line_without_newline, newline_str) = strip_newline(after_indent);
1203
1204            if !line_without_newline.is_empty() {
1205                builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1206            }
1207
1208            if !newline_str.is_empty() {
1209                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1210            }
1211        }
1212        builder.finish_node(); // CodeContent
1213    }
1214
1215    // Closing fence (if found)
1216    if found_closing {
1217        let closing_line = lines[current_pos - 1];
1218        let closing_after_blockquote = if bq_depth > 0 {
1219            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1220            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1221            if prefix_len > 0 {
1222                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1223            }
1224            stripped
1225        } else {
1226            closing_line
1227        };
1228
1229        // Emit base indent for lossless parsing
1230        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1231        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1232            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1233            if !indent_str.is_empty() {
1234                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1235            }
1236        }
1237
1238        // Strip base indent to get fence
1239        let closing_stripped =
1240            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1241                &closing_after_blockquote[base_indent_bytes..]
1242            } else {
1243                closing_after_blockquote
1244            };
1245        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1246        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1247        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1248        let closing_count = closing_trimmed_start
1249            .chars()
1250            .take_while(|&c| c == fence.fence_char)
1251            .count();
1252        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1253
1254        builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1255        if leading_ws_len > 0 {
1256            builder.token(
1257                SyntaxKind::WHITESPACE.into(),
1258                &closing_without_newline[..leading_ws_len],
1259            );
1260        }
1261        builder.token(
1262            SyntaxKind::CODE_FENCE_MARKER.into(),
1263            &closing_trimmed_start[..closing_count],
1264        );
1265        if !trailing_after_marker.is_empty() {
1266            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1267        }
1268        if !newline_str.is_empty() {
1269            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1270        }
1271        builder.finish_node(); // CodeFenceClose
1272    }
1273
1274    builder.finish_node(); // CodeBlock
1275
1276    current_pos
1277}
1278
1279/// Parse a GFM math fence (``` math ... ```) as DISPLAY_MATH while preserving bytes.
1280pub(crate) fn parse_fenced_math_block(
1281    builder: &mut GreenNodeBuilder<'static>,
1282    lines: &[&str],
1283    start_pos: usize,
1284    fence: FenceInfo,
1285    bq_depth: usize,
1286    base_indent: usize,
1287    first_line_override: Option<&str>,
1288) -> usize {
1289    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1290
1291    let (first_trimmed, _first_inner) = prepare_fence_open_line(
1292        builder,
1293        lines[start_pos],
1294        first_line_override,
1295        bq_depth,
1296        base_indent,
1297    );
1298    let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1299    builder.token(
1300        SyntaxKind::DISPLAY_MATH_MARKER.into(),
1301        opening_without_newline,
1302    );
1303    if !opening_newline.is_empty() {
1304        builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1305    }
1306
1307    let mut current_pos = start_pos + 1;
1308    let mut content_lines: Vec<&str> = Vec::new();
1309    let mut found_closing = false;
1310
1311    while current_pos < lines.len() {
1312        let line = lines[current_pos];
1313        let (line_bq_depth, _) = count_blockquote_markers(line);
1314        if line_bq_depth < bq_depth {
1315            break;
1316        }
1317
1318        let inner = if bq_depth > 0 {
1319            strip_n_blockquote_markers(line, bq_depth)
1320        } else {
1321            line
1322        };
1323        let base_indent_bytes = byte_index_at_column(inner, base_indent);
1324        let inner_stripped = if base_indent > 0 && inner.len() >= base_indent_bytes {
1325            &inner[base_indent_bytes..]
1326        } else {
1327            inner
1328        };
1329
1330        if is_closing_fence(inner_stripped, &fence) {
1331            found_closing = true;
1332            current_pos += 1;
1333            break;
1334        }
1335
1336        content_lines.push(line);
1337        current_pos += 1;
1338    }
1339
1340    if !content_lines.is_empty() {
1341        let mut content = String::new();
1342        for content_line in content_lines {
1343            let after_indent =
1344                emit_content_line_prefixes(builder, content_line, bq_depth, base_indent);
1345            let (line_without_newline, newline_str) = strip_newline(after_indent);
1346            content.push_str(line_without_newline);
1347            content.push_str(newline_str);
1348        }
1349        builder.token(SyntaxKind::TEXT.into(), &content);
1350    }
1351
1352    if found_closing {
1353        let closing_line = lines[current_pos - 1];
1354        let closing_after_blockquote = if bq_depth > 0 {
1355            let stripped = strip_n_blockquote_markers(closing_line, bq_depth);
1356            let prefix_len = closing_line.len().saturating_sub(stripped.len());
1357            if prefix_len > 0 {
1358                emit_blockquote_prefix_tokens(builder, &closing_line[..prefix_len]);
1359            }
1360            stripped
1361        } else {
1362            closing_line
1363        };
1364
1365        let base_indent_bytes = byte_index_at_column(closing_after_blockquote, base_indent);
1366        if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1367            let indent_str = &closing_after_blockquote[..base_indent_bytes];
1368            if !indent_str.is_empty() {
1369                builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1370            }
1371        }
1372
1373        let closing_stripped =
1374            if base_indent > 0 && closing_after_blockquote.len() >= base_indent_bytes {
1375                &closing_after_blockquote[base_indent_bytes..]
1376            } else {
1377                closing_after_blockquote
1378            };
1379        let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1380        let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1381        let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1382        let closing_count = closing_trimmed_start
1383            .chars()
1384            .take_while(|&c| c == fence.fence_char)
1385            .count();
1386        let trailing_after_marker = &closing_trimmed_start[closing_count..];
1387
1388        if leading_ws_len > 0 {
1389            builder.token(
1390                SyntaxKind::WHITESPACE.into(),
1391                &closing_without_newline[..leading_ws_len],
1392            );
1393        }
1394        builder.token(
1395            SyntaxKind::DISPLAY_MATH_MARKER.into(),
1396            &closing_trimmed_start[..closing_count],
1397        );
1398        if !trailing_after_marker.is_empty() {
1399            builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1400        }
1401        if !newline_str.is_empty() {
1402            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1403        }
1404    }
1405
1406    builder.finish_node(); // DisplayMath
1407    current_pos
1408}
1409
1410#[cfg(test)]
1411mod tests {
1412    use super::*;
1413
1414    #[test]
1415    fn test_backtick_fence() {
1416        let fence = try_parse_fence_open("```python").unwrap();
1417        assert_eq!(fence.fence_char, '`');
1418        assert_eq!(fence.fence_count, 3);
1419        assert_eq!(fence.info_string, "python");
1420    }
1421
1422    #[test]
1423    fn test_tilde_fence() {
1424        let fence = try_parse_fence_open("~~~").unwrap();
1425        assert_eq!(fence.fence_char, '~');
1426        assert_eq!(fence.fence_count, 3);
1427        assert_eq!(fence.info_string, "");
1428    }
1429
1430    #[test]
1431    fn test_long_fence() {
1432        let fence = try_parse_fence_open("`````").unwrap();
1433        assert_eq!(fence.fence_count, 5);
1434    }
1435
1436    #[test]
1437    fn test_two_backticks_invalid() {
1438        assert!(try_parse_fence_open("``").is_none());
1439    }
1440
1441    #[test]
1442    fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1443        assert!(try_parse_fence_open("`````hi````there`````").is_none());
1444    }
1445
1446    #[test]
1447    fn test_closing_fence() {
1448        let fence = FenceInfo {
1449            fence_char: '`',
1450            fence_count: 3,
1451            info_string: String::new(),
1452        };
1453        assert!(is_closing_fence("```", &fence));
1454        assert!(is_closing_fence("````", &fence));
1455        assert!(!is_closing_fence("``", &fence));
1456        assert!(!is_closing_fence("~~~", &fence));
1457    }
1458
1459    #[test]
1460    fn test_fenced_code_preserves_leading_gt() {
1461        let input = "```\n> foo\n```\n";
1462        let tree = crate::parse(input, None);
1463        assert_eq!(tree.text().to_string(), input);
1464    }
1465
1466    #[test]
1467    fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1468        let input = "> ```\n> code\n> ```\n";
1469        let tree = crate::parse(input, None);
1470        assert_eq!(tree.text().to_string(), input);
1471    }
1472
1473    #[test]
1474    fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1475        let input = "Term\n: ```\nā”œā”€ā”€ pyproject.toml\n```\n";
1476        let tree = crate::parse(input, None);
1477        assert_eq!(tree.text().to_string(), input);
1478    }
1479
1480    #[test]
1481    fn test_info_string_plain() {
1482        let info = InfoString::parse("");
1483        assert_eq!(info.block_type, CodeBlockType::Plain);
1484        assert!(info.attributes.is_empty());
1485    }
1486
1487    #[test]
1488    fn test_info_string_shortcut() {
1489        let info = InfoString::parse("python");
1490        assert_eq!(
1491            info.block_type,
1492            CodeBlockType::DisplayShortcut {
1493                language: "python".to_string()
1494            }
1495        );
1496        assert!(info.attributes.is_empty());
1497    }
1498
1499    #[test]
1500    fn test_info_string_shortcut_with_trailing() {
1501        let info = InfoString::parse("python extra stuff");
1502        assert_eq!(
1503            info.block_type,
1504            CodeBlockType::DisplayShortcut {
1505                language: "python".to_string()
1506            }
1507        );
1508    }
1509
1510    #[test]
1511    fn test_info_string_display_explicit() {
1512        let info = InfoString::parse("{.python}");
1513        assert_eq!(
1514            info.block_type,
1515            CodeBlockType::DisplayExplicit {
1516                classes: vec!["python".to_string()]
1517            }
1518        );
1519    }
1520
1521    #[test]
1522    fn test_info_string_display_explicit_multiple() {
1523        let info = InfoString::parse("{.python .numberLines}");
1524        assert_eq!(
1525            info.block_type,
1526            CodeBlockType::DisplayExplicit {
1527                classes: vec!["python".to_string(), "numberLines".to_string()]
1528            }
1529        );
1530    }
1531
1532    #[test]
1533    fn test_info_string_executable() {
1534        let info = InfoString::parse("{python}");
1535        assert_eq!(
1536            info.block_type,
1537            CodeBlockType::Executable {
1538                language: "python".to_string()
1539            }
1540        );
1541    }
1542
1543    #[test]
1544    fn test_info_string_executable_with_options() {
1545        let info = InfoString::parse("{python echo=false warning=true}");
1546        assert_eq!(
1547            info.block_type,
1548            CodeBlockType::Executable {
1549                language: "python".to_string()
1550            }
1551        );
1552        assert_eq!(info.attributes.len(), 2);
1553        assert_eq!(
1554            info.attributes[0],
1555            ("echo".to_string(), Some("false".to_string()))
1556        );
1557        assert_eq!(
1558            info.attributes[1],
1559            ("warning".to_string(), Some("true".to_string()))
1560        );
1561    }
1562
1563    #[test]
1564    fn test_info_string_executable_with_commas() {
1565        let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1566        assert_eq!(
1567            info.block_type,
1568            CodeBlockType::Executable {
1569                language: "r".to_string()
1570            }
1571        );
1572        assert_eq!(info.attributes.len(), 2);
1573        assert_eq!(
1574            info.attributes[0],
1575            ("echo".to_string(), Some("FALSE".to_string()))
1576        );
1577        assert_eq!(
1578            info.attributes[1],
1579            ("warning".to_string(), Some("TRUE".to_string()))
1580        );
1581    }
1582
1583    #[test]
1584    fn test_info_string_executable_mixed_commas_spaces() {
1585        // R-style with commas and spaces
1586        let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1587        assert_eq!(
1588            info.block_type,
1589            CodeBlockType::Executable {
1590                language: "r".to_string()
1591            }
1592        );
1593        assert_eq!(info.attributes.len(), 2);
1594        assert_eq!(
1595            info.attributes[0],
1596            ("echo".to_string(), Some("FALSE".to_string()))
1597        );
1598        assert_eq!(
1599            info.attributes[1],
1600            ("label".to_string(), Some("my chunk".to_string()))
1601        );
1602    }
1603
1604    #[test]
1605    fn test_info_string_mixed_shortcut_and_attrs() {
1606        let info = InfoString::parse("python {.numberLines}");
1607        assert_eq!(
1608            info.block_type,
1609            CodeBlockType::DisplayShortcut {
1610                language: "python".to_string()
1611            }
1612        );
1613        assert_eq!(info.attributes.len(), 1);
1614        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1615    }
1616
1617    #[test]
1618    fn test_info_string_mixed_with_key_value() {
1619        let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1620        assert_eq!(
1621            info.block_type,
1622            CodeBlockType::DisplayShortcut {
1623                language: "python".to_string()
1624            }
1625        );
1626        assert_eq!(info.attributes.len(), 2);
1627        assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1628        assert_eq!(
1629            info.attributes[1],
1630            ("startFrom".to_string(), Some("100".to_string()))
1631        );
1632    }
1633
1634    #[test]
1635    fn test_info_string_explicit_with_id_and_classes() {
1636        let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1637        assert_eq!(
1638            info.block_type,
1639            CodeBlockType::DisplayExplicit {
1640                classes: vec!["haskell".to_string(), "numberLines".to_string()]
1641            }
1642        );
1643        // Non-class attributes
1644        let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1645        let has_start = info
1646            .attributes
1647            .iter()
1648            .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1649        assert!(has_id);
1650        assert!(has_start);
1651    }
1652
1653    #[test]
1654    fn test_info_string_raw_html() {
1655        let info = InfoString::parse("{=html}");
1656        assert_eq!(
1657            info.block_type,
1658            CodeBlockType::Raw {
1659                format: "html".to_string()
1660            }
1661        );
1662        assert!(info.attributes.is_empty());
1663    }
1664
1665    #[test]
1666    fn test_info_string_raw_latex() {
1667        let info = InfoString::parse("{=latex}");
1668        assert_eq!(
1669            info.block_type,
1670            CodeBlockType::Raw {
1671                format: "latex".to_string()
1672            }
1673        );
1674    }
1675
1676    #[test]
1677    fn test_info_string_raw_openxml() {
1678        let info = InfoString::parse("{=openxml}");
1679        assert_eq!(
1680            info.block_type,
1681            CodeBlockType::Raw {
1682                format: "openxml".to_string()
1683            }
1684        );
1685    }
1686
1687    #[test]
1688    fn test_info_string_raw_ms() {
1689        let info = InfoString::parse("{=ms}");
1690        assert_eq!(
1691            info.block_type,
1692            CodeBlockType::Raw {
1693                format: "ms".to_string()
1694            }
1695        );
1696    }
1697
1698    #[test]
1699    fn test_info_string_raw_html5() {
1700        let info = InfoString::parse("{=html5}");
1701        assert_eq!(
1702            info.block_type,
1703            CodeBlockType::Raw {
1704                format: "html5".to_string()
1705            }
1706        );
1707    }
1708
1709    #[test]
1710    fn test_info_string_raw_not_combined_with_attrs() {
1711        // If there are other attributes with =format, it should not be treated as raw
1712        let info = InfoString::parse("{=html .class}");
1713        // This should NOT be parsed as raw because there's more than one attribute
1714        assert_ne!(
1715            info.block_type,
1716            CodeBlockType::Raw {
1717                format: "html".to_string()
1718            }
1719        );
1720    }
1721
1722    #[test]
1723    fn test_parse_pandoc_attributes_spaces() {
1724        // Pandoc display blocks use spaces as delimiters
1725        let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1726        assert_eq!(attrs.len(), 3);
1727        assert_eq!(attrs[0], (".python".to_string(), None));
1728        assert_eq!(attrs[1], (".numberLines".to_string(), None));
1729        assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1730    }
1731
1732    #[test]
1733    fn test_parse_pandoc_attributes_no_commas() {
1734        // Commas in Pandoc attributes should be treated as part of the value
1735        let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1736        assert_eq!(attrs.len(), 3);
1737        assert_eq!(attrs[0], ("#id".to_string(), None));
1738        assert_eq!(attrs[1], (".class".to_string(), None));
1739        assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1740    }
1741
1742    #[test]
1743    fn test_parse_chunk_options_commas() {
1744        // Quarto/RMarkdown chunks use commas as delimiters
1745        let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1746        assert_eq!(attrs.len(), 3);
1747        assert_eq!(attrs[0], ("r".to_string(), None));
1748        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1749        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1750    }
1751
1752    #[test]
1753    fn test_parse_chunk_options_no_spaces() {
1754        // Should handle comma-separated without spaces
1755        let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1756        assert_eq!(attrs.len(), 3);
1757        assert_eq!(attrs[0], ("r".to_string(), None));
1758        assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1759        assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1760    }
1761
1762    #[test]
1763    fn test_parse_chunk_options_mixed() {
1764        // Handle both commas and spaces
1765        let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1766        assert_eq!(attrs.len(), 3);
1767        assert_eq!(attrs[0], ("python".to_string(), None));
1768        assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1769        assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1770    }
1771
1772    #[test]
1773    fn test_parse_chunk_options_nested_function_call() {
1774        // R function calls with nested commas should be treated as single value
1775        let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
1776        assert_eq!(attrs.len(), 3);
1777        assert_eq!(attrs[0], ("r".to_string(), None));
1778        assert_eq!(attrs[1], ("pep-cg".to_string(), None));
1779        assert_eq!(
1780            attrs[2],
1781            (
1782                "dependson".to_string(),
1783                Some(r#"c("foo", "bar")"#.to_string())
1784            )
1785        );
1786    }
1787
1788    #[test]
1789    fn test_parse_chunk_options_nested_with_spaces() {
1790        // Function call with spaces inside
1791        let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
1792        assert_eq!(attrs.len(), 2);
1793        assert_eq!(attrs[0], ("r".to_string(), None));
1794        assert_eq!(
1795            attrs[1],
1796            (
1797                "cache.path".to_string(),
1798                Some(r#"file.path("cache", "dir")"#.to_string())
1799            )
1800        );
1801    }
1802
1803    #[test]
1804    fn test_parse_chunk_options_deeply_nested() {
1805        // Multiple levels of nesting
1806        let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
1807        assert_eq!(attrs.len(), 2);
1808        assert_eq!(attrs[0], ("r".to_string(), None));
1809        assert_eq!(
1810            attrs[1],
1811            (
1812                "x".to_string(),
1813                Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
1814            )
1815        );
1816    }
1817
1818    #[test]
1819    fn test_parse_chunk_options_brackets_and_braces() {
1820        // Test all bracket types
1821        let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
1822        assert_eq!(attrs.len(), 3);
1823        assert_eq!(attrs[0], ("r".to_string(), None));
1824        assert_eq!(
1825            attrs[1],
1826            ("data".to_string(), Some("df[rows, cols]".to_string()))
1827        );
1828        assert_eq!(
1829            attrs[2],
1830            ("config".to_string(), Some("{a:1, b:2}".to_string()))
1831        );
1832    }
1833
1834    #[test]
1835    fn test_parse_chunk_options_quotes_with_parens() {
1836        // Parentheses inside quoted strings shouldn't affect depth tracking
1837        // Note: The parser strips outer quotes from quoted values
1838        let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
1839        assert_eq!(attrs.len(), 3);
1840        assert_eq!(attrs[0], ("r".to_string(), None));
1841        assert_eq!(
1842            attrs[1],
1843            ("label".to_string(), Some("test (with parens)".to_string()))
1844        );
1845        assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
1846    }
1847
1848    #[test]
1849    fn test_parse_chunk_options_escaped_quotes() {
1850        // Escaped quotes inside string values
1851        // Note: The parser strips outer quotes and processes escapes
1852        let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
1853        assert_eq!(attrs.len(), 2);
1854        assert_eq!(attrs[0], ("r".to_string(), None));
1855        assert_eq!(
1856            attrs[1],
1857            (
1858                "label".to_string(),
1859                Some(r#"has "quoted" text"#.to_string())
1860            )
1861        );
1862    }
1863
1864    #[test]
1865    fn test_display_vs_executable_parsing() {
1866        // Display block should use Pandoc parser (spaces)
1867        let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
1868        assert!(matches!(
1869            info1.block_type,
1870            CodeBlockType::DisplayExplicit { .. }
1871        ));
1872
1873        // Executable chunk should use chunk options parser (commas)
1874        let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1875        assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
1876        assert_eq!(info2.attributes.len(), 2);
1877    }
1878
1879    #[test]
1880    fn test_info_string_executable_implicit_label() {
1881        // {r mylabel} should parse as label=mylabel
1882        let info = InfoString::parse("{r mylabel}");
1883        assert!(matches!(
1884            info.block_type,
1885            CodeBlockType::Executable { ref language } if language == "r"
1886        ));
1887        assert_eq!(info.attributes.len(), 1);
1888        assert_eq!(
1889            info.attributes[0],
1890            ("label".to_string(), Some("mylabel".to_string()))
1891        );
1892    }
1893
1894    #[test]
1895    fn test_info_string_executable_implicit_label_with_options() {
1896        // {r mylabel, echo=FALSE} should parse as label=mylabel, echo=FALSE
1897        let info = InfoString::parse("{r mylabel, echo=FALSE}");
1898        assert!(matches!(
1899            info.block_type,
1900            CodeBlockType::Executable { ref language } if language == "r"
1901        ));
1902        assert_eq!(info.attributes.len(), 2);
1903        assert_eq!(
1904            info.attributes[0],
1905            ("label".to_string(), Some("mylabel".to_string()))
1906        );
1907        assert_eq!(
1908            info.attributes[1],
1909            ("echo".to_string(), Some("FALSE".to_string()))
1910        );
1911    }
1912
1913    #[test]
1914    fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
1915        let content_lines = vec![
1916            "#| fig-cap: |\n",
1917            "#|   A caption\n",
1918            "#|   spanning lines\n",
1919            "a <- 1\n",
1920        ];
1921        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1922        assert_eq!(count, 3);
1923    }
1924
1925    #[test]
1926    fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
1927        let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
1928        let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0);
1929        assert_eq!(count, 1);
1930    }
1931}