1use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
8use super::container_prefix::{ContainerPrefix, StripOp, advance_columns};
9use crate::parser::utils::container_stack::byte_index_at_column;
10
11pub(crate) fn strip_list_indent(line: &str, list_content_col: usize) -> &str {
17 if list_content_col == 0 {
18 return line;
19 }
20 let idx = byte_index_at_column(line, list_content_col);
21 &line[idx..]
22}
23
24pub(crate) fn bq_outer_of_list(prefix: &ContainerPrefix) -> bool {
29 for op in prefix.ops() {
30 match op {
31 StripOp::BlockQuoteMarker => return true,
32 StripOp::ListAdvance(_) => return false,
33 StripOp::ContentIndent(_) => {}
34 }
35 }
36 false
37}
38
39use crate::parser::utils::helpers::{
40 strip_leading_spaces, strip_newline, trim_end_spaces_tabs, trim_start_spaces_tabs,
41};
42
43#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum CodeBlockType {
46 DisplayShortcut { language: String },
48 DisplayExplicit { classes: Vec<String> },
50 Executable { language: String },
52 Raw { format: String },
54 Plain,
56}
57
58#[derive(Debug, Clone, PartialEq)]
60pub struct InfoString {
61 pub raw: String,
62 pub block_type: CodeBlockType,
63 pub attributes: Vec<(String, Option<String>)>, }
65
66impl InfoString {
67 pub fn parse(raw: &str) -> Self {
69 let trimmed = raw.trim();
70
71 if trimmed.is_empty() {
72 return InfoString {
73 raw: raw.to_string(),
74 block_type: CodeBlockType::Plain,
75 attributes: Vec::new(),
76 };
77 }
78
79 if let Some(stripped) = trimmed.strip_prefix('{')
81 && let Some(content) = stripped.strip_suffix('}')
82 {
83 return Self::parse_explicit(raw, content);
84 }
85
86 if let Some(brace_start) = trimmed.find('{') {
88 let language = trimmed[..brace_start].trim();
89 if !language.is_empty() && !language.contains(char::is_whitespace) {
90 let attr_part = &trimmed[brace_start..];
91 if let Some(stripped) = attr_part.strip_prefix('{')
92 && let Some(content) = stripped.strip_suffix('}')
93 {
94 let attrs = Self::parse_attributes(content);
95 return InfoString {
96 raw: raw.to_string(),
97 block_type: CodeBlockType::DisplayShortcut {
98 language: language.to_string(),
99 },
100 attributes: attrs,
101 };
102 }
103 }
104 }
105
106 let language = trimmed.split_whitespace().next().unwrap_or(trimmed);
109 InfoString {
110 raw: raw.to_string(),
111 block_type: CodeBlockType::DisplayShortcut {
112 language: language.to_string(),
113 },
114 attributes: Vec::new(),
115 }
116 }
117
118 fn parse_explicit(raw: &str, content: &str) -> Self {
119 let trimmed_content = content.trim();
122 if let Some(format_name) = trimmed_content.strip_prefix('=') {
123 if !format_name.is_empty()
125 && format_name.chars().all(|c| c.is_alphanumeric())
126 && !format_name.contains(char::is_whitespace)
127 {
128 return InfoString {
129 raw: raw.to_string(),
130 block_type: CodeBlockType::Raw {
131 format: format_name.to_string(),
132 },
133 attributes: Vec::new(),
134 };
135 }
136 }
137
138 let prelim_attrs = Self::parse_chunk_options(content);
141
142 let mut first_lang_token = None;
144 for (key, val) in prelim_attrs.iter() {
145 if val.is_none() && !key.starts_with('#') {
146 first_lang_token = Some(key.as_str());
147 break;
148 }
149 }
150
151 let first_token = first_lang_token.unwrap_or("");
152
153 if first_token.starts_with('.') {
154 let attrs = Self::parse_pandoc_attributes(content);
157
158 let classes: Vec<String> = attrs
159 .iter()
160 .filter(|(k, v)| k.starts_with('.') && v.is_none())
161 .map(|(k, _)| k[1..].to_string())
162 .collect();
163
164 let non_class_attrs: Vec<(String, Option<String>)> = attrs
165 .into_iter()
166 .filter(|(k, _)| !k.starts_with('.') || k.contains('='))
167 .collect();
168
169 InfoString {
170 raw: raw.to_string(),
171 block_type: CodeBlockType::DisplayExplicit { classes },
172 attributes: non_class_attrs,
173 }
174 } else if !first_token.is_empty() && !first_token.starts_with('#') {
175 let attrs = Self::parse_chunk_options(content);
178 let lang_index = attrs.iter().position(|(k, _)| k == first_token).unwrap();
179
180 let mut has_implicit_label = false;
183 let implicit_label_value = if lang_index + 1 < attrs.len() {
184 if let (label_key, None) = &attrs[lang_index + 1] {
185 has_implicit_label = true;
187 Some(label_key.clone())
188 } else {
189 None
190 }
191 } else {
192 None
193 };
194
195 let mut final_attrs: Vec<(String, Option<String>)> = attrs
196 .into_iter()
197 .enumerate()
198 .filter(|(i, _)| {
199 if *i == lang_index {
201 return false;
202 }
203 if has_implicit_label && *i == lang_index + 1 {
205 return false;
206 }
207 true
208 })
209 .map(|(_, attr)| attr)
210 .collect();
211
212 if let Some(label_val) = implicit_label_value {
214 final_attrs.insert(0, ("label".to_string(), Some(label_val)));
215 }
216
217 InfoString {
218 raw: raw.to_string(),
219 block_type: CodeBlockType::Executable {
220 language: first_token.to_string(),
221 },
222 attributes: final_attrs,
223 }
224 } else {
225 let attrs = Self::parse_pandoc_attributes(content);
227 InfoString {
228 raw: raw.to_string(),
229 block_type: CodeBlockType::Plain,
230 attributes: attrs,
231 }
232 }
233 }
234
235 fn parse_pandoc_attributes(content: &str) -> Vec<(String, Option<String>)> {
238 let mut attrs = Vec::new();
239 let mut chars = content.chars().peekable();
240
241 while chars.peek().is_some() {
242 while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
244 chars.next();
245 }
246
247 if chars.peek().is_none() {
248 break;
249 }
250
251 let mut key = String::new();
253 while let Some(&ch) = chars.peek() {
254 if ch == '=' || ch == ' ' || ch == '\t' {
255 break;
256 }
257 key.push(ch);
258 chars.next();
259 }
260
261 if key.is_empty() {
262 break;
263 }
264
265 while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
267 chars.next();
268 }
269
270 if chars.peek() == Some(&'=') {
272 chars.next(); while matches!(chars.peek(), Some(&' ') | Some(&'\t')) {
276 chars.next();
277 }
278
279 let value = if chars.peek() == Some(&'"') {
281 chars.next(); let mut val = String::new();
283 while let Some(&ch) = chars.peek() {
284 chars.next();
285 if ch == '"' {
286 break;
287 }
288 if ch == '\\' {
289 if let Some(&next_ch) = chars.peek() {
290 chars.next();
291 val.push(next_ch);
292 }
293 } else {
294 val.push(ch);
295 }
296 }
297 val
298 } else {
299 let mut val = String::new();
301 while let Some(&ch) = chars.peek() {
302 if ch == ' ' || ch == '\t' {
303 break;
304 }
305 val.push(ch);
306 chars.next();
307 }
308 val
309 };
310
311 attrs.push((key, Some(value)));
312 } else {
313 attrs.push((key, None));
314 }
315 }
316
317 attrs
318 }
319
320 fn parse_chunk_options(content: &str) -> Vec<(String, Option<String>)> {
323 let mut attrs = Vec::new();
324 let mut chars = content.chars().peekable();
325
326 while chars.peek().is_some() {
327 while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
329 chars.next();
330 }
331
332 if chars.peek().is_none() {
333 break;
334 }
335
336 let mut key = String::new();
338 while let Some(&ch) = chars.peek() {
339 if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' {
340 break;
341 }
342 key.push(ch);
343 chars.next();
344 }
345
346 if key.is_empty() {
347 break;
348 }
349
350 while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
352 chars.next();
353 }
354
355 if chars.peek() == Some(&'=') {
357 chars.next(); while matches!(chars.peek(), Some(&' ') | Some(&'\t') | Some(&',')) {
361 chars.next();
362 }
363
364 let value = if chars.peek() == Some(&'"') {
366 chars.next(); let mut val = String::new();
368 while let Some(&ch) = chars.peek() {
369 chars.next();
370 if ch == '"' {
371 break;
372 }
373 if ch == '\\' {
374 if let Some(&next_ch) = chars.peek() {
375 chars.next();
376 val.push(next_ch);
377 }
378 } else {
379 val.push(ch);
380 }
381 }
382 val
383 } else {
384 let mut val = String::new();
387 let mut depth = 0; let mut in_quote: Option<char> = None; let mut escaped = false; while let Some(&ch) = chars.peek() {
392 if escaped {
394 val.push(ch);
395 chars.next();
396 escaped = false;
397 continue;
398 }
399
400 if ch == '\\' {
401 val.push(ch);
402 chars.next();
403 escaped = true;
404 continue;
405 }
406
407 if let Some(quote_char) = in_quote {
409 val.push(ch);
410 chars.next();
411 if ch == quote_char {
412 in_quote = None; }
414 continue;
415 }
416
417 if ch == '"' || ch == '\'' {
419 in_quote = Some(ch);
420 val.push(ch);
421 chars.next();
422 continue;
423 }
424
425 if ch == '(' || ch == '[' || ch == '{' {
427 depth += 1;
428 val.push(ch);
429 chars.next();
430 continue;
431 }
432
433 if ch == ')' || ch == ']' || ch == '}' {
434 depth -= 1;
435 val.push(ch);
436 chars.next();
437 continue;
438 }
439
440 if depth == 0 && (ch == ' ' || ch == '\t' || ch == ',') {
442 break;
443 }
444
445 val.push(ch);
447 chars.next();
448 }
449 val
450 };
451
452 attrs.push((key, Some(value)));
453 } else {
454 attrs.push((key, None));
455 }
456 }
457
458 attrs
459 }
460
461 fn parse_attributes(content: &str) -> Vec<(String, Option<String>)> {
464 Self::parse_chunk_options(content)
466 }
467}
468
469#[derive(Debug, Clone)]
471pub(crate) struct FenceInfo {
472 pub fence_char: char,
473 pub fence_count: usize,
474 pub info_string: String,
475}
476
477pub(crate) fn is_gfm_math_fence(fence: &FenceInfo) -> bool {
478 fence.info_string.trim() == "math"
479}
480
481pub(crate) fn try_parse_fence_open(content: &str) -> Option<FenceInfo> {
484 let trimmed = strip_leading_spaces(content);
485
486 let (fence_char, fence_count) = if trimmed.starts_with('`') {
488 let count = trimmed.chars().take_while(|&c| c == '`').count();
489 ('`', count)
490 } else if trimmed.starts_with('~') {
491 let count = trimmed.chars().take_while(|&c| c == '~').count();
492 ('~', count)
493 } else {
494 return None;
495 };
496
497 if fence_count < 3 {
498 return None;
499 }
500
501 let info_string_raw = &trimmed[fence_count..];
502 let (info_string_trimmed, _) = strip_newline(info_string_raw);
504 let info_string = if let Some(stripped) = info_string_trimmed.strip_prefix(' ') {
505 stripped.to_string()
506 } else {
507 info_string_trimmed.to_string()
508 };
509
510 if fence_char == '`' && info_string.contains('`') {
512 return None;
513 }
514
515 Some(FenceInfo {
516 fence_char,
517 fence_count,
518 info_string,
519 })
520}
521
522#[allow(clippy::too_many_arguments)]
523fn prepare_fence_open_line<'a>(
524 builder: &mut GreenNodeBuilder<'static>,
525 source_line: &'a str,
526 first_line_override: Option<&'a str>,
527 bq_depth: usize,
528 list_content_col: usize,
529 list_marker_consumed_on_line_0: bool,
530 bq_outer: bool,
531 content_indent: usize,
532) -> (&'a str, &'a str) {
533 if let Some(first_line) = first_line_override {
542 if bq_depth > 0 && source_line != first_line {
543 let stripped = strip_n_blockquote_markers(source_line, bq_depth);
544 let prefix_len = source_line.len().saturating_sub(stripped.len());
545 if prefix_len > 0 {
546 emit_blockquote_prefix_tokens(builder, &source_line[..prefix_len]);
547 }
548 }
549 let first_trimmed = strip_leading_spaces(first_line);
550 let leading_ws_len = first_line.len().saturating_sub(first_trimmed.len());
551 if leading_ws_len > 0 {
552 builder.token(SyntaxKind::WHITESPACE.into(), &first_line[..leading_ws_len]);
553 }
554 return (first_trimmed, first_line);
555 }
556
557 let mut s: &'a str = source_line;
558 let mut pending_ws_start: Option<usize> = None;
559 let suppress_list = list_marker_consumed_on_line_0;
560
561 let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
562 pending: &mut Option<usize>,
563 current_offset: usize| {
564 if let Some(start) = *pending
565 && current_offset > start
566 {
567 builder.token(
568 SyntaxKind::WHITESPACE.into(),
569 &source_line[start..current_offset],
570 );
571 }
572 *pending = None;
573 };
574
575 let do_strip_list = |s: &mut &'a str, pending: &mut Option<usize>| {
576 if list_content_col == 0 {
577 return;
578 }
579 let stripped = if suppress_list {
587 advance_columns(s, list_content_col)
588 } else {
589 strip_list_indent(s, list_content_col)
590 };
591 let consumed = s.len() - stripped.len();
592 if consumed > 0 {
593 let start = source_line.len() - s.len();
594 if !suppress_list && pending.is_none() {
595 *pending = Some(start);
596 }
597 *s = stripped;
598 }
599 };
600
601 let do_strip_bq =
602 |builder: &mut GreenNodeBuilder<'static>, s: &mut &'a str, pending: &mut Option<usize>| {
603 if bq_depth == 0 {
604 return;
605 }
606 let current_offset = source_line.len() - s.len();
607 flush_ws(builder, pending, current_offset);
608 *s = strip_n_blockquote_markers(s, bq_depth);
609 };
610
611 if bq_outer {
612 do_strip_bq(builder, &mut s, &mut pending_ws_start);
613 do_strip_list(&mut s, &mut pending_ws_start);
614 } else {
615 do_strip_list(&mut s, &mut pending_ws_start);
616 do_strip_bq(builder, &mut s, &mut pending_ws_start);
617 }
618
619 if content_indent > 0 {
621 let indent_bytes = byte_index_at_column(s, content_indent);
622 if s.len() >= indent_bytes && indent_bytes > 0 {
623 let start = source_line.len() - s.len();
624 if pending_ws_start.is_none() {
625 pending_ws_start = Some(start);
626 }
627 s = &s[indent_bytes..];
628 }
629 }
630
631 let final_offset = source_line.len() - s.len();
632 flush_ws(builder, &mut pending_ws_start, final_offset);
633
634 let first_trimmed = strip_leading_spaces(s);
635 let leading_ws_len = s.len().saturating_sub(first_trimmed.len());
636 if leading_ws_len > 0 {
637 builder.token(SyntaxKind::WHITESPACE.into(), &s[..leading_ws_len]);
638 }
639 (first_trimmed, s)
640}
641
642pub(crate) fn emit_blockquote_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
643 for ch in prefix.chars() {
644 if ch == '>' {
645 builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
646 } else {
647 let mut buf = [0u8; 4];
648 builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
649 }
650 }
651}
652
653pub(crate) fn emit_content_line_prefixes<'a>(
654 builder: &mut GreenNodeBuilder<'static>,
655 content_line: &'a str,
656 bq_depth: usize,
657 list_content_col: usize,
658 bq_outer: bool,
659 content_indent: usize,
660) -> &'a str {
661 let mut s = content_line;
670 let mut pending_ws_start: Option<usize> = None;
671
672 let flush_ws = |builder: &mut GreenNodeBuilder<'static>,
673 pending: &mut Option<usize>,
674 current_offset: usize| {
675 if let Some(start) = *pending
676 && current_offset > start
677 {
678 builder.token(
679 SyntaxKind::WHITESPACE.into(),
680 &content_line[start..current_offset],
681 );
682 *pending = None;
683 }
684 };
685
686 let strip_and_remember_list =
687 |s: &mut &'a str, pending: &mut Option<usize>, list_content_col: usize| {
688 if list_content_col == 0 {
689 return;
690 }
691 let stripped = strip_list_indent(s, list_content_col);
692 let consumed = s.len() - stripped.len();
693 if consumed > 0 {
694 let start = content_line.len() - s.len();
695 if pending.is_none() {
696 *pending = Some(start);
697 }
698 *s = stripped;
699 }
700 };
701
702 let strip_and_emit_bq = |builder: &mut GreenNodeBuilder<'static>,
703 s: &mut &'a str,
704 pending: &mut Option<usize>,
705 bq_depth: usize| {
706 if bq_depth == 0 {
707 return;
708 }
709 let current_offset = content_line.len() - s.len();
710 flush_ws(builder, pending, current_offset);
711 let stripped = strip_n_blockquote_markers(s, bq_depth);
712 let prefix_len = s.len() - stripped.len();
713 if prefix_len > 0 {
714 emit_blockquote_prefix_tokens(builder, &s[..prefix_len]);
715 }
716 *s = stripped;
717 };
718
719 if bq_outer {
720 strip_and_emit_bq(builder, &mut s, &mut pending_ws_start, bq_depth);
721 strip_and_remember_list(&mut s, &mut pending_ws_start, list_content_col);
722 } else {
723 strip_and_remember_list(&mut s, &mut pending_ws_start, list_content_col);
724 strip_and_emit_bq(builder, &mut s, &mut pending_ws_start, bq_depth);
725 }
726
727 if content_indent > 0 {
728 let indent_bytes = byte_index_at_column(s, content_indent);
729 if s.len() >= indent_bytes && indent_bytes > 0 {
730 let start = content_line.len() - s.len();
731 if pending_ws_start.is_none() {
732 pending_ws_start = Some(start);
733 }
734 s = &s[indent_bytes..];
735 }
736 }
737
738 let final_offset = content_line.len() - s.len();
739 flush_ws(builder, &mut pending_ws_start, final_offset);
740 s
741}
742
743fn strip_content_line_prefixes(
744 content_line: &str,
745 bq_depth: usize,
746 list_content_col: usize,
747 bq_outer: bool,
748 content_indent: usize,
749) -> &str {
750 let after_bq_and_list = if bq_outer {
751 let after_bq = if bq_depth > 0 {
752 strip_n_blockquote_markers(content_line, bq_depth)
753 } else {
754 content_line
755 };
756 strip_list_indent(after_bq, list_content_col)
757 } else {
758 let after_list = strip_list_indent(content_line, list_content_col);
759 if bq_depth > 0 {
760 strip_n_blockquote_markers(after_list, bq_depth)
761 } else {
762 after_list
763 }
764 };
765
766 let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
767 if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
768 &after_bq_and_list[indent_bytes..]
769 } else {
770 after_bq_and_list
771 }
772}
773
774pub(crate) fn compute_hashpipe_preamble_line_count(
775 content_lines: &[&str],
776 prefix: &str,
777 bq_depth: usize,
778 list_content_col: usize,
779 bq_outer: bool,
780 content_indent: usize,
781) -> usize {
782 let mut line_idx = 0usize;
783
784 while line_idx < content_lines.len() {
785 let preview_after_indent = strip_content_line_prefixes(
786 content_lines[line_idx],
787 bq_depth,
788 list_content_col,
789 bq_outer,
790 content_indent,
791 );
792 let (preview_without_newline, _) = strip_newline(preview_after_indent);
793 if !is_hashpipe_option_line(preview_without_newline, prefix)
794 && !is_hashpipe_continuation_line(preview_without_newline, prefix)
795 {
796 break;
797 }
798 line_idx += 1;
799 }
800
801 line_idx
802}
803
804fn emit_hashpipe_option_line(
805 builder: &mut GreenNodeBuilder<'static>,
806 line_without_newline: &str,
807 prefix: &str,
808) -> bool {
809 if !is_hashpipe_option_line(line_without_newline, prefix) {
810 return false;
811 }
812
813 let trimmed_start = trim_start_spaces_tabs(line_without_newline);
814 let leading_ws_len = line_without_newline
815 .len()
816 .saturating_sub(trimmed_start.len());
817 let after_prefix = &trimmed_start[prefix.len()..];
818 let ws_after_prefix_len = after_prefix
819 .len()
820 .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
821 let rest = &after_prefix[ws_after_prefix_len..];
822 let Some(colon_idx) = rest.find(':') else {
823 return false;
824 };
825
826 let key_with_ws = &rest[..colon_idx];
827 let key = trim_end_spaces_tabs(key_with_ws);
828 if key.is_empty() {
829 return false;
830 }
831 let key_ws_suffix = &key_with_ws[key.len()..];
832
833 let after_colon = &rest[colon_idx + 1..];
834 let value_ws_prefix_len = after_colon
835 .len()
836 .saturating_sub(trim_start_spaces_tabs(after_colon).len());
837 let value_with_trailing = &after_colon[value_ws_prefix_len..];
838 let value = trim_end_spaces_tabs(value_with_trailing);
839 let value_ws_suffix = &value_with_trailing[value.len()..];
840
841 builder.start_node(SyntaxKind::CHUNK_OPTION.into());
842 if leading_ws_len > 0 {
843 builder.token(
844 SyntaxKind::WHITESPACE.into(),
845 &line_without_newline[..leading_ws_len],
846 );
847 }
848 builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
849 if ws_after_prefix_len > 0 {
850 builder.token(
851 SyntaxKind::WHITESPACE.into(),
852 &after_prefix[..ws_after_prefix_len],
853 );
854 }
855
856 builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
857 if !key_ws_suffix.is_empty() {
858 builder.token(SyntaxKind::WHITESPACE.into(), key_ws_suffix);
859 }
860 builder.token(SyntaxKind::TEXT.into(), ":");
861 if value_ws_prefix_len > 0 {
862 builder.token(
863 SyntaxKind::WHITESPACE.into(),
864 &after_colon[..value_ws_prefix_len],
865 );
866 }
867
868 if !value.is_empty() {
869 if let Some(quote) = value.chars().next()
870 && (quote == '"' || quote == '\'')
871 && value.ends_with(quote)
872 && value.len() >= 2
873 {
874 builder.token(SyntaxKind::CHUNK_OPTION_QUOTE.into(), &value[..1]);
875 builder.token(
876 SyntaxKind::CHUNK_OPTION_VALUE.into(),
877 &value[1..value.len() - 1],
878 );
879 builder.token(
880 SyntaxKind::CHUNK_OPTION_QUOTE.into(),
881 &value[value.len() - 1..],
882 );
883 } else {
884 builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), value);
885 }
886 }
887
888 if !value_ws_suffix.is_empty() {
889 builder.token(SyntaxKind::WHITESPACE.into(), value_ws_suffix);
890 }
891 builder.finish_node();
892 true
893}
894
895fn emit_hashpipe_continuation_line(
896 builder: &mut GreenNodeBuilder<'static>,
897 line_without_newline: &str,
898 prefix: &str,
899) -> bool {
900 if !is_hashpipe_continuation_line(line_without_newline, prefix) {
901 return false;
902 }
903 let trimmed_start = trim_start_spaces_tabs(line_without_newline);
904 let leading_ws_len = line_without_newline
905 .len()
906 .saturating_sub(trimmed_start.len());
907 let after_prefix = &trimmed_start[prefix.len()..];
908 let ws_after_prefix_len = after_prefix
909 .len()
910 .saturating_sub(trim_start_spaces_tabs(after_prefix).len());
911 let continuation_with_trailing = &after_prefix[ws_after_prefix_len..];
912 let continuation_value = trim_end_spaces_tabs(continuation_with_trailing);
913 if continuation_value.is_empty() {
914 return false;
915 }
916 let continuation_ws_suffix = &continuation_with_trailing[continuation_value.len()..];
917
918 builder.start_node(SyntaxKind::CHUNK_OPTION.into());
919 if leading_ws_len > 0 {
920 builder.token(
921 SyntaxKind::WHITESPACE.into(),
922 &line_without_newline[..leading_ws_len],
923 );
924 }
925 builder.token(SyntaxKind::HASHPIPE_PREFIX.into(), prefix);
926 if ws_after_prefix_len > 0 {
927 builder.token(
928 SyntaxKind::WHITESPACE.into(),
929 &after_prefix[..ws_after_prefix_len],
930 );
931 }
932 builder.token(SyntaxKind::CHUNK_OPTION_VALUE.into(), continuation_value);
933 if !continuation_ws_suffix.is_empty() {
934 builder.token(SyntaxKind::WHITESPACE.into(), continuation_ws_suffix);
935 }
936 builder.finish_node();
937 true
938}
939
940fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
941 let trimmed_start = trim_start_spaces_tabs(line_without_newline);
942 if !trimmed_start.starts_with(prefix) {
943 return false;
944 }
945 let after_prefix = &trimmed_start[prefix.len()..];
946 let rest = trim_start_spaces_tabs(after_prefix);
947 let Some(colon_idx) = rest.find(':') else {
948 return false;
949 };
950 let key = trim_end_spaces_tabs(&rest[..colon_idx]);
951 if key.is_empty() {
952 return false;
953 }
954 true
955}
956
957fn is_hashpipe_continuation_line(line_without_newline: &str, prefix: &str) -> bool {
958 let trimmed_start = trim_start_spaces_tabs(line_without_newline);
959 if !trimmed_start.starts_with(prefix) {
960 return false;
961 }
962 let after_prefix = &trimmed_start[prefix.len()..];
963 let Some(first) = after_prefix.chars().next() else {
964 return false;
965 };
966 if first != ' ' && first != '\t' {
967 return false;
968 }
969 !trim_start_spaces_tabs(after_prefix).is_empty()
970}
971
972pub(crate) fn is_closing_fence(content: &str, fence: &FenceInfo) -> bool {
974 let trimmed = strip_leading_spaces(content);
975
976 if !trimmed.starts_with(fence.fence_char) {
977 return false;
978 }
979
980 let closing_count = trimmed
981 .chars()
982 .take_while(|&c| c == fence.fence_char)
983 .count();
984
985 if closing_count < fence.fence_count {
986 return false;
987 }
988
989 trimmed[closing_count..].trim().is_empty()
991}
992
993fn emit_chunk_options(builder: &mut GreenNodeBuilder<'static>, content: &str) {
996 if content.trim().is_empty() {
997 builder.token(SyntaxKind::TEXT.into(), content);
998 return;
999 }
1000
1001 builder.start_node(SyntaxKind::CHUNK_OPTIONS.into());
1002
1003 let mut pos = 0;
1004 let bytes = content.as_bytes();
1005
1006 while pos < bytes.len() {
1007 let ws_start = pos;
1009 while pos < bytes.len() {
1010 let ch = bytes[pos] as char;
1011 if ch != ' ' && ch != '\t' && ch != ',' {
1012 break;
1013 }
1014 pos += 1;
1015 }
1016 if pos > ws_start {
1017 builder.token(SyntaxKind::TEXT.into(), &content[ws_start..pos]);
1018 }
1019
1020 if pos >= bytes.len() {
1021 break;
1022 }
1023
1024 if bytes[pos] as char == '}' {
1026 builder.token(SyntaxKind::TEXT.into(), &content[pos..pos + 1]);
1027 pos += 1;
1028 if pos < bytes.len() {
1029 builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
1030 }
1031 break;
1032 }
1033
1034 let key_start = pos;
1036 while pos < bytes.len() {
1037 let ch = bytes[pos] as char;
1038 if ch == '=' || ch == ' ' || ch == '\t' || ch == ',' || ch == '}' {
1039 break;
1040 }
1041 pos += 1;
1042 }
1043
1044 if pos == key_start {
1045 if pos < bytes.len() {
1047 builder.token(SyntaxKind::TEXT.into(), &content[pos..]);
1048 }
1049 break;
1050 }
1051
1052 let key = &content[key_start..pos];
1053
1054 let ws_before_eq_start = pos;
1056 while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
1057 pos += 1;
1058 }
1059
1060 if pos < bytes.len() && bytes[pos] as char == '=' {
1062 builder.start_node(SyntaxKind::CHUNK_OPTION.into());
1064 builder.token(SyntaxKind::CHUNK_OPTION_KEY.into(), key);
1065
1066 if pos > ws_before_eq_start {
1068 builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1069 }
1070
1071 builder.token(SyntaxKind::TEXT.into(), "=");
1072 pos += 1; let ws_after_eq_start = pos;
1076 while pos < bytes.len() && matches!(bytes[pos] as char, ' ' | '\t') {
1077 pos += 1;
1078 }
1079 if pos > ws_after_eq_start {
1080 builder.token(SyntaxKind::TEXT.into(), &content[ws_after_eq_start..pos]);
1081 }
1082
1083 if pos < bytes.len() {
1085 let quote_char = bytes[pos] as char;
1086 if quote_char == '"' || quote_char == '\'' {
1087 builder.token(
1089 SyntaxKind::CHUNK_OPTION_QUOTE.into(),
1090 &content[pos..pos + 1],
1091 );
1092 pos += 1; let val_start = pos;
1095 let mut escaped = false;
1096 while pos < bytes.len() {
1097 let ch = bytes[pos] as char;
1098 if !escaped && ch == quote_char {
1099 break;
1100 }
1101 escaped = !escaped && ch == '\\';
1102 pos += 1;
1103 }
1104
1105 if pos > val_start {
1106 builder.token(
1107 SyntaxKind::CHUNK_OPTION_VALUE.into(),
1108 &content[val_start..pos],
1109 );
1110 }
1111
1112 if pos < bytes.len() && bytes[pos] as char == quote_char {
1114 builder.token(
1115 SyntaxKind::CHUNK_OPTION_QUOTE.into(),
1116 &content[pos..pos + 1],
1117 );
1118 pos += 1;
1119 }
1120 } else {
1121 let val_start = pos;
1123 let mut depth = 0;
1124
1125 while pos < bytes.len() {
1126 let ch = bytes[pos] as char;
1127 match ch {
1128 '(' | '[' | '{' => depth += 1,
1129 ')' | ']' => {
1130 if depth > 0 {
1131 depth -= 1;
1132 } else {
1133 break;
1134 }
1135 }
1136 '}' => {
1137 if depth > 0 {
1138 depth -= 1;
1139 } else {
1140 break; }
1142 }
1143 ',' if depth == 0 => {
1144 break; }
1146 ' ' | '\t' if depth == 0 => {
1147 break; }
1149 _ => {}
1150 }
1151 pos += 1;
1152 }
1153
1154 if pos > val_start {
1155 builder.token(
1156 SyntaxKind::CHUNK_OPTION_VALUE.into(),
1157 &content[val_start..pos],
1158 );
1159 }
1160 }
1161 }
1162
1163 builder.finish_node(); } else {
1165 if pos > ws_before_eq_start {
1168 builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1169 builder.token(SyntaxKind::TEXT.into(), key);
1170 builder.finish_node(); builder.token(SyntaxKind::TEXT.into(), &content[ws_before_eq_start..pos]);
1172 } else {
1173 builder.start_node(SyntaxKind::CHUNK_LABEL.into());
1174 builder.token(SyntaxKind::TEXT.into(), key);
1175 builder.finish_node(); }
1177 }
1178 }
1179
1180 builder.finish_node(); }
1182
1183fn emit_code_info_node(builder: &mut GreenNodeBuilder<'static>, info_string: &str) {
1186 builder.start_node(SyntaxKind::CODE_INFO.into());
1187
1188 let info = InfoString::parse(info_string);
1189
1190 match &info.block_type {
1191 CodeBlockType::DisplayShortcut { language } => {
1192 builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1194
1195 let after_lang = &info_string[language.len()..];
1197 if !after_lang.is_empty() {
1198 builder.token(SyntaxKind::TEXT.into(), after_lang);
1199 }
1200 }
1201 CodeBlockType::Executable { language } => {
1202 builder.token(SyntaxKind::TEXT.into(), "{");
1204 builder.token(SyntaxKind::CODE_LANGUAGE.into(), language);
1205
1206 let start_offset = 1 + language.len(); if start_offset < info_string.len() {
1209 let rest = &info_string[start_offset..];
1210 emit_chunk_options(builder, rest);
1211 }
1212 }
1213 CodeBlockType::DisplayExplicit { classes } => {
1214 if let Some(lang) = classes.first() {
1218 let needle = format!(".{}", lang);
1220 if let Some(lang_start) = info_string.find(&needle) {
1221 if lang_start > 0 {
1223 builder.token(SyntaxKind::TEXT.into(), &info_string[..lang_start]);
1224 }
1225
1226 builder.token(SyntaxKind::TEXT.into(), ".");
1228
1229 builder.token(SyntaxKind::CODE_LANGUAGE.into(), lang);
1231
1232 let after_lang_start = lang_start + 1 + lang.len();
1234 if after_lang_start < info_string.len() {
1235 builder.token(SyntaxKind::TEXT.into(), &info_string[after_lang_start..]);
1236 }
1237 } else {
1238 builder.token(SyntaxKind::TEXT.into(), info_string);
1240 }
1241 } else {
1242 builder.token(SyntaxKind::TEXT.into(), info_string);
1244 }
1245 }
1246 CodeBlockType::Raw { .. } | CodeBlockType::Plain => {
1247 builder.token(SyntaxKind::TEXT.into(), info_string);
1249 }
1250 }
1251
1252 builder.finish_node(); }
1254
1255#[allow(clippy::too_many_arguments)]
1264pub(crate) fn parse_fenced_code_block(
1265 builder: &mut GreenNodeBuilder<'static>,
1266 lines: &[&str],
1267 start_pos: usize,
1268 fence: FenceInfo,
1269 bq_depth: usize,
1270 list_content_col: usize,
1271 list_marker_consumed_on_line_0: bool,
1272 bq_outer: bool,
1273 content_indent: usize,
1274 first_line_override: Option<&str>,
1275) -> usize {
1276 builder.start_node(SyntaxKind::CODE_BLOCK.into());
1278
1279 let (first_trimmed, _first_inner) = prepare_fence_open_line(
1281 builder,
1282 lines[start_pos],
1283 first_line_override,
1284 bq_depth,
1285 list_content_col,
1286 list_marker_consumed_on_line_0,
1287 bq_outer,
1288 content_indent,
1289 );
1290
1291 builder.start_node(SyntaxKind::CODE_FENCE_OPEN.into());
1292 builder.token(
1293 SyntaxKind::CODE_FENCE_MARKER.into(),
1294 &first_trimmed[..fence.fence_count],
1295 );
1296
1297 let after_fence = &first_trimmed[fence.fence_count..];
1299 if let Some(_space_stripped) = after_fence.strip_prefix(' ') {
1300 builder.token(SyntaxKind::WHITESPACE.into(), " ");
1302 if !fence.info_string.is_empty() {
1304 emit_code_info_node(builder, &fence.info_string);
1305 }
1306 } else if !fence.info_string.is_empty() {
1307 emit_code_info_node(builder, &fence.info_string);
1309 }
1310
1311 let (_, newline_str) = strip_newline(first_trimmed);
1313 if !newline_str.is_empty() {
1314 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1315 }
1316 builder.finish_node(); let mut current_pos = start_pos + 1;
1319 let mut content_lines: Vec<&str> = Vec::new(); let mut found_closing = false;
1321
1322 while current_pos < lines.len() {
1323 let line = lines[current_pos];
1324
1325 let after_bq_and_list = if bq_outer {
1328 let after_bq = if bq_depth > 0 {
1329 strip_n_blockquote_markers(line, bq_depth)
1330 } else {
1331 line
1332 };
1333 strip_list_indent(after_bq, list_content_col)
1334 } else {
1335 let after_list = strip_list_indent(line, list_content_col);
1336 if bq_depth > 0 {
1337 strip_n_blockquote_markers(after_list, bq_depth)
1338 } else {
1339 after_list
1340 }
1341 };
1342
1343 let probe = if bq_outer {
1349 line
1350 } else {
1351 strip_list_indent(line, list_content_col)
1352 };
1353 let (line_bq_depth, _) = count_blockquote_markers(probe);
1354 if line_bq_depth < bq_depth {
1355 break;
1356 }
1357
1358 let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
1359 let inner_stripped = if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
1360 &after_bq_and_list[indent_bytes..]
1361 } else {
1362 after_bq_and_list
1363 };
1364
1365 if is_closing_fence(inner_stripped, &fence) {
1366 found_closing = true;
1367 current_pos += 1;
1368 break;
1369 }
1370
1371 content_lines.push(line);
1372 current_pos += 1;
1373 }
1374
1375 if !content_lines.is_empty() {
1377 builder.start_node(SyntaxKind::CODE_CONTENT.into());
1378 let hashpipe_prefix = match InfoString::parse(&fence.info_string).block_type {
1379 CodeBlockType::Executable { language } => hashpipe_comment_prefix(&language),
1380 _ => None,
1381 };
1382
1383 let mut line_idx = 0usize;
1384 if let Some(prefix) = hashpipe_prefix {
1385 let prepared_hashpipe_lines = compute_hashpipe_preamble_line_count(
1386 &content_lines,
1387 prefix,
1388 bq_depth,
1389 list_content_col,
1390 bq_outer,
1391 content_indent,
1392 );
1393 if prepared_hashpipe_lines > 0 {
1394 builder.start_node(SyntaxKind::HASHPIPE_YAML_PREAMBLE.into());
1395 builder.start_node(SyntaxKind::HASHPIPE_YAML_CONTENT.into());
1396 while line_idx < prepared_hashpipe_lines {
1397 let content_line = content_lines[line_idx];
1398 let after_indent = emit_content_line_prefixes(
1399 builder,
1400 content_line,
1401 bq_depth,
1402 list_content_col,
1403 bq_outer,
1404 content_indent,
1405 );
1406 let (line_without_newline, newline_str) = strip_newline(after_indent);
1407 if !emit_hashpipe_option_line(builder, line_without_newline, prefix) {
1408 let _ =
1409 emit_hashpipe_continuation_line(builder, line_without_newline, prefix);
1410 }
1411 if !newline_str.is_empty() {
1412 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1413 }
1414 line_idx += 1;
1415 }
1416 builder.finish_node(); builder.finish_node(); }
1419 }
1420
1421 for content_line in content_lines.iter().skip(line_idx) {
1422 let after_indent = emit_content_line_prefixes(
1423 builder,
1424 content_line,
1425 bq_depth,
1426 list_content_col,
1427 bq_outer,
1428 content_indent,
1429 );
1430 let (line_without_newline, newline_str) = strip_newline(after_indent);
1431
1432 if !line_without_newline.is_empty() {
1433 builder.token(SyntaxKind::TEXT.into(), line_without_newline);
1434 }
1435
1436 if !newline_str.is_empty() {
1437 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1438 }
1439 }
1440 builder.finish_node(); }
1442
1443 if found_closing {
1445 let closing_line = lines[current_pos - 1];
1446
1447 let closing_stripped = emit_content_line_prefixes(
1448 builder,
1449 closing_line,
1450 bq_depth,
1451 list_content_col,
1452 bq_outer,
1453 content_indent,
1454 );
1455 let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1456 let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1457 let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1458 let closing_count = closing_trimmed_start
1459 .chars()
1460 .take_while(|&c| c == fence.fence_char)
1461 .count();
1462 let trailing_after_marker = &closing_trimmed_start[closing_count..];
1463
1464 builder.start_node(SyntaxKind::CODE_FENCE_CLOSE.into());
1465 if leading_ws_len > 0 {
1466 builder.token(
1467 SyntaxKind::WHITESPACE.into(),
1468 &closing_without_newline[..leading_ws_len],
1469 );
1470 }
1471 builder.token(
1472 SyntaxKind::CODE_FENCE_MARKER.into(),
1473 &closing_trimmed_start[..closing_count],
1474 );
1475 if !trailing_after_marker.is_empty() {
1476 builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1477 }
1478 if !newline_str.is_empty() {
1479 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1480 }
1481 builder.finish_node(); }
1483
1484 builder.finish_node(); current_pos
1487}
1488
1489#[allow(clippy::too_many_arguments)]
1491pub(crate) fn parse_fenced_math_block(
1492 builder: &mut GreenNodeBuilder<'static>,
1493 lines: &[&str],
1494 start_pos: usize,
1495 fence: FenceInfo,
1496 bq_depth: usize,
1497 list_content_col: usize,
1498 list_marker_consumed_on_line_0: bool,
1499 bq_outer: bool,
1500 content_indent: usize,
1501 first_line_override: Option<&str>,
1502) -> usize {
1503 builder.start_node(SyntaxKind::DISPLAY_MATH.into());
1504
1505 let (first_trimmed, _first_inner) = prepare_fence_open_line(
1506 builder,
1507 lines[start_pos],
1508 first_line_override,
1509 bq_depth,
1510 list_content_col,
1511 list_marker_consumed_on_line_0,
1512 bq_outer,
1513 content_indent,
1514 );
1515 let (opening_without_newline, opening_newline) = strip_newline(first_trimmed);
1516 builder.token(
1517 SyntaxKind::DISPLAY_MATH_MARKER.into(),
1518 opening_without_newline,
1519 );
1520 if !opening_newline.is_empty() {
1521 builder.token(SyntaxKind::NEWLINE.into(), opening_newline);
1522 }
1523
1524 let mut current_pos = start_pos + 1;
1525 let mut content_lines: Vec<&str> = Vec::new();
1526 let mut found_closing = false;
1527
1528 while current_pos < lines.len() {
1529 let line = lines[current_pos];
1530
1531 let after_bq_and_list = if bq_outer {
1532 let after_bq = if bq_depth > 0 {
1533 strip_n_blockquote_markers(line, bq_depth)
1534 } else {
1535 line
1536 };
1537 strip_list_indent(after_bq, list_content_col)
1538 } else {
1539 let after_list = strip_list_indent(line, list_content_col);
1540 if bq_depth > 0 {
1541 strip_n_blockquote_markers(after_list, bq_depth)
1542 } else {
1543 after_list
1544 }
1545 };
1546
1547 let probe = if bq_outer {
1548 line
1549 } else {
1550 strip_list_indent(line, list_content_col)
1551 };
1552 let (line_bq_depth, _) = count_blockquote_markers(probe);
1553 if line_bq_depth < bq_depth {
1554 break;
1555 }
1556
1557 let indent_bytes = byte_index_at_column(after_bq_and_list, content_indent);
1558 let inner_stripped = if content_indent > 0 && after_bq_and_list.len() >= indent_bytes {
1559 &after_bq_and_list[indent_bytes..]
1560 } else {
1561 after_bq_and_list
1562 };
1563
1564 if is_closing_fence(inner_stripped, &fence) {
1565 found_closing = true;
1566 current_pos += 1;
1567 break;
1568 }
1569
1570 content_lines.push(line);
1571 current_pos += 1;
1572 }
1573
1574 if !content_lines.is_empty() {
1575 let mut content = String::new();
1576 for content_line in content_lines {
1577 let after_indent = emit_content_line_prefixes(
1578 builder,
1579 content_line,
1580 bq_depth,
1581 list_content_col,
1582 bq_outer,
1583 content_indent,
1584 );
1585 let (line_without_newline, newline_str) = strip_newline(after_indent);
1586 content.push_str(line_without_newline);
1587 content.push_str(newline_str);
1588 }
1589 builder.token(SyntaxKind::TEXT.into(), &content);
1590 }
1591
1592 if found_closing {
1593 let closing_line = lines[current_pos - 1];
1594
1595 let closing_stripped = emit_content_line_prefixes(
1596 builder,
1597 closing_line,
1598 bq_depth,
1599 list_content_col,
1600 bq_outer,
1601 content_indent,
1602 );
1603 let (closing_without_newline, newline_str) = strip_newline(closing_stripped);
1604 let closing_trimmed_start = strip_leading_spaces(closing_without_newline);
1605 let leading_ws_len = closing_without_newline.len() - closing_trimmed_start.len();
1606 let closing_count = closing_trimmed_start
1607 .chars()
1608 .take_while(|&c| c == fence.fence_char)
1609 .count();
1610 let trailing_after_marker = &closing_trimmed_start[closing_count..];
1611
1612 if leading_ws_len > 0 {
1613 builder.token(
1614 SyntaxKind::WHITESPACE.into(),
1615 &closing_without_newline[..leading_ws_len],
1616 );
1617 }
1618 builder.token(
1619 SyntaxKind::DISPLAY_MATH_MARKER.into(),
1620 &closing_trimmed_start[..closing_count],
1621 );
1622 if !trailing_after_marker.is_empty() {
1623 builder.token(SyntaxKind::WHITESPACE.into(), trailing_after_marker);
1624 }
1625 if !newline_str.is_empty() {
1626 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1627 }
1628 }
1629
1630 builder.finish_node(); current_pos
1632}
1633
1634#[cfg(test)]
1635mod tests {
1636 use super::*;
1637
1638 #[test]
1639 fn test_backtick_fence() {
1640 let fence = try_parse_fence_open("```python").unwrap();
1641 assert_eq!(fence.fence_char, '`');
1642 assert_eq!(fence.fence_count, 3);
1643 assert_eq!(fence.info_string, "python");
1644 }
1645
1646 #[test]
1647 fn test_tilde_fence() {
1648 let fence = try_parse_fence_open("~~~").unwrap();
1649 assert_eq!(fence.fence_char, '~');
1650 assert_eq!(fence.fence_count, 3);
1651 assert_eq!(fence.info_string, "");
1652 }
1653
1654 #[test]
1655 fn test_long_fence() {
1656 let fence = try_parse_fence_open("`````").unwrap();
1657 assert_eq!(fence.fence_count, 5);
1658 }
1659
1660 #[test]
1661 fn test_two_backticks_invalid() {
1662 assert!(try_parse_fence_open("``").is_none());
1663 }
1664
1665 #[test]
1666 fn test_backtick_fence_with_backtick_in_info_is_invalid() {
1667 assert!(try_parse_fence_open("`````hi````there`````").is_none());
1668 }
1669
1670 #[test]
1671 fn test_closing_fence() {
1672 let fence = FenceInfo {
1673 fence_char: '`',
1674 fence_count: 3,
1675 info_string: String::new(),
1676 };
1677 assert!(is_closing_fence("```", &fence));
1678 assert!(is_closing_fence("````", &fence));
1679 assert!(!is_closing_fence("``", &fence));
1680 assert!(!is_closing_fence("~~~", &fence));
1681 }
1682
1683 #[test]
1684 fn test_fenced_code_preserves_leading_gt() {
1685 let input = "```\n> foo\n```\n";
1686 let tree = crate::parse(input, None);
1687 assert_eq!(tree.text().to_string(), input);
1688 }
1689
1690 #[test]
1691 fn test_fenced_code_in_blockquote_preserves_opening_fence_marker() {
1692 let input = "> ```\n> code\n> ```\n";
1693 let tree = crate::parse(input, None);
1694 assert_eq!(tree.text().to_string(), input);
1695 }
1696
1697 #[test]
1698 fn test_fenced_code_in_definition_list_with_unicode_content_does_not_panic() {
1699 let input = "Term\n: ```\nāāā pyproject.toml\n```\n";
1700 let tree = crate::parse(input, None);
1701 assert_eq!(tree.text().to_string(), input);
1702 }
1703
1704 #[test]
1705 fn test_info_string_plain() {
1706 let info = InfoString::parse("");
1707 assert_eq!(info.block_type, CodeBlockType::Plain);
1708 assert!(info.attributes.is_empty());
1709 }
1710
1711 #[test]
1712 fn test_info_string_shortcut() {
1713 let info = InfoString::parse("python");
1714 assert_eq!(
1715 info.block_type,
1716 CodeBlockType::DisplayShortcut {
1717 language: "python".to_string()
1718 }
1719 );
1720 assert!(info.attributes.is_empty());
1721 }
1722
1723 #[test]
1724 fn test_info_string_shortcut_with_trailing() {
1725 let info = InfoString::parse("python extra stuff");
1726 assert_eq!(
1727 info.block_type,
1728 CodeBlockType::DisplayShortcut {
1729 language: "python".to_string()
1730 }
1731 );
1732 }
1733
1734 #[test]
1735 fn test_info_string_display_explicit() {
1736 let info = InfoString::parse("{.python}");
1737 assert_eq!(
1738 info.block_type,
1739 CodeBlockType::DisplayExplicit {
1740 classes: vec!["python".to_string()]
1741 }
1742 );
1743 }
1744
1745 #[test]
1746 fn test_info_string_display_explicit_multiple() {
1747 let info = InfoString::parse("{.python .numberLines}");
1748 assert_eq!(
1749 info.block_type,
1750 CodeBlockType::DisplayExplicit {
1751 classes: vec!["python".to_string(), "numberLines".to_string()]
1752 }
1753 );
1754 }
1755
1756 #[test]
1757 fn test_info_string_executable() {
1758 let info = InfoString::parse("{python}");
1759 assert_eq!(
1760 info.block_type,
1761 CodeBlockType::Executable {
1762 language: "python".to_string()
1763 }
1764 );
1765 }
1766
1767 #[test]
1768 fn test_info_string_executable_with_options() {
1769 let info = InfoString::parse("{python echo=false warning=true}");
1770 assert_eq!(
1771 info.block_type,
1772 CodeBlockType::Executable {
1773 language: "python".to_string()
1774 }
1775 );
1776 assert_eq!(info.attributes.len(), 2);
1777 assert_eq!(
1778 info.attributes[0],
1779 ("echo".to_string(), Some("false".to_string()))
1780 );
1781 assert_eq!(
1782 info.attributes[1],
1783 ("warning".to_string(), Some("true".to_string()))
1784 );
1785 }
1786
1787 #[test]
1788 fn test_info_string_executable_with_commas() {
1789 let info = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
1790 assert_eq!(
1791 info.block_type,
1792 CodeBlockType::Executable {
1793 language: "r".to_string()
1794 }
1795 );
1796 assert_eq!(info.attributes.len(), 2);
1797 assert_eq!(
1798 info.attributes[0],
1799 ("echo".to_string(), Some("FALSE".to_string()))
1800 );
1801 assert_eq!(
1802 info.attributes[1],
1803 ("warning".to_string(), Some("TRUE".to_string()))
1804 );
1805 }
1806
1807 #[test]
1808 fn test_info_string_executable_mixed_commas_spaces() {
1809 let info = InfoString::parse("{r, echo=FALSE, label=\"my chunk\"}");
1811 assert_eq!(
1812 info.block_type,
1813 CodeBlockType::Executable {
1814 language: "r".to_string()
1815 }
1816 );
1817 assert_eq!(info.attributes.len(), 2);
1818 assert_eq!(
1819 info.attributes[0],
1820 ("echo".to_string(), Some("FALSE".to_string()))
1821 );
1822 assert_eq!(
1823 info.attributes[1],
1824 ("label".to_string(), Some("my chunk".to_string()))
1825 );
1826 }
1827
1828 #[test]
1829 fn test_info_string_mixed_shortcut_and_attrs() {
1830 let info = InfoString::parse("python {.numberLines}");
1831 assert_eq!(
1832 info.block_type,
1833 CodeBlockType::DisplayShortcut {
1834 language: "python".to_string()
1835 }
1836 );
1837 assert_eq!(info.attributes.len(), 1);
1838 assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1839 }
1840
1841 #[test]
1842 fn test_info_string_mixed_with_key_value() {
1843 let info = InfoString::parse("python {.numberLines startFrom=\"100\"}");
1844 assert_eq!(
1845 info.block_type,
1846 CodeBlockType::DisplayShortcut {
1847 language: "python".to_string()
1848 }
1849 );
1850 assert_eq!(info.attributes.len(), 2);
1851 assert_eq!(info.attributes[0], (".numberLines".to_string(), None));
1852 assert_eq!(
1853 info.attributes[1],
1854 ("startFrom".to_string(), Some("100".to_string()))
1855 );
1856 }
1857
1858 #[test]
1859 fn test_info_string_explicit_with_id_and_classes() {
1860 let info = InfoString::parse("{#mycode .haskell .numberLines startFrom=\"100\"}");
1861 assert_eq!(
1862 info.block_type,
1863 CodeBlockType::DisplayExplicit {
1864 classes: vec!["haskell".to_string(), "numberLines".to_string()]
1865 }
1866 );
1867 let has_id = info.attributes.iter().any(|(k, _)| k == "#mycode");
1869 let has_start = info
1870 .attributes
1871 .iter()
1872 .any(|(k, v)| k == "startFrom" && v == &Some("100".to_string()));
1873 assert!(has_id);
1874 assert!(has_start);
1875 }
1876
1877 #[test]
1878 fn test_info_string_raw_html() {
1879 let info = InfoString::parse("{=html}");
1880 assert_eq!(
1881 info.block_type,
1882 CodeBlockType::Raw {
1883 format: "html".to_string()
1884 }
1885 );
1886 assert!(info.attributes.is_empty());
1887 }
1888
1889 #[test]
1890 fn test_info_string_raw_latex() {
1891 let info = InfoString::parse("{=latex}");
1892 assert_eq!(
1893 info.block_type,
1894 CodeBlockType::Raw {
1895 format: "latex".to_string()
1896 }
1897 );
1898 }
1899
1900 #[test]
1901 fn test_info_string_raw_openxml() {
1902 let info = InfoString::parse("{=openxml}");
1903 assert_eq!(
1904 info.block_type,
1905 CodeBlockType::Raw {
1906 format: "openxml".to_string()
1907 }
1908 );
1909 }
1910
1911 #[test]
1912 fn test_info_string_raw_ms() {
1913 let info = InfoString::parse("{=ms}");
1914 assert_eq!(
1915 info.block_type,
1916 CodeBlockType::Raw {
1917 format: "ms".to_string()
1918 }
1919 );
1920 }
1921
1922 #[test]
1923 fn test_info_string_raw_html5() {
1924 let info = InfoString::parse("{=html5}");
1925 assert_eq!(
1926 info.block_type,
1927 CodeBlockType::Raw {
1928 format: "html5".to_string()
1929 }
1930 );
1931 }
1932
1933 #[test]
1934 fn test_info_string_raw_not_combined_with_attrs() {
1935 let info = InfoString::parse("{=html .class}");
1937 assert_ne!(
1939 info.block_type,
1940 CodeBlockType::Raw {
1941 format: "html".to_string()
1942 }
1943 );
1944 }
1945
1946 #[test]
1947 fn test_parse_pandoc_attributes_spaces() {
1948 let attrs = InfoString::parse_pandoc_attributes(".python .numberLines startFrom=\"10\"");
1950 assert_eq!(attrs.len(), 3);
1951 assert_eq!(attrs[0], (".python".to_string(), None));
1952 assert_eq!(attrs[1], (".numberLines".to_string(), None));
1953 assert_eq!(attrs[2], ("startFrom".to_string(), Some("10".to_string())));
1954 }
1955
1956 #[test]
1957 fn test_parse_pandoc_attributes_no_commas() {
1958 let attrs = InfoString::parse_pandoc_attributes("#id .class key=value");
1960 assert_eq!(attrs.len(), 3);
1961 assert_eq!(attrs[0], ("#id".to_string(), None));
1962 assert_eq!(attrs[1], (".class".to_string(), None));
1963 assert_eq!(attrs[2], ("key".to_string(), Some("value".to_string())));
1964 }
1965
1966 #[test]
1967 fn test_parse_chunk_options_commas() {
1968 let attrs = InfoString::parse_chunk_options("r, echo=FALSE, warning=TRUE");
1970 assert_eq!(attrs.len(), 3);
1971 assert_eq!(attrs[0], ("r".to_string(), None));
1972 assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1973 assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1974 }
1975
1976 #[test]
1977 fn test_parse_chunk_options_no_spaces() {
1978 let attrs = InfoString::parse_chunk_options("r,echo=FALSE,warning=TRUE");
1980 assert_eq!(attrs.len(), 3);
1981 assert_eq!(attrs[0], ("r".to_string(), None));
1982 assert_eq!(attrs[1], ("echo".to_string(), Some("FALSE".to_string())));
1983 assert_eq!(attrs[2], ("warning".to_string(), Some("TRUE".to_string())));
1984 }
1985
1986 #[test]
1987 fn test_parse_chunk_options_mixed() {
1988 let attrs = InfoString::parse_chunk_options("python echo=False, warning=True");
1990 assert_eq!(attrs.len(), 3);
1991 assert_eq!(attrs[0], ("python".to_string(), None));
1992 assert_eq!(attrs[1], ("echo".to_string(), Some("False".to_string())));
1993 assert_eq!(attrs[2], ("warning".to_string(), Some("True".to_string())));
1994 }
1995
1996 #[test]
1997 fn test_parse_chunk_options_nested_function_call() {
1998 let attrs = InfoString::parse_chunk_options(r#"r pep-cg, dependson=c("foo", "bar")"#);
2000 assert_eq!(attrs.len(), 3);
2001 assert_eq!(attrs[0], ("r".to_string(), None));
2002 assert_eq!(attrs[1], ("pep-cg".to_string(), None));
2003 assert_eq!(
2004 attrs[2],
2005 (
2006 "dependson".to_string(),
2007 Some(r#"c("foo", "bar")"#.to_string())
2008 )
2009 );
2010 }
2011
2012 #[test]
2013 fn test_parse_chunk_options_nested_with_spaces() {
2014 let attrs = InfoString::parse_chunk_options(r#"r, cache.path=file.path("cache", "dir")"#);
2016 assert_eq!(attrs.len(), 2);
2017 assert_eq!(attrs[0], ("r".to_string(), None));
2018 assert_eq!(
2019 attrs[1],
2020 (
2021 "cache.path".to_string(),
2022 Some(r#"file.path("cache", "dir")"#.to_string())
2023 )
2024 );
2025 }
2026
2027 #[test]
2028 fn test_parse_chunk_options_deeply_nested() {
2029 let attrs = InfoString::parse_chunk_options(r#"r, x=list(a=c(1,2), b=c(3,4))"#);
2031 assert_eq!(attrs.len(), 2);
2032 assert_eq!(attrs[0], ("r".to_string(), None));
2033 assert_eq!(
2034 attrs[1],
2035 (
2036 "x".to_string(),
2037 Some(r#"list(a=c(1,2), b=c(3,4))"#.to_string())
2038 )
2039 );
2040 }
2041
2042 #[test]
2043 fn test_parse_chunk_options_brackets_and_braces() {
2044 let attrs = InfoString::parse_chunk_options(r#"r, data=df[rows, cols], config={a:1, b:2}"#);
2046 assert_eq!(attrs.len(), 3);
2047 assert_eq!(attrs[0], ("r".to_string(), None));
2048 assert_eq!(
2049 attrs[1],
2050 ("data".to_string(), Some("df[rows, cols]".to_string()))
2051 );
2052 assert_eq!(
2053 attrs[2],
2054 ("config".to_string(), Some("{a:1, b:2}".to_string()))
2055 );
2056 }
2057
2058 #[test]
2059 fn test_parse_chunk_options_quotes_with_parens() {
2060 let attrs = InfoString::parse_chunk_options(r#"r, label="test (with parens)", echo=TRUE"#);
2063 assert_eq!(attrs.len(), 3);
2064 assert_eq!(attrs[0], ("r".to_string(), None));
2065 assert_eq!(
2066 attrs[1],
2067 ("label".to_string(), Some("test (with parens)".to_string()))
2068 );
2069 assert_eq!(attrs[2], ("echo".to_string(), Some("TRUE".to_string())));
2070 }
2071
2072 #[test]
2073 fn test_parse_chunk_options_escaped_quotes() {
2074 let attrs = InfoString::parse_chunk_options(r#"r, label="has \"quoted\" text""#);
2077 assert_eq!(attrs.len(), 2);
2078 assert_eq!(attrs[0], ("r".to_string(), None));
2079 assert_eq!(
2080 attrs[1],
2081 (
2082 "label".to_string(),
2083 Some(r#"has "quoted" text"#.to_string())
2084 )
2085 );
2086 }
2087
2088 #[test]
2089 fn test_display_vs_executable_parsing() {
2090 let info1 = InfoString::parse("{.python .numberLines startFrom=\"10\"}");
2092 assert!(matches!(
2093 info1.block_type,
2094 CodeBlockType::DisplayExplicit { .. }
2095 ));
2096
2097 let info2 = InfoString::parse("{r, echo=FALSE, warning=TRUE}");
2099 assert!(matches!(info2.block_type, CodeBlockType::Executable { .. }));
2100 assert_eq!(info2.attributes.len(), 2);
2101 }
2102
2103 #[test]
2104 fn test_info_string_executable_implicit_label() {
2105 let info = InfoString::parse("{r mylabel}");
2107 assert!(matches!(
2108 info.block_type,
2109 CodeBlockType::Executable { ref language } if language == "r"
2110 ));
2111 assert_eq!(info.attributes.len(), 1);
2112 assert_eq!(
2113 info.attributes[0],
2114 ("label".to_string(), Some("mylabel".to_string()))
2115 );
2116 }
2117
2118 #[test]
2119 fn test_info_string_executable_implicit_label_with_options() {
2120 let info = InfoString::parse("{r mylabel, echo=FALSE}");
2122 assert!(matches!(
2123 info.block_type,
2124 CodeBlockType::Executable { ref language } if language == "r"
2125 ));
2126 assert_eq!(info.attributes.len(), 2);
2127 assert_eq!(
2128 info.attributes[0],
2129 ("label".to_string(), Some("mylabel".to_string()))
2130 );
2131 assert_eq!(
2132 info.attributes[1],
2133 ("echo".to_string(), Some("FALSE".to_string()))
2134 );
2135 }
2136
2137 #[test]
2138 fn test_compute_hashpipe_preamble_line_count_for_block_scalar() {
2139 let content_lines = vec![
2140 "#| fig-cap: |\n",
2141 "#| A caption\n",
2142 "#| spanning lines\n",
2143 "a <- 1\n",
2144 ];
2145 let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2146 assert_eq!(count, 3);
2147 }
2148
2149 #[test]
2150 fn test_compute_hashpipe_preamble_line_count_stops_at_non_option() {
2151 let content_lines = vec!["#| label: fig-plot\n", "plot(1:10)\n", "#| echo: false\n"];
2152 let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2153 assert_eq!(count, 1);
2154 }
2155
2156 #[test]
2157 fn test_compute_hashpipe_preamble_line_count_stops_at_standalone_prefix() {
2158 let content_lines = vec!["#| label: fig-plot\n", "#|\n", "plot(1:10)\n"];
2159 let count = compute_hashpipe_preamble_line_count(&content_lines, "#|", 0, 0, false, 0);
2160 assert_eq!(count, 1);
2161 }
2162}