1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{
6 Container, ContainerStack, leading_indent, leading_indent_from,
7};
8use crate::parser::utils::helpers::{strip_newline, trim_end_newlines};
9use crate::parser::utils::list_item_buffer::ListItemBuffer;
10
11#[derive(Debug, Clone, PartialEq)]
12pub(crate) enum ListMarker {
13 Bullet(char),
14 Ordered(OrderedMarker),
15}
16
17#[derive(Debug, Clone, PartialEq)]
18pub(crate) enum OrderedMarker {
19 Decimal {
20 number: String,
21 style: ListDelimiter,
22 },
23 Hash,
24 LowerAlpha {
25 letter: char,
26 style: ListDelimiter,
27 },
28 UpperAlpha {
29 letter: char,
30 style: ListDelimiter,
31 },
32 LowerRoman {
33 numeral: String,
34 style: ListDelimiter,
35 },
36 UpperRoman {
37 numeral: String,
38 style: ListDelimiter,
39 },
40 Example {
41 label: Option<String>,
42 },
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub(crate) enum ListDelimiter {
47 Period,
48 RightParen,
49 Parens,
50}
51
52#[derive(Debug, Clone, PartialEq)]
53pub(crate) struct ListMarkerMatch {
54 pub(crate) marker: ListMarker,
55 pub(crate) marker_len: usize,
56 pub(crate) spaces_after_cols: usize,
57 pub(crate) spaces_after_bytes: usize,
58 pub(crate) virtual_marker_space: bool,
65}
66
67#[derive(Debug, Clone, Copy)]
68pub(in crate::parser) struct ListItemEmissionInput<'a> {
69 pub content: &'a str,
70 pub marker_len: usize,
71 pub spaces_after_cols: usize,
72 pub spaces_after_bytes: usize,
73 pub indent_cols: usize,
74 pub indent_bytes: usize,
75 pub virtual_marker_space: bool,
76}
77
78fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<usize> {
90 let bytes = text.as_bytes();
91 let mut count = 0usize;
93 while count < bytes.len() {
94 let b = bytes[count];
95 let valid = if uppercase {
96 matches!(b, b'I' | b'V' | b'X' | b'L' | b'C' | b'D' | b'M')
97 } else {
98 matches!(b, b'i' | b'v' | b'x' | b'l' | b'c' | b'd' | b'm')
99 };
100 if !valid {
101 break;
102 }
103 count += 1;
104 }
105
106 if count == 0 {
107 return None;
108 }
109
110 if count == 1 {
113 let upper = bytes[0] & !0x20;
114 if !matches!(upper, b'I' | b'V' | b'X') {
115 return None;
116 }
117 }
118
119 let mut run_byte = 0u8;
122 let mut run_len = 0usize;
123 for &b in &bytes[..count] {
124 let upper = b & !0x20;
125 if upper == run_byte {
126 run_len += 1;
127 } else {
128 run_byte = upper;
129 run_len = 1;
130 }
131 if (run_len > 3 && matches!(upper, b'I' | b'X' | b'C'))
132 || (run_len > 1 && matches!(upper, b'V' | b'L' | b'D'))
133 {
134 return None;
135 }
136 }
137
138 fn val(upper: u8) -> u32 {
141 match upper {
142 b'I' => 1,
143 b'V' => 5,
144 b'X' => 10,
145 b'L' => 50,
146 b'C' => 100,
147 b'D' => 500,
148 b'M' => 1000,
149 _ => 0,
150 }
151 }
152 for i in 0..count.saturating_sub(1) {
153 let curr = bytes[i] & !0x20;
154 let next = bytes[i + 1] & !0x20;
155 let cv = val(curr);
156 let nv = val(next);
157 if cv < nv {
158 match (curr, next) {
159 (b'I', b'V') | (b'I', b'X') => {}
160 (b'X', b'L') | (b'X', b'C') => {}
161 (b'C', b'D') | (b'C', b'M') => {}
162 _ => return None,
163 }
164 }
165 }
166 Some(count)
167}
168
169fn marker_spaces_after(after_marker: &str, marker_end_col: usize) -> (usize, usize, bool) {
184 let (effective_cols, n_bytes) = leading_indent_from(after_marker, marker_end_col);
185 let after_ws = &after_marker[n_bytes..];
186 let has_content = !trim_end_newlines(after_ws).is_empty();
187 if has_content && effective_cols >= 5 {
188 let bytes = match after_marker.as_bytes().first() {
189 Some(b' ') => 1,
190 Some(b'\t') => {
191 let span = 4 - (marker_end_col % 4);
192 if span == 1 { 1 } else { 0 }
193 }
194 _ => 0,
195 };
196 (1, bytes, bytes == 0)
197 } else {
198 (effective_cols, n_bytes, false)
199 }
200}
201
202pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
203 let line = trim_end_newlines(line);
206 let (_indent_cols, indent_bytes) = leading_indent(line);
207 let trimmed = &line[indent_bytes..];
208
209 if let Some(ch) = trimmed.chars().next()
211 && matches!(ch, '*' | '+' | '-')
212 {
213 let after_marker = &trimmed[1..];
214
215 let trimmed_after = after_marker.trim_start();
217 let is_task = trimmed_after.starts_with('[')
218 && trimmed_after.len() >= 3
219 && matches!(
220 trimmed_after.chars().nth(1),
221 Some(' ') | Some('x') | Some('X')
222 )
223 && trimmed_after.chars().nth(2) == Some(']');
224
225 if after_marker.starts_with(' ')
227 || after_marker.starts_with('\t')
228 || after_marker.is_empty()
229 || is_task
230 {
231 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
232 marker_spaces_after(after_marker, _indent_cols + 1);
233 return Some(ListMarkerMatch {
234 marker: ListMarker::Bullet(ch),
235 marker_len: 1,
236 spaces_after_cols,
237 spaces_after_bytes,
238 virtual_marker_space,
239 });
240 }
241 }
242
243 if config.extensions.fancy_lists
245 && let Some(after_marker) = trimmed.strip_prefix("#.")
246 && (after_marker.starts_with(' ')
247 || after_marker.starts_with('\t')
248 || after_marker.is_empty())
249 {
250 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
251 marker_spaces_after(after_marker, _indent_cols + 2);
252 return Some(ListMarkerMatch {
253 marker: ListMarker::Ordered(OrderedMarker::Hash),
254 marker_len: 2,
255 spaces_after_cols,
256 spaces_after_bytes,
257 virtual_marker_space,
258 });
259 }
260
261 if config.extensions.example_lists
263 && let Some(rest) = trimmed.strip_prefix("(@")
264 {
265 let label_end = rest
267 .chars()
268 .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
269 .count();
270
271 if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
273 let label = if label_end > 0 {
274 Some(rest[..label_end].to_string())
275 } else {
276 None
277 };
278
279 let after_marker = &rest[label_end + 1..];
280 if after_marker.starts_with(' ')
281 || after_marker.starts_with('\t')
282 || after_marker.is_empty()
283 {
284 let marker_len = 2 + label_end + 1; let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
286 marker_spaces_after(after_marker, _indent_cols + marker_len);
287 return Some(ListMarkerMatch {
288 marker: ListMarker::Ordered(OrderedMarker::Example { label }),
289 marker_len,
290 spaces_after_cols,
291 spaces_after_bytes,
292 virtual_marker_space,
293 });
294 }
295 }
296 }
297
298 if let Some(rest) = trimmed.strip_prefix('(') {
300 if config.extensions.fancy_lists {
301 let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
303 if digit_count > 0
304 && rest.len() > digit_count
305 && rest.chars().nth(digit_count) == Some(')')
306 {
307 let number = &rest[..digit_count];
308 let after_marker = &rest[digit_count + 1..];
309 if after_marker.starts_with(' ')
310 || after_marker.starts_with('\t')
311 || after_marker.is_empty()
312 {
313 let marker_len = 2 + digit_count;
314 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
315 marker_spaces_after(after_marker, _indent_cols + marker_len);
316 return Some(ListMarkerMatch {
317 marker: ListMarker::Ordered(OrderedMarker::Decimal {
318 number: number.to_string(),
319 style: ListDelimiter::Parens,
320 }),
321 marker_len,
322 spaces_after_cols,
323 spaces_after_bytes,
324 virtual_marker_space,
325 });
326 }
327 }
328 }
329
330 if config.extensions.fancy_lists {
332 if let Some(len) = try_parse_roman_numeral(rest, false)
336 && rest.len() > len
337 && rest.as_bytes()[len] == b')'
338 {
339 let after_marker = &rest[len + 1..];
340 if after_marker.starts_with(' ')
341 || after_marker.starts_with('\t')
342 || after_marker.is_empty()
343 {
344 let marker_len = len + 2;
345 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
346 marker_spaces_after(after_marker, _indent_cols + marker_len);
347 return Some(ListMarkerMatch {
348 marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
349 numeral: rest[..len].to_string(),
350 style: ListDelimiter::Parens,
351 }),
352 marker_len,
353 spaces_after_cols,
354 spaces_after_bytes,
355 virtual_marker_space,
356 });
357 }
358 }
359
360 if let Some(len) = try_parse_roman_numeral(rest, true)
362 && rest.len() > len
363 && rest.as_bytes()[len] == b')'
364 {
365 let after_marker = &rest[len + 1..];
366 if after_marker.starts_with(' ')
367 || after_marker.starts_with('\t')
368 || after_marker.is_empty()
369 {
370 let marker_len = len + 2;
371 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
372 marker_spaces_after(after_marker, _indent_cols + marker_len);
373 return Some(ListMarkerMatch {
374 marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
375 numeral: rest[..len].to_string(),
376 style: ListDelimiter::Parens,
377 }),
378 marker_len,
379 spaces_after_cols,
380 spaces_after_bytes,
381 virtual_marker_space,
382 });
383 }
384 }
385
386 if let Some(ch) = rest.chars().next()
388 && ch.is_ascii_lowercase()
389 && rest.len() > 1
390 && rest.chars().nth(1) == Some(')')
391 {
392 let after_marker = &rest[2..];
393 if after_marker.starts_with(' ')
394 || after_marker.starts_with('\t')
395 || after_marker.is_empty()
396 {
397 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
398 marker_spaces_after(after_marker, _indent_cols + 3);
399 return Some(ListMarkerMatch {
400 marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
401 letter: ch,
402 style: ListDelimiter::Parens,
403 }),
404 marker_len: 3,
405 spaces_after_cols,
406 spaces_after_bytes,
407 virtual_marker_space,
408 });
409 }
410 }
411
412 if let Some(ch) = rest.chars().next()
414 && ch.is_ascii_uppercase()
415 && rest.len() > 1
416 && rest.chars().nth(1) == Some(')')
417 {
418 let after_marker = &rest[2..];
419 if after_marker.starts_with(' ')
420 || after_marker.starts_with('\t')
421 || after_marker.is_empty()
422 {
423 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
424 marker_spaces_after(after_marker, _indent_cols + 3);
425 return Some(ListMarkerMatch {
426 marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
427 letter: ch,
428 style: ListDelimiter::Parens,
429 }),
430 marker_len: 3,
431 spaces_after_cols,
432 spaces_after_bytes,
433 virtual_marker_space,
434 });
435 }
436 }
437 }
438 }
439
440 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
442 if digit_count > 0 && trimmed.len() > digit_count {
443 if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
446 return None;
447 }
448
449 let number = &trimmed[..digit_count];
450 let delim = trimmed.chars().nth(digit_count);
451
452 let (style, marker_len) = match delim {
453 Some('.') => (ListDelimiter::Period, digit_count + 1),
454 Some(')') => (ListDelimiter::RightParen, digit_count + 1),
455 _ => return None,
456 };
457 if style == ListDelimiter::RightParen
460 && !config.extensions.fancy_lists
461 && config.dialect != crate::Dialect::CommonMark
462 {
463 return None;
464 }
465
466 let after_marker = &trimmed[marker_len..];
467 if after_marker.starts_with(' ')
468 || after_marker.starts_with('\t')
469 || after_marker.is_empty()
470 {
471 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
472 marker_spaces_after(after_marker, _indent_cols + marker_len);
473 return Some(ListMarkerMatch {
474 marker: ListMarker::Ordered(OrderedMarker::Decimal {
475 number: number.to_string(),
476 style,
477 }),
478 marker_len,
479 spaces_after_cols,
480 spaces_after_bytes,
481 virtual_marker_space,
482 });
483 }
484 }
485
486 if config.extensions.fancy_lists {
488 if let Some(len) = try_parse_roman_numeral(trimmed, false)
492 && trimmed.len() > len
493 && let delim = trimmed.as_bytes()[len]
494 && (delim == b'.' || delim == b')')
495 {
496 let style = if delim == b'.' {
497 ListDelimiter::Period
498 } else {
499 ListDelimiter::RightParen
500 };
501 let marker_len = len + 1;
502
503 let after_marker = &trimmed[marker_len..];
504 if after_marker.starts_with(' ')
505 || after_marker.starts_with('\t')
506 || after_marker.is_empty()
507 {
508 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
509 marker_spaces_after(after_marker, _indent_cols + marker_len);
510 return Some(ListMarkerMatch {
511 marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
512 numeral: trimmed[..len].to_string(),
513 style,
514 }),
515 marker_len,
516 spaces_after_cols,
517 spaces_after_bytes,
518 virtual_marker_space,
519 });
520 }
521 }
522
523 if let Some(len) = try_parse_roman_numeral(trimmed, true)
525 && trimmed.len() > len
526 && let delim = trimmed.as_bytes()[len]
527 && (delim == b'.' || delim == b')')
528 {
529 let style = if delim == b'.' {
530 ListDelimiter::Period
531 } else {
532 ListDelimiter::RightParen
533 };
534 let marker_len = len + 1;
535
536 let after_marker = &trimmed[marker_len..];
537 if after_marker.starts_with(' ')
538 || after_marker.starts_with('\t')
539 || after_marker.is_empty()
540 {
541 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
542 marker_spaces_after(after_marker, _indent_cols + marker_len);
543 return Some(ListMarkerMatch {
544 marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
545 numeral: trimmed[..len].to_string(),
546 style,
547 }),
548 marker_len,
549 spaces_after_cols,
550 spaces_after_bytes,
551 virtual_marker_space,
552 });
553 }
554 }
555
556 if let Some(ch) = trimmed.chars().next()
558 && ch.is_ascii_lowercase()
559 && trimmed.len() > 1
560 && let Some(delim) = trimmed.chars().nth(1)
561 && (delim == '.' || delim == ')')
562 {
563 let style = if delim == '.' {
564 ListDelimiter::Period
565 } else {
566 ListDelimiter::RightParen
567 };
568 let marker_len = 2;
569
570 let after_marker = &trimmed[marker_len..];
571 if after_marker.starts_with(' ')
572 || after_marker.starts_with('\t')
573 || after_marker.is_empty()
574 {
575 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
576 marker_spaces_after(after_marker, _indent_cols + marker_len);
577 return Some(ListMarkerMatch {
578 marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
579 marker_len,
580 spaces_after_cols,
581 spaces_after_bytes,
582 virtual_marker_space,
583 });
584 }
585 }
586
587 if let Some(ch) = trimmed.chars().next()
589 && ch.is_ascii_uppercase()
590 && trimmed.len() > 1
591 && let Some(delim) = trimmed.chars().nth(1)
592 && (delim == '.' || delim == ')')
593 {
594 let style = if delim == '.' {
595 ListDelimiter::Period
596 } else {
597 ListDelimiter::RightParen
598 };
599 let marker_len = 2;
600
601 let after_marker = &trimmed[marker_len..];
602 let min_spaces = if delim == '.' { 2 } else { 1 };
604 let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
605
606 if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
607 && effective_cols >= min_spaces
608 {
609 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
610 marker_spaces_after(after_marker, _indent_cols + marker_len);
611 return Some(ListMarkerMatch {
612 marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
613 marker_len,
614 spaces_after_cols,
615 spaces_after_bytes,
616 virtual_marker_space,
617 });
618 }
619 }
620 }
621
622 None
623}
624
625pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker, dialect: crate::Dialect) -> bool {
626 match (a, b) {
627 (ListMarker::Bullet(ca), ListMarker::Bullet(cb)) => match dialect {
633 crate::Dialect::CommonMark => ca == cb,
634 _ => true,
635 },
636 (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
637 true
638 }
639 (
640 ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
641 ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
642 ) => s1 == s2,
643 (
644 ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
645 ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
646 ) => s1 == s2,
647 (
648 ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
649 ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
650 ) => s1 == s2,
651 (
652 ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
653 ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
654 ) => s1 == s2,
655 (
656 ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
657 ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
658 ) => s1 == s2,
659 (
660 ListMarker::Ordered(OrderedMarker::Example { .. }),
661 ListMarker::Ordered(OrderedMarker::Example { .. }),
662 ) => true, _ => false,
664 }
665}
666
667pub(in crate::parser) fn emit_list_item(
671 builder: &mut GreenNodeBuilder<'static>,
672 item: &ListItemEmissionInput<'_>,
673) -> (usize, String) {
674 builder.start_node(SyntaxKind::LIST_ITEM.into());
675
676 if item.indent_bytes > 0 {
678 builder.token(
679 SyntaxKind::WHITESPACE.into(),
680 &item.content[..item.indent_bytes],
681 );
682 }
683
684 let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
685 builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
686
687 if item.spaces_after_bytes > 0 {
688 let space_start = item.indent_bytes + item.marker_len;
689 let space_end = space_start + item.spaces_after_bytes;
690 if space_end <= item.content.len() {
691 builder.token(
692 SyntaxKind::WHITESPACE.into(),
693 &item.content[space_start..space_end],
694 );
695 }
696 }
697
698 let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
699 let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
700
701 let text_to_buffer = if content_start < item.content.len() {
705 let rest = &item.content[content_start..];
706 if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
707 && rest
708 .as_bytes()
709 .get(3)
710 .is_some_and(|b| (*b as char).is_whitespace())
711 {
712 builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
713 rest[3..].to_string()
714 } else {
715 rest.to_string()
716 }
717 } else {
718 String::new()
719 };
720
721 (content_col, text_to_buffer)
722}
723
724#[cfg(test)]
725mod tests {
726 use super::*;
727 use crate::options::ParserOptions;
728
729 #[test]
730 fn detects_bullet_markers() {
731 let config = ParserOptions::default();
732 assert!(try_parse_list_marker("* item", &config).is_some());
733 assert!(try_parse_list_marker("*\titem", &config).is_some());
734 }
735
736 #[test]
737 fn detects_fancy_alpha_markers() {
738 let mut config = ParserOptions::default();
739 config.extensions.fancy_lists = true;
740
741 assert!(
743 try_parse_list_marker("a. item", &config).is_some(),
744 "a. should parse"
745 );
746 assert!(
747 try_parse_list_marker("b. item", &config).is_some(),
748 "b. should parse"
749 );
750 assert!(
751 try_parse_list_marker("c. item", &config).is_some(),
752 "c. should parse"
753 );
754
755 assert!(
757 try_parse_list_marker("a) item", &config).is_some(),
758 "a) should parse"
759 );
760 assert!(
761 try_parse_list_marker("b) item", &config).is_some(),
762 "b) should parse"
763 );
764 }
765}
766
767#[test]
768fn markers_match_fancy_lists() {
769 use ListDelimiter::*;
770 use ListMarker::*;
771 use OrderedMarker::*;
772
773 let a_period = Ordered(LowerAlpha {
775 letter: 'a',
776 style: Period,
777 });
778 let b_period = Ordered(LowerAlpha {
779 letter: 'b',
780 style: Period,
781 });
782 assert!(
783 markers_match(&a_period, &b_period, crate::Dialect::Pandoc),
784 "a. and b. should match"
785 );
786
787 let i_period = Ordered(LowerRoman {
788 numeral: "i".to_string(),
789 style: Period,
790 });
791 let ii_period = Ordered(LowerRoman {
792 numeral: "ii".to_string(),
793 style: Period,
794 });
795 assert!(
796 markers_match(&i_period, &ii_period, crate::Dialect::Pandoc),
797 "i. and ii. should match"
798 );
799
800 let a_paren = Ordered(LowerAlpha {
802 letter: 'a',
803 style: RightParen,
804 });
805 assert!(
806 !markers_match(&a_period, &a_paren, crate::Dialect::Pandoc),
807 "a. and a) should not match"
808 );
809}
810
811#[test]
812fn markers_match_bullet_dialect_split() {
813 use ListMarker::*;
814 assert!(markers_match(
816 &Bullet('-'),
817 &Bullet('+'),
818 crate::Dialect::Pandoc
819 ));
820 assert!(markers_match(
822 &Bullet('-'),
823 &Bullet('-'),
824 crate::Dialect::CommonMark
825 ));
826 assert!(!markers_match(
827 &Bullet('-'),
828 &Bullet('+'),
829 crate::Dialect::CommonMark
830 ));
831 assert!(!markers_match(
832 &Bullet('*'),
833 &Bullet('-'),
834 crate::Dialect::CommonMark
835 ));
836}
837
838#[test]
839fn detects_complex_roman_numerals() {
840 let mut config = ParserOptions::default();
841 config.extensions.fancy_lists = true;
842
843 assert!(
845 try_parse_list_marker("iv. item", &config).is_some(),
846 "iv. should parse"
847 );
848 assert!(
849 try_parse_list_marker("v. item", &config).is_some(),
850 "v. should parse"
851 );
852 assert!(
853 try_parse_list_marker("vi. item", &config).is_some(),
854 "vi. should parse"
855 );
856 assert!(
857 try_parse_list_marker("vii. item", &config).is_some(),
858 "vii. should parse"
859 );
860 assert!(
861 try_parse_list_marker("viii. item", &config).is_some(),
862 "viii. should parse"
863 );
864 assert!(
865 try_parse_list_marker("ix. item", &config).is_some(),
866 "ix. should parse"
867 );
868 assert!(
869 try_parse_list_marker("x. item", &config).is_some(),
870 "x. should parse"
871 );
872}
873
874#[test]
875fn detects_example_list_markers() {
876 let mut config = ParserOptions::default();
877 config.extensions.example_lists = true;
878
879 assert!(
881 try_parse_list_marker("(@) item", &config).is_some(),
882 "(@) should parse"
883 );
884
885 assert!(
887 try_parse_list_marker("(@foo) item", &config).is_some(),
888 "(@foo) should parse"
889 );
890 assert!(
891 try_parse_list_marker("(@my_label) item", &config).is_some(),
892 "(@my_label) should parse"
893 );
894 assert!(
895 try_parse_list_marker("(@test-123) item", &config).is_some(),
896 "(@test-123) should parse"
897 );
898
899 let disabled_config = ParserOptions {
901 extensions: crate::options::Extensions {
902 example_lists: false,
903 ..Default::default()
904 },
905 ..Default::default()
906 };
907 assert!(
908 try_parse_list_marker("(@) item", &disabled_config).is_none(),
909 "(@) should not parse when extension disabled"
910 );
911}
912
913#[test]
914fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
915 use crate::parser::utils::container_stack::{Container, ContainerStack};
916
917 let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
918 numeral: "ii".to_string(),
919 style: ListDelimiter::Period,
920 });
921
922 let mut containers = ContainerStack::new();
923 containers.push(Container::List {
924 marker: marker.clone(),
925 base_indent_cols: 8,
926 has_blank_between_items: false,
927 });
928 containers.push(Container::ListItem {
929 content_col: 11,
930 buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
931 marker_only: false,
932 virtual_marker_space: false,
933 });
934 containers.push(Container::List {
935 marker,
936 base_indent_cols: 6,
937 has_blank_between_items: false,
938 });
939
940 assert_eq!(
943 find_matching_list_level(
944 &containers,
945 &ListMarker::Ordered(OrderedMarker::LowerRoman {
946 numeral: "iii".to_string(),
947 style: ListDelimiter::Period,
948 }),
949 7,
950 crate::Dialect::Pandoc,
951 ),
952 Some(0)
953 );
954}
955
956#[test]
957fn deep_ordered_matches_exact_indent_when_available() {
958 use crate::parser::utils::container_stack::{Container, ContainerStack};
959
960 let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
961 numeral: "ii".to_string(),
962 style: ListDelimiter::Period,
963 });
964
965 let mut containers = ContainerStack::new();
966 containers.push(Container::List {
967 marker: marker.clone(),
968 base_indent_cols: 8,
969 has_blank_between_items: false,
970 });
971 containers.push(Container::List {
972 marker,
973 base_indent_cols: 6,
974 has_blank_between_items: false,
975 });
976
977 assert_eq!(
978 find_matching_list_level(
979 &containers,
980 &ListMarker::Ordered(OrderedMarker::LowerRoman {
981 numeral: "iii".to_string(),
982 style: ListDelimiter::Period,
983 }),
984 6,
985 crate::Dialect::Pandoc,
986 ),
987 Some(1)
988 );
989}
990
991#[test]
992fn parses_nested_bullet_list_from_single_marker() {
993 use crate::parse;
994 use crate::syntax::SyntaxKind;
995
996 let config = ParserOptions::default();
997
998 for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
1000 let tree = parse(input, Some(config.clone()));
1001
1002 assert_eq!(
1004 tree.kind(),
1005 SyntaxKind::DOCUMENT,
1006 "{desc}: root should be DOCUMENT"
1007 );
1008
1009 let outer_list = tree
1011 .children()
1012 .find(|n| n.kind() == SyntaxKind::LIST)
1013 .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
1014
1015 let outer_item = outer_list
1017 .children()
1018 .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1019 .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
1020
1021 let nested_list = outer_item
1023 .children()
1024 .find(|n| n.kind() == SyntaxKind::LIST)
1025 .unwrap_or_else(|| {
1026 panic!(
1027 "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
1028 outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
1029 )
1030 });
1031
1032 let nested_item = nested_list
1034 .children()
1035 .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1036 .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
1037
1038 let has_plain = nested_item
1040 .children()
1041 .any(|n| n.kind() == SyntaxKind::PLAIN);
1042 assert!(
1043 !has_plain,
1044 "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
1045 );
1046 }
1047}
1048
1049pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
1053 containers
1054 .stack
1055 .iter()
1056 .any(|c| matches!(c, Container::List { .. }))
1057}
1058
1059pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
1061 let mut seen_blockquote = false;
1062 for c in &containers.stack {
1063 if matches!(c, Container::BlockQuote { .. }) {
1064 seen_blockquote = true;
1065 }
1066 if seen_blockquote && matches!(c, Container::List { .. }) {
1067 return true;
1068 }
1069 }
1070 false
1071}
1072
1073pub(in crate::parser) fn find_matching_list_level(
1075 containers: &ContainerStack,
1076 marker: &ListMarker,
1077 indent_cols: usize,
1078 dialect: crate::Dialect,
1079) -> Option<usize> {
1080 let mut best_match: Option<(usize, usize, bool)> = None; let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
1085 let mut best_above_match: Option<(usize, usize)> = None; for (i, c) in containers.stack.iter().enumerate().rev() {
1088 if let Container::List {
1089 marker: list_marker,
1090 base_indent_cols,
1091 ..
1092 } = c
1093 && markers_match(marker, list_marker, dialect)
1094 {
1095 let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
1096 match (marker, list_marker) {
1101 (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1102 indent_cols.abs_diff(*base_indent_cols) <= 3
1103 }
1104 _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
1105 }
1106 } else if indent_cols >= 4 || *base_indent_cols >= 4 {
1107 match (marker, list_marker) {
1112 (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1113 indent_cols.abs_diff(*base_indent_cols) <= 3
1114 }
1115 _ => false,
1116 }
1117 } else {
1118 indent_cols.abs_diff(*base_indent_cols) <= 3
1121 };
1122
1123 if matches {
1124 let distance = indent_cols.abs_diff(*base_indent_cols);
1125 let base_leq_indent = *base_indent_cols <= indent_cols;
1126
1127 if is_deep_ordered
1131 && matches!(
1132 (marker, list_marker),
1133 (ListMarker::Ordered(_), ListMarker::Ordered(_))
1134 )
1135 && *base_indent_cols >= indent_cols
1136 {
1137 let delta = *base_indent_cols - indent_cols;
1138 if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1139 best_above_match = Some((i, delta));
1140 }
1141 }
1142
1143 if let Some((_, best_dist, best_base_leq)) = best_match {
1144 if distance < best_dist
1145 || (distance == best_dist && base_leq_indent && !best_base_leq)
1146 {
1147 best_match = Some((i, distance, base_leq_indent));
1148 }
1149 } else {
1150 best_match = Some((i, distance, base_leq_indent));
1151 }
1152
1153 if distance == 0 {
1155 return Some(i);
1156 }
1157 }
1158 }
1159 }
1160
1161 if let Some((index, _)) = best_above_match {
1162 return Some(index);
1163 }
1164
1165 best_match.map(|(i, _, _)| i)
1166}
1167
1168pub(in crate::parser) fn start_nested_list(
1170 containers: &mut ContainerStack,
1171 builder: &mut GreenNodeBuilder<'static>,
1172 marker: &ListMarker,
1173 item: &ListItemEmissionInput<'_>,
1174 indent_to_emit: Option<&str>,
1175 config: &ParserOptions,
1176) {
1177 if let Some(indent_str) = indent_to_emit {
1179 builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1180 }
1181
1182 builder.start_node(SyntaxKind::LIST.into());
1184 containers.push(Container::List {
1185 marker: marker.clone(),
1186 base_indent_cols: item.indent_cols,
1187 has_blank_between_items: false,
1188 });
1189
1190 let (content_col, text_to_buffer) = emit_list_item(builder, item);
1192 finish_list_item_with_optional_nested(
1193 containers,
1194 builder,
1195 content_col,
1196 text_to_buffer,
1197 item.virtual_marker_space,
1198 config,
1199 );
1200}
1201
1202pub(in crate::parser) fn is_content_nested_bullet_marker(
1205 content: &str,
1206 marker_len: usize,
1207 spaces_after_bytes: usize,
1208) -> Option<char> {
1209 let (_, indent_bytes) = leading_indent(content);
1210 let content_start = indent_bytes + marker_len + spaces_after_bytes;
1211
1212 if content_start >= content.len() {
1213 return None;
1214 }
1215
1216 let remaining = &content[content_start..];
1217 let (text_part, _) = strip_newline(remaining);
1218 let trimmed = text_part.trim();
1219
1220 if trimmed.len() == 1 {
1222 let ch = trimmed.chars().next().unwrap();
1223 if matches!(ch, '*' | '+' | '-') {
1224 return Some(ch);
1225 }
1226 }
1227
1228 None
1229}
1230
1231pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1234 containers: &mut ContainerStack,
1235 builder: &mut GreenNodeBuilder<'static>,
1236 item: &ListItemEmissionInput<'_>,
1237 nested_marker: char,
1238) {
1239 builder.start_node(SyntaxKind::LIST_ITEM.into());
1241
1242 if item.indent_bytes > 0 {
1244 builder.token(
1245 SyntaxKind::WHITESPACE.into(),
1246 &item.content[..item.indent_bytes],
1247 );
1248 }
1249
1250 let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1251 builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1252
1253 if item.spaces_after_bytes > 0 {
1254 let space_start = item.indent_bytes + item.marker_len;
1255 let space_end = space_start + item.spaces_after_bytes;
1256 if space_end <= item.content.len() {
1257 builder.token(
1258 SyntaxKind::WHITESPACE.into(),
1259 &item.content[space_start..space_end],
1260 );
1261 }
1262 }
1263
1264 builder.start_node(SyntaxKind::LIST.into());
1266
1267 builder.start_node(SyntaxKind::LIST_ITEM.into());
1269 builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1270
1271 let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1273 if content_start < item.content.len() {
1274 let remaining = &item.content[content_start..];
1275 if remaining.len() > 1 {
1277 let (_, newline_str) = strip_newline(&remaining[1..]);
1278 if !newline_str.is_empty() {
1279 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1280 }
1281 }
1282 }
1283
1284 builder.finish_node(); builder.finish_node(); let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1289 containers.push(Container::ListItem {
1290 content_col,
1291 buffer: ListItemBuffer::new(),
1292 marker_only: false, virtual_marker_space: item.virtual_marker_space,
1294 });
1295}
1296
1297pub(in crate::parser) fn add_list_item(
1299 containers: &mut ContainerStack,
1300 builder: &mut GreenNodeBuilder<'static>,
1301 item: &ListItemEmissionInput<'_>,
1302 config: &ParserOptions,
1303) {
1304 let (content_col, text_to_buffer) = emit_list_item(builder, item);
1305
1306 log::trace!(
1307 "add_list_item: content={:?}, text_to_buffer={:?}",
1308 item.content,
1309 text_to_buffer
1310 );
1311
1312 finish_list_item_with_optional_nested(
1313 containers,
1314 builder,
1315 content_col,
1316 text_to_buffer,
1317 item.virtual_marker_space,
1318 config,
1319 );
1320}
1321
1322fn finish_list_item_with_optional_nested(
1327 containers: &mut ContainerStack,
1328 builder: &mut GreenNodeBuilder<'static>,
1329 content_col: usize,
1330 text_to_buffer: String,
1331 virtual_marker_space: bool,
1332 config: &ParserOptions,
1333) {
1334 let buffered_is_thematic_break =
1339 super::horizontal_rules::try_parse_horizontal_rule(trim_end_newlines(&text_to_buffer))
1340 .is_some();
1341
1342 let dialect_allows_nested = config.dialect == crate::Dialect::CommonMark;
1349
1350 if dialect_allows_nested
1351 && !buffered_is_thematic_break
1352 && let Some(inner_match) = try_parse_list_marker(&text_to_buffer, config)
1353 {
1354 let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1355 let after_inner =
1356 trim_end_newlines(text_to_buffer.get(inner_content_start..).unwrap_or(""));
1357 if !after_inner.is_empty() {
1361 containers.push(Container::ListItem {
1363 content_col,
1364 buffer: ListItemBuffer::new(),
1365 marker_only: false, virtual_marker_space,
1367 });
1368 builder.start_node(SyntaxKind::LIST.into());
1370 containers.push(Container::List {
1371 marker: inner_match.marker.clone(),
1372 base_indent_cols: content_col,
1373 has_blank_between_items: false,
1374 });
1375 let inner_item = ListItemEmissionInput {
1378 content: text_to_buffer.as_str(),
1379 marker_len: inner_match.marker_len,
1380 spaces_after_cols: inner_match.spaces_after_cols,
1381 spaces_after_bytes: inner_match.spaces_after_bytes,
1382 indent_cols: content_col,
1383 indent_bytes: 0,
1384 virtual_marker_space: inner_match.virtual_marker_space,
1385 };
1386 let (inner_content_col, inner_text_to_buffer) = emit_list_item(builder, &inner_item);
1387 finish_list_item_with_optional_nested(
1388 containers,
1389 builder,
1390 inner_content_col,
1391 inner_text_to_buffer,
1392 inner_match.virtual_marker_space,
1393 config,
1394 );
1395 return;
1396 }
1397 }
1398
1399 if dialect_allows_nested
1410 && !buffered_is_thematic_break
1411 && text_to_buffer.starts_with('>')
1412 && !text_to_buffer.starts_with(">>")
1413 {
1414 let bytes = text_to_buffer.as_bytes();
1415 let has_trailing_space = bytes.get(1).copied() == Some(b' ');
1416 let content_offset = if has_trailing_space { 2 } else { 1 };
1417 let remaining = &text_to_buffer[content_offset..];
1418
1419 containers.push(Container::ListItem {
1422 content_col,
1423 buffer: ListItemBuffer::new(),
1424 marker_only: false,
1425 virtual_marker_space,
1426 });
1427
1428 builder.start_node(SyntaxKind::BLOCK_QUOTE.into());
1430 builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
1431 if has_trailing_space {
1432 builder.token(SyntaxKind::WHITESPACE.into(), " ");
1433 }
1434 containers.push(Container::BlockQuote {});
1435
1436 let trimmed = trim_end_newlines(remaining);
1441 if !trimmed.is_empty() {
1442 crate::parser::blocks::paragraphs::start_paragraph_if_needed(containers, builder);
1443 crate::parser::blocks::paragraphs::append_paragraph_line(
1444 containers, builder, remaining, config,
1445 );
1446 }
1447 return;
1448 }
1449
1450 let marker_only = text_to_buffer.trim().is_empty();
1451 let mut buffer = ListItemBuffer::new();
1452 if !text_to_buffer.is_empty() {
1453 buffer.push_text(text_to_buffer);
1454 }
1455 containers.push(Container::ListItem {
1456 content_col,
1457 buffer,
1458 marker_only,
1459 virtual_marker_space,
1460 });
1461}