1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{
6 Container, ContainerStack, leading_indent, leading_indent_from,
7};
8use crate::parser::utils::helpers::{strip_newline, trim_end_newlines};
9use crate::parser::utils::list_item_buffer::ListItemBuffer;
10
11#[derive(Debug, Clone, PartialEq)]
12pub(crate) enum ListMarker {
13 Bullet(char),
14 Ordered(OrderedMarker),
15}
16
17#[derive(Debug, Clone, PartialEq)]
18pub(crate) enum OrderedMarker {
19 Decimal {
20 number: String,
21 style: ListDelimiter,
22 },
23 Hash,
24 LowerAlpha {
25 letter: char,
26 style: ListDelimiter,
27 },
28 UpperAlpha {
29 letter: char,
30 style: ListDelimiter,
31 },
32 LowerRoman {
33 numeral: String,
34 style: ListDelimiter,
35 },
36 UpperRoman {
37 numeral: String,
38 style: ListDelimiter,
39 },
40 Example {
41 label: Option<String>,
42 },
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub(crate) enum ListDelimiter {
47 Period,
48 RightParen,
49 Parens,
50}
51
52#[derive(Debug, Clone, PartialEq)]
53pub(crate) struct ListMarkerMatch {
54 pub(crate) marker: ListMarker,
55 pub(crate) marker_len: usize,
56 pub(crate) spaces_after_cols: usize,
57 pub(crate) spaces_after_bytes: usize,
58 pub(crate) virtual_marker_space: bool,
65}
66
67#[derive(Debug, Clone, Copy)]
68pub(in crate::parser) struct ListItemEmissionInput<'a> {
69 pub content: &'a str,
70 pub marker_len: usize,
71 pub spaces_after_cols: usize,
72 pub spaces_after_bytes: usize,
73 pub indent_cols: usize,
74 pub indent_bytes: usize,
75 pub virtual_marker_space: bool,
76}
77
78fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<usize> {
90 let bytes = text.as_bytes();
91 let mut count = 0usize;
93 while count < bytes.len() {
94 let b = bytes[count];
95 let valid = if uppercase {
96 matches!(b, b'I' | b'V' | b'X' | b'L' | b'C' | b'D' | b'M')
97 } else {
98 matches!(b, b'i' | b'v' | b'x' | b'l' | b'c' | b'd' | b'm')
99 };
100 if !valid {
101 break;
102 }
103 count += 1;
104 }
105
106 if count == 0 {
107 return None;
108 }
109
110 if count == 1 {
113 let upper = bytes[0] & !0x20;
114 if !matches!(upper, b'I' | b'V' | b'X') {
115 return None;
116 }
117 }
118
119 let mut run_byte = 0u8;
122 let mut run_len = 0usize;
123 for &b in &bytes[..count] {
124 let upper = b & !0x20;
125 if upper == run_byte {
126 run_len += 1;
127 } else {
128 run_byte = upper;
129 run_len = 1;
130 }
131 if (run_len > 3 && matches!(upper, b'I' | b'X' | b'C'))
132 || (run_len > 1 && matches!(upper, b'V' | b'L' | b'D'))
133 {
134 return None;
135 }
136 }
137
138 fn val(upper: u8) -> u32 {
141 match upper {
142 b'I' => 1,
143 b'V' => 5,
144 b'X' => 10,
145 b'L' => 50,
146 b'C' => 100,
147 b'D' => 500,
148 b'M' => 1000,
149 _ => 0,
150 }
151 }
152 for i in 0..count.saturating_sub(1) {
153 let curr = bytes[i] & !0x20;
154 let next = bytes[i + 1] & !0x20;
155 let cv = val(curr);
156 let nv = val(next);
157 if cv < nv {
158 match (curr, next) {
159 (b'I', b'V') | (b'I', b'X') => {}
160 (b'X', b'L') | (b'X', b'C') => {}
161 (b'C', b'D') | (b'C', b'M') => {}
162 _ => return None,
163 }
164 }
165 }
166 Some(count)
167}
168
169fn marker_spaces_after(after_marker: &str, marker_end_col: usize) -> (usize, usize, bool) {
184 let (effective_cols, n_bytes) = leading_indent_from(after_marker, marker_end_col);
185 let after_ws = &after_marker[n_bytes..];
186 let has_content = !trim_end_newlines(after_ws).is_empty();
187 if has_content && effective_cols >= 5 {
188 let bytes = match after_marker.as_bytes().first() {
189 Some(b' ') => 1,
190 Some(b'\t') => {
191 let span = 4 - (marker_end_col % 4);
192 if span == 1 { 1 } else { 0 }
193 }
194 _ => 0,
195 };
196 (1, bytes, bytes == 0)
197 } else {
198 (effective_cols, n_bytes, false)
199 }
200}
201
202pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
203 let line = trim_end_newlines(line);
206 let (_indent_cols, indent_bytes) = leading_indent(line);
207 let trimmed = &line[indent_bytes..];
208
209 if let Some(ch) = trimmed.chars().next()
211 && matches!(ch, '*' | '+' | '-')
212 {
213 let after_marker = &trimmed[1..];
214
215 let trimmed_after = after_marker.trim_start();
217 let is_task = trimmed_after.starts_with('[')
218 && trimmed_after.len() >= 3
219 && matches!(
220 trimmed_after.chars().nth(1),
221 Some(' ') | Some('x') | Some('X')
222 )
223 && trimmed_after.chars().nth(2) == Some(']');
224
225 if after_marker.starts_with(' ')
227 || after_marker.starts_with('\t')
228 || after_marker.is_empty()
229 || is_task
230 {
231 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
232 marker_spaces_after(after_marker, _indent_cols + 1);
233 return Some(ListMarkerMatch {
234 marker: ListMarker::Bullet(ch),
235 marker_len: 1,
236 spaces_after_cols,
237 spaces_after_bytes,
238 virtual_marker_space,
239 });
240 }
241 }
242
243 if config.extensions.fancy_lists
245 && let Some(after_marker) = trimmed.strip_prefix("#.")
246 && (after_marker.starts_with(' ')
247 || after_marker.starts_with('\t')
248 || after_marker.is_empty())
249 {
250 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
251 marker_spaces_after(after_marker, _indent_cols + 2);
252 return Some(ListMarkerMatch {
253 marker: ListMarker::Ordered(OrderedMarker::Hash),
254 marker_len: 2,
255 spaces_after_cols,
256 spaces_after_bytes,
257 virtual_marker_space,
258 });
259 }
260
261 if config.extensions.example_lists
263 && let Some(rest) = trimmed.strip_prefix("(@")
264 {
265 let label_end = rest
267 .chars()
268 .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
269 .count();
270
271 if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
273 let label = if label_end > 0 {
274 Some(rest[..label_end].to_string())
275 } else {
276 None
277 };
278
279 let after_marker = &rest[label_end + 1..];
280 if after_marker.starts_with(' ')
281 || after_marker.starts_with('\t')
282 || after_marker.is_empty()
283 {
284 let marker_len = 2 + label_end + 1; let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
286 marker_spaces_after(after_marker, _indent_cols + marker_len);
287 return Some(ListMarkerMatch {
288 marker: ListMarker::Ordered(OrderedMarker::Example { label }),
289 marker_len,
290 spaces_after_cols,
291 spaces_after_bytes,
292 virtual_marker_space,
293 });
294 }
295 }
296 }
297
298 if let Some(rest) = trimmed.strip_prefix('(') {
300 if config.extensions.fancy_lists {
301 let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
303 if digit_count > 0
304 && rest.len() > digit_count
305 && rest.chars().nth(digit_count) == Some(')')
306 {
307 let number = &rest[..digit_count];
308 let after_marker = &rest[digit_count + 1..];
309 if after_marker.starts_with(' ')
310 || after_marker.starts_with('\t')
311 || after_marker.is_empty()
312 {
313 let marker_len = 2 + digit_count;
314 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
315 marker_spaces_after(after_marker, _indent_cols + marker_len);
316 return Some(ListMarkerMatch {
317 marker: ListMarker::Ordered(OrderedMarker::Decimal {
318 number: number.to_string(),
319 style: ListDelimiter::Parens,
320 }),
321 marker_len,
322 spaces_after_cols,
323 spaces_after_bytes,
324 virtual_marker_space,
325 });
326 }
327 }
328 }
329
330 if config.extensions.fancy_lists {
332 if let Some(len) = try_parse_roman_numeral(rest, false)
336 && rest.len() > len
337 && rest.as_bytes()[len] == b')'
338 {
339 let after_marker = &rest[len + 1..];
340 if after_marker.starts_with(' ')
341 || after_marker.starts_with('\t')
342 || after_marker.is_empty()
343 {
344 let marker_len = len + 2;
345 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
346 marker_spaces_after(after_marker, _indent_cols + marker_len);
347 return Some(ListMarkerMatch {
348 marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
349 numeral: rest[..len].to_string(),
350 style: ListDelimiter::Parens,
351 }),
352 marker_len,
353 spaces_after_cols,
354 spaces_after_bytes,
355 virtual_marker_space,
356 });
357 }
358 }
359
360 if let Some(len) = try_parse_roman_numeral(rest, true)
362 && rest.len() > len
363 && rest.as_bytes()[len] == b')'
364 {
365 let after_marker = &rest[len + 1..];
366 if after_marker.starts_with(' ')
367 || after_marker.starts_with('\t')
368 || after_marker.is_empty()
369 {
370 let marker_len = len + 2;
371 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
372 marker_spaces_after(after_marker, _indent_cols + marker_len);
373 return Some(ListMarkerMatch {
374 marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
375 numeral: rest[..len].to_string(),
376 style: ListDelimiter::Parens,
377 }),
378 marker_len,
379 spaces_after_cols,
380 spaces_after_bytes,
381 virtual_marker_space,
382 });
383 }
384 }
385
386 if let Some(ch) = rest.chars().next()
388 && ch.is_ascii_lowercase()
389 && rest.len() > 1
390 && rest.chars().nth(1) == Some(')')
391 {
392 let after_marker = &rest[2..];
393 if after_marker.starts_with(' ')
394 || after_marker.starts_with('\t')
395 || after_marker.is_empty()
396 {
397 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
398 marker_spaces_after(after_marker, _indent_cols + 3);
399 return Some(ListMarkerMatch {
400 marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
401 letter: ch,
402 style: ListDelimiter::Parens,
403 }),
404 marker_len: 3,
405 spaces_after_cols,
406 spaces_after_bytes,
407 virtual_marker_space,
408 });
409 }
410 }
411
412 if let Some(ch) = rest.chars().next()
414 && ch.is_ascii_uppercase()
415 && rest.len() > 1
416 && rest.chars().nth(1) == Some(')')
417 {
418 let after_marker = &rest[2..];
419 if after_marker.starts_with(' ')
420 || after_marker.starts_with('\t')
421 || after_marker.is_empty()
422 {
423 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
424 marker_spaces_after(after_marker, _indent_cols + 3);
425 return Some(ListMarkerMatch {
426 marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
427 letter: ch,
428 style: ListDelimiter::Parens,
429 }),
430 marker_len: 3,
431 spaces_after_cols,
432 spaces_after_bytes,
433 virtual_marker_space,
434 });
435 }
436 }
437 }
438 }
439
440 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
442 if digit_count > 0 && trimmed.len() > digit_count {
443 if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
446 return None;
447 }
448
449 let number = &trimmed[..digit_count];
450 let delim = trimmed.chars().nth(digit_count);
451
452 let (style, marker_len) = match delim {
453 Some('.') => (ListDelimiter::Period, digit_count + 1),
454 Some(')') => (ListDelimiter::RightParen, digit_count + 1),
455 _ => return None,
456 };
457 if style == ListDelimiter::RightParen
460 && !config.extensions.fancy_lists
461 && config.dialect != crate::Dialect::CommonMark
462 {
463 return None;
464 }
465
466 let after_marker = &trimmed[marker_len..];
467 if after_marker.starts_with(' ')
468 || after_marker.starts_with('\t')
469 || after_marker.is_empty()
470 {
471 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
472 marker_spaces_after(after_marker, _indent_cols + marker_len);
473 return Some(ListMarkerMatch {
474 marker: ListMarker::Ordered(OrderedMarker::Decimal {
475 number: number.to_string(),
476 style,
477 }),
478 marker_len,
479 spaces_after_cols,
480 spaces_after_bytes,
481 virtual_marker_space,
482 });
483 }
484 }
485
486 if config.extensions.fancy_lists {
488 if let Some(len) = try_parse_roman_numeral(trimmed, false)
492 && trimmed.len() > len
493 && let delim = trimmed.as_bytes()[len]
494 && (delim == b'.' || delim == b')')
495 {
496 let style = if delim == b'.' {
497 ListDelimiter::Period
498 } else {
499 ListDelimiter::RightParen
500 };
501 let marker_len = len + 1;
502
503 let after_marker = &trimmed[marker_len..];
504 if after_marker.starts_with(' ')
505 || after_marker.starts_with('\t')
506 || after_marker.is_empty()
507 {
508 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
509 marker_spaces_after(after_marker, _indent_cols + marker_len);
510 return Some(ListMarkerMatch {
511 marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
512 numeral: trimmed[..len].to_string(),
513 style,
514 }),
515 marker_len,
516 spaces_after_cols,
517 spaces_after_bytes,
518 virtual_marker_space,
519 });
520 }
521 }
522
523 if let Some(len) = try_parse_roman_numeral(trimmed, true)
525 && trimmed.len() > len
526 && let delim = trimmed.as_bytes()[len]
527 && (delim == b'.' || delim == b')')
528 {
529 let style = if delim == b'.' {
530 ListDelimiter::Period
531 } else {
532 ListDelimiter::RightParen
533 };
534 let marker_len = len + 1;
535
536 let after_marker = &trimmed[marker_len..];
537 let min_spaces = if delim == b'.' && len == 1 { 2 } else { 1 };
543 let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
544
545 if (after_marker.starts_with(' ')
546 || after_marker.starts_with('\t')
547 || after_marker.is_empty())
548 && (after_marker.is_empty() || effective_cols >= min_spaces)
549 {
550 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
551 marker_spaces_after(after_marker, _indent_cols + marker_len);
552 return Some(ListMarkerMatch {
553 marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
554 numeral: trimmed[..len].to_string(),
555 style,
556 }),
557 marker_len,
558 spaces_after_cols,
559 spaces_after_bytes,
560 virtual_marker_space,
561 });
562 }
563 }
564
565 if let Some(ch) = trimmed.chars().next()
567 && ch.is_ascii_lowercase()
568 && trimmed.len() > 1
569 && let Some(delim) = trimmed.chars().nth(1)
570 && (delim == '.' || delim == ')')
571 {
572 let style = if delim == '.' {
573 ListDelimiter::Period
574 } else {
575 ListDelimiter::RightParen
576 };
577 let marker_len = 2;
578
579 let after_marker = &trimmed[marker_len..];
580 if after_marker.starts_with(' ')
581 || after_marker.starts_with('\t')
582 || after_marker.is_empty()
583 {
584 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
585 marker_spaces_after(after_marker, _indent_cols + marker_len);
586 return Some(ListMarkerMatch {
587 marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
588 marker_len,
589 spaces_after_cols,
590 spaces_after_bytes,
591 virtual_marker_space,
592 });
593 }
594 }
595
596 if let Some(ch) = trimmed.chars().next()
598 && ch.is_ascii_uppercase()
599 && trimmed.len() > 1
600 && let Some(delim) = trimmed.chars().nth(1)
601 && (delim == '.' || delim == ')')
602 {
603 let style = if delim == '.' {
604 ListDelimiter::Period
605 } else {
606 ListDelimiter::RightParen
607 };
608 let marker_len = 2;
609
610 let after_marker = &trimmed[marker_len..];
611 let min_spaces = if delim == '.' { 2 } else { 1 };
613 let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
614
615 if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
616 && effective_cols >= min_spaces
617 {
618 let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
619 marker_spaces_after(after_marker, _indent_cols + marker_len);
620 return Some(ListMarkerMatch {
621 marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
622 marker_len,
623 spaces_after_cols,
624 spaces_after_bytes,
625 virtual_marker_space,
626 });
627 }
628 }
629 }
630
631 None
632}
633
634pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker, dialect: crate::Dialect) -> bool {
635 match (a, b) {
636 (ListMarker::Bullet(ca), ListMarker::Bullet(cb)) => match dialect {
642 crate::Dialect::CommonMark => ca == cb,
643 _ => true,
644 },
645 (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
646 true
647 }
648 (
649 ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
650 ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
651 ) => s1 == s2,
652 (
653 ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
654 ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
655 ) => s1 == s2,
656 (
657 ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
658 ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
659 ) => s1 == s2,
660 (
661 ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
662 ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
663 ) => s1 == s2,
664 (
665 ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
666 ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
667 ) => s1 == s2,
668 (
669 ListMarker::Ordered(OrderedMarker::Example { .. }),
670 ListMarker::Ordered(OrderedMarker::Example { .. }),
671 ) => true, _ => false,
673 }
674}
675
676pub(in crate::parser) fn emit_list_item(
680 builder: &mut GreenNodeBuilder<'static>,
681 item: &ListItemEmissionInput<'_>,
682) -> (usize, String) {
683 builder.start_node(SyntaxKind::LIST_ITEM.into());
684
685 if item.indent_bytes > 0 {
687 builder.token(
688 SyntaxKind::WHITESPACE.into(),
689 &item.content[..item.indent_bytes],
690 );
691 }
692
693 let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
694 builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
695
696 if item.spaces_after_bytes > 0 {
697 let space_start = item.indent_bytes + item.marker_len;
698 let space_end = space_start + item.spaces_after_bytes;
699 if space_end <= item.content.len() {
700 builder.token(
701 SyntaxKind::WHITESPACE.into(),
702 &item.content[space_start..space_end],
703 );
704 }
705 }
706
707 let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
708 let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
709
710 let text_to_buffer = if content_start < item.content.len() {
714 let rest = &item.content[content_start..];
715 if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
716 && rest
717 .as_bytes()
718 .get(3)
719 .is_some_and(|b| (*b as char).is_whitespace())
720 {
721 builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
722 rest[3..].to_string()
723 } else {
724 rest.to_string()
725 }
726 } else {
727 String::new()
728 };
729
730 (content_col, text_to_buffer)
731}
732
733#[cfg(test)]
734mod tests {
735 use super::*;
736 use crate::options::ParserOptions;
737
738 #[test]
739 fn detects_bullet_markers() {
740 let config = ParserOptions::default();
741 assert!(try_parse_list_marker("* item", &config).is_some());
742 assert!(try_parse_list_marker("*\titem", &config).is_some());
743 }
744
745 #[test]
746 fn detects_fancy_alpha_markers() {
747 let mut config = ParserOptions::default();
748 config.extensions.fancy_lists = true;
749
750 assert!(
752 try_parse_list_marker("a. item", &config).is_some(),
753 "a. should parse"
754 );
755 assert!(
756 try_parse_list_marker("b. item", &config).is_some(),
757 "b. should parse"
758 );
759 assert!(
760 try_parse_list_marker("c. item", &config).is_some(),
761 "c. should parse"
762 );
763
764 assert!(
766 try_parse_list_marker("a) item", &config).is_some(),
767 "a) should parse"
768 );
769 assert!(
770 try_parse_list_marker("b) item", &config).is_some(),
771 "b) should parse"
772 );
773 }
774}
775
776#[test]
777fn markers_match_fancy_lists() {
778 use ListDelimiter::*;
779 use ListMarker::*;
780 use OrderedMarker::*;
781
782 let a_period = Ordered(LowerAlpha {
784 letter: 'a',
785 style: Period,
786 });
787 let b_period = Ordered(LowerAlpha {
788 letter: 'b',
789 style: Period,
790 });
791 assert!(
792 markers_match(&a_period, &b_period, crate::Dialect::Pandoc),
793 "a. and b. should match"
794 );
795
796 let i_period = Ordered(LowerRoman {
797 numeral: "i".to_string(),
798 style: Period,
799 });
800 let ii_period = Ordered(LowerRoman {
801 numeral: "ii".to_string(),
802 style: Period,
803 });
804 assert!(
805 markers_match(&i_period, &ii_period, crate::Dialect::Pandoc),
806 "i. and ii. should match"
807 );
808
809 let a_paren = Ordered(LowerAlpha {
811 letter: 'a',
812 style: RightParen,
813 });
814 assert!(
815 !markers_match(&a_period, &a_paren, crate::Dialect::Pandoc),
816 "a. and a) should not match"
817 );
818}
819
820#[test]
821fn markers_match_bullet_dialect_split() {
822 use ListMarker::*;
823 assert!(markers_match(
825 &Bullet('-'),
826 &Bullet('+'),
827 crate::Dialect::Pandoc
828 ));
829 assert!(markers_match(
831 &Bullet('-'),
832 &Bullet('-'),
833 crate::Dialect::CommonMark
834 ));
835 assert!(!markers_match(
836 &Bullet('-'),
837 &Bullet('+'),
838 crate::Dialect::CommonMark
839 ));
840 assert!(!markers_match(
841 &Bullet('*'),
842 &Bullet('-'),
843 crate::Dialect::CommonMark
844 ));
845}
846
847#[test]
848fn detects_complex_roman_numerals() {
849 let mut config = ParserOptions::default();
850 config.extensions.fancy_lists = true;
851
852 assert!(
854 try_parse_list_marker("iv. item", &config).is_some(),
855 "iv. should parse"
856 );
857 assert!(
858 try_parse_list_marker("v. item", &config).is_some(),
859 "v. should parse"
860 );
861 assert!(
862 try_parse_list_marker("vi. item", &config).is_some(),
863 "vi. should parse"
864 );
865 assert!(
866 try_parse_list_marker("vii. item", &config).is_some(),
867 "vii. should parse"
868 );
869 assert!(
870 try_parse_list_marker("viii. item", &config).is_some(),
871 "viii. should parse"
872 );
873 assert!(
874 try_parse_list_marker("ix. item", &config).is_some(),
875 "ix. should parse"
876 );
877 assert!(
878 try_parse_list_marker("x. item", &config).is_some(),
879 "x. should parse"
880 );
881}
882
883#[test]
884fn detects_example_list_markers() {
885 let mut config = ParserOptions::default();
886 config.extensions.example_lists = true;
887
888 assert!(
890 try_parse_list_marker("(@) item", &config).is_some(),
891 "(@) should parse"
892 );
893
894 assert!(
896 try_parse_list_marker("(@foo) item", &config).is_some(),
897 "(@foo) should parse"
898 );
899 assert!(
900 try_parse_list_marker("(@my_label) item", &config).is_some(),
901 "(@my_label) should parse"
902 );
903 assert!(
904 try_parse_list_marker("(@test-123) item", &config).is_some(),
905 "(@test-123) should parse"
906 );
907
908 let disabled_config = ParserOptions {
910 extensions: crate::options::Extensions {
911 example_lists: false,
912 ..Default::default()
913 },
914 ..Default::default()
915 };
916 assert!(
917 try_parse_list_marker("(@) item", &disabled_config).is_none(),
918 "(@) should not parse when extension disabled"
919 );
920}
921
922#[test]
923fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
924 use crate::parser::utils::container_stack::{Container, ContainerStack};
925
926 let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
927 numeral: "ii".to_string(),
928 style: ListDelimiter::Period,
929 });
930
931 let mut containers = ContainerStack::new();
932 containers.push(Container::List {
933 marker: marker.clone(),
934 base_indent_cols: 8,
935 has_blank_between_items: false,
936 });
937 containers.push(Container::ListItem {
938 content_col: 11,
939 buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
940 marker_only: false,
941 virtual_marker_space: false,
942 });
943 containers.push(Container::List {
944 marker,
945 base_indent_cols: 6,
946 has_blank_between_items: false,
947 });
948
949 assert_eq!(
952 find_matching_list_level(
953 &containers,
954 &ListMarker::Ordered(OrderedMarker::LowerRoman {
955 numeral: "iii".to_string(),
956 style: ListDelimiter::Period,
957 }),
958 7,
959 crate::Dialect::Pandoc,
960 ),
961 Some(0)
962 );
963}
964
965#[test]
966fn deep_ordered_matches_exact_indent_when_available() {
967 use crate::parser::utils::container_stack::{Container, ContainerStack};
968
969 let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
970 numeral: "ii".to_string(),
971 style: ListDelimiter::Period,
972 });
973
974 let mut containers = ContainerStack::new();
975 containers.push(Container::List {
976 marker: marker.clone(),
977 base_indent_cols: 8,
978 has_blank_between_items: false,
979 });
980 containers.push(Container::List {
981 marker,
982 base_indent_cols: 6,
983 has_blank_between_items: false,
984 });
985
986 assert_eq!(
987 find_matching_list_level(
988 &containers,
989 &ListMarker::Ordered(OrderedMarker::LowerRoman {
990 numeral: "iii".to_string(),
991 style: ListDelimiter::Period,
992 }),
993 6,
994 crate::Dialect::Pandoc,
995 ),
996 Some(1)
997 );
998}
999
1000#[test]
1001fn parses_nested_bullet_list_from_single_marker() {
1002 use crate::parse;
1003 use crate::syntax::SyntaxKind;
1004
1005 let config = ParserOptions::default();
1006
1007 for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
1009 let tree = parse(input, Some(config.clone()));
1010
1011 assert_eq!(
1013 tree.kind(),
1014 SyntaxKind::DOCUMENT,
1015 "{desc}: root should be DOCUMENT"
1016 );
1017
1018 let outer_list = tree
1020 .children()
1021 .find(|n| n.kind() == SyntaxKind::LIST)
1022 .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
1023
1024 let outer_item = outer_list
1026 .children()
1027 .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1028 .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
1029
1030 let nested_list = outer_item
1032 .children()
1033 .find(|n| n.kind() == SyntaxKind::LIST)
1034 .unwrap_or_else(|| {
1035 panic!(
1036 "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
1037 outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
1038 )
1039 });
1040
1041 let nested_item = nested_list
1043 .children()
1044 .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1045 .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
1046
1047 let has_plain = nested_item
1049 .children()
1050 .any(|n| n.kind() == SyntaxKind::PLAIN);
1051 assert!(
1052 !has_plain,
1053 "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
1054 );
1055 }
1056}
1057
1058pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
1062 containers
1063 .stack
1064 .iter()
1065 .any(|c| matches!(c, Container::List { .. }))
1066}
1067
1068pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
1070 let mut seen_blockquote = false;
1071 for c in &containers.stack {
1072 if matches!(c, Container::BlockQuote { .. }) {
1073 seen_blockquote = true;
1074 }
1075 if seen_blockquote && matches!(c, Container::List { .. }) {
1076 return true;
1077 }
1078 }
1079 false
1080}
1081
1082pub(in crate::parser) fn find_matching_list_level(
1084 containers: &ContainerStack,
1085 marker: &ListMarker,
1086 indent_cols: usize,
1087 dialect: crate::Dialect,
1088) -> Option<usize> {
1089 let mut best_match: Option<(usize, usize, bool)> = None; let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
1094 let mut best_above_match: Option<(usize, usize)> = None; for (i, c) in containers.stack.iter().enumerate().rev() {
1097 if matches!(c, Container::BlockQuote { .. }) {
1104 break;
1105 }
1106 if let Container::List {
1107 marker: list_marker,
1108 base_indent_cols,
1109 ..
1110 } = c
1111 && markers_match(marker, list_marker, dialect)
1112 {
1113 let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
1114 match (marker, list_marker) {
1119 (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1120 indent_cols.abs_diff(*base_indent_cols) <= 3
1121 }
1122 _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
1123 }
1124 } else if indent_cols >= 4 || *base_indent_cols >= 4 {
1125 match (marker, list_marker) {
1130 (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1131 indent_cols.abs_diff(*base_indent_cols) <= 3
1132 }
1133 _ => false,
1134 }
1135 } else {
1136 indent_cols.abs_diff(*base_indent_cols) <= 3
1139 };
1140
1141 if matches {
1142 let distance = indent_cols.abs_diff(*base_indent_cols);
1143 let base_leq_indent = *base_indent_cols <= indent_cols;
1144
1145 if is_deep_ordered
1149 && matches!(
1150 (marker, list_marker),
1151 (ListMarker::Ordered(_), ListMarker::Ordered(_))
1152 )
1153 && *base_indent_cols >= indent_cols
1154 {
1155 let delta = *base_indent_cols - indent_cols;
1156 if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1157 best_above_match = Some((i, delta));
1158 }
1159 }
1160
1161 if let Some((_, best_dist, best_base_leq)) = best_match {
1162 if distance < best_dist
1163 || (distance == best_dist && base_leq_indent && !best_base_leq)
1164 {
1165 best_match = Some((i, distance, base_leq_indent));
1166 }
1167 } else {
1168 best_match = Some((i, distance, base_leq_indent));
1169 }
1170
1171 if distance == 0 {
1173 return Some(i);
1174 }
1175 }
1176 }
1177 }
1178
1179 if let Some((index, _)) = best_above_match {
1180 return Some(index);
1181 }
1182
1183 best_match.map(|(i, _, _)| i)
1184}
1185
1186pub(in crate::parser) fn start_nested_list(
1188 containers: &mut ContainerStack,
1189 builder: &mut GreenNodeBuilder<'static>,
1190 marker: &ListMarker,
1191 item: &ListItemEmissionInput<'_>,
1192 indent_to_emit: Option<&str>,
1193 config: &ParserOptions,
1194) {
1195 if let Some(indent_str) = indent_to_emit {
1197 builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1198 }
1199
1200 builder.start_node(SyntaxKind::LIST.into());
1202 containers.push(Container::List {
1203 marker: marker.clone(),
1204 base_indent_cols: item.indent_cols,
1205 has_blank_between_items: false,
1206 });
1207
1208 let (content_col, text_to_buffer) = emit_list_item(builder, item);
1210 finish_list_item_with_optional_nested(
1211 containers,
1212 builder,
1213 content_col,
1214 text_to_buffer,
1215 item.virtual_marker_space,
1216 config,
1217 );
1218}
1219
1220pub(in crate::parser) fn is_content_nested_bullet_marker(
1223 content: &str,
1224 marker_len: usize,
1225 spaces_after_bytes: usize,
1226) -> Option<char> {
1227 let (_, indent_bytes) = leading_indent(content);
1228 let content_start = indent_bytes + marker_len + spaces_after_bytes;
1229
1230 if content_start >= content.len() {
1231 return None;
1232 }
1233
1234 let remaining = &content[content_start..];
1235 let (text_part, _) = strip_newline(remaining);
1236 let trimmed = text_part.trim();
1237
1238 if trimmed.len() == 1 {
1240 let ch = trimmed.chars().next().unwrap();
1241 if matches!(ch, '*' | '+' | '-') {
1242 return Some(ch);
1243 }
1244 }
1245
1246 None
1247}
1248
1249pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1252 containers: &mut ContainerStack,
1253 builder: &mut GreenNodeBuilder<'static>,
1254 item: &ListItemEmissionInput<'_>,
1255 nested_marker: char,
1256) {
1257 builder.start_node(SyntaxKind::LIST_ITEM.into());
1259
1260 if item.indent_bytes > 0 {
1262 builder.token(
1263 SyntaxKind::WHITESPACE.into(),
1264 &item.content[..item.indent_bytes],
1265 );
1266 }
1267
1268 let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1269 builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1270
1271 if item.spaces_after_bytes > 0 {
1272 let space_start = item.indent_bytes + item.marker_len;
1273 let space_end = space_start + item.spaces_after_bytes;
1274 if space_end <= item.content.len() {
1275 builder.token(
1276 SyntaxKind::WHITESPACE.into(),
1277 &item.content[space_start..space_end],
1278 );
1279 }
1280 }
1281
1282 builder.start_node(SyntaxKind::LIST.into());
1284
1285 builder.start_node(SyntaxKind::LIST_ITEM.into());
1287 builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1288
1289 let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1291 if content_start < item.content.len() {
1292 let remaining = &item.content[content_start..];
1293 if remaining.len() > 1 {
1295 let (_, newline_str) = strip_newline(&remaining[1..]);
1296 if !newline_str.is_empty() {
1297 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1298 }
1299 }
1300 }
1301
1302 builder.finish_node(); builder.finish_node(); let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1307 containers.push(Container::ListItem {
1308 content_col,
1309 buffer: ListItemBuffer::new(),
1310 marker_only: false, virtual_marker_space: item.virtual_marker_space,
1312 });
1313}
1314
1315pub(in crate::parser) fn add_list_item(
1317 containers: &mut ContainerStack,
1318 builder: &mut GreenNodeBuilder<'static>,
1319 item: &ListItemEmissionInput<'_>,
1320 config: &ParserOptions,
1321) {
1322 let (content_col, text_to_buffer) = emit_list_item(builder, item);
1323
1324 log::trace!(
1325 "add_list_item: content={:?}, text_to_buffer={:?}",
1326 item.content,
1327 text_to_buffer
1328 );
1329
1330 finish_list_item_with_optional_nested(
1331 containers,
1332 builder,
1333 content_col,
1334 text_to_buffer,
1335 item.virtual_marker_space,
1336 config,
1337 );
1338}
1339
1340fn finish_list_item_with_optional_nested(
1345 containers: &mut ContainerStack,
1346 builder: &mut GreenNodeBuilder<'static>,
1347 content_col: usize,
1348 text_to_buffer: String,
1349 virtual_marker_space: bool,
1350 config: &ParserOptions,
1351) {
1352 let buffered_is_thematic_break =
1357 super::horizontal_rules::try_parse_horizontal_rule(trim_end_newlines(&text_to_buffer))
1358 .is_some();
1359
1360 if !buffered_is_thematic_break
1368 && let Some(inner_match) = try_parse_list_marker(&text_to_buffer, config)
1369 {
1370 let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1371 let after_inner =
1372 trim_end_newlines(text_to_buffer.get(inner_content_start..).unwrap_or(""));
1373 if !after_inner.is_empty() {
1377 containers.push(Container::ListItem {
1379 content_col,
1380 buffer: ListItemBuffer::new(),
1381 marker_only: false, virtual_marker_space,
1383 });
1384 builder.start_node(SyntaxKind::LIST.into());
1386 containers.push(Container::List {
1387 marker: inner_match.marker.clone(),
1388 base_indent_cols: content_col,
1389 has_blank_between_items: false,
1390 });
1391 let inner_item = ListItemEmissionInput {
1394 content: text_to_buffer.as_str(),
1395 marker_len: inner_match.marker_len,
1396 spaces_after_cols: inner_match.spaces_after_cols,
1397 spaces_after_bytes: inner_match.spaces_after_bytes,
1398 indent_cols: content_col,
1399 indent_bytes: 0,
1400 virtual_marker_space: inner_match.virtual_marker_space,
1401 };
1402 let (inner_content_col, inner_text_to_buffer) = emit_list_item(builder, &inner_item);
1403 finish_list_item_with_optional_nested(
1404 containers,
1405 builder,
1406 inner_content_col,
1407 inner_text_to_buffer,
1408 inner_match.virtual_marker_space,
1409 config,
1410 );
1411 return;
1412 }
1413 }
1414
1415 if !buffered_is_thematic_break
1424 && text_to_buffer.starts_with('>')
1425 && !text_to_buffer.starts_with(">>")
1426 {
1427 let bytes = text_to_buffer.as_bytes();
1428 let has_trailing_space = bytes.get(1).copied() == Some(b' ');
1429 let content_offset = if has_trailing_space { 2 } else { 1 };
1430 let remaining = &text_to_buffer[content_offset..];
1431
1432 containers.push(Container::ListItem {
1435 content_col,
1436 buffer: ListItemBuffer::new(),
1437 marker_only: false,
1438 virtual_marker_space,
1439 });
1440
1441 builder.start_node(SyntaxKind::BLOCK_QUOTE.into());
1443 builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
1444 if has_trailing_space {
1445 builder.token(SyntaxKind::WHITESPACE.into(), " ");
1446 }
1447 containers.push(Container::BlockQuote {});
1448
1449 let trimmed = trim_end_newlines(remaining);
1450
1451 let inner_is_thematic_break =
1458 super::horizontal_rules::try_parse_horizontal_rule(trimmed).is_some();
1459 if !inner_is_thematic_break
1460 && let Some(inner_match) = try_parse_list_marker(remaining, config)
1461 {
1462 let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1463 let after_inner = trim_end_newlines(remaining.get(inner_content_start..).unwrap_or(""));
1464 if !after_inner.is_empty() {
1465 let bq_content_col = content_col + content_offset;
1466 builder.start_node(SyntaxKind::LIST.into());
1467 containers.push(Container::List {
1468 marker: inner_match.marker.clone(),
1469 base_indent_cols: bq_content_col,
1470 has_blank_between_items: false,
1471 });
1472 let inner_item = ListItemEmissionInput {
1473 content: remaining,
1474 marker_len: inner_match.marker_len,
1475 spaces_after_cols: inner_match.spaces_after_cols,
1476 spaces_after_bytes: inner_match.spaces_after_bytes,
1477 indent_cols: bq_content_col,
1478 indent_bytes: 0,
1479 virtual_marker_space: inner_match.virtual_marker_space,
1480 };
1481 let (inner_content_col, inner_text_to_buffer) =
1482 emit_list_item(builder, &inner_item);
1483 finish_list_item_with_optional_nested(
1484 containers,
1485 builder,
1486 inner_content_col,
1487 inner_text_to_buffer,
1488 inner_match.virtual_marker_space,
1489 config,
1490 );
1491 return;
1492 }
1493 }
1494
1495 if !trimmed.is_empty() {
1500 crate::parser::blocks::paragraphs::start_paragraph_if_needed(containers, builder);
1501 crate::parser::blocks::paragraphs::append_paragraph_line(
1502 containers, builder, remaining, config,
1503 );
1504 }
1505 return;
1506 }
1507
1508 let marker_only = text_to_buffer.trim().is_empty();
1509 let mut buffer = ListItemBuffer::new();
1510 if !text_to_buffer.is_empty() {
1511 buffer.push_text(text_to_buffer);
1512 }
1513 containers.push(Container::ListItem {
1514 content_col,
1515 buffer,
1516 marker_only,
1517 virtual_marker_space,
1518 });
1519}