1#[derive(Debug, Clone, PartialEq, Eq)]
8#[allow(dead_code)] pub struct ListItem {
10 pub marker: String,
12 pub content: String,
14 pub level: usize,
16 pub is_task: bool,
18 pub checked: Option<bool>,
20 pub line_number: usize,
22}
23
24#[derive(Debug, Clone, PartialEq, Eq)]
26#[allow(dead_code)] pub struct List {
28 pub items: Vec<ListItem>,
30 pub start_line: usize,
32 pub end_line: usize,
34 pub is_ordered: bool,
36}
37
38#[must_use]
54#[allow(dead_code)] pub fn detect_lists(content: &str) -> Vec<List> {
56 let mut lists = Vec::new();
57 let lines: Vec<&str> = content.lines().collect();
58
59 let code_line_ranges = get_code_block_line_ranges(content);
61
62 let mut i = 0;
63 while i < lines.len() {
64 let line = lines[i];
65
66 if is_in_code_region(i, &code_line_ranges) {
68 i += 1;
69 continue;
70 }
71
72 if let Some(item) = parse_list_item(line, i) {
74 let mut current_list_items = vec![item];
76 let start_line = i;
77 i += 1;
78
79 while i < lines.len() {
81 let next_line = lines[i];
82
83 if is_in_code_region(i, &code_line_ranges) {
85 break;
86 }
87
88 if next_line.trim().is_empty() {
90 if i + 1 < lines.len() {
92 if let Some(next_item) = parse_list_item(lines[i + 1], i + 1) {
93 if is_same_list(¤t_list_items, &next_item) {
95 i += 1; continue;
97 }
98 }
99 }
100 break;
101 }
102
103 if let Some(next_item) = parse_list_item(next_line, i) {
105 if is_same_list(¤t_list_items, &next_item)
107 || is_nested_list(¤t_list_items, &next_item)
108 {
109 current_list_items.push(next_item);
110 i += 1;
111 continue;
112 }
113 }
114
115 if is_continuation_line(next_line, ¤t_list_items) {
117 i += 1;
118 continue;
119 }
120
121 break;
122 }
123
124 if !current_list_items.is_empty() {
126 let is_ordered = current_list_items[0].marker.parse::<i32>().is_ok();
127 lists.push(List {
128 items: current_list_items,
129 start_line,
130 end_line: i - 1,
131 is_ordered,
132 });
133 }
134 } else {
135 i += 1;
136 }
137 }
138
139 lists
140}
141
142fn get_code_block_line_ranges(content: &str) -> Vec<(usize, usize)> {
144 let mut ranges = Vec::new();
145 let lines: Vec<&str> = content.lines().collect();
146 let mut in_code_block = false;
147 let mut block_start = 0;
148
149 for (i, line) in lines.iter().enumerate() {
150 let trimmed = line.trim();
151
152 if is_fence_line(trimmed) {
154 if in_code_block {
155 ranges.push((block_start, i));
157 in_code_block = false;
158 } else {
159 block_start = i;
161 in_code_block = true;
162 }
163 }
164 }
165
166 if in_code_block {
168 ranges.push((block_start, lines.len() - 1));
169 }
170
171 ranges
172}
173
174fn is_fence_line(line: &str) -> bool {
176 line.starts_with("```") || line.starts_with("~~~")
177}
178
179fn parse_list_item(line: &str, line_number: usize) -> Option<ListItem> {
181 let trimmed = line.trim_start();
182
183 if let Some(rest) = trimmed.strip_prefix("- ") {
185 return parse_task_or_item("-", rest, line_number);
186 }
187 if let Some(rest) = trimmed.strip_prefix("* ") {
188 return Some(ListItem {
189 marker: "*".to_string(),
190 content: rest.to_string(),
191 level: 0, is_task: false,
193 checked: None,
194 line_number,
195 });
196 }
197 if let Some(rest) = trimmed.strip_prefix("+ ") {
198 return Some(ListItem {
199 marker: "+".to_string(),
200 content: rest.to_string(),
201 level: 0,
202 is_task: false,
203 checked: None,
204 line_number,
205 });
206 }
207
208 let chars: Vec<char> = trimmed.chars().collect();
211 if !chars.is_empty() && chars[0].is_ascii_digit() {
212 let mut num_end = 0;
214 while num_end < chars.len() && chars[num_end].is_ascii_digit() {
215 num_end += 1;
216 }
217
218 if num_end < chars.len() && (chars[num_end] == '.' || chars[num_end] == ')') {
219 let number = &trimmed[0..num_end];
220 let rest = trimmed[num_end + 1..].trim_start();
221 return Some(ListItem {
222 marker: number.to_string(),
223 content: rest.to_string(),
224 level: 0,
225 is_task: false,
226 checked: None,
227 line_number,
228 });
229 }
230 }
231
232 None
233}
234
235fn parse_task_or_item(marker: &str, content: &str, line_number: usize) -> Option<ListItem> {
237 let trimmed = content.trim_start();
238
239 if let Some(rest) = trimmed.strip_prefix("[ ] ") {
241 return Some(ListItem {
242 marker: marker.to_string(),
243 content: rest.to_string(),
244 level: 0,
245 is_task: true,
246 checked: Some(false),
247 line_number,
248 });
249 }
250 if let Some(rest) = trimmed.strip_prefix("[x] ") {
251 return Some(ListItem {
252 marker: marker.to_string(),
253 content: rest.to_string(),
254 level: 0,
255 is_task: true,
256 checked: Some(true),
257 line_number,
258 });
259 }
260 if let Some(rest) = trimmed.strip_prefix("[X] ") {
261 return Some(ListItem {
262 marker: marker.to_string(),
263 content: rest.to_string(),
264 level: 0,
265 is_task: true,
266 checked: Some(true),
267 line_number,
268 });
269 }
270
271 Some(ListItem {
273 marker: marker.to_string(),
274 content: content.to_string(),
275 level: 0,
276 is_task: false,
277 checked: None,
278 line_number,
279 })
280}
281
282#[allow(dead_code)] fn is_same_list(existing_items: &[ListItem], new_item: &ListItem) -> bool {
285 if existing_items.is_empty() {
286 return true;
287 }
288
289 let first = &existing_items[0];
291
292 if ["-", "*", "+"].contains(&first.marker.as_str())
294 && ["-", "*", "+"].contains(&new_item.marker.as_str())
295 {
296 return true;
297 }
298
299 if first.marker.parse::<i32>().is_ok() && new_item.marker.parse::<i32>().is_ok() {
301 return true;
302 }
303
304 false
305}
306
307#[allow(dead_code)] fn is_nested_list(existing_items: &[ListItem], new_item: &ListItem) -> bool {
310 if existing_items.is_empty() {
313 return false;
314 }
315
316 new_item.line_number > existing_items[0].line_number
319}
320
321#[allow(dead_code)] fn is_continuation_line(line: &str, current_items: &[ListItem]) -> bool {
324 if current_items.is_empty() {
325 return false;
326 }
327
328 if line.trim().is_empty() {
331 return false;
332 }
333
334 let leading_spaces = line.len() - line.trim_start().len();
335 leading_spaces >= 2 && parse_list_item(line, 0).is_none()
337}
338
339fn is_in_code_region(line_num: usize, regions: &[(usize, usize)]) -> bool {
341 for (start, end) in regions {
342 if line_num >= *start && line_num <= *end {
343 return true;
344 }
345 }
346 false
347}
348
349#[must_use]
364#[allow(dead_code)] pub fn normalize_list_indentation(content: &str) -> String {
366 let lines: Vec<&str> = content.lines().collect();
367 if lines.is_empty() {
368 return String::new();
369 }
370
371 let code_ranges = get_code_block_line_ranges(content);
373
374 let mut result = Vec::new();
375 let mut list_stack: Vec<usize> = Vec::new(); for (i, line) in lines.iter().enumerate() {
378 if is_in_code_region(i, &code_ranges) {
380 result.push(line.to_string());
381 continue;
382 }
383
384 if let Some(item) = parse_list_item(line, i) {
386 let current_indent = line.len() - line.trim_start().len();
387
388 let level = if list_stack.is_empty() {
390 list_stack.push(current_indent);
392 0
393 } else {
394 let mut level = list_stack.len();
396 for (idx, &indent) in list_stack.iter().enumerate() {
397 if current_indent <= indent {
398 level = idx;
399 break;
400 }
401 }
402
403 list_stack.truncate(level);
405
406 if level == list_stack.len() {
408 list_stack.push(current_indent);
409 }
410
411 level
412 };
413
414 let normalized_indent = " ".repeat(level);
416 let reconstructed = format!("{}{} {}", normalized_indent, item.marker, item.content);
417 result.push(reconstructed);
418 } else {
419 if line.trim().is_empty() {
421 list_stack.clear();
422 }
423 result.push(line.to_string());
424 }
425 }
426
427 result.join("\n")
428}
429
430#[must_use]
452#[allow(dead_code)] pub fn normalize_bullet_styles(content: &str, target_bullet: char) -> String {
454 let lines: Vec<&str> = content.lines().collect();
455 if lines.is_empty() {
456 return String::new();
457 }
458
459 let target = match target_bullet {
461 '-' | '*' | '+' => target_bullet,
462 _ => '-', };
464
465 let code_ranges = get_code_block_line_ranges(content);
467
468 let mut result = Vec::new();
469
470 for (i, line) in lines.iter().enumerate() {
471 if is_in_code_region(i, &code_ranges) {
473 result.push(line.to_string());
474 continue;
475 }
476
477 if let Some(item) = parse_list_item(line, i) {
479 if ["-", "*", "+"].contains(&item.marker.as_str()) {
481 let indent = line.len() - line.trim_start().len();
482 let indent_str = " ".repeat(indent);
483
484 let reconstructed = if item.is_task {
486 format!(
487 "{}{} [{}] {}",
488 indent_str,
489 target,
490 if item.checked.unwrap_or(false) {
491 "x"
492 } else {
493 " "
494 },
495 item.content
496 )
497 } else {
498 format!("{}{} {}", indent_str, target, item.content)
499 };
500 result.push(reconstructed);
501 } else {
502 result.push(line.to_string());
504 }
505 } else {
506 result.push(line.to_string());
507 }
508 }
509
510 result.join("\n")
511}
512
513#[must_use]
530pub fn normalize_lists(content: &str) -> String {
531 let lines: Vec<&str> = content.lines().collect();
532 if lines.is_empty() {
533 return String::new();
534 }
535
536 let code_ranges = get_code_block_line_ranges(content);
538
539 let mut result = Vec::new();
540 let mut list_stack: Vec<usize> = Vec::new(); let target_bullet = '-';
542
543 for (i, line) in lines.iter().enumerate() {
544 if is_in_code_region(i, &code_ranges) {
546 result.push(line.to_string());
547 list_stack.clear();
549 continue;
550 }
551
552 if let Some(item) = parse_list_item(line, i) {
554 let current_indent = line.len() - line.trim_start().len();
555
556 let level = if list_stack.is_empty() {
558 list_stack.push(current_indent);
560 0
561 } else {
562 let mut level = list_stack.len();
564 for (idx, &indent) in list_stack.iter().enumerate() {
565 if current_indent <= indent {
566 level = idx;
567 break;
568 }
569 }
570
571 list_stack.truncate(level);
573
574 if level == list_stack.len() {
576 list_stack.push(current_indent);
577 }
578
579 level
580 };
581
582 let normalized_indent = " ".repeat(level);
584
585 let reconstructed = if item.is_task && ["-", "*", "+"].contains(&item.marker.as_str()) {
587 format!(
588 "{}{} [{}] {}",
589 normalized_indent,
590 target_bullet,
591 if item.checked.unwrap_or(false) {
592 "x"
593 } else {
594 " "
595 },
596 item.content
597 )
598 } else if ["-", "*", "+"].contains(&item.marker.as_str()) {
599 format!("{}{} {}", normalized_indent, target_bullet, item.content)
600 } else {
601 format!("{}{}. {}", normalized_indent, item.marker, item.content)
603 };
604 result.push(reconstructed);
605 } else {
606 if line.trim().is_empty() {
608 list_stack.clear();
610 }
611 result.push(line.to_string());
612 }
613 }
614
615 result.join("\n")
616}
617
618#[must_use]
637#[allow(dead_code)] pub fn normalize_loose_lists(content: &str) -> String {
639 let lines: Vec<&str> = content.lines().collect();
640 if lines.is_empty() {
641 return String::new();
642 }
643
644 let code_line_ranges = get_code_block_line_ranges(content);
646
647 let mut result = Vec::new();
648
649 for (i, line) in lines.iter().enumerate() {
650 if is_in_code_region(i, &code_line_ranges) {
652 result.push(line.to_string());
653 continue;
654 }
655
656 let is_list_item = parse_list_item(line, i).is_some();
658
659 let prev_line = if i > 0 { lines.get(i - 1) } else { None };
661 let prev_trimmed = prev_line.map_or("", |l| l.trim());
662
663 let needs_blank_line = if is_list_item {
665 let prev_was_header = prev_trimmed.starts_with('#');
667 let prev_was_list_item = prev_line.is_some_and(|l| parse_list_item(l, i - 1).is_some());
672 let prev_was_paragraph =
673 !prev_trimmed.is_empty() && !prev_was_header && !prev_was_list_item;
674
675 (prev_was_header || prev_was_paragraph)
676 && !result.is_empty()
677 && result.last().is_none_or(|s: &String| !s.trim().is_empty())
678 } else {
679 false
680 };
681
682 if needs_blank_line {
683 result.push(String::new());
684 }
685
686 result.push(line.to_string());
687 }
688
689 result.join("\n")
690}
691
692#[cfg(test)]
693mod tests {
694 use super::*;
695
696 #[test]
697 fn detect_simple_bullet_list() {
698 let content = "- Item 1\n- Item 2\n- Item 3";
699 let lists = detect_lists(content);
700 assert_eq!(lists.len(), 1);
701 assert_eq!(lists[0].items.len(), 3);
702 assert_eq!(lists[0].items[0].content, "Item 1");
703 assert_eq!(lists[0].items[1].content, "Item 2");
704 assert_eq!(lists[0].items[2].content, "Item 3");
705 }
706
707 #[test]
708 fn detect_list_with_mixed_bullets() {
709 let content = "- Item 1\n* Item 2\n+ Item 3";
710 let lists = detect_lists(content);
711 assert_eq!(lists.len(), 1);
712 assert_eq!(lists[0].items.len(), 3);
713 }
714
715 #[test]
716 fn detect_ordered_list() {
717 let content = "1. First item\n2. Second item\n3. Third item";
718 let lists = detect_lists(content);
719 assert_eq!(lists.len(), 1);
720 assert_eq!(lists[0].items.len(), 3);
721 assert!(lists[0].is_ordered);
722 }
723
724 #[test]
725 fn detect_task_list() {
726 let content = "- [ ] Todo item\n- [x] Done item\n- [X] Also done";
727 let lists = detect_lists(content);
728 assert_eq!(lists.len(), 1);
729 assert_eq!(lists[0].items.len(), 3);
730 assert!(lists[0].items[0].is_task);
731 assert!(!lists[0].items[0].checked.unwrap());
732 assert!(lists[0].items[1].is_task);
733 assert!(lists[0].items[1].checked.unwrap());
734 }
735
736 #[test]
737 fn ignore_lists_in_code_blocks() {
738 let content =
739 "```markdown\n- Item in code block\n- Another item\n```\n\n- Real item outside";
740 let lists = detect_lists(content);
741 assert_eq!(lists.len(), 1);
742 assert_eq!(lists[0].items.len(), 1);
743 assert_eq!(lists[0].items[0].content, "Real item outside");
744 }
745
746 #[test]
747 fn detect_multiple_lists() {
748 let content = "- First list item 1\n- First list item 2\n\nSome text\n\n* Second list item 1\n* Second list item 2";
749 let lists = detect_lists(content);
750 assert_eq!(lists.len(), 2);
751 assert_eq!(lists[0].items.len(), 2);
752 assert_eq!(lists[1].items.len(), 2);
753 }
754
755 #[test]
756 fn no_lists_in_plain_text() {
757 let content = "This is just a paragraph.\nNo lists here.\nJust text.";
758 let lists = detect_lists(content);
759 assert_eq!(lists.len(), 0);
760 }
761
762 #[test]
763 fn normalize_indentation_to_two_spaces() {
764 let content = "- Item 1\n - Nested item\n- Item 2";
766 let normalized = normalize_list_indentation(content);
767 assert!(normalized.contains("- Item 1"));
768 assert!(normalized.contains(" - Nested item")); assert!(!normalized.contains(" - Nested")); }
771
772 #[test]
773 fn normalize_deeply_nested_list() {
774 let content = "- Level 1\n - Level 2\n - Level 3\n- Back to 1";
776 let normalized = normalize_list_indentation(content);
777 assert!(normalized.contains("- Level 1"));
778 assert!(normalized.contains(" - Level 2")); assert!(normalized.contains(" - Level 3")); }
781
782 #[test]
783 fn preserve_content_when_normalizing() {
784 let content = "- First item with text\n - Second item with more text";
786 let normalized = normalize_list_indentation(content);
787 assert!(normalized.contains("First item with text"));
788 assert!(normalized.contains("Second item with more text"));
789 }
790
791 #[test]
792 fn no_change_to_already_normalized() {
793 let content = "- Item 1\n - Nested\n - Another nested\n- Item 2";
795 let normalized = normalize_list_indentation(content);
796 assert_eq!(normalized, content);
797 }
798
799 #[test]
800 fn normalize_mixed_indentation_styles() {
801 let content = "- Item 1\n - Two space\n - Four space (should be 4)\n- Item 2";
803 let normalized = normalize_list_indentation(content);
804 assert!(normalized.contains("- Item 1"));
805 assert!(normalized.contains(" - Two space"));
806 assert!(normalized.contains(" - Four space")); }
808
809 #[test]
810 fn normalize_bullet_styles_to_dash() {
811 let content = "- Item 1\n* Item 2\n+ Item 3";
813 let normalized = normalize_bullet_styles(content, '-');
814 assert!(normalized.contains("- Item 1"));
815 assert!(normalized.contains("- Item 2"));
816 assert!(normalized.contains("- Item 3"));
817 assert!(!normalized.contains("* Item"));
818 assert!(!normalized.contains("+ Item"));
819 }
820
821 #[test]
822 fn normalize_bullet_styles_to_asterisk() {
823 let content = "- Item 1\n* Item 2\n+ Item 3";
825 let normalized = normalize_bullet_styles(content, '*');
826 assert!(normalized.contains("* Item 1"));
827 assert!(normalized.contains("* Item 2"));
828 assert!(normalized.contains("* Item 3"));
829 }
830
831 #[test]
832 fn bullet_normalization_preserves_indentation() {
833 let content = "- Item 1\n * Nested\n + Deep";
835 let normalized = normalize_bullet_styles(content, '-');
836 assert!(normalized.contains("- Item 1"));
837 assert!(normalized.contains(" - Nested"));
838 assert!(normalized.contains(" - Deep"));
839 }
840
841 #[test]
842 fn bullet_normalization_preserves_task_lists() {
843 let content = "- [ ] Todo\n* [x] Done\n+ [ ] Another";
845 let normalized = normalize_bullet_styles(content, '-');
846 assert!(normalized.contains("- [ ] Todo"));
847 assert!(normalized.contains("- [x] Done"));
848 assert!(normalized.contains("- [ ] Another"));
849 }
850
851 #[test]
852 fn bullet_normalization_preserves_ordered_lists() {
853 let content = "1. First\n2. Second\n- Unordered";
855 let normalized = normalize_bullet_styles(content, '-');
856 assert!(normalized.contains("1. First"));
857 assert!(normalized.contains("2. Second"));
858 assert!(normalized.contains("- Unordered"));
859 }
860
861 #[test]
862 fn normalize_complex_nested_list() {
863 let content = "- Level 1\n * Level 2\n + Level 3\n- Back to 1";
865 let normalized = normalize_lists(content);
866 assert!(normalized.contains("- Level 1"));
868 assert!(normalized.contains(" - Level 2"));
869 assert!(normalized.contains(" - Level 3"));
870 assert!(normalized.contains("- Back to 1"));
871 }
872
873 #[test]
874 fn normalize_deeply_nested_structure() {
875 let content = "- A\n - B\n - C\n - D\n- E";
877 let normalized = normalize_lists(content);
878 assert!(normalized.contains("- A"));
879 assert!(normalized.contains(" - B"));
880 assert!(normalized.contains(" - C"));
881 assert!(normalized.contains(" - D"));
882 assert!(normalized.contains("- E"));
883 }
884
885 #[test]
886 fn normalize_nested_with_inconsistent_indentation() {
887 let content = "- Item 1\n - Nested with 4\n - Deeper\n- Item 2";
889 let normalized = normalize_lists(content);
890 assert!(normalized.contains("- Item 1"));
891 assert!(normalized.contains(" - Nested with 4")); assert!(normalized.contains(" - Deeper")); assert!(normalized.contains("- Item 2"));
894 }
895
896 #[test]
897 fn separate_adjacent_lists() {
898 let content =
900 "- First list A\n- First list B\n\nSome text\n\n* Second list A\n* Second list B";
901 let normalized = normalize_lists(content);
902 assert!(normalized.contains("- First list A"));
903 assert!(normalized.contains("- First list B"));
904 assert!(normalized.contains("- Second list A")); assert!(normalized.contains("- Second list B"));
906 }
907
908 #[test]
909 fn preserve_task_list_checkboxes() {
910 let content = "- [ ] Unchecked todo\n- [x] Checked todo\n- [X] Also checked";
913 let normalized = normalize_lists(content);
914 assert!(normalized.contains("- [ ] Unchecked todo"));
915 assert!(normalized.contains("- [x] Checked todo"));
916 assert!(normalized.contains("- [x] Also checked")); }
918
919 #[test]
920 fn normalize_mixed_task_and_regular() {
921 let content = "- [ ] Todo item\n- Regular item\n- [x] Done item";
923 let normalized = normalize_lists(content);
924 assert!(normalized.contains("- [ ] Todo item"));
925 assert!(normalized.contains("- Regular item"));
926 assert!(normalized.contains("- [x] Done item"));
927 }
928
929 #[test]
930 fn nested_task_lists() {
931 let content =
933 "- [ ] Parent task\n - [ ] Subtask 1\n - [x] Subtask 2\n- [ ] Another parent";
934 let normalized = normalize_lists(content);
935 assert!(normalized.contains("- [ ] Parent task"));
936 assert!(normalized.contains(" - [ ] Subtask 1"));
937 assert!(normalized.contains(" - [x] Subtask 2"));
938 assert!(normalized.contains("- [ ] Another parent"));
939 }
940
941 #[test]
942 fn task_list_with_bullet_normalization() {
943 let content = "- [ ] Todo 1\n* [ ] Todo 2\n+ [x] Done";
945 let normalized = normalize_lists(content);
946 assert!(normalized.contains("- [ ] Todo 1"));
947 assert!(normalized.contains("- [ ] Todo 2")); assert!(normalized.contains("- [x] Done")); }
950
951 #[test]
952 fn task_lists_in_code_blocks_preserved() {
953 let content =
955 "```markdown\n- [ ] In code block\n- [x] Also in block\n```\n\n- [ ] Real task outside";
956 let normalized = normalize_lists(content);
957 assert!(normalized.contains("- [ ] In code block")); assert!(normalized.contains("- [ ] Real task outside"));
959 }
960
961 #[test]
962 fn add_blank_line_between_header_and_list() {
963 let content = "# My Title\n- Item 1\n- Item 2";
965 let normalized = normalize_loose_lists(content);
966 assert!(normalized.contains("# My Title\n\n- Item 1"));
967 }
968
969 #[test]
970 fn add_blank_line_between_paragraph_and_list() {
971 let content = "Some paragraph text\n- Item 1\n- Item 2";
973 let normalized = normalize_loose_lists(content);
974 assert!(normalized.contains("Some paragraph text\n\n- Item 1"));
975 }
976
977 #[test]
978 fn preserve_existing_blank_lines() {
979 let content = "# Title\n\n- Item 1\n\nSome text\n\n- Item 2";
981 let normalized = normalize_loose_lists(content);
982 assert_eq!(normalized, content);
983 }
984
985 #[test]
986 fn handle_loose_lists_with_paragraphs() {
987 let content = "- Item 1\n\n Paragraph text\n\n- Item 2";
989 let normalized = normalize_loose_lists(content);
990 assert!(normalized.contains("- Item 1\n\n Paragraph text\n\n- Item 2"));
991 }
992
993 #[test]
994 fn multiple_headers_need_spacing() {
995 let content = "# Header 1\n- Item A\n# Header 2\n- Item B";
997 let normalized = normalize_loose_lists(content);
998 assert!(normalized.contains("# Header 1\n\n- Item A"));
999 assert!(normalized.contains("# Header 2\n\n- Item B"));
1000 }
1001}