1use crate::config::MarkdownFlavor;
2use crate::utils::ast_utils::get_cached_ast;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use lazy_static::lazy_static;
5use markdown::mdast::Node;
6use regex::Regex;
7
8lazy_static! {
9 static ref LINK_PATTERN: Regex = Regex::new(
12 r"(?sx)
13 \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Link text in group 1 (handles nested brackets)
14 (?:
15 \(([^)]*)\) # Inline URL in group 2 (can be empty)
16 |
17 \[([^\]]*)\] # Reference ID in group 3
18 )"
19 ).unwrap();
20
21 static ref IMAGE_PATTERN: Regex = Regex::new(
24 r"(?sx)
25 !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Alt text in group 1 (handles nested brackets)
26 (?:
27 \(([^)]*)\) # Inline URL in group 2 (can be empty)
28 |
29 \[([^\]]*)\] # Reference ID in group 3
30 )"
31 ).unwrap();
32
33 static ref REF_DEF_PATTERN: Regex = Regex::new(
35 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
36 ).unwrap();
37
38 static ref CODE_SPAN_PATTERN: Regex = Regex::new(
41 r"`+"
42 ).unwrap();
43
44 static ref BARE_URL_PATTERN: Regex = Regex::new(
46 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
47 ).unwrap();
48
49 static ref BARE_EMAIL_PATTERN: Regex = Regex::new(
51 r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
52 ).unwrap();
53
54 static ref ANGLE_BRACKET_PATTERN: Regex = Regex::new(
56 r"<((?:https?|ftp)://[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"
57 ).unwrap();
58
59 static ref BLOCKQUOTE_PREFIX_REGEX: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
61}
62
63#[derive(Debug, Clone)]
65pub struct LineInfo {
66 pub content: String,
68 pub byte_offset: usize,
70 pub indent: usize,
72 pub is_blank: bool,
74 pub in_code_block: bool,
76 pub in_front_matter: bool,
78 pub in_html_block: bool,
80 pub list_item: Option<ListItemInfo>,
82 pub heading: Option<HeadingInfo>,
84 pub blockquote: Option<BlockquoteInfo>,
86}
87
88#[derive(Debug, Clone)]
90pub struct ListItemInfo {
91 pub marker: String,
93 pub is_ordered: bool,
95 pub number: Option<usize>,
97 pub marker_column: usize,
99 pub content_column: usize,
101}
102
103#[derive(Debug, Clone, PartialEq)]
105pub enum HeadingStyle {
106 ATX,
108 Setext1,
110 Setext2,
112}
113
114#[derive(Debug, Clone)]
116pub struct ParsedLink {
117 pub line: usize,
119 pub start_col: usize,
121 pub end_col: usize,
123 pub byte_offset: usize,
125 pub byte_end: usize,
127 pub text: String,
129 pub url: String,
131 pub is_reference: bool,
133 pub reference_id: Option<String>,
135}
136
137#[derive(Debug, Clone)]
139pub struct ParsedImage {
140 pub line: usize,
142 pub start_col: usize,
144 pub end_col: usize,
146 pub byte_offset: usize,
148 pub byte_end: usize,
150 pub alt_text: String,
152 pub url: String,
154 pub is_reference: bool,
156 pub reference_id: Option<String>,
158}
159
160#[derive(Debug, Clone)]
162pub struct ReferenceDef {
163 pub line: usize,
165 pub id: String,
167 pub url: String,
169 pub title: Option<String>,
171}
172
173#[derive(Debug, Clone)]
175pub struct CodeSpan {
176 pub line: usize,
178 pub start_col: usize,
180 pub end_col: usize,
182 pub byte_offset: usize,
184 pub byte_end: usize,
186 pub backtick_count: usize,
188 pub content: String,
190}
191
192#[derive(Debug, Clone)]
194pub struct HeadingInfo {
195 pub level: u8,
197 pub style: HeadingStyle,
199 pub marker: String,
201 pub marker_column: usize,
203 pub content_column: usize,
205 pub text: String,
207 pub custom_id: Option<String>,
209 pub raw_text: String,
211 pub has_closing_sequence: bool,
213 pub closing_sequence: String,
215}
216
217#[derive(Debug, Clone)]
219pub struct BlockquoteInfo {
220 pub nesting_level: usize,
222 pub indent: String,
224 pub marker_column: usize,
226 pub prefix: String,
228 pub content: String,
230 pub has_no_space_after_marker: bool,
232 pub has_multiple_spaces_after_marker: bool,
234 pub needs_md028_fix: bool,
236}
237
238#[derive(Debug, Clone)]
240pub struct ListBlock {
241 pub start_line: usize,
243 pub end_line: usize,
245 pub is_ordered: bool,
247 pub marker: Option<String>,
249 pub blockquote_prefix: String,
251 pub item_lines: Vec<usize>,
253 pub nesting_level: usize,
255 pub max_marker_width: usize,
257}
258
259use std::sync::{Arc, Mutex};
260
261#[derive(Debug, Clone, Default)]
263pub struct CharFrequency {
264 pub hash_count: usize,
266 pub asterisk_count: usize,
268 pub underscore_count: usize,
270 pub hyphen_count: usize,
272 pub plus_count: usize,
274 pub gt_count: usize,
276 pub pipe_count: usize,
278 pub bracket_count: usize,
280 pub backtick_count: usize,
282 pub lt_count: usize,
284 pub exclamation_count: usize,
286 pub newline_count: usize,
288}
289
290#[derive(Debug, Clone)]
292pub struct HtmlTag {
293 pub line: usize,
295 pub start_col: usize,
297 pub end_col: usize,
299 pub byte_offset: usize,
301 pub byte_end: usize,
303 pub tag_name: String,
305 pub is_closing: bool,
307 pub is_self_closing: bool,
309 pub raw_content: String,
311}
312
313#[derive(Debug, Clone)]
315pub struct EmphasisSpan {
316 pub line: usize,
318 pub start_col: usize,
320 pub end_col: usize,
322 pub byte_offset: usize,
324 pub byte_end: usize,
326 pub marker: char,
328 pub marker_count: usize,
330 pub content: String,
332}
333
334#[derive(Debug, Clone)]
336pub struct TableRow {
337 pub line: usize,
339 pub is_separator: bool,
341 pub column_count: usize,
343 pub column_alignments: Vec<String>, }
346
347#[derive(Debug, Clone)]
349pub struct BareUrl {
350 pub line: usize,
352 pub start_col: usize,
354 pub end_col: usize,
356 pub byte_offset: usize,
358 pub byte_end: usize,
360 pub url: String,
362 pub url_type: String,
364}
365
366pub struct LintContext<'a> {
367 pub content: &'a str,
368 pub line_offsets: Vec<usize>,
369 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink>, pub images: Vec<ParsedImage>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, ast_cache: Mutex<Option<Arc<Node>>>, pub flavor: MarkdownFlavor, }
384
385impl<'a> LintContext<'a> {
386 pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
387 let mut line_offsets = vec![0];
388 for (i, c) in content.char_indices() {
389 if c == '\n' {
390 line_offsets.push(i + 1);
391 }
392 }
393
394 let code_blocks = CodeBlockUtils::detect_code_blocks(content);
396
397 let mut lines = Self::compute_line_info(content, &line_offsets, &code_blocks, flavor);
399
400 let ast = get_cached_ast(content);
402 let code_spans = Self::parse_code_spans(content, &lines, &ast);
403
404 let links = Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor);
406 let images = Self::parse_images(content, &lines, &code_blocks, &code_spans);
407 let reference_defs = Self::parse_reference_defs(content, &lines);
408 let list_blocks = Self::parse_list_blocks(&lines);
411
412 Self::detect_html_blocks(&mut lines);
414
415 let char_frequency = Self::compute_char_frequency(content);
417
418 Self {
419 content,
420 line_offsets,
421 code_blocks,
422 lines,
423 links,
424 images,
425 reference_defs,
426 code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
427 list_blocks,
428 char_frequency,
429 html_tags_cache: Mutex::new(None),
430 emphasis_spans_cache: Mutex::new(None),
431 table_rows_cache: Mutex::new(None),
432 bare_urls_cache: Mutex::new(None),
433 ast_cache: Mutex::new(None),
434 flavor,
435 }
436 }
437
438 pub fn get_ast(&self) -> Arc<Node> {
440 let mut cache = self.ast_cache.lock().unwrap();
441
442 if cache.is_none() {
443 *cache = Some(get_cached_ast(self.content));
446 }
447
448 cache.as_ref().unwrap().clone()
449 }
450
451 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
453 let mut cache = self.code_spans_cache.lock().unwrap();
454
455 if cache.is_none() {
457 let ast = self.get_ast();
458 let code_spans = Self::parse_code_spans(self.content, &self.lines, &ast);
459 *cache = Some(Arc::new(code_spans));
460 }
461
462 cache.as_ref().unwrap().clone()
464 }
465
466 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
468 let mut cache = self.html_tags_cache.lock().unwrap();
469
470 if cache.is_none() {
471 let html_tags = Self::parse_html_tags(self.content, &self.lines, &self.code_blocks);
472 *cache = Some(Arc::new(html_tags));
473 }
474
475 cache.as_ref().unwrap().clone()
476 }
477
478 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
480 let mut cache = self.emphasis_spans_cache.lock().unwrap();
481
482 if cache.is_none() {
483 let emphasis_spans = Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks);
484 *cache = Some(Arc::new(emphasis_spans));
485 }
486
487 cache.as_ref().unwrap().clone()
488 }
489
490 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
492 let mut cache = self.table_rows_cache.lock().unwrap();
493
494 if cache.is_none() {
495 let table_rows = Self::parse_table_rows(&self.lines);
496 *cache = Some(Arc::new(table_rows));
497 }
498
499 cache.as_ref().unwrap().clone()
500 }
501
502 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
504 let mut cache = self.bare_urls_cache.lock().unwrap();
505
506 if cache.is_none() {
507 let bare_urls = Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks);
508 *cache = Some(Arc::new(bare_urls));
509 }
510
511 cache.as_ref().unwrap().clone()
512 }
513
514 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
516 match self.line_offsets.binary_search(&offset) {
517 Ok(line) => (line + 1, 1),
518 Err(line) => {
519 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
520 (line, offset - line_start + 1)
521 }
522 }
523 }
524
525 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
527 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
529 return true;
530 }
531
532 self.code_spans()
534 .iter()
535 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
536 }
537
538 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
540 if line_num > 0 {
541 self.lines.get(line_num - 1)
542 } else {
543 None
544 }
545 }
546
547 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
549 self.line_info(line_num).map(|info| info.byte_offset)
550 }
551
552 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
554 let normalized_id = ref_id.to_lowercase();
555 self.reference_defs
556 .iter()
557 .find(|def| def.id == normalized_id)
558 .map(|def| def.url.as_str())
559 }
560
561 pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink> {
563 self.links.iter().filter(|link| link.line == line_num).collect()
564 }
565
566 pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage> {
568 self.images.iter().filter(|img| img.line == line_num).collect()
569 }
570
571 pub fn is_in_list_block(&self, line_num: usize) -> bool {
573 self.list_blocks
574 .iter()
575 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
576 }
577
578 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
580 self.list_blocks
581 .iter()
582 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
583 }
584
585 pub fn is_in_code_block(&self, line_num: usize) -> bool {
589 if line_num == 0 || line_num > self.lines.len() {
590 return false;
591 }
592 self.lines[line_num - 1].in_code_block
593 }
594
595 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
597 if line_num == 0 || line_num > self.lines.len() {
598 return false;
599 }
600 self.lines[line_num - 1].in_front_matter
601 }
602
603 pub fn is_in_html_block(&self, line_num: usize) -> bool {
605 if line_num == 0 || line_num > self.lines.len() {
606 return false;
607 }
608 self.lines[line_num - 1].in_html_block
609 }
610
611 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
613 if line_num == 0 || line_num > self.lines.len() {
614 return false;
615 }
616
617 let col_0indexed = if col > 0 { col - 1 } else { 0 };
621 let code_spans = self.code_spans();
622 code_spans
623 .iter()
624 .any(|span| span.line == line_num && col_0indexed >= span.start_col && col_0indexed < span.end_col)
625 }
626
627 pub fn has_char(&self, ch: char) -> bool {
629 match ch {
630 '#' => self.char_frequency.hash_count > 0,
631 '*' => self.char_frequency.asterisk_count > 0,
632 '_' => self.char_frequency.underscore_count > 0,
633 '-' => self.char_frequency.hyphen_count > 0,
634 '+' => self.char_frequency.plus_count > 0,
635 '>' => self.char_frequency.gt_count > 0,
636 '|' => self.char_frequency.pipe_count > 0,
637 '[' => self.char_frequency.bracket_count > 0,
638 '`' => self.char_frequency.backtick_count > 0,
639 '<' => self.char_frequency.lt_count > 0,
640 '!' => self.char_frequency.exclamation_count > 0,
641 '\n' => self.char_frequency.newline_count > 0,
642 _ => self.content.contains(ch), }
644 }
645
646 pub fn char_count(&self, ch: char) -> usize {
648 match ch {
649 '#' => self.char_frequency.hash_count,
650 '*' => self.char_frequency.asterisk_count,
651 '_' => self.char_frequency.underscore_count,
652 '-' => self.char_frequency.hyphen_count,
653 '+' => self.char_frequency.plus_count,
654 '>' => self.char_frequency.gt_count,
655 '|' => self.char_frequency.pipe_count,
656 '[' => self.char_frequency.bracket_count,
657 '`' => self.char_frequency.backtick_count,
658 '<' => self.char_frequency.lt_count,
659 '!' => self.char_frequency.exclamation_count,
660 '\n' => self.char_frequency.newline_count,
661 _ => self.content.matches(ch).count(), }
663 }
664
665 pub fn likely_has_headings(&self) -> bool {
667 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
669
670 pub fn likely_has_lists(&self) -> bool {
672 self.char_frequency.asterisk_count > 0
673 || self.char_frequency.hyphen_count > 0
674 || self.char_frequency.plus_count > 0
675 }
676
677 pub fn likely_has_emphasis(&self) -> bool {
679 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
680 }
681
682 pub fn likely_has_tables(&self) -> bool {
684 self.char_frequency.pipe_count > 2
685 }
686
687 pub fn likely_has_blockquotes(&self) -> bool {
689 self.char_frequency.gt_count > 0
690 }
691
692 pub fn likely_has_code(&self) -> bool {
694 self.char_frequency.backtick_count > 0
695 }
696
697 pub fn likely_has_links_or_images(&self) -> bool {
699 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
700 }
701
702 pub fn likely_has_html(&self) -> bool {
704 self.char_frequency.lt_count > 0
705 }
706
707 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
709 self.html_tags()
710 .iter()
711 .filter(|tag| tag.line == line_num)
712 .cloned()
713 .collect()
714 }
715
716 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
718 self.emphasis_spans()
719 .iter()
720 .filter(|span| span.line == line_num)
721 .cloned()
722 .collect()
723 }
724
725 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
727 self.table_rows()
728 .iter()
729 .filter(|row| row.line == line_num)
730 .cloned()
731 .collect()
732 }
733
734 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
736 self.bare_urls()
737 .iter()
738 .filter(|url| url.line == line_num)
739 .cloned()
740 .collect()
741 }
742
743 fn parse_links(
745 content: &str,
746 lines: &[LineInfo],
747 code_blocks: &[(usize, usize)],
748 code_spans: &[CodeSpan],
749 flavor: MarkdownFlavor,
750 ) -> Vec<ParsedLink> {
751 use crate::utils::skip_context::is_mkdocs_snippet_line;
752
753 let mut links = Vec::with_capacity(content.len() / 500); for cap in LINK_PATTERN.captures_iter(content) {
758 let full_match = cap.get(0).unwrap();
759 let match_start = full_match.start();
760 let match_end = full_match.end();
761
762 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
764 continue;
765 }
766
767 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
769 continue;
770 }
771
772 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
774 continue;
775 }
776
777 if code_spans
779 .iter()
780 .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
781 {
782 continue;
783 }
784
785 let line_idx = lines
788 .iter()
789 .position(|line| {
790 match_start >= line.byte_offset && (match_start < line.byte_offset + line.content.len() + 1)
791 })
792 .unwrap_or(0);
793
794 if is_mkdocs_snippet_line(&lines[line_idx].content, flavor) {
795 continue;
796 }
797
798 let mut line_num = 1;
800 let mut col_start = match_start;
801 for (idx, line_info) in lines.iter().enumerate() {
802 if match_start >= line_info.byte_offset {
803 line_num = idx + 1;
804 col_start = match_start - line_info.byte_offset;
805 } else {
806 break;
807 }
808 }
809
810 let mut end_line_num = 1;
812 let mut col_end = match_end;
813 for (idx, line_info) in lines.iter().enumerate() {
814 if match_end > line_info.byte_offset {
815 end_line_num = idx + 1;
816 col_end = match_end - line_info.byte_offset;
817 } else {
818 break;
819 }
820 }
821
822 if line_num == end_line_num {
824 } else {
826 }
829
830 let text = cap.get(1).map_or("", |m| m.as_str()).to_string();
831
832 if let Some(inline_url) = cap.get(2) {
833 links.push(ParsedLink {
835 line: line_num,
836 start_col: col_start,
837 end_col: col_end,
838 byte_offset: match_start,
839 byte_end: match_end,
840 text,
841 url: inline_url.as_str().to_string(),
842 is_reference: false,
843 reference_id: None,
844 });
845 } else if let Some(ref_id) = cap.get(3) {
846 let ref_id_str = ref_id.as_str();
848 let normalized_ref = if ref_id_str.is_empty() {
849 text.to_lowercase() } else {
851 ref_id_str.to_lowercase()
852 };
853
854 links.push(ParsedLink {
855 line: line_num,
856 start_col: col_start,
857 end_col: col_end,
858 byte_offset: match_start,
859 byte_end: match_end,
860 text,
861 url: String::new(), is_reference: true,
863 reference_id: Some(normalized_ref),
864 });
865 }
866 }
867
868 links
869 }
870
871 fn parse_images(
873 content: &str,
874 lines: &[LineInfo],
875 code_blocks: &[(usize, usize)],
876 code_spans: &[CodeSpan],
877 ) -> Vec<ParsedImage> {
878 let mut images = Vec::with_capacity(content.len() / 1000); for cap in IMAGE_PATTERN.captures_iter(content) {
883 let full_match = cap.get(0).unwrap();
884 let match_start = full_match.start();
885 let match_end = full_match.end();
886
887 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
889 continue;
890 }
891
892 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
894 continue;
895 }
896
897 if code_spans
899 .iter()
900 .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
901 {
902 continue;
903 }
904
905 let mut line_num = 1;
907 let mut col_start = match_start;
908 for (idx, line_info) in lines.iter().enumerate() {
909 if match_start >= line_info.byte_offset {
910 line_num = idx + 1;
911 col_start = match_start - line_info.byte_offset;
912 } else {
913 break;
914 }
915 }
916
917 let mut end_line_num = 1;
919 let mut col_end = match_end;
920 for (idx, line_info) in lines.iter().enumerate() {
921 if match_end > line_info.byte_offset {
922 end_line_num = idx + 1;
923 col_end = match_end - line_info.byte_offset;
924 } else {
925 break;
926 }
927 }
928
929 if line_num == end_line_num {
931 } else {
933 }
936
937 let alt_text = cap.get(1).map_or("", |m| m.as_str()).to_string();
938
939 if let Some(inline_url) = cap.get(2) {
940 images.push(ParsedImage {
942 line: line_num,
943 start_col: col_start,
944 end_col: col_end,
945 byte_offset: match_start,
946 byte_end: match_end,
947 alt_text,
948 url: inline_url.as_str().to_string(),
949 is_reference: false,
950 reference_id: None,
951 });
952 } else if let Some(ref_id) = cap.get(3) {
953 let ref_id_str = ref_id.as_str();
955 let normalized_ref = if ref_id_str.is_empty() {
956 alt_text.to_lowercase() } else {
958 ref_id_str.to_lowercase()
959 };
960
961 images.push(ParsedImage {
962 line: line_num,
963 start_col: col_start,
964 end_col: col_end,
965 byte_offset: match_start,
966 byte_end: match_end,
967 alt_text,
968 url: String::new(), is_reference: true,
970 reference_id: Some(normalized_ref),
971 });
972 }
973 }
974
975 images
976 }
977
978 fn parse_reference_defs(_content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
980 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
984 if line_info.in_code_block {
986 continue;
987 }
988
989 let line = &line_info.content;
990 let line_num = line_idx + 1;
991
992 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
993 let id = cap.get(1).unwrap().as_str().to_lowercase();
994 let url = cap.get(2).unwrap().as_str().to_string();
995 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
996
997 refs.push(ReferenceDef {
998 line: line_num,
999 id,
1000 url,
1001 title,
1002 });
1003 }
1004 }
1005
1006 refs
1007 }
1008
1009 fn compute_line_info(
1011 content: &str,
1012 line_offsets: &[usize],
1013 code_blocks: &[(usize, usize)],
1014 flavor: MarkdownFlavor,
1015 ) -> Vec<LineInfo> {
1016 lazy_static! {
1017 static ref UNORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)([-*+])([ \t]*)(.*)").unwrap();
1019 static ref ORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(\d+)([.)])([ \t]*)(.*)").unwrap();
1020
1021 static ref BLOCKQUOTE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*>\s*)(.*)").unwrap();
1023
1024 static ref ATX_HEADING_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap();
1026 static ref SETEXT_UNDERLINE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap();
1027
1028 static ref BLOCKQUOTE_REGEX_FULL: regex::Regex = regex::Regex::new(r"^(\s*)(>+)(\s*)(.*)$").unwrap();
1030 }
1031
1032 let content_lines: Vec<&str> = content.lines().collect();
1033 let mut lines = Vec::with_capacity(content_lines.len());
1034
1035 let mut in_front_matter = false;
1037 let mut front_matter_end = 0;
1038 if content_lines.first().map(|l| l.trim()) == Some("---") {
1039 in_front_matter = true;
1040 for (idx, line) in content_lines.iter().enumerate().skip(1) {
1041 if line.trim() == "---" {
1042 front_matter_end = idx;
1043 break;
1044 }
1045 }
1046 }
1047
1048 for (i, line) in content_lines.iter().enumerate() {
1049 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1050 let indent = line.len() - line.trim_start().len();
1051 let is_blank = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1053 let after_prefix = caps.get(2).map_or("", |m| m.as_str());
1055 after_prefix.trim().is_empty()
1056 } else {
1057 line.trim().is_empty()
1058 };
1059 let in_code_block = code_blocks.iter().any(|&(start, end)| {
1062 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1067 let mut boundary = start;
1069 while boundary > 0 && !content.is_char_boundary(boundary) {
1070 boundary -= 1;
1071 }
1072 boundary
1073 } else {
1074 start
1075 };
1076
1077 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1078 let mut boundary = end;
1080 while boundary < content.len() && !content.is_char_boundary(boundary) {
1081 boundary += 1;
1082 }
1083 boundary
1084 } else {
1085 end.min(content.len())
1086 };
1087
1088 let block_content = &content[safe_start..safe_end];
1089 let is_multiline = block_content.contains('\n');
1090 let is_fenced = block_content.starts_with("```") || block_content.starts_with("~~~");
1091 let is_indented = !is_fenced
1092 && block_content
1093 .lines()
1094 .all(|l| l.starts_with(" ") || l.starts_with("\t") || l.trim().is_empty());
1095
1096 byte_offset >= start && byte_offset < end && (is_multiline || is_fenced || is_indented)
1097 });
1098
1099 let list_item = if !(in_code_block || is_blank || in_front_matter && i <= front_matter_end) {
1101 let (line_for_list_check, blockquote_prefix_len) = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1103 let prefix = caps.get(1).unwrap().as_str();
1104 let content = caps.get(2).unwrap().as_str();
1105 (content, prefix.len())
1106 } else {
1107 (&**line, 0)
1108 };
1109
1110 if let Some(caps) = UNORDERED_REGEX.captures(line_for_list_check) {
1111 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1112 let marker = caps.get(2).map_or("", |m| m.as_str());
1113 let spacing = caps.get(3).map_or("", |m| m.as_str());
1114 let _content = caps.get(4).map_or("", |m| m.as_str());
1115 let marker_column = blockquote_prefix_len + leading_spaces.len();
1116 let content_column = marker_column + marker.len() + spacing.len();
1117
1118 if spacing.is_empty() {
1125 None
1126 } else {
1127 Some(ListItemInfo {
1128 marker: marker.to_string(),
1129 is_ordered: false,
1130 number: None,
1131 marker_column,
1132 content_column,
1133 })
1134 }
1135 } else if let Some(caps) = ORDERED_REGEX.captures(line_for_list_check) {
1136 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1137 let number_str = caps.get(2).map_or("", |m| m.as_str());
1138 let delimiter = caps.get(3).map_or("", |m| m.as_str());
1139 let spacing = caps.get(4).map_or("", |m| m.as_str());
1140 let _content = caps.get(5).map_or("", |m| m.as_str());
1141 let marker = format!("{number_str}{delimiter}");
1142 let marker_column = blockquote_prefix_len + leading_spaces.len();
1143 let content_column = marker_column + marker.len() + spacing.len();
1144
1145 if spacing.is_empty() {
1148 None
1149 } else {
1150 Some(ListItemInfo {
1151 marker,
1152 is_ordered: true,
1153 number: number_str.parse().ok(),
1154 marker_column,
1155 content_column,
1156 })
1157 }
1158 } else {
1159 None
1160 }
1161 } else {
1162 None
1163 };
1164
1165 lines.push(LineInfo {
1166 content: line.to_string(),
1167 byte_offset,
1168 indent,
1169 is_blank,
1170 in_code_block,
1171 in_front_matter: in_front_matter && i <= front_matter_end,
1172 in_html_block: false, list_item,
1174 heading: None, blockquote: None, });
1177 }
1178
1179 for i in 0..content_lines.len() {
1181 if lines[i].in_code_block {
1182 continue;
1183 }
1184
1185 if in_front_matter && i <= front_matter_end {
1187 continue;
1188 }
1189
1190 let line = content_lines[i];
1191
1192 if let Some(caps) = BLOCKQUOTE_REGEX_FULL.captures(line) {
1194 let indent_str = caps.get(1).map_or("", |m| m.as_str());
1195 let markers = caps.get(2).map_or("", |m| m.as_str());
1196 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1197 let content = caps.get(4).map_or("", |m| m.as_str());
1198
1199 let nesting_level = markers.chars().filter(|&c| c == '>').count();
1200 let marker_column = indent_str.len();
1201
1202 let prefix = format!("{indent_str}{markers}{spaces_after}");
1204
1205 let has_no_space = spaces_after.is_empty() && !content.is_empty();
1207 let has_multiple_spaces = spaces_after.len() > 1 || spaces_after.contains('\t');
1209
1210 let needs_md028_fix = content.is_empty() && spaces_after.is_empty();
1214
1215 lines[i].blockquote = Some(BlockquoteInfo {
1216 nesting_level,
1217 indent: indent_str.to_string(),
1218 marker_column,
1219 prefix,
1220 content: content.to_string(),
1221 has_no_space_after_marker: has_no_space,
1222 has_multiple_spaces_after_marker: has_multiple_spaces,
1223 needs_md028_fix,
1224 });
1225 }
1226
1227 if lines[i].is_blank {
1229 continue;
1230 }
1231
1232 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1235 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1236 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1237 } else {
1238 false
1239 };
1240
1241 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1242 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1244 continue;
1245 }
1246 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1247 let hashes = caps.get(2).map_or("", |m| m.as_str());
1248 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1249 let rest = caps.get(4).map_or("", |m| m.as_str());
1250
1251 let level = hashes.len() as u8;
1252 let marker_column = leading_spaces.len();
1253
1254 let (text, has_closing, closing_seq) = {
1256 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1258 if rest[id_start..].trim_end().ends_with('}') {
1260 (&rest[..id_start], &rest[id_start..])
1262 } else {
1263 (rest, "")
1264 }
1265 } else {
1266 (rest, "")
1267 };
1268
1269 let trimmed_rest = rest_without_id.trim_end();
1271 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1272 let mut start_of_hashes = last_hash_pos;
1274 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1275 start_of_hashes -= 1;
1276 }
1277
1278 let has_space_before = start_of_hashes == 0
1280 || trimmed_rest
1281 .chars()
1282 .nth(start_of_hashes - 1)
1283 .is_some_and(|c| c.is_whitespace());
1284
1285 let potential_closing = &trimmed_rest[start_of_hashes..];
1287 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1288
1289 if is_all_hashes && has_space_before {
1290 let closing_hashes = potential_closing.to_string();
1292 let text_part = if !custom_id_part.is_empty() {
1295 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1298 } else {
1299 rest_without_id[..start_of_hashes].trim_end().to_string()
1300 };
1301 (text_part, true, closing_hashes)
1302 } else {
1303 (rest.to_string(), false, String::new())
1305 }
1306 } else {
1307 (rest.to_string(), false, String::new())
1309 }
1310 };
1311
1312 let content_column = marker_column + hashes.len() + spaces_after.len();
1313
1314 let raw_text = text.trim().to_string();
1316 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1317
1318 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1320 let next_line = content_lines[i + 1];
1321 if !lines[i + 1].in_code_block
1322 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1323 && let Some(next_line_id) =
1324 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1325 {
1326 custom_id = Some(next_line_id);
1327 }
1328 }
1329
1330 lines[i].heading = Some(HeadingInfo {
1331 level,
1332 style: HeadingStyle::ATX,
1333 marker: hashes.to_string(),
1334 marker_column,
1335 content_column,
1336 text: clean_text,
1337 custom_id,
1338 raw_text,
1339 has_closing_sequence: has_closing,
1340 closing_sequence: closing_seq,
1341 });
1342 }
1343 else if i + 1 < content_lines.len() {
1345 let next_line = content_lines[i + 1];
1346 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1347 if in_front_matter && i < front_matter_end {
1349 continue;
1350 }
1351
1352 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1354 continue;
1355 }
1356
1357 let underline = next_line.trim();
1358
1359 if underline == "---" {
1362 continue;
1363 }
1364
1365 let current_line_trimmed = line.trim();
1367 if current_line_trimmed.contains(':')
1368 && !current_line_trimmed.starts_with('#')
1369 && !current_line_trimmed.contains('[')
1370 && !current_line_trimmed.contains("](")
1371 {
1372 continue;
1374 }
1375
1376 let level = if underline.starts_with('=') { 1 } else { 2 };
1377 let style = if level == 1 {
1378 HeadingStyle::Setext1
1379 } else {
1380 HeadingStyle::Setext2
1381 };
1382
1383 let raw_text = line.trim().to_string();
1385 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1386
1387 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1389 let attr_line = content_lines[i + 2];
1390 if !lines[i + 2].in_code_block
1391 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1392 && let Some(attr_line_id) =
1393 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
1394 {
1395 custom_id = Some(attr_line_id);
1396 }
1397 }
1398
1399 lines[i].heading = Some(HeadingInfo {
1400 level,
1401 style,
1402 marker: underline.to_string(),
1403 marker_column: next_line.len() - next_line.trim_start().len(),
1404 content_column: lines[i].indent,
1405 text: clean_text,
1406 custom_id,
1407 raw_text,
1408 has_closing_sequence: false,
1409 closing_sequence: String::new(),
1410 });
1411 }
1412 }
1413 }
1414
1415 lines
1416 }
1417
1418 fn detect_html_blocks(lines: &mut [LineInfo]) {
1420 const BLOCK_ELEMENTS: &[&str] = &[
1422 "address",
1423 "article",
1424 "aside",
1425 "blockquote",
1426 "details",
1427 "dialog",
1428 "dd",
1429 "div",
1430 "dl",
1431 "dt",
1432 "fieldset",
1433 "figcaption",
1434 "figure",
1435 "footer",
1436 "form",
1437 "h1",
1438 "h2",
1439 "h3",
1440 "h4",
1441 "h5",
1442 "h6",
1443 "header",
1444 "hr",
1445 "li",
1446 "main",
1447 "nav",
1448 "ol",
1449 "p",
1450 "pre",
1451 "section",
1452 "table",
1453 "tbody",
1454 "td",
1455 "tfoot",
1456 "th",
1457 "thead",
1458 "tr",
1459 "ul",
1460 ];
1461
1462 let mut i = 0;
1463 while i < lines.len() {
1464 if lines[i].in_code_block || lines[i].in_front_matter {
1466 i += 1;
1467 continue;
1468 }
1469
1470 let trimmed = lines[i].content.trim_start();
1471
1472 if trimmed.starts_with('<') && trimmed.len() > 1 {
1474 let after_bracket = &trimmed[1..];
1476 let is_closing = after_bracket.starts_with('/');
1477 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
1478
1479 let tag_name = tag_start
1481 .chars()
1482 .take_while(|c| c.is_ascii_alphabetic() || *c == '-')
1483 .collect::<String>()
1484 .to_lowercase();
1485
1486 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
1488 lines[i].in_html_block = true;
1490
1491 if !is_closing {
1494 let closing_tag = format!("</{tag_name}>");
1495 let mut j = i + 1;
1496 while j < lines.len() && j < i + 100 {
1497 if lines[j].is_blank {
1500 break;
1501 }
1502
1503 lines[j].in_html_block = true;
1504
1505 if lines[j].content.contains(&closing_tag) {
1507 break;
1508 }
1509 j += 1;
1510 }
1511 }
1512 }
1513 }
1514
1515 i += 1;
1516 }
1517 }
1518
1519 fn parse_code_spans(content: &str, lines: &[LineInfo], ast: &Node) -> Vec<CodeSpan> {
1521 let mut code_spans = Vec::new();
1522
1523 if !content.contains('`') {
1525 return code_spans;
1526 }
1527
1528 fn extract_code_spans(node: &Node, content: &str, lines: &[LineInfo], spans: &mut Vec<CodeSpan>) {
1530 match node {
1531 Node::InlineCode(inline_code) => {
1532 if let Some(pos) = &inline_code.position {
1533 let start_pos = pos.start.offset;
1534 let end_pos = pos.end.offset;
1535
1536 let full_span = &content[start_pos..end_pos];
1538 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
1539
1540 let content_start = start_pos + backtick_count;
1542 let content_end = end_pos - backtick_count;
1543 let span_content = if content_start < content_end {
1544 content[content_start..content_end].to_string()
1545 } else {
1546 String::new()
1547 };
1548
1549 let mut line_num = 1;
1551 let mut col_start = start_pos;
1552 for (idx, line_info) in lines.iter().enumerate() {
1553 if start_pos >= line_info.byte_offset {
1554 line_num = idx + 1;
1555 col_start = start_pos - line_info.byte_offset;
1556 } else {
1557 break;
1558 }
1559 }
1560
1561 let mut col_end = end_pos;
1563 for line_info in lines.iter() {
1564 if end_pos > line_info.byte_offset {
1565 col_end = end_pos - line_info.byte_offset;
1566 } else {
1567 break;
1568 }
1569 }
1570
1571 spans.push(CodeSpan {
1572 line: line_num,
1573 start_col: col_start,
1574 end_col: col_end,
1575 byte_offset: start_pos,
1576 byte_end: end_pos,
1577 backtick_count,
1578 content: span_content,
1579 });
1580 }
1581 }
1582 Node::Root(root) => {
1584 for child in &root.children {
1585 extract_code_spans(child, content, lines, spans);
1586 }
1587 }
1588 Node::Paragraph(para) => {
1589 for child in ¶.children {
1590 extract_code_spans(child, content, lines, spans);
1591 }
1592 }
1593 Node::Heading(heading) => {
1594 for child in &heading.children {
1595 extract_code_spans(child, content, lines, spans);
1596 }
1597 }
1598 Node::List(list) => {
1599 for child in &list.children {
1600 extract_code_spans(child, content, lines, spans);
1601 }
1602 }
1603 Node::ListItem(item) => {
1604 for child in &item.children {
1605 extract_code_spans(child, content, lines, spans);
1606 }
1607 }
1608 Node::Blockquote(blockquote) => {
1609 for child in &blockquote.children {
1610 extract_code_spans(child, content, lines, spans);
1611 }
1612 }
1613 Node::Table(table) => {
1614 for child in &table.children {
1615 extract_code_spans(child, content, lines, spans);
1616 }
1617 }
1618 Node::TableRow(row) => {
1619 for child in &row.children {
1620 extract_code_spans(child, content, lines, spans);
1621 }
1622 }
1623 Node::TableCell(cell) => {
1624 for child in &cell.children {
1625 extract_code_spans(child, content, lines, spans);
1626 }
1627 }
1628 Node::Emphasis(emphasis) => {
1629 for child in &emphasis.children {
1630 extract_code_spans(child, content, lines, spans);
1631 }
1632 }
1633 Node::Strong(strong) => {
1634 for child in &strong.children {
1635 extract_code_spans(child, content, lines, spans);
1636 }
1637 }
1638 Node::Link(link) => {
1639 for child in &link.children {
1640 extract_code_spans(child, content, lines, spans);
1641 }
1642 }
1643 Node::LinkReference(link_ref) => {
1644 for child in &link_ref.children {
1645 extract_code_spans(child, content, lines, spans);
1646 }
1647 }
1648 Node::FootnoteDefinition(footnote) => {
1649 for child in &footnote.children {
1650 extract_code_spans(child, content, lines, spans);
1651 }
1652 }
1653 Node::Delete(delete) => {
1654 for child in &delete.children {
1655 extract_code_spans(child, content, lines, spans);
1656 }
1657 }
1658 Node::Code(_)
1660 | Node::Text(_)
1661 | Node::Html(_)
1662 | Node::Image(_)
1663 | Node::ImageReference(_)
1664 | Node::FootnoteReference(_)
1665 | Node::Break(_)
1666 | Node::ThematicBreak(_)
1667 | Node::Definition(_)
1668 | Node::Yaml(_)
1669 | Node::Toml(_)
1670 | Node::Math(_)
1671 | Node::InlineMath(_)
1672 | Node::MdxJsxFlowElement(_)
1673 | Node::MdxFlowExpression(_)
1674 | Node::MdxJsxTextElement(_)
1675 | Node::MdxTextExpression(_)
1676 | Node::MdxjsEsm(_) => {
1677 }
1679 }
1680 }
1681
1682 extract_code_spans(ast, content, lines, &mut code_spans);
1684
1685 code_spans.sort_by_key(|span| span.byte_offset);
1687
1688 code_spans
1689 }
1690
1691 fn parse_list_blocks(lines: &[LineInfo]) -> Vec<ListBlock> {
1693 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
1696 let mut last_list_item_line = 0;
1697 let mut current_indent_level = 0;
1698 let mut last_marker_width = 0;
1699
1700 for (line_idx, line_info) in lines.iter().enumerate() {
1701 let line_num = line_idx + 1;
1702
1703 if line_info.in_code_block {
1705 if let Some(ref mut block) = current_block {
1706 let min_continuation_indent = CodeBlockUtils::calculate_min_continuation_indent(lines, line_idx);
1708
1709 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
1711
1712 match context {
1713 CodeBlockContext::Indented => {
1714 block.end_line = line_num;
1716 continue;
1717 }
1718 CodeBlockContext::Standalone => {
1719 let completed_block = current_block.take().unwrap();
1721 list_blocks.push(completed_block);
1722 continue;
1723 }
1724 CodeBlockContext::Adjacent => {
1725 block.end_line = line_num;
1727 continue;
1728 }
1729 }
1730 } else {
1731 continue;
1733 }
1734 }
1735
1736 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(&line_info.content) {
1738 caps.get(0).unwrap().as_str().to_string()
1739 } else {
1740 String::new()
1741 };
1742
1743 if let Some(list_item) = &line_info.list_item {
1745 let item_indent = list_item.marker_column;
1747 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
1750 let is_nested = nesting > block.nesting_level;
1754 let same_type =
1755 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
1756 let same_context = block.blockquote_prefix == blockquote_prefix;
1757 let reasonable_distance = line_num <= last_list_item_line + 2; let marker_compatible =
1761 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
1762
1763 let has_non_list_content = {
1765 let mut found_non_list = false;
1766 let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
1768
1769 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1771 let last_line = &lines[block_last_item_line - 1];
1772 if last_line.content.contains(r"`sqlalchemy`") && last_line.content.contains(r"\`") {
1773 log::debug!(
1774 "After problematic line {}: checking lines {} to {} for non-list content",
1775 block_last_item_line,
1776 block_last_item_line + 1,
1777 line_num
1778 );
1779 if line_num == block_last_item_line + 1 {
1781 log::debug!("Lines are consecutive, no content between");
1782 }
1783 }
1784 }
1785
1786 for check_line in (block_last_item_line + 1)..line_num {
1787 let check_idx = check_line - 1;
1788 if check_idx < lines.len() {
1789 let check_info = &lines[check_idx];
1790 let is_list_breaking_content = if check_info.in_code_block {
1792 let last_item_marker_width =
1794 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1795 lines[block_last_item_line - 1]
1796 .list_item
1797 .as_ref()
1798 .map(|li| {
1799 if li.is_ordered {
1800 li.marker.len() + 1 } else {
1802 li.marker.len()
1803 }
1804 })
1805 .unwrap_or(3) } else {
1807 3 };
1809
1810 let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
1811
1812 let context = CodeBlockUtils::analyze_code_block_context(
1814 lines,
1815 check_line - 1,
1816 min_continuation,
1817 );
1818
1819 matches!(context, CodeBlockContext::Standalone)
1821 } else if !check_info.is_blank && check_info.list_item.is_none() {
1822 let line_content = check_info.content.trim();
1824
1825 if check_info.heading.is_some()
1827 || line_content.starts_with("---")
1828 || line_content.starts_with("***")
1829 || line_content.starts_with("___")
1830 || (line_content.contains('|')
1831 && !line_content.contains("](")
1832 && !line_content.contains("http")
1833 && (line_content.matches('|').count() > 1
1834 || line_content.starts_with('|')
1835 || line_content.ends_with('|')))
1836 || line_content.starts_with(">")
1837 {
1838 true
1839 }
1840 else {
1842 let last_item_marker_width =
1843 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1844 lines[block_last_item_line - 1]
1845 .list_item
1846 .as_ref()
1847 .map(|li| {
1848 if li.is_ordered {
1849 li.marker.len() + 1 } else {
1851 li.marker.len()
1852 }
1853 })
1854 .unwrap_or(3) } else {
1856 3 };
1858
1859 let min_continuation =
1860 if block.is_ordered { last_item_marker_width } else { 2 };
1861 check_info.indent < min_continuation
1862 }
1863 } else {
1864 false
1865 };
1866
1867 if is_list_breaking_content {
1868 found_non_list = true;
1870 break;
1871 }
1872 }
1873 }
1874 found_non_list
1875 };
1876
1877 let mut continues_list = if is_nested {
1881 same_context && reasonable_distance && !has_non_list_content
1883 } else {
1884 let result = same_type
1886 && same_context
1887 && reasonable_distance
1888 && marker_compatible
1889 && !has_non_list_content;
1890
1891 if block.item_lines.last().is_some_and(|&last_line| {
1893 last_line > 0
1894 && last_line <= lines.len()
1895 && lines[last_line - 1].content.contains(r"`sqlalchemy`")
1896 && lines[last_line - 1].content.contains(r"\`")
1897 }) {
1898 log::debug!(
1899 "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
1900 );
1901 if line_num > 0 && line_num <= lines.len() {
1902 log::debug!("Current line content: {:?}", lines[line_num - 1].content);
1903 }
1904 }
1905
1906 result
1907 };
1908
1909 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
1912 if block.item_lines.contains(&(line_num - 1)) {
1914 continues_list = true;
1916 }
1917 }
1918
1919 if continues_list {
1920 block.end_line = line_num;
1922 block.item_lines.push(line_num);
1923
1924 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
1926 list_item.marker.len() + 1
1927 } else {
1928 list_item.marker.len()
1929 });
1930
1931 if !block.is_ordered
1933 && block.marker.is_some()
1934 && block.marker.as_ref() != Some(&list_item.marker)
1935 {
1936 block.marker = None;
1938 }
1939 } else {
1940 list_blocks.push(block.clone());
1943
1944 *block = ListBlock {
1945 start_line: line_num,
1946 end_line: line_num,
1947 is_ordered: list_item.is_ordered,
1948 marker: if list_item.is_ordered {
1949 None
1950 } else {
1951 Some(list_item.marker.clone())
1952 },
1953 blockquote_prefix: blockquote_prefix.clone(),
1954 item_lines: vec![line_num],
1955 nesting_level: nesting,
1956 max_marker_width: if list_item.is_ordered {
1957 list_item.marker.len() + 1
1958 } else {
1959 list_item.marker.len()
1960 },
1961 };
1962 }
1963 } else {
1964 current_block = Some(ListBlock {
1966 start_line: line_num,
1967 end_line: line_num,
1968 is_ordered: list_item.is_ordered,
1969 marker: if list_item.is_ordered {
1970 None
1971 } else {
1972 Some(list_item.marker.clone())
1973 },
1974 blockquote_prefix,
1975 item_lines: vec![line_num],
1976 nesting_level: nesting,
1977 max_marker_width: list_item.marker.len(),
1978 });
1979 }
1980
1981 last_list_item_line = line_num;
1982 current_indent_level = item_indent;
1983 last_marker_width = if list_item.is_ordered {
1984 list_item.marker.len() + 1 } else {
1986 list_item.marker.len()
1987 };
1988 } else if let Some(ref mut block) = current_block {
1989 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
1999 lines[block.end_line - 1].content.trim_end().ends_with('\\')
2000 } else {
2001 false
2002 };
2003
2004 let min_continuation_indent = if block.is_ordered {
2008 current_indent_level + last_marker_width
2009 } else {
2010 current_indent_level + 2 };
2012
2013 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2014 block.end_line = line_num;
2016 } else if line_info.is_blank {
2017 let mut check_idx = line_idx + 1;
2020 let mut found_continuation = false;
2021
2022 while check_idx < lines.len() && lines[check_idx].is_blank {
2024 check_idx += 1;
2025 }
2026
2027 if check_idx < lines.len() {
2028 let next_line = &lines[check_idx];
2029 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2031 found_continuation = true;
2032 }
2033 else if !next_line.in_code_block
2035 && next_line.list_item.is_some()
2036 && let Some(item) = &next_line.list_item
2037 {
2038 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2039 .find(&next_line.content)
2040 .map_or(String::new(), |m| m.as_str().to_string());
2041 if item.marker_column == current_indent_level
2042 && item.is_ordered == block.is_ordered
2043 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2044 {
2045 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2048 if let Some(between_line) = lines.get(idx) {
2049 let trimmed = between_line.content.trim();
2050 if trimmed.is_empty() {
2052 return false;
2053 }
2054 let line_indent =
2056 between_line.content.len() - between_line.content.trim_start().len();
2057
2058 if trimmed.starts_with("```")
2060 || trimmed.starts_with("~~~")
2061 || trimmed.starts_with("---")
2062 || trimmed.starts_with("***")
2063 || trimmed.starts_with("___")
2064 || trimmed.starts_with(">")
2065 || trimmed.contains('|') || between_line.heading.is_some()
2067 {
2068 return true; }
2070
2071 line_indent >= min_continuation_indent
2073 } else {
2074 false
2075 }
2076 });
2077
2078 if block.is_ordered {
2079 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2082 if let Some(between_line) = lines.get(idx) {
2083 let trimmed = between_line.content.trim();
2084 if trimmed.is_empty() {
2085 return false;
2086 }
2087 trimmed.starts_with("```")
2089 || trimmed.starts_with("~~~")
2090 || trimmed.starts_with("---")
2091 || trimmed.starts_with("***")
2092 || trimmed.starts_with("___")
2093 || trimmed.starts_with(">")
2094 || trimmed.contains('|') || between_line.heading.is_some()
2096 } else {
2097 false
2098 }
2099 });
2100 found_continuation = !has_structural_separators;
2101 } else {
2102 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2104 if let Some(between_line) = lines.get(idx) {
2105 let trimmed = between_line.content.trim();
2106 if trimmed.is_empty() {
2107 return false;
2108 }
2109 trimmed.starts_with("```")
2111 || trimmed.starts_with("~~~")
2112 || trimmed.starts_with("---")
2113 || trimmed.starts_with("***")
2114 || trimmed.starts_with("___")
2115 || trimmed.starts_with(">")
2116 || trimmed.contains('|') || between_line.heading.is_some()
2118 } else {
2119 false
2120 }
2121 });
2122 found_continuation = !has_structural_separators;
2123 }
2124 }
2125 }
2126 }
2127
2128 if found_continuation {
2129 block.end_line = line_num;
2131 } else {
2132 list_blocks.push(block.clone());
2134 current_block = None;
2135 }
2136 } else {
2137 let min_required_indent = if block.is_ordered {
2140 current_indent_level + last_marker_width
2141 } else {
2142 current_indent_level + 2
2143 };
2144
2145 let line_content = line_info.content.trim();
2150 let is_structural_separator = line_info.heading.is_some()
2151 || line_content.starts_with("```")
2152 || line_content.starts_with("~~~")
2153 || line_content.starts_with("---")
2154 || line_content.starts_with("***")
2155 || line_content.starts_with("___")
2156 || line_content.starts_with(">")
2157 || (line_content.contains('|')
2158 && !line_content.contains("](")
2159 && !line_content.contains("http")
2160 && (line_content.matches('|').count() > 1
2161 || line_content.starts_with('|')
2162 || line_content.ends_with('|'))); let is_lazy_continuation = !is_structural_separator
2167 && !line_info.is_blank
2168 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2169
2170 if is_lazy_continuation {
2171 let content_to_check = if !blockquote_prefix.is_empty() {
2174 line_info
2176 .content
2177 .strip_prefix(&blockquote_prefix)
2178 .unwrap_or(&line_info.content)
2179 .trim()
2180 } else {
2181 line_info.content.trim()
2182 };
2183
2184 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2185
2186 if starts_with_uppercase && last_list_item_line > 0 {
2189 list_blocks.push(block.clone());
2191 current_block = None;
2192 } else {
2193 block.end_line = line_num;
2195 }
2196 } else {
2197 list_blocks.push(block.clone());
2199 current_block = None;
2200 }
2201 }
2202 }
2203 }
2204
2205 if let Some(block) = current_block {
2207 list_blocks.push(block);
2208 }
2209
2210 merge_adjacent_list_blocks(&mut list_blocks, lines);
2212
2213 list_blocks
2214 }
2215
2216 fn compute_char_frequency(content: &str) -> CharFrequency {
2218 let mut frequency = CharFrequency::default();
2219
2220 for ch in content.chars() {
2221 match ch {
2222 '#' => frequency.hash_count += 1,
2223 '*' => frequency.asterisk_count += 1,
2224 '_' => frequency.underscore_count += 1,
2225 '-' => frequency.hyphen_count += 1,
2226 '+' => frequency.plus_count += 1,
2227 '>' => frequency.gt_count += 1,
2228 '|' => frequency.pipe_count += 1,
2229 '[' => frequency.bracket_count += 1,
2230 '`' => frequency.backtick_count += 1,
2231 '<' => frequency.lt_count += 1,
2232 '!' => frequency.exclamation_count += 1,
2233 '\n' => frequency.newline_count += 1,
2234 _ => {}
2235 }
2236 }
2237
2238 frequency
2239 }
2240
2241 fn parse_html_tags(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<HtmlTag> {
2243 lazy_static! {
2244 static ref HTML_TAG_REGEX: regex::Regex =
2245 regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap();
2246 }
2247
2248 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2249
2250 for cap in HTML_TAG_REGEX.captures_iter(content) {
2251 let full_match = cap.get(0).unwrap();
2252 let match_start = full_match.start();
2253 let match_end = full_match.end();
2254
2255 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2257 continue;
2258 }
2259
2260 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2261 let tag_name = cap.get(2).unwrap().as_str().to_lowercase();
2262 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2263
2264 let mut line_num = 1;
2266 let mut col_start = match_start;
2267 let mut col_end = match_end;
2268 for (idx, line_info) in lines.iter().enumerate() {
2269 if match_start >= line_info.byte_offset {
2270 line_num = idx + 1;
2271 col_start = match_start - line_info.byte_offset;
2272 col_end = match_end - line_info.byte_offset;
2273 } else {
2274 break;
2275 }
2276 }
2277
2278 html_tags.push(HtmlTag {
2279 line: line_num,
2280 start_col: col_start,
2281 end_col: col_end,
2282 byte_offset: match_start,
2283 byte_end: match_end,
2284 tag_name,
2285 is_closing,
2286 is_self_closing,
2287 raw_content: full_match.as_str().to_string(),
2288 });
2289 }
2290
2291 html_tags
2292 }
2293
2294 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2296 lazy_static! {
2297 static ref EMPHASIS_REGEX: regex::Regex =
2298 regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap();
2299 }
2300
2301 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2302
2303 for cap in EMPHASIS_REGEX.captures_iter(content) {
2304 let full_match = cap.get(0).unwrap();
2305 let match_start = full_match.start();
2306 let match_end = full_match.end();
2307
2308 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2310 continue;
2311 }
2312
2313 let opening_markers = cap.get(1).unwrap().as_str();
2314 let content_part = cap.get(2).unwrap().as_str();
2315 let closing_markers = cap.get(3).unwrap().as_str();
2316
2317 if opening_markers.chars().next() != closing_markers.chars().next()
2319 || opening_markers.len() != closing_markers.len()
2320 {
2321 continue;
2322 }
2323
2324 let marker = opening_markers.chars().next().unwrap();
2325 let marker_count = opening_markers.len();
2326
2327 let mut line_num = 1;
2329 let mut col_start = match_start;
2330 let mut col_end = match_end;
2331 for (idx, line_info) in lines.iter().enumerate() {
2332 if match_start >= line_info.byte_offset {
2333 line_num = idx + 1;
2334 col_start = match_start - line_info.byte_offset;
2335 col_end = match_end - line_info.byte_offset;
2336 } else {
2337 break;
2338 }
2339 }
2340
2341 emphasis_spans.push(EmphasisSpan {
2342 line: line_num,
2343 start_col: col_start,
2344 end_col: col_end,
2345 byte_offset: match_start,
2346 byte_end: match_end,
2347 marker,
2348 marker_count,
2349 content: content_part.to_string(),
2350 });
2351 }
2352
2353 emphasis_spans
2354 }
2355
2356 fn parse_table_rows(lines: &[LineInfo]) -> Vec<TableRow> {
2358 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2359
2360 for (line_idx, line_info) in lines.iter().enumerate() {
2361 if line_info.in_code_block || line_info.is_blank {
2363 continue;
2364 }
2365
2366 let line = &line_info.content;
2367 let line_num = line_idx + 1;
2368
2369 if !line.contains('|') {
2371 continue;
2372 }
2373
2374 let parts: Vec<&str> = line.split('|').collect();
2376 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2377
2378 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2380 let mut column_alignments = Vec::new();
2381
2382 if is_separator {
2383 for part in &parts[1..parts.len() - 1] {
2384 let trimmed = part.trim();
2386 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2387 "center".to_string()
2388 } else if trimmed.ends_with(':') {
2389 "right".to_string()
2390 } else if trimmed.starts_with(':') {
2391 "left".to_string()
2392 } else {
2393 "none".to_string()
2394 };
2395 column_alignments.push(alignment);
2396 }
2397 }
2398
2399 table_rows.push(TableRow {
2400 line: line_num,
2401 is_separator,
2402 column_count,
2403 column_alignments,
2404 });
2405 }
2406
2407 table_rows
2408 }
2409
2410 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2412 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2413
2414 for cap in BARE_URL_PATTERN.captures_iter(content) {
2416 let full_match = cap.get(0).unwrap();
2417 let match_start = full_match.start();
2418 let match_end = full_match.end();
2419
2420 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2422 continue;
2423 }
2424
2425 let preceding_char = if match_start > 0 {
2427 content.chars().nth(match_start - 1)
2428 } else {
2429 None
2430 };
2431 let following_char = content.chars().nth(match_end);
2432
2433 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2434 continue;
2435 }
2436 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2437 continue;
2438 }
2439
2440 let url = full_match.as_str();
2441 let url_type = if url.starts_with("https://") {
2442 "https"
2443 } else if url.starts_with("http://") {
2444 "http"
2445 } else if url.starts_with("ftp://") {
2446 "ftp"
2447 } else {
2448 "other"
2449 };
2450
2451 let mut line_num = 1;
2453 let mut col_start = match_start;
2454 let mut col_end = match_end;
2455 for (idx, line_info) in lines.iter().enumerate() {
2456 if match_start >= line_info.byte_offset {
2457 line_num = idx + 1;
2458 col_start = match_start - line_info.byte_offset;
2459 col_end = match_end - line_info.byte_offset;
2460 } else {
2461 break;
2462 }
2463 }
2464
2465 bare_urls.push(BareUrl {
2466 line: line_num,
2467 start_col: col_start,
2468 end_col: col_end,
2469 byte_offset: match_start,
2470 byte_end: match_end,
2471 url: url.to_string(),
2472 url_type: url_type.to_string(),
2473 });
2474 }
2475
2476 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
2478 let full_match = cap.get(0).unwrap();
2479 let match_start = full_match.start();
2480 let match_end = full_match.end();
2481
2482 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2484 continue;
2485 }
2486
2487 let preceding_char = if match_start > 0 {
2489 content.chars().nth(match_start - 1)
2490 } else {
2491 None
2492 };
2493 let following_char = content.chars().nth(match_end);
2494
2495 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2496 continue;
2497 }
2498 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2499 continue;
2500 }
2501
2502 let email = full_match.as_str();
2503
2504 let mut line_num = 1;
2506 let mut col_start = match_start;
2507 let mut col_end = match_end;
2508 for (idx, line_info) in lines.iter().enumerate() {
2509 if match_start >= line_info.byte_offset {
2510 line_num = idx + 1;
2511 col_start = match_start - line_info.byte_offset;
2512 col_end = match_end - line_info.byte_offset;
2513 } else {
2514 break;
2515 }
2516 }
2517
2518 bare_urls.push(BareUrl {
2519 line: line_num,
2520 start_col: col_start,
2521 end_col: col_end,
2522 byte_offset: match_start,
2523 byte_end: match_end,
2524 url: email.to_string(),
2525 url_type: "email".to_string(),
2526 });
2527 }
2528
2529 bare_urls
2530 }
2531}
2532
2533fn merge_adjacent_list_blocks(list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
2535 if list_blocks.len() < 2 {
2536 return;
2537 }
2538
2539 let mut merger = ListBlockMerger::new(lines);
2540 *list_blocks = merger.merge(list_blocks);
2541}
2542
2543struct ListBlockMerger<'a> {
2545 lines: &'a [LineInfo],
2546}
2547
2548impl<'a> ListBlockMerger<'a> {
2549 fn new(lines: &'a [LineInfo]) -> Self {
2550 Self { lines }
2551 }
2552
2553 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
2554 let mut merged = Vec::with_capacity(list_blocks.len());
2555 let mut current = list_blocks[0].clone();
2556
2557 for next in list_blocks.iter().skip(1) {
2558 if self.should_merge_blocks(¤t, next) {
2559 current = self.merge_two_blocks(current, next);
2560 } else {
2561 merged.push(current);
2562 current = next.clone();
2563 }
2564 }
2565
2566 merged.push(current);
2567 merged
2568 }
2569
2570 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
2572 if !self.blocks_are_compatible(current, next) {
2574 return false;
2575 }
2576
2577 let spacing = self.analyze_spacing_between(current, next);
2579 match spacing {
2580 BlockSpacing::Consecutive => true,
2581 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
2582 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
2583 self.can_merge_with_content_between(current, next)
2584 }
2585 }
2586 }
2587
2588 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
2590 current.is_ordered == next.is_ordered
2591 && current.blockquote_prefix == next.blockquote_prefix
2592 && current.nesting_level == next.nesting_level
2593 }
2594
2595 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
2597 let gap = next.start_line - current.end_line;
2598
2599 match gap {
2600 1 => BlockSpacing::Consecutive,
2601 2 => BlockSpacing::SingleBlank,
2602 _ if gap > 2 => {
2603 if self.has_only_blank_lines_between(current, next) {
2604 BlockSpacing::MultipleBlanks
2605 } else {
2606 BlockSpacing::ContentBetween
2607 }
2608 }
2609 _ => BlockSpacing::Consecutive, }
2611 }
2612
2613 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2615 if has_meaningful_content_between(current, next, self.lines) {
2618 return false; }
2620
2621 !current.is_ordered && current.marker == next.marker
2623 }
2624
2625 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2627 if has_meaningful_content_between(current, next, self.lines) {
2629 return false; }
2631
2632 current.is_ordered && next.is_ordered
2634 }
2635
2636 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2638 for line_num in (current.end_line + 1)..next.start_line {
2639 if let Some(line_info) = self.lines.get(line_num - 1)
2640 && !line_info.content.trim().is_empty()
2641 {
2642 return false;
2643 }
2644 }
2645 true
2646 }
2647
2648 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
2650 current.end_line = next.end_line;
2651 current.item_lines.extend_from_slice(&next.item_lines);
2652
2653 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
2655
2656 if !current.is_ordered && self.markers_differ(¤t, next) {
2658 current.marker = None; }
2660
2661 current
2662 }
2663
2664 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
2666 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
2667 }
2668}
2669
2670#[derive(Debug, PartialEq)]
2672enum BlockSpacing {
2673 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
2678
2679fn has_meaningful_content_between(current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
2681 for line_num in (current.end_line + 1)..next.start_line {
2683 if let Some(line_info) = lines.get(line_num - 1) {
2684 let trimmed = line_info.content.trim();
2686
2687 if trimmed.is_empty() {
2689 continue;
2690 }
2691
2692 if line_info.heading.is_some() {
2696 return true; }
2698
2699 if is_horizontal_rule(trimmed) {
2701 return true; }
2703
2704 if trimmed.contains('|') && trimmed.len() > 1 {
2707 if !trimmed.contains("](") && !trimmed.contains("http") {
2709 let pipe_count = trimmed.matches('|').count();
2711 if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
2712 return true; }
2714 }
2715 }
2716
2717 if trimmed.starts_with('>') {
2719 return true; }
2721
2722 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2724 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2725
2726 let min_continuation_indent = if current.is_ordered {
2728 current.nesting_level + current.max_marker_width + 1 } else {
2730 current.nesting_level + 2
2731 };
2732
2733 if line_indent < min_continuation_indent {
2734 return true; }
2737 }
2738
2739 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2741
2742 let min_indent = if current.is_ordered {
2744 current.nesting_level + current.max_marker_width
2745 } else {
2746 current.nesting_level + 2
2747 };
2748
2749 if line_indent < min_indent {
2751 return true; }
2753
2754 }
2757 }
2758
2759 false
2761}
2762
2763fn is_horizontal_rule(trimmed: &str) -> bool {
2765 if trimmed.len() < 3 {
2766 return false;
2767 }
2768
2769 let chars: Vec<char> = trimmed.chars().collect();
2771 if let Some(&first_char) = chars.first()
2772 && (first_char == '-' || first_char == '*' || first_char == '_')
2773 {
2774 let mut count = 0;
2775 for &ch in &chars {
2776 if ch == first_char {
2777 count += 1;
2778 } else if ch != ' ' && ch != '\t' {
2779 return false; }
2781 }
2782 return count >= 3;
2783 }
2784 false
2785}
2786
2787#[cfg(test)]
2789mod tests {
2790 use super::*;
2791
2792 #[test]
2793 fn test_empty_content() {
2794 let ctx = LintContext::new("", MarkdownFlavor::Standard);
2795 assert_eq!(ctx.content, "");
2796 assert_eq!(ctx.line_offsets, vec![0]);
2797 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2798 assert_eq!(ctx.lines.len(), 0);
2799 }
2800
2801 #[test]
2802 fn test_single_line() {
2803 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
2804 assert_eq!(ctx.content, "# Hello");
2805 assert_eq!(ctx.line_offsets, vec![0]);
2806 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2807 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
2808 }
2809
2810 #[test]
2811 fn test_multi_line() {
2812 let content = "# Title\n\nSecond line\nThird line";
2813 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2814 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
2815 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
2822
2823 #[test]
2824 fn test_line_info() {
2825 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
2826 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2827
2828 assert_eq!(ctx.lines.len(), 7);
2830
2831 let line1 = &ctx.lines[0];
2833 assert_eq!(line1.content, "# Title");
2834 assert_eq!(line1.byte_offset, 0);
2835 assert_eq!(line1.indent, 0);
2836 assert!(!line1.is_blank);
2837 assert!(!line1.in_code_block);
2838 assert!(line1.list_item.is_none());
2839
2840 let line2 = &ctx.lines[1];
2842 assert_eq!(line2.content, " indented");
2843 assert_eq!(line2.byte_offset, 8);
2844 assert_eq!(line2.indent, 4);
2845 assert!(!line2.is_blank);
2846
2847 let line3 = &ctx.lines[2];
2849 assert_eq!(line3.content, "");
2850 assert!(line3.is_blank);
2851
2852 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
2854 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
2855 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
2856 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
2857 }
2858
2859 #[test]
2860 fn test_list_item_detection() {
2861 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
2862 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2863
2864 let line1 = &ctx.lines[0];
2866 assert!(line1.list_item.is_some());
2867 let list1 = line1.list_item.as_ref().unwrap();
2868 assert_eq!(list1.marker, "-");
2869 assert!(!list1.is_ordered);
2870 assert_eq!(list1.marker_column, 0);
2871 assert_eq!(list1.content_column, 2);
2872
2873 let line2 = &ctx.lines[1];
2875 assert!(line2.list_item.is_some());
2876 let list2 = line2.list_item.as_ref().unwrap();
2877 assert_eq!(list2.marker, "*");
2878 assert_eq!(list2.marker_column, 2);
2879
2880 let line3 = &ctx.lines[2];
2882 assert!(line3.list_item.is_some());
2883 let list3 = line3.list_item.as_ref().unwrap();
2884 assert_eq!(list3.marker, "1.");
2885 assert!(list3.is_ordered);
2886 assert_eq!(list3.number, Some(1));
2887
2888 let line6 = &ctx.lines[5];
2890 assert!(line6.list_item.is_none());
2891 }
2892
2893 #[test]
2894 fn test_offset_to_line_col_edge_cases() {
2895 let content = "a\nb\nc";
2896 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2897 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
2905}