1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::ast_utils::get_cached_ast;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use lazy_static::lazy_static;
6use markdown::mdast::Node;
7use regex::Regex;
8
9lazy_static! {
10 static ref LINK_PATTERN: Regex = Regex::new(
13 r#"(?sx)
14 \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Link text in group 1 (handles nested brackets)
15 (?:
16 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
17 |
18 \[([^\]]*)\] # Reference ID in group 6
19 )"#
20 ).unwrap();
21
22 static ref IMAGE_PATTERN: Regex = Regex::new(
25 r#"(?sx)
26 !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Alt text in group 1 (handles nested brackets)
27 (?:
28 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
29 |
30 \[([^\]]*)\] # Reference ID in group 6
31 )"#
32 ).unwrap();
33
34 static ref REF_DEF_PATTERN: Regex = Regex::new(
36 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
37 ).unwrap();
38
39 static ref CODE_SPAN_PATTERN: Regex = Regex::new(
42 r"`+"
43 ).unwrap();
44
45 static ref BARE_URL_PATTERN: Regex = Regex::new(
47 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
48 ).unwrap();
49
50 static ref BARE_EMAIL_PATTERN: Regex = Regex::new(
52 r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
53 ).unwrap();
54
55 static ref ANGLE_BRACKET_PATTERN: Regex = Regex::new(
57 r"<((?:https?|ftp)://[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"
58 ).unwrap();
59
60 static ref BLOCKQUOTE_PREFIX_REGEX: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
62}
63
64#[derive(Debug, Clone)]
66pub struct LineInfo {
67 pub content: String,
69 pub byte_offset: usize,
71 pub indent: usize,
73 pub is_blank: bool,
75 pub in_code_block: bool,
77 pub in_front_matter: bool,
79 pub in_html_block: bool,
81 pub in_html_comment: bool,
83 pub list_item: Option<ListItemInfo>,
85 pub heading: Option<HeadingInfo>,
87 pub blockquote: Option<BlockquoteInfo>,
89 pub in_mkdocstrings: bool,
91}
92
93#[derive(Debug, Clone)]
95pub struct ListItemInfo {
96 pub marker: String,
98 pub is_ordered: bool,
100 pub number: Option<usize>,
102 pub marker_column: usize,
104 pub content_column: usize,
106}
107
108#[derive(Debug, Clone, PartialEq)]
110pub enum HeadingStyle {
111 ATX,
113 Setext1,
115 Setext2,
117}
118
119#[derive(Debug, Clone)]
121pub struct ParsedLink {
122 pub line: usize,
124 pub start_col: usize,
126 pub end_col: usize,
128 pub byte_offset: usize,
130 pub byte_end: usize,
132 pub text: String,
134 pub url: String,
136 pub is_reference: bool,
138 pub reference_id: Option<String>,
140}
141
142#[derive(Debug, Clone)]
144pub struct ParsedImage {
145 pub line: usize,
147 pub start_col: usize,
149 pub end_col: usize,
151 pub byte_offset: usize,
153 pub byte_end: usize,
155 pub alt_text: String,
157 pub url: String,
159 pub is_reference: bool,
161 pub reference_id: Option<String>,
163}
164
165#[derive(Debug, Clone)]
167pub struct ReferenceDef {
168 pub line: usize,
170 pub id: String,
172 pub url: String,
174 pub title: Option<String>,
176}
177
178#[derive(Debug, Clone)]
180pub struct CodeSpan {
181 pub line: usize,
183 pub start_col: usize,
185 pub end_col: usize,
187 pub byte_offset: usize,
189 pub byte_end: usize,
191 pub backtick_count: usize,
193 pub content: String,
195}
196
197#[derive(Debug, Clone)]
199pub struct HeadingInfo {
200 pub level: u8,
202 pub style: HeadingStyle,
204 pub marker: String,
206 pub marker_column: usize,
208 pub content_column: usize,
210 pub text: String,
212 pub custom_id: Option<String>,
214 pub raw_text: String,
216 pub has_closing_sequence: bool,
218 pub closing_sequence: String,
220}
221
222#[derive(Debug, Clone)]
224pub struct BlockquoteInfo {
225 pub nesting_level: usize,
227 pub indent: String,
229 pub marker_column: usize,
231 pub prefix: String,
233 pub content: String,
235 pub has_no_space_after_marker: bool,
237 pub has_multiple_spaces_after_marker: bool,
239 pub needs_md028_fix: bool,
241}
242
243#[derive(Debug, Clone)]
245pub struct ListBlock {
246 pub start_line: usize,
248 pub end_line: usize,
250 pub is_ordered: bool,
252 pub marker: Option<String>,
254 pub blockquote_prefix: String,
256 pub item_lines: Vec<usize>,
258 pub nesting_level: usize,
260 pub max_marker_width: usize,
262}
263
264use std::sync::{Arc, Mutex};
265
266#[derive(Debug, Clone, Default)]
268pub struct CharFrequency {
269 pub hash_count: usize,
271 pub asterisk_count: usize,
273 pub underscore_count: usize,
275 pub hyphen_count: usize,
277 pub plus_count: usize,
279 pub gt_count: usize,
281 pub pipe_count: usize,
283 pub bracket_count: usize,
285 pub backtick_count: usize,
287 pub lt_count: usize,
289 pub exclamation_count: usize,
291 pub newline_count: usize,
293}
294
295#[derive(Debug, Clone)]
297pub struct HtmlTag {
298 pub line: usize,
300 pub start_col: usize,
302 pub end_col: usize,
304 pub byte_offset: usize,
306 pub byte_end: usize,
308 pub tag_name: String,
310 pub is_closing: bool,
312 pub is_self_closing: bool,
314 pub raw_content: String,
316}
317
318#[derive(Debug, Clone)]
320pub struct EmphasisSpan {
321 pub line: usize,
323 pub start_col: usize,
325 pub end_col: usize,
327 pub byte_offset: usize,
329 pub byte_end: usize,
331 pub marker: char,
333 pub marker_count: usize,
335 pub content: String,
337}
338
339#[derive(Debug, Clone)]
341pub struct TableRow {
342 pub line: usize,
344 pub is_separator: bool,
346 pub column_count: usize,
348 pub column_alignments: Vec<String>, }
351
352#[derive(Debug, Clone)]
354pub struct BareUrl {
355 pub line: usize,
357 pub start_col: usize,
359 pub end_col: usize,
361 pub byte_offset: usize,
363 pub byte_end: usize,
365 pub url: String,
367 pub url_type: String,
369}
370
371pub struct LintContext<'a> {
372 pub content: &'a str,
373 pub line_offsets: Vec<usize>,
374 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink>, pub images: Vec<ParsedImage>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, ast_cache: Mutex<Option<Arc<Node>>>, pub flavor: MarkdownFlavor, }
389
390impl<'a> LintContext<'a> {
391 pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
392 let mut line_offsets = vec![0];
393 for (i, c) in content.char_indices() {
394 if c == '\n' {
395 line_offsets.push(i + 1);
396 }
397 }
398
399 let code_blocks = CodeBlockUtils::detect_code_blocks(content);
401
402 let mut lines = Self::compute_line_info(content, &line_offsets, &code_blocks, flavor);
404
405 let ast = get_cached_ast(content);
407 let code_spans = Self::parse_code_spans(content, &lines, &ast);
408
409 let links = Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor);
411 let images = Self::parse_images(content, &lines, &code_blocks, &code_spans);
412 let reference_defs = Self::parse_reference_defs(content, &lines);
413 let list_blocks = Self::parse_list_blocks(&lines);
416
417 Self::detect_html_blocks(&mut lines);
419
420 let char_frequency = Self::compute_char_frequency(content);
422
423 Self {
424 content,
425 line_offsets,
426 code_blocks,
427 lines,
428 links,
429 images,
430 reference_defs,
431 code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
432 list_blocks,
433 char_frequency,
434 html_tags_cache: Mutex::new(None),
435 emphasis_spans_cache: Mutex::new(None),
436 table_rows_cache: Mutex::new(None),
437 bare_urls_cache: Mutex::new(None),
438 ast_cache: Mutex::new(None),
439 flavor,
440 }
441 }
442
443 pub fn get_ast(&self) -> Arc<Node> {
445 let mut cache = self.ast_cache.lock().unwrap();
446
447 if cache.is_none() {
448 *cache = Some(get_cached_ast(self.content));
451 }
452
453 cache.as_ref().unwrap().clone()
454 }
455
456 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
458 let mut cache = self.code_spans_cache.lock().unwrap();
459
460 if cache.is_none() {
462 let ast = self.get_ast();
463 let code_spans = Self::parse_code_spans(self.content, &self.lines, &ast);
464 *cache = Some(Arc::new(code_spans));
465 }
466
467 cache.as_ref().unwrap().clone()
469 }
470
471 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
473 let mut cache = self.html_tags_cache.lock().unwrap();
474
475 if cache.is_none() {
476 let html_tags = Self::parse_html_tags(self.content, &self.lines, &self.code_blocks);
477 *cache = Some(Arc::new(html_tags));
478 }
479
480 cache.as_ref().unwrap().clone()
481 }
482
483 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
485 let mut cache = self.emphasis_spans_cache.lock().unwrap();
486
487 if cache.is_none() {
488 let emphasis_spans = Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks);
489 *cache = Some(Arc::new(emphasis_spans));
490 }
491
492 cache.as_ref().unwrap().clone()
493 }
494
495 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
497 let mut cache = self.table_rows_cache.lock().unwrap();
498
499 if cache.is_none() {
500 let table_rows = Self::parse_table_rows(&self.lines);
501 *cache = Some(Arc::new(table_rows));
502 }
503
504 cache.as_ref().unwrap().clone()
505 }
506
507 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
509 let mut cache = self.bare_urls_cache.lock().unwrap();
510
511 if cache.is_none() {
512 let bare_urls = Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks);
513 *cache = Some(Arc::new(bare_urls));
514 }
515
516 cache.as_ref().unwrap().clone()
517 }
518
519 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
521 match self.line_offsets.binary_search(&offset) {
522 Ok(line) => (line + 1, 1),
523 Err(line) => {
524 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
525 (line, offset - line_start + 1)
526 }
527 }
528 }
529
530 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
532 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
534 return true;
535 }
536
537 self.code_spans()
539 .iter()
540 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
541 }
542
543 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
545 if line_num > 0 {
546 self.lines.get(line_num - 1)
547 } else {
548 None
549 }
550 }
551
552 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
554 self.line_info(line_num).map(|info| info.byte_offset)
555 }
556
557 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
559 let normalized_id = ref_id.to_lowercase();
560 self.reference_defs
561 .iter()
562 .find(|def| def.id == normalized_id)
563 .map(|def| def.url.as_str())
564 }
565
566 pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink> {
568 self.links.iter().filter(|link| link.line == line_num).collect()
569 }
570
571 pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage> {
573 self.images.iter().filter(|img| img.line == line_num).collect()
574 }
575
576 pub fn is_in_list_block(&self, line_num: usize) -> bool {
578 self.list_blocks
579 .iter()
580 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
581 }
582
583 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
585 self.list_blocks
586 .iter()
587 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
588 }
589
590 pub fn is_in_code_block(&self, line_num: usize) -> bool {
594 if line_num == 0 || line_num > self.lines.len() {
595 return false;
596 }
597 self.lines[line_num - 1].in_code_block
598 }
599
600 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
602 if line_num == 0 || line_num > self.lines.len() {
603 return false;
604 }
605 self.lines[line_num - 1].in_front_matter
606 }
607
608 pub fn is_in_html_block(&self, line_num: usize) -> bool {
610 if line_num == 0 || line_num > self.lines.len() {
611 return false;
612 }
613 self.lines[line_num - 1].in_html_block
614 }
615
616 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
618 if line_num == 0 || line_num > self.lines.len() {
619 return false;
620 }
621
622 let col_0indexed = if col > 0 { col - 1 } else { 0 };
626 let code_spans = self.code_spans();
627 code_spans
628 .iter()
629 .any(|span| span.line == line_num && col_0indexed >= span.start_col && col_0indexed < span.end_col)
630 }
631
632 pub fn has_char(&self, ch: char) -> bool {
634 match ch {
635 '#' => self.char_frequency.hash_count > 0,
636 '*' => self.char_frequency.asterisk_count > 0,
637 '_' => self.char_frequency.underscore_count > 0,
638 '-' => self.char_frequency.hyphen_count > 0,
639 '+' => self.char_frequency.plus_count > 0,
640 '>' => self.char_frequency.gt_count > 0,
641 '|' => self.char_frequency.pipe_count > 0,
642 '[' => self.char_frequency.bracket_count > 0,
643 '`' => self.char_frequency.backtick_count > 0,
644 '<' => self.char_frequency.lt_count > 0,
645 '!' => self.char_frequency.exclamation_count > 0,
646 '\n' => self.char_frequency.newline_count > 0,
647 _ => self.content.contains(ch), }
649 }
650
651 pub fn char_count(&self, ch: char) -> usize {
653 match ch {
654 '#' => self.char_frequency.hash_count,
655 '*' => self.char_frequency.asterisk_count,
656 '_' => self.char_frequency.underscore_count,
657 '-' => self.char_frequency.hyphen_count,
658 '+' => self.char_frequency.plus_count,
659 '>' => self.char_frequency.gt_count,
660 '|' => self.char_frequency.pipe_count,
661 '[' => self.char_frequency.bracket_count,
662 '`' => self.char_frequency.backtick_count,
663 '<' => self.char_frequency.lt_count,
664 '!' => self.char_frequency.exclamation_count,
665 '\n' => self.char_frequency.newline_count,
666 _ => self.content.matches(ch).count(), }
668 }
669
670 pub fn likely_has_headings(&self) -> bool {
672 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
674
675 pub fn likely_has_lists(&self) -> bool {
677 self.char_frequency.asterisk_count > 0
678 || self.char_frequency.hyphen_count > 0
679 || self.char_frequency.plus_count > 0
680 }
681
682 pub fn likely_has_emphasis(&self) -> bool {
684 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
685 }
686
687 pub fn likely_has_tables(&self) -> bool {
689 self.char_frequency.pipe_count > 2
690 }
691
692 pub fn likely_has_blockquotes(&self) -> bool {
694 self.char_frequency.gt_count > 0
695 }
696
697 pub fn likely_has_code(&self) -> bool {
699 self.char_frequency.backtick_count > 0
700 }
701
702 pub fn likely_has_links_or_images(&self) -> bool {
704 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
705 }
706
707 pub fn likely_has_html(&self) -> bool {
709 self.char_frequency.lt_count > 0
710 }
711
712 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
714 self.html_tags()
715 .iter()
716 .filter(|tag| tag.line == line_num)
717 .cloned()
718 .collect()
719 }
720
721 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
723 self.emphasis_spans()
724 .iter()
725 .filter(|span| span.line == line_num)
726 .cloned()
727 .collect()
728 }
729
730 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
732 self.table_rows()
733 .iter()
734 .filter(|row| row.line == line_num)
735 .cloned()
736 .collect()
737 }
738
739 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
741 self.bare_urls()
742 .iter()
743 .filter(|url| url.line == line_num)
744 .cloned()
745 .collect()
746 }
747
748 fn parse_links(
750 content: &str,
751 lines: &[LineInfo],
752 code_blocks: &[(usize, usize)],
753 code_spans: &[CodeSpan],
754 flavor: MarkdownFlavor,
755 ) -> Vec<ParsedLink> {
756 use crate::utils::skip_context::{is_in_html_comment, is_mkdocs_snippet_line};
757
758 let mut links = Vec::with_capacity(content.len() / 500); for cap in LINK_PATTERN.captures_iter(content) {
763 let full_match = cap.get(0).unwrap();
764 let match_start = full_match.start();
765 let match_end = full_match.end();
766
767 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
769 continue;
770 }
771
772 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
774 continue;
775 }
776
777 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
779 continue;
780 }
781
782 if code_spans
784 .iter()
785 .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
786 {
787 continue;
788 }
789
790 if is_in_html_comment(content, match_start) {
792 continue;
793 }
794
795 let line_idx = lines
798 .iter()
799 .position(|line| {
800 match_start >= line.byte_offset && (match_start < line.byte_offset + line.content.len() + 1)
801 })
802 .unwrap_or(0);
803
804 if is_mkdocs_snippet_line(&lines[line_idx].content, flavor) {
805 continue;
806 }
807
808 let mut line_num = 1;
810 let mut col_start = match_start;
811 for (idx, line_info) in lines.iter().enumerate() {
812 if match_start >= line_info.byte_offset {
813 line_num = idx + 1;
814 col_start = match_start - line_info.byte_offset;
815 } else {
816 break;
817 }
818 }
819
820 let mut end_line_num = 1;
822 let mut col_end = match_end;
823 for (idx, line_info) in lines.iter().enumerate() {
824 if match_end > line_info.byte_offset {
825 end_line_num = idx + 1;
826 col_end = match_end - line_info.byte_offset;
827 } else {
828 break;
829 }
830 }
831
832 if line_num == end_line_num {
834 } else {
836 }
839
840 let text = cap.get(1).map_or("", |m| m.as_str()).to_string();
841
842 let inline_url = cap.get(2).or_else(|| cap.get(3));
844
845 if let Some(url_match) = inline_url {
846 links.push(ParsedLink {
848 line: line_num,
849 start_col: col_start,
850 end_col: col_end,
851 byte_offset: match_start,
852 byte_end: match_end,
853 text,
854 url: url_match.as_str().to_string(),
855 is_reference: false,
856 reference_id: None,
857 });
858 } else if let Some(ref_id) = cap.get(6) {
859 let ref_id_str = ref_id.as_str();
861 let normalized_ref = if ref_id_str.is_empty() {
862 text.to_lowercase() } else {
864 ref_id_str.to_lowercase()
865 };
866
867 links.push(ParsedLink {
868 line: line_num,
869 start_col: col_start,
870 end_col: col_end,
871 byte_offset: match_start,
872 byte_end: match_end,
873 text,
874 url: String::new(), is_reference: true,
876 reference_id: Some(normalized_ref),
877 });
878 }
879 }
880
881 links
882 }
883
884 fn parse_images(
886 content: &str,
887 lines: &[LineInfo],
888 code_blocks: &[(usize, usize)],
889 code_spans: &[CodeSpan],
890 ) -> Vec<ParsedImage> {
891 use crate::utils::skip_context::is_in_html_comment;
892
893 let mut images = Vec::with_capacity(content.len() / 1000); for cap in IMAGE_PATTERN.captures_iter(content) {
898 let full_match = cap.get(0).unwrap();
899 let match_start = full_match.start();
900 let match_end = full_match.end();
901
902 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
904 continue;
905 }
906
907 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
909 continue;
910 }
911
912 if code_spans
914 .iter()
915 .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
916 {
917 continue;
918 }
919
920 if is_in_html_comment(content, match_start) {
922 continue;
923 }
924
925 let mut line_num = 1;
927 let mut col_start = match_start;
928 for (idx, line_info) in lines.iter().enumerate() {
929 if match_start >= line_info.byte_offset {
930 line_num = idx + 1;
931 col_start = match_start - line_info.byte_offset;
932 } else {
933 break;
934 }
935 }
936
937 let mut end_line_num = 1;
939 let mut col_end = match_end;
940 for (idx, line_info) in lines.iter().enumerate() {
941 if match_end > line_info.byte_offset {
942 end_line_num = idx + 1;
943 col_end = match_end - line_info.byte_offset;
944 } else {
945 break;
946 }
947 }
948
949 if line_num == end_line_num {
951 } else {
953 }
956
957 let alt_text = cap.get(1).map_or("", |m| m.as_str()).to_string();
958
959 let inline_url = cap.get(2).or_else(|| cap.get(3));
961
962 if let Some(url_match) = inline_url {
963 images.push(ParsedImage {
965 line: line_num,
966 start_col: col_start,
967 end_col: col_end,
968 byte_offset: match_start,
969 byte_end: match_end,
970 alt_text,
971 url: url_match.as_str().to_string(),
972 is_reference: false,
973 reference_id: None,
974 });
975 } else if let Some(ref_id) = cap.get(6) {
976 let ref_id_str = ref_id.as_str();
978 let normalized_ref = if ref_id_str.is_empty() {
979 alt_text.to_lowercase() } else {
981 ref_id_str.to_lowercase()
982 };
983
984 images.push(ParsedImage {
985 line: line_num,
986 start_col: col_start,
987 end_col: col_end,
988 byte_offset: match_start,
989 byte_end: match_end,
990 alt_text,
991 url: String::new(), is_reference: true,
993 reference_id: Some(normalized_ref),
994 });
995 }
996 }
997
998 images
999 }
1000
1001 fn parse_reference_defs(_content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1003 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1007 if line_info.in_code_block {
1009 continue;
1010 }
1011
1012 let line = &line_info.content;
1013 let line_num = line_idx + 1;
1014
1015 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1016 let id = cap.get(1).unwrap().as_str().to_lowercase();
1017 let url = cap.get(2).unwrap().as_str().to_string();
1018 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
1019
1020 refs.push(ReferenceDef {
1021 line: line_num,
1022 id,
1023 url,
1024 title,
1025 });
1026 }
1027 }
1028
1029 refs
1030 }
1031
1032 fn compute_line_info(
1034 content: &str,
1035 line_offsets: &[usize],
1036 code_blocks: &[(usize, usize)],
1037 flavor: MarkdownFlavor,
1038 ) -> Vec<LineInfo> {
1039 lazy_static! {
1040 static ref UNORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)([-*+])([ \t]*)(.*)").unwrap();
1042 static ref ORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(\d+)([.)])([ \t]*)(.*)").unwrap();
1043
1044 static ref BLOCKQUOTE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*>\s*)(.*)").unwrap();
1046
1047 static ref ATX_HEADING_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap();
1049 static ref SETEXT_UNDERLINE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap();
1050
1051 static ref BLOCKQUOTE_REGEX_FULL: regex::Regex = regex::Regex::new(r"^(\s*)(>+)(\s*)(.*)$").unwrap();
1053 }
1054
1055 let content_lines: Vec<&str> = content.lines().collect();
1056 let mut lines = Vec::with_capacity(content_lines.len());
1057
1058 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1061
1062 for (i, line) in content_lines.iter().enumerate() {
1063 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1064 let indent = line.len() - line.trim_start().len();
1065 let is_blank = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1067 let after_prefix = caps.get(2).map_or("", |m| m.as_str());
1069 after_prefix.trim().is_empty()
1070 } else {
1071 line.trim().is_empty()
1072 };
1073 let in_code_block = code_blocks.iter().any(|&(start, end)| {
1076 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1081 let mut boundary = start;
1083 while boundary > 0 && !content.is_char_boundary(boundary) {
1084 boundary -= 1;
1085 }
1086 boundary
1087 } else {
1088 start
1089 };
1090
1091 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1092 let mut boundary = end;
1094 while boundary < content.len() && !content.is_char_boundary(boundary) {
1095 boundary += 1;
1096 }
1097 boundary
1098 } else {
1099 end.min(content.len())
1100 };
1101
1102 let block_content = &content[safe_start..safe_end];
1103 let is_multiline = block_content.contains('\n');
1104 let is_fenced = block_content.starts_with("```") || block_content.starts_with("~~~");
1105 let is_indented = !is_fenced
1106 && block_content
1107 .lines()
1108 .all(|l| l.starts_with(" ") || l.starts_with("\t") || l.trim().is_empty());
1109
1110 byte_offset >= start && byte_offset < end && (is_multiline || is_fenced || is_indented)
1111 });
1112
1113 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1115 && crate::utils::mkdocstrings_refs::is_within_autodoc_block(content, byte_offset);
1116 let in_html_comment = crate::utils::skip_context::is_in_html_comment(content, byte_offset);
1117 let list_item = if !(in_code_block
1118 || is_blank
1119 || in_mkdocstrings
1120 || in_html_comment
1121 || (front_matter_end > 0 && i < front_matter_end))
1122 {
1123 let (line_for_list_check, blockquote_prefix_len) = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1125 let prefix = caps.get(1).unwrap().as_str();
1126 let content = caps.get(2).unwrap().as_str();
1127 (content, prefix.len())
1128 } else {
1129 (&**line, 0)
1130 };
1131
1132 if let Some(caps) = UNORDERED_REGEX.captures(line_for_list_check) {
1133 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1134 let marker = caps.get(2).map_or("", |m| m.as_str());
1135 let spacing = caps.get(3).map_or("", |m| m.as_str());
1136 let _content = caps.get(4).map_or("", |m| m.as_str());
1137 let marker_column = blockquote_prefix_len + leading_spaces.len();
1138 let content_column = marker_column + marker.len() + spacing.len();
1139
1140 if spacing.is_empty() {
1147 None
1148 } else {
1149 Some(ListItemInfo {
1150 marker: marker.to_string(),
1151 is_ordered: false,
1152 number: None,
1153 marker_column,
1154 content_column,
1155 })
1156 }
1157 } else if let Some(caps) = ORDERED_REGEX.captures(line_for_list_check) {
1158 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1159 let number_str = caps.get(2).map_or("", |m| m.as_str());
1160 let delimiter = caps.get(3).map_or("", |m| m.as_str());
1161 let spacing = caps.get(4).map_or("", |m| m.as_str());
1162 let _content = caps.get(5).map_or("", |m| m.as_str());
1163 let marker = format!("{number_str}{delimiter}");
1164 let marker_column = blockquote_prefix_len + leading_spaces.len();
1165 let content_column = marker_column + marker.len() + spacing.len();
1166
1167 if spacing.is_empty() {
1170 None
1171 } else {
1172 Some(ListItemInfo {
1173 marker,
1174 is_ordered: true,
1175 number: number_str.parse().ok(),
1176 marker_column,
1177 content_column,
1178 })
1179 }
1180 } else {
1181 None
1182 }
1183 } else {
1184 None
1185 };
1186
1187 lines.push(LineInfo {
1188 content: line.to_string(),
1189 byte_offset,
1190 indent,
1191 is_blank,
1192 in_code_block,
1193 in_front_matter: front_matter_end > 0 && i < front_matter_end,
1194 in_html_block: false, in_html_comment,
1196 list_item,
1197 heading: None, blockquote: None, in_mkdocstrings,
1200 });
1201 }
1202
1203 for i in 0..content_lines.len() {
1205 if lines[i].in_code_block {
1206 continue;
1207 }
1208
1209 if front_matter_end > 0 && i < front_matter_end {
1211 continue;
1212 }
1213
1214 let line = content_lines[i];
1215
1216 if let Some(caps) = BLOCKQUOTE_REGEX_FULL.captures(line) {
1218 let indent_str = caps.get(1).map_or("", |m| m.as_str());
1219 let markers = caps.get(2).map_or("", |m| m.as_str());
1220 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1221 let content = caps.get(4).map_or("", |m| m.as_str());
1222
1223 let nesting_level = markers.chars().filter(|&c| c == '>').count();
1224 let marker_column = indent_str.len();
1225
1226 let prefix = format!("{indent_str}{markers}{spaces_after}");
1228
1229 let has_no_space = spaces_after.is_empty() && !content.is_empty();
1231 let has_multiple_spaces = spaces_after.len() > 1 || spaces_after.contains('\t');
1233
1234 let needs_md028_fix = content.is_empty() && spaces_after.is_empty();
1238
1239 lines[i].blockquote = Some(BlockquoteInfo {
1240 nesting_level,
1241 indent: indent_str.to_string(),
1242 marker_column,
1243 prefix,
1244 content: content.to_string(),
1245 has_no_space_after_marker: has_no_space,
1246 has_multiple_spaces_after_marker: has_multiple_spaces,
1247 needs_md028_fix,
1248 });
1249 }
1250
1251 if lines[i].is_blank {
1253 continue;
1254 }
1255
1256 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1259 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1260 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1261 } else {
1262 false
1263 };
1264
1265 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1266 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1268 continue;
1269 }
1270 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1271 let hashes = caps.get(2).map_or("", |m| m.as_str());
1272 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1273 let rest = caps.get(4).map_or("", |m| m.as_str());
1274
1275 let level = hashes.len() as u8;
1276 let marker_column = leading_spaces.len();
1277
1278 let (text, has_closing, closing_seq) = {
1280 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1282 if rest[id_start..].trim_end().ends_with('}') {
1284 (&rest[..id_start], &rest[id_start..])
1286 } else {
1287 (rest, "")
1288 }
1289 } else {
1290 (rest, "")
1291 };
1292
1293 let trimmed_rest = rest_without_id.trim_end();
1295 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1296 let mut start_of_hashes = last_hash_pos;
1298 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1299 start_of_hashes -= 1;
1300 }
1301
1302 let has_space_before = start_of_hashes == 0
1304 || trimmed_rest
1305 .chars()
1306 .nth(start_of_hashes - 1)
1307 .is_some_and(|c| c.is_whitespace());
1308
1309 let potential_closing = &trimmed_rest[start_of_hashes..];
1311 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1312
1313 if is_all_hashes && has_space_before {
1314 let closing_hashes = potential_closing.to_string();
1316 let text_part = if !custom_id_part.is_empty() {
1319 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1322 } else {
1323 rest_without_id[..start_of_hashes].trim_end().to_string()
1324 };
1325 (text_part, true, closing_hashes)
1326 } else {
1327 (rest.to_string(), false, String::new())
1329 }
1330 } else {
1331 (rest.to_string(), false, String::new())
1333 }
1334 };
1335
1336 let content_column = marker_column + hashes.len() + spaces_after.len();
1337
1338 let raw_text = text.trim().to_string();
1340 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1341
1342 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1344 let next_line = content_lines[i + 1];
1345 if !lines[i + 1].in_code_block
1346 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1347 && let Some(next_line_id) =
1348 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1349 {
1350 custom_id = Some(next_line_id);
1351 }
1352 }
1353
1354 lines[i].heading = Some(HeadingInfo {
1355 level,
1356 style: HeadingStyle::ATX,
1357 marker: hashes.to_string(),
1358 marker_column,
1359 content_column,
1360 text: clean_text,
1361 custom_id,
1362 raw_text,
1363 has_closing_sequence: has_closing,
1364 closing_sequence: closing_seq,
1365 });
1366 }
1367 else if i + 1 < content_lines.len() {
1369 let next_line = content_lines[i + 1];
1370 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1371 if front_matter_end > 0 && i < front_matter_end {
1373 continue;
1374 }
1375
1376 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1378 continue;
1379 }
1380
1381 let underline = next_line.trim();
1382
1383 if underline == "---" {
1386 continue;
1387 }
1388
1389 let current_line_trimmed = line.trim();
1391 if current_line_trimmed.contains(':')
1392 && !current_line_trimmed.starts_with('#')
1393 && !current_line_trimmed.contains('[')
1394 && !current_line_trimmed.contains("](")
1395 {
1396 continue;
1398 }
1399
1400 let level = if underline.starts_with('=') { 1 } else { 2 };
1401 let style = if level == 1 {
1402 HeadingStyle::Setext1
1403 } else {
1404 HeadingStyle::Setext2
1405 };
1406
1407 let raw_text = line.trim().to_string();
1409 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1410
1411 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1413 let attr_line = content_lines[i + 2];
1414 if !lines[i + 2].in_code_block
1415 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1416 && let Some(attr_line_id) =
1417 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
1418 {
1419 custom_id = Some(attr_line_id);
1420 }
1421 }
1422
1423 lines[i].heading = Some(HeadingInfo {
1424 level,
1425 style,
1426 marker: underline.to_string(),
1427 marker_column: next_line.len() - next_line.trim_start().len(),
1428 content_column: lines[i].indent,
1429 text: clean_text,
1430 custom_id,
1431 raw_text,
1432 has_closing_sequence: false,
1433 closing_sequence: String::new(),
1434 });
1435 }
1436 }
1437 }
1438
1439 lines
1440 }
1441
1442 fn detect_html_blocks(lines: &mut [LineInfo]) {
1444 const BLOCK_ELEMENTS: &[&str] = &[
1446 "address",
1447 "article",
1448 "aside",
1449 "blockquote",
1450 "details",
1451 "dialog",
1452 "dd",
1453 "div",
1454 "dl",
1455 "dt",
1456 "fieldset",
1457 "figcaption",
1458 "figure",
1459 "footer",
1460 "form",
1461 "h1",
1462 "h2",
1463 "h3",
1464 "h4",
1465 "h5",
1466 "h6",
1467 "header",
1468 "hr",
1469 "li",
1470 "main",
1471 "nav",
1472 "ol",
1473 "p",
1474 "pre",
1475 "section",
1476 "table",
1477 "tbody",
1478 "td",
1479 "tfoot",
1480 "th",
1481 "thead",
1482 "tr",
1483 "ul",
1484 ];
1485
1486 let mut i = 0;
1487 while i < lines.len() {
1488 if lines[i].in_code_block || lines[i].in_front_matter {
1490 i += 1;
1491 continue;
1492 }
1493
1494 let trimmed = lines[i].content.trim_start();
1495
1496 if trimmed.starts_with('<') && trimmed.len() > 1 {
1498 let after_bracket = &trimmed[1..];
1500 let is_closing = after_bracket.starts_with('/');
1501 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
1502
1503 let tag_name = tag_start
1505 .chars()
1506 .take_while(|c| c.is_ascii_alphabetic() || *c == '-')
1507 .collect::<String>()
1508 .to_lowercase();
1509
1510 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
1512 lines[i].in_html_block = true;
1514
1515 if !is_closing {
1518 let closing_tag = format!("</{tag_name}>");
1519 let mut j = i + 1;
1520 while j < lines.len() && j < i + 100 {
1521 if lines[j].is_blank {
1524 break;
1525 }
1526
1527 lines[j].in_html_block = true;
1528
1529 if lines[j].content.contains(&closing_tag) {
1531 break;
1532 }
1533 j += 1;
1534 }
1535 }
1536 }
1537 }
1538
1539 i += 1;
1540 }
1541 }
1542
1543 fn parse_code_spans(content: &str, lines: &[LineInfo], ast: &Node) -> Vec<CodeSpan> {
1545 let mut code_spans = Vec::new();
1546
1547 if !content.contains('`') {
1549 return code_spans;
1550 }
1551
1552 fn extract_code_spans(node: &Node, content: &str, lines: &[LineInfo], spans: &mut Vec<CodeSpan>) {
1554 match node {
1555 Node::InlineCode(inline_code) => {
1556 if let Some(pos) = &inline_code.position {
1557 let start_pos = pos.start.offset;
1558 let end_pos = pos.end.offset;
1559
1560 let full_span = &content[start_pos..end_pos];
1562 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
1563
1564 let content_start = start_pos + backtick_count;
1566 let content_end = end_pos - backtick_count;
1567 let span_content = if content_start < content_end {
1568 content[content_start..content_end].to_string()
1569 } else {
1570 String::new()
1571 };
1572
1573 let mut line_num = 1;
1575 let mut col_start = start_pos;
1576 for (idx, line_info) in lines.iter().enumerate() {
1577 if start_pos >= line_info.byte_offset {
1578 line_num = idx + 1;
1579 col_start = start_pos - line_info.byte_offset;
1580 } else {
1581 break;
1582 }
1583 }
1584
1585 let mut col_end = end_pos;
1587 for line_info in lines.iter() {
1588 if end_pos > line_info.byte_offset {
1589 col_end = end_pos - line_info.byte_offset;
1590 } else {
1591 break;
1592 }
1593 }
1594
1595 spans.push(CodeSpan {
1596 line: line_num,
1597 start_col: col_start,
1598 end_col: col_end,
1599 byte_offset: start_pos,
1600 byte_end: end_pos,
1601 backtick_count,
1602 content: span_content,
1603 });
1604 }
1605 }
1606 Node::Root(root) => {
1608 for child in &root.children {
1609 extract_code_spans(child, content, lines, spans);
1610 }
1611 }
1612 Node::Paragraph(para) => {
1613 for child in ¶.children {
1614 extract_code_spans(child, content, lines, spans);
1615 }
1616 }
1617 Node::Heading(heading) => {
1618 for child in &heading.children {
1619 extract_code_spans(child, content, lines, spans);
1620 }
1621 }
1622 Node::List(list) => {
1623 for child in &list.children {
1624 extract_code_spans(child, content, lines, spans);
1625 }
1626 }
1627 Node::ListItem(item) => {
1628 for child in &item.children {
1629 extract_code_spans(child, content, lines, spans);
1630 }
1631 }
1632 Node::Blockquote(blockquote) => {
1633 for child in &blockquote.children {
1634 extract_code_spans(child, content, lines, spans);
1635 }
1636 }
1637 Node::Table(table) => {
1638 for child in &table.children {
1639 extract_code_spans(child, content, lines, spans);
1640 }
1641 }
1642 Node::TableRow(row) => {
1643 for child in &row.children {
1644 extract_code_spans(child, content, lines, spans);
1645 }
1646 }
1647 Node::TableCell(cell) => {
1648 for child in &cell.children {
1649 extract_code_spans(child, content, lines, spans);
1650 }
1651 }
1652 Node::Emphasis(emphasis) => {
1653 for child in &emphasis.children {
1654 extract_code_spans(child, content, lines, spans);
1655 }
1656 }
1657 Node::Strong(strong) => {
1658 for child in &strong.children {
1659 extract_code_spans(child, content, lines, spans);
1660 }
1661 }
1662 Node::Link(link) => {
1663 for child in &link.children {
1664 extract_code_spans(child, content, lines, spans);
1665 }
1666 }
1667 Node::LinkReference(link_ref) => {
1668 for child in &link_ref.children {
1669 extract_code_spans(child, content, lines, spans);
1670 }
1671 }
1672 Node::FootnoteDefinition(footnote) => {
1673 for child in &footnote.children {
1674 extract_code_spans(child, content, lines, spans);
1675 }
1676 }
1677 Node::Delete(delete) => {
1678 for child in &delete.children {
1679 extract_code_spans(child, content, lines, spans);
1680 }
1681 }
1682 Node::Code(_)
1684 | Node::Text(_)
1685 | Node::Html(_)
1686 | Node::Image(_)
1687 | Node::ImageReference(_)
1688 | Node::FootnoteReference(_)
1689 | Node::Break(_)
1690 | Node::ThematicBreak(_)
1691 | Node::Definition(_)
1692 | Node::Yaml(_)
1693 | Node::Toml(_)
1694 | Node::Math(_)
1695 | Node::InlineMath(_)
1696 | Node::MdxJsxFlowElement(_)
1697 | Node::MdxFlowExpression(_)
1698 | Node::MdxJsxTextElement(_)
1699 | Node::MdxTextExpression(_)
1700 | Node::MdxjsEsm(_) => {
1701 }
1703 }
1704 }
1705
1706 extract_code_spans(ast, content, lines, &mut code_spans);
1708
1709 code_spans.sort_by_key(|span| span.byte_offset);
1711
1712 code_spans
1713 }
1714
1715 fn parse_list_blocks(lines: &[LineInfo]) -> Vec<ListBlock> {
1717 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
1720 let mut last_list_item_line = 0;
1721 let mut current_indent_level = 0;
1722 let mut last_marker_width = 0;
1723
1724 for (line_idx, line_info) in lines.iter().enumerate() {
1725 let line_num = line_idx + 1;
1726
1727 if line_info.in_code_block {
1729 if let Some(ref mut block) = current_block {
1730 let min_continuation_indent = CodeBlockUtils::calculate_min_continuation_indent(lines, line_idx);
1732
1733 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
1735
1736 match context {
1737 CodeBlockContext::Indented => {
1738 block.end_line = line_num;
1740 continue;
1741 }
1742 CodeBlockContext::Standalone => {
1743 let completed_block = current_block.take().unwrap();
1745 list_blocks.push(completed_block);
1746 continue;
1747 }
1748 CodeBlockContext::Adjacent => {
1749 block.end_line = line_num;
1751 continue;
1752 }
1753 }
1754 } else {
1755 continue;
1757 }
1758 }
1759
1760 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(&line_info.content) {
1762 caps.get(0).unwrap().as_str().to_string()
1763 } else {
1764 String::new()
1765 };
1766
1767 if let Some(list_item) = &line_info.list_item {
1769 let item_indent = list_item.marker_column;
1771 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
1774 let is_nested = nesting > block.nesting_level;
1778 let same_type =
1779 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
1780 let same_context = block.blockquote_prefix == blockquote_prefix;
1781 let reasonable_distance = line_num <= last_list_item_line + 2; let marker_compatible =
1785 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
1786
1787 let has_non_list_content = {
1789 let mut found_non_list = false;
1790 let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
1792
1793 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1795 let last_line = &lines[block_last_item_line - 1];
1796 if last_line.content.contains(r"`sqlalchemy`") && last_line.content.contains(r"\`") {
1797 log::debug!(
1798 "After problematic line {}: checking lines {} to {} for non-list content",
1799 block_last_item_line,
1800 block_last_item_line + 1,
1801 line_num
1802 );
1803 if line_num == block_last_item_line + 1 {
1805 log::debug!("Lines are consecutive, no content between");
1806 }
1807 }
1808 }
1809
1810 for check_line in (block_last_item_line + 1)..line_num {
1811 let check_idx = check_line - 1;
1812 if check_idx < lines.len() {
1813 let check_info = &lines[check_idx];
1814 let is_list_breaking_content = if check_info.in_code_block {
1816 let last_item_marker_width =
1818 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1819 lines[block_last_item_line - 1]
1820 .list_item
1821 .as_ref()
1822 .map(|li| {
1823 if li.is_ordered {
1824 li.marker.len() + 1 } else {
1826 li.marker.len()
1827 }
1828 })
1829 .unwrap_or(3) } else {
1831 3 };
1833
1834 let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
1835
1836 let context = CodeBlockUtils::analyze_code_block_context(
1838 lines,
1839 check_line - 1,
1840 min_continuation,
1841 );
1842
1843 matches!(context, CodeBlockContext::Standalone)
1845 } else if !check_info.is_blank && check_info.list_item.is_none() {
1846 let line_content = check_info.content.trim();
1848
1849 if check_info.heading.is_some()
1851 || line_content.starts_with("---")
1852 || line_content.starts_with("***")
1853 || line_content.starts_with("___")
1854 || (line_content.contains('|')
1855 && !line_content.contains("](")
1856 && !line_content.contains("http")
1857 && (line_content.matches('|').count() > 1
1858 || line_content.starts_with('|')
1859 || line_content.ends_with('|')))
1860 || line_content.starts_with(">")
1861 {
1862 true
1863 }
1864 else {
1866 let last_item_marker_width =
1867 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1868 lines[block_last_item_line - 1]
1869 .list_item
1870 .as_ref()
1871 .map(|li| {
1872 if li.is_ordered {
1873 li.marker.len() + 1 } else {
1875 li.marker.len()
1876 }
1877 })
1878 .unwrap_or(3) } else {
1880 3 };
1882
1883 let min_continuation =
1884 if block.is_ordered { last_item_marker_width } else { 2 };
1885 check_info.indent < min_continuation
1886 }
1887 } else {
1888 false
1889 };
1890
1891 if is_list_breaking_content {
1892 found_non_list = true;
1894 break;
1895 }
1896 }
1897 }
1898 found_non_list
1899 };
1900
1901 let mut continues_list = if is_nested {
1905 same_context && reasonable_distance && !has_non_list_content
1907 } else {
1908 let result = same_type
1910 && same_context
1911 && reasonable_distance
1912 && marker_compatible
1913 && !has_non_list_content;
1914
1915 if block.item_lines.last().is_some_and(|&last_line| {
1917 last_line > 0
1918 && last_line <= lines.len()
1919 && lines[last_line - 1].content.contains(r"`sqlalchemy`")
1920 && lines[last_line - 1].content.contains(r"\`")
1921 }) {
1922 log::debug!(
1923 "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
1924 );
1925 if line_num > 0 && line_num <= lines.len() {
1926 log::debug!("Current line content: {:?}", lines[line_num - 1].content);
1927 }
1928 }
1929
1930 result
1931 };
1932
1933 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
1936 if block.item_lines.contains(&(line_num - 1)) {
1938 continues_list = true;
1940 }
1941 }
1942
1943 if continues_list {
1944 block.end_line = line_num;
1946 block.item_lines.push(line_num);
1947
1948 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
1950 list_item.marker.len() + 1
1951 } else {
1952 list_item.marker.len()
1953 });
1954
1955 if !block.is_ordered
1957 && block.marker.is_some()
1958 && block.marker.as_ref() != Some(&list_item.marker)
1959 {
1960 block.marker = None;
1962 }
1963 } else {
1964 list_blocks.push(block.clone());
1967
1968 *block = ListBlock {
1969 start_line: line_num,
1970 end_line: line_num,
1971 is_ordered: list_item.is_ordered,
1972 marker: if list_item.is_ordered {
1973 None
1974 } else {
1975 Some(list_item.marker.clone())
1976 },
1977 blockquote_prefix: blockquote_prefix.clone(),
1978 item_lines: vec![line_num],
1979 nesting_level: nesting,
1980 max_marker_width: if list_item.is_ordered {
1981 list_item.marker.len() + 1
1982 } else {
1983 list_item.marker.len()
1984 },
1985 };
1986 }
1987 } else {
1988 current_block = Some(ListBlock {
1990 start_line: line_num,
1991 end_line: line_num,
1992 is_ordered: list_item.is_ordered,
1993 marker: if list_item.is_ordered {
1994 None
1995 } else {
1996 Some(list_item.marker.clone())
1997 },
1998 blockquote_prefix,
1999 item_lines: vec![line_num],
2000 nesting_level: nesting,
2001 max_marker_width: list_item.marker.len(),
2002 });
2003 }
2004
2005 last_list_item_line = line_num;
2006 current_indent_level = item_indent;
2007 last_marker_width = if list_item.is_ordered {
2008 list_item.marker.len() + 1 } else {
2010 list_item.marker.len()
2011 };
2012 } else if let Some(ref mut block) = current_block {
2013 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2023 lines[block.end_line - 1].content.trim_end().ends_with('\\')
2024 } else {
2025 false
2026 };
2027
2028 let min_continuation_indent = if block.is_ordered {
2032 current_indent_level + last_marker_width
2033 } else {
2034 current_indent_level + 2 };
2036
2037 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2038 block.end_line = line_num;
2040 } else if line_info.is_blank {
2041 let mut check_idx = line_idx + 1;
2044 let mut found_continuation = false;
2045
2046 while check_idx < lines.len() && lines[check_idx].is_blank {
2048 check_idx += 1;
2049 }
2050
2051 if check_idx < lines.len() {
2052 let next_line = &lines[check_idx];
2053 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2055 found_continuation = true;
2056 }
2057 else if !next_line.in_code_block
2059 && next_line.list_item.is_some()
2060 && let Some(item) = &next_line.list_item
2061 {
2062 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2063 .find(&next_line.content)
2064 .map_or(String::new(), |m| m.as_str().to_string());
2065 if item.marker_column == current_indent_level
2066 && item.is_ordered == block.is_ordered
2067 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2068 {
2069 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2072 if let Some(between_line) = lines.get(idx) {
2073 let trimmed = between_line.content.trim();
2074 if trimmed.is_empty() {
2076 return false;
2077 }
2078 let line_indent =
2080 between_line.content.len() - between_line.content.trim_start().len();
2081
2082 if trimmed.starts_with("```")
2084 || trimmed.starts_with("~~~")
2085 || trimmed.starts_with("---")
2086 || trimmed.starts_with("***")
2087 || trimmed.starts_with("___")
2088 || trimmed.starts_with(">")
2089 || trimmed.contains('|') || between_line.heading.is_some()
2091 {
2092 return true; }
2094
2095 line_indent >= min_continuation_indent
2097 } else {
2098 false
2099 }
2100 });
2101
2102 if block.is_ordered {
2103 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2106 if let Some(between_line) = lines.get(idx) {
2107 let trimmed = between_line.content.trim();
2108 if trimmed.is_empty() {
2109 return false;
2110 }
2111 trimmed.starts_with("```")
2113 || trimmed.starts_with("~~~")
2114 || trimmed.starts_with("---")
2115 || trimmed.starts_with("***")
2116 || trimmed.starts_with("___")
2117 || trimmed.starts_with(">")
2118 || trimmed.contains('|') || between_line.heading.is_some()
2120 } else {
2121 false
2122 }
2123 });
2124 found_continuation = !has_structural_separators;
2125 } else {
2126 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2128 if let Some(between_line) = lines.get(idx) {
2129 let trimmed = between_line.content.trim();
2130 if trimmed.is_empty() {
2131 return false;
2132 }
2133 trimmed.starts_with("```")
2135 || trimmed.starts_with("~~~")
2136 || trimmed.starts_with("---")
2137 || trimmed.starts_with("***")
2138 || trimmed.starts_with("___")
2139 || trimmed.starts_with(">")
2140 || trimmed.contains('|') || between_line.heading.is_some()
2142 } else {
2143 false
2144 }
2145 });
2146 found_continuation = !has_structural_separators;
2147 }
2148 }
2149 }
2150 }
2151
2152 if found_continuation {
2153 block.end_line = line_num;
2155 } else {
2156 list_blocks.push(block.clone());
2158 current_block = None;
2159 }
2160 } else {
2161 let min_required_indent = if block.is_ordered {
2164 current_indent_level + last_marker_width
2165 } else {
2166 current_indent_level + 2
2167 };
2168
2169 let line_content = line_info.content.trim();
2174 let is_structural_separator = line_info.heading.is_some()
2175 || line_content.starts_with("```")
2176 || line_content.starts_with("~~~")
2177 || line_content.starts_with("---")
2178 || line_content.starts_with("***")
2179 || line_content.starts_with("___")
2180 || line_content.starts_with(">")
2181 || (line_content.contains('|')
2182 && !line_content.contains("](")
2183 && !line_content.contains("http")
2184 && (line_content.matches('|').count() > 1
2185 || line_content.starts_with('|')
2186 || line_content.ends_with('|'))); let is_lazy_continuation = !is_structural_separator
2191 && !line_info.is_blank
2192 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2193
2194 if is_lazy_continuation {
2195 let content_to_check = if !blockquote_prefix.is_empty() {
2198 line_info
2200 .content
2201 .strip_prefix(&blockquote_prefix)
2202 .unwrap_or(&line_info.content)
2203 .trim()
2204 } else {
2205 line_info.content.trim()
2206 };
2207
2208 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2209
2210 if starts_with_uppercase && last_list_item_line > 0 {
2213 list_blocks.push(block.clone());
2215 current_block = None;
2216 } else {
2217 block.end_line = line_num;
2219 }
2220 } else {
2221 list_blocks.push(block.clone());
2223 current_block = None;
2224 }
2225 }
2226 }
2227 }
2228
2229 if let Some(block) = current_block {
2231 list_blocks.push(block);
2232 }
2233
2234 merge_adjacent_list_blocks(&mut list_blocks, lines);
2236
2237 list_blocks
2238 }
2239
2240 fn compute_char_frequency(content: &str) -> CharFrequency {
2242 let mut frequency = CharFrequency::default();
2243
2244 for ch in content.chars() {
2245 match ch {
2246 '#' => frequency.hash_count += 1,
2247 '*' => frequency.asterisk_count += 1,
2248 '_' => frequency.underscore_count += 1,
2249 '-' => frequency.hyphen_count += 1,
2250 '+' => frequency.plus_count += 1,
2251 '>' => frequency.gt_count += 1,
2252 '|' => frequency.pipe_count += 1,
2253 '[' => frequency.bracket_count += 1,
2254 '`' => frequency.backtick_count += 1,
2255 '<' => frequency.lt_count += 1,
2256 '!' => frequency.exclamation_count += 1,
2257 '\n' => frequency.newline_count += 1,
2258 _ => {}
2259 }
2260 }
2261
2262 frequency
2263 }
2264
2265 fn parse_html_tags(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<HtmlTag> {
2267 lazy_static! {
2268 static ref HTML_TAG_REGEX: regex::Regex =
2269 regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap();
2270 }
2271
2272 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2273
2274 for cap in HTML_TAG_REGEX.captures_iter(content) {
2275 let full_match = cap.get(0).unwrap();
2276 let match_start = full_match.start();
2277 let match_end = full_match.end();
2278
2279 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2281 continue;
2282 }
2283
2284 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2285 let tag_name = cap.get(2).unwrap().as_str().to_lowercase();
2286 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2287
2288 let mut line_num = 1;
2290 let mut col_start = match_start;
2291 let mut col_end = match_end;
2292 for (idx, line_info) in lines.iter().enumerate() {
2293 if match_start >= line_info.byte_offset {
2294 line_num = idx + 1;
2295 col_start = match_start - line_info.byte_offset;
2296 col_end = match_end - line_info.byte_offset;
2297 } else {
2298 break;
2299 }
2300 }
2301
2302 html_tags.push(HtmlTag {
2303 line: line_num,
2304 start_col: col_start,
2305 end_col: col_end,
2306 byte_offset: match_start,
2307 byte_end: match_end,
2308 tag_name,
2309 is_closing,
2310 is_self_closing,
2311 raw_content: full_match.as_str().to_string(),
2312 });
2313 }
2314
2315 html_tags
2316 }
2317
2318 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2320 lazy_static! {
2321 static ref EMPHASIS_REGEX: regex::Regex =
2322 regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap();
2323 }
2324
2325 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2326
2327 for cap in EMPHASIS_REGEX.captures_iter(content) {
2328 let full_match = cap.get(0).unwrap();
2329 let match_start = full_match.start();
2330 let match_end = full_match.end();
2331
2332 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2334 continue;
2335 }
2336
2337 let opening_markers = cap.get(1).unwrap().as_str();
2338 let content_part = cap.get(2).unwrap().as_str();
2339 let closing_markers = cap.get(3).unwrap().as_str();
2340
2341 if opening_markers.chars().next() != closing_markers.chars().next()
2343 || opening_markers.len() != closing_markers.len()
2344 {
2345 continue;
2346 }
2347
2348 let marker = opening_markers.chars().next().unwrap();
2349 let marker_count = opening_markers.len();
2350
2351 let mut line_num = 1;
2353 let mut col_start = match_start;
2354 let mut col_end = match_end;
2355 for (idx, line_info) in lines.iter().enumerate() {
2356 if match_start >= line_info.byte_offset {
2357 line_num = idx + 1;
2358 col_start = match_start - line_info.byte_offset;
2359 col_end = match_end - line_info.byte_offset;
2360 } else {
2361 break;
2362 }
2363 }
2364
2365 emphasis_spans.push(EmphasisSpan {
2366 line: line_num,
2367 start_col: col_start,
2368 end_col: col_end,
2369 byte_offset: match_start,
2370 byte_end: match_end,
2371 marker,
2372 marker_count,
2373 content: content_part.to_string(),
2374 });
2375 }
2376
2377 emphasis_spans
2378 }
2379
2380 fn parse_table_rows(lines: &[LineInfo]) -> Vec<TableRow> {
2382 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2383
2384 for (line_idx, line_info) in lines.iter().enumerate() {
2385 if line_info.in_code_block || line_info.is_blank {
2387 continue;
2388 }
2389
2390 let line = &line_info.content;
2391 let line_num = line_idx + 1;
2392
2393 if !line.contains('|') {
2395 continue;
2396 }
2397
2398 let parts: Vec<&str> = line.split('|').collect();
2400 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2401
2402 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2404 let mut column_alignments = Vec::new();
2405
2406 if is_separator {
2407 for part in &parts[1..parts.len() - 1] {
2408 let trimmed = part.trim();
2410 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2411 "center".to_string()
2412 } else if trimmed.ends_with(':') {
2413 "right".to_string()
2414 } else if trimmed.starts_with(':') {
2415 "left".to_string()
2416 } else {
2417 "none".to_string()
2418 };
2419 column_alignments.push(alignment);
2420 }
2421 }
2422
2423 table_rows.push(TableRow {
2424 line: line_num,
2425 is_separator,
2426 column_count,
2427 column_alignments,
2428 });
2429 }
2430
2431 table_rows
2432 }
2433
2434 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2436 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2437
2438 for cap in BARE_URL_PATTERN.captures_iter(content) {
2440 let full_match = cap.get(0).unwrap();
2441 let match_start = full_match.start();
2442 let match_end = full_match.end();
2443
2444 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2446 continue;
2447 }
2448
2449 let preceding_char = if match_start > 0 {
2451 content.chars().nth(match_start - 1)
2452 } else {
2453 None
2454 };
2455 let following_char = content.chars().nth(match_end);
2456
2457 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2458 continue;
2459 }
2460 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2461 continue;
2462 }
2463
2464 let url = full_match.as_str();
2465 let url_type = if url.starts_with("https://") {
2466 "https"
2467 } else if url.starts_with("http://") {
2468 "http"
2469 } else if url.starts_with("ftp://") {
2470 "ftp"
2471 } else {
2472 "other"
2473 };
2474
2475 let mut line_num = 1;
2477 let mut col_start = match_start;
2478 let mut col_end = match_end;
2479 for (idx, line_info) in lines.iter().enumerate() {
2480 if match_start >= line_info.byte_offset {
2481 line_num = idx + 1;
2482 col_start = match_start - line_info.byte_offset;
2483 col_end = match_end - line_info.byte_offset;
2484 } else {
2485 break;
2486 }
2487 }
2488
2489 bare_urls.push(BareUrl {
2490 line: line_num,
2491 start_col: col_start,
2492 end_col: col_end,
2493 byte_offset: match_start,
2494 byte_end: match_end,
2495 url: url.to_string(),
2496 url_type: url_type.to_string(),
2497 });
2498 }
2499
2500 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
2502 let full_match = cap.get(0).unwrap();
2503 let match_start = full_match.start();
2504 let match_end = full_match.end();
2505
2506 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2508 continue;
2509 }
2510
2511 let preceding_char = if match_start > 0 {
2513 content.chars().nth(match_start - 1)
2514 } else {
2515 None
2516 };
2517 let following_char = content.chars().nth(match_end);
2518
2519 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2520 continue;
2521 }
2522 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2523 continue;
2524 }
2525
2526 let email = full_match.as_str();
2527
2528 let mut line_num = 1;
2530 let mut col_start = match_start;
2531 let mut col_end = match_end;
2532 for (idx, line_info) in lines.iter().enumerate() {
2533 if match_start >= line_info.byte_offset {
2534 line_num = idx + 1;
2535 col_start = match_start - line_info.byte_offset;
2536 col_end = match_end - line_info.byte_offset;
2537 } else {
2538 break;
2539 }
2540 }
2541
2542 bare_urls.push(BareUrl {
2543 line: line_num,
2544 start_col: col_start,
2545 end_col: col_end,
2546 byte_offset: match_start,
2547 byte_end: match_end,
2548 url: email.to_string(),
2549 url_type: "email".to_string(),
2550 });
2551 }
2552
2553 bare_urls
2554 }
2555}
2556
2557fn merge_adjacent_list_blocks(list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
2559 if list_blocks.len() < 2 {
2560 return;
2561 }
2562
2563 let mut merger = ListBlockMerger::new(lines);
2564 *list_blocks = merger.merge(list_blocks);
2565}
2566
2567struct ListBlockMerger<'a> {
2569 lines: &'a [LineInfo],
2570}
2571
2572impl<'a> ListBlockMerger<'a> {
2573 fn new(lines: &'a [LineInfo]) -> Self {
2574 Self { lines }
2575 }
2576
2577 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
2578 let mut merged = Vec::with_capacity(list_blocks.len());
2579 let mut current = list_blocks[0].clone();
2580
2581 for next in list_blocks.iter().skip(1) {
2582 if self.should_merge_blocks(¤t, next) {
2583 current = self.merge_two_blocks(current, next);
2584 } else {
2585 merged.push(current);
2586 current = next.clone();
2587 }
2588 }
2589
2590 merged.push(current);
2591 merged
2592 }
2593
2594 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
2596 if !self.blocks_are_compatible(current, next) {
2598 return false;
2599 }
2600
2601 let spacing = self.analyze_spacing_between(current, next);
2603 match spacing {
2604 BlockSpacing::Consecutive => true,
2605 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
2606 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
2607 self.can_merge_with_content_between(current, next)
2608 }
2609 }
2610 }
2611
2612 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
2614 current.is_ordered == next.is_ordered
2615 && current.blockquote_prefix == next.blockquote_prefix
2616 && current.nesting_level == next.nesting_level
2617 }
2618
2619 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
2621 let gap = next.start_line - current.end_line;
2622
2623 match gap {
2624 1 => BlockSpacing::Consecutive,
2625 2 => BlockSpacing::SingleBlank,
2626 _ if gap > 2 => {
2627 if self.has_only_blank_lines_between(current, next) {
2628 BlockSpacing::MultipleBlanks
2629 } else {
2630 BlockSpacing::ContentBetween
2631 }
2632 }
2633 _ => BlockSpacing::Consecutive, }
2635 }
2636
2637 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2639 if has_meaningful_content_between(current, next, self.lines) {
2642 return false; }
2644
2645 !current.is_ordered && current.marker == next.marker
2647 }
2648
2649 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2651 if has_meaningful_content_between(current, next, self.lines) {
2653 return false; }
2655
2656 current.is_ordered && next.is_ordered
2658 }
2659
2660 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2662 for line_num in (current.end_line + 1)..next.start_line {
2663 if let Some(line_info) = self.lines.get(line_num - 1)
2664 && !line_info.content.trim().is_empty()
2665 {
2666 return false;
2667 }
2668 }
2669 true
2670 }
2671
2672 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
2674 current.end_line = next.end_line;
2675 current.item_lines.extend_from_slice(&next.item_lines);
2676
2677 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
2679
2680 if !current.is_ordered && self.markers_differ(¤t, next) {
2682 current.marker = None; }
2684
2685 current
2686 }
2687
2688 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
2690 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
2691 }
2692}
2693
2694#[derive(Debug, PartialEq)]
2696enum BlockSpacing {
2697 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
2702
2703fn has_meaningful_content_between(current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
2705 for line_num in (current.end_line + 1)..next.start_line {
2707 if let Some(line_info) = lines.get(line_num - 1) {
2708 let trimmed = line_info.content.trim();
2710
2711 if trimmed.is_empty() {
2713 continue;
2714 }
2715
2716 if line_info.heading.is_some() {
2720 return true; }
2722
2723 if is_horizontal_rule(trimmed) {
2725 return true; }
2727
2728 if trimmed.contains('|') && trimmed.len() > 1 {
2731 if !trimmed.contains("](") && !trimmed.contains("http") {
2733 let pipe_count = trimmed.matches('|').count();
2735 if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
2736 return true; }
2738 }
2739 }
2740
2741 if trimmed.starts_with('>') {
2743 return true; }
2745
2746 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2748 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2749
2750 let min_continuation_indent = if current.is_ordered {
2752 current.nesting_level + current.max_marker_width + 1 } else {
2754 current.nesting_level + 2
2755 };
2756
2757 if line_indent < min_continuation_indent {
2758 return true; }
2761 }
2762
2763 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2765
2766 let min_indent = if current.is_ordered {
2768 current.nesting_level + current.max_marker_width
2769 } else {
2770 current.nesting_level + 2
2771 };
2772
2773 if line_indent < min_indent {
2775 return true; }
2777
2778 }
2781 }
2782
2783 false
2785}
2786
2787fn is_horizontal_rule(trimmed: &str) -> bool {
2789 if trimmed.len() < 3 {
2790 return false;
2791 }
2792
2793 let chars: Vec<char> = trimmed.chars().collect();
2795 if let Some(&first_char) = chars.first()
2796 && (first_char == '-' || first_char == '*' || first_char == '_')
2797 {
2798 let mut count = 0;
2799 for &ch in &chars {
2800 if ch == first_char {
2801 count += 1;
2802 } else if ch != ' ' && ch != '\t' {
2803 return false; }
2805 }
2806 return count >= 3;
2807 }
2808 false
2809}
2810
2811#[cfg(test)]
2813mod tests {
2814 use super::*;
2815
2816 #[test]
2817 fn test_empty_content() {
2818 let ctx = LintContext::new("", MarkdownFlavor::Standard);
2819 assert_eq!(ctx.content, "");
2820 assert_eq!(ctx.line_offsets, vec![0]);
2821 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2822 assert_eq!(ctx.lines.len(), 0);
2823 }
2824
2825 #[test]
2826 fn test_single_line() {
2827 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
2828 assert_eq!(ctx.content, "# Hello");
2829 assert_eq!(ctx.line_offsets, vec![0]);
2830 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2831 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
2832 }
2833
2834 #[test]
2835 fn test_multi_line() {
2836 let content = "# Title\n\nSecond line\nThird line";
2837 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2838 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
2839 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
2846
2847 #[test]
2848 fn test_line_info() {
2849 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
2850 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2851
2852 assert_eq!(ctx.lines.len(), 7);
2854
2855 let line1 = &ctx.lines[0];
2857 assert_eq!(line1.content, "# Title");
2858 assert_eq!(line1.byte_offset, 0);
2859 assert_eq!(line1.indent, 0);
2860 assert!(!line1.is_blank);
2861 assert!(!line1.in_code_block);
2862 assert!(line1.list_item.is_none());
2863
2864 let line2 = &ctx.lines[1];
2866 assert_eq!(line2.content, " indented");
2867 assert_eq!(line2.byte_offset, 8);
2868 assert_eq!(line2.indent, 4);
2869 assert!(!line2.is_blank);
2870
2871 let line3 = &ctx.lines[2];
2873 assert_eq!(line3.content, "");
2874 assert!(line3.is_blank);
2875
2876 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
2878 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
2879 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
2880 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
2881 }
2882
2883 #[test]
2884 fn test_list_item_detection() {
2885 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
2886 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2887
2888 let line1 = &ctx.lines[0];
2890 assert!(line1.list_item.is_some());
2891 let list1 = line1.list_item.as_ref().unwrap();
2892 assert_eq!(list1.marker, "-");
2893 assert!(!list1.is_ordered);
2894 assert_eq!(list1.marker_column, 0);
2895 assert_eq!(list1.content_column, 2);
2896
2897 let line2 = &ctx.lines[1];
2899 assert!(line2.list_item.is_some());
2900 let list2 = line2.list_item.as_ref().unwrap();
2901 assert_eq!(list2.marker, "*");
2902 assert_eq!(list2.marker_column, 2);
2903
2904 let line3 = &ctx.lines[2];
2906 assert!(line3.list_item.is_some());
2907 let list3 = line3.list_item.as_ref().unwrap();
2908 assert_eq!(list3.marker, "1.");
2909 assert!(list3.is_ordered);
2910 assert_eq!(list3.number, Some(1));
2911
2912 let line6 = &ctx.lines[5];
2914 assert!(line6.list_item.is_none());
2915 }
2916
2917 #[test]
2918 fn test_offset_to_line_col_edge_cases() {
2919 let content = "a\nb\nc";
2920 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2921 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
2929}