1use crate::config::MarkdownFlavor;
2use crate::utils::ast_utils::get_cached_ast;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use lazy_static::lazy_static;
5use markdown::mdast::Node;
6use regex::Regex;
7
8lazy_static! {
9 static ref LINK_PATTERN: Regex = Regex::new(
12 r"(?sx)
13 \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Link text in group 1 (handles nested brackets)
14 (?:
15 \(([^)]*)\) # Inline URL in group 2 (can be empty)
16 |
17 \[([^\]]*)\] # Reference ID in group 3
18 )"
19 ).unwrap();
20
21 static ref IMAGE_PATTERN: Regex = Regex::new(
24 r"(?sx)
25 !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Alt text in group 1 (handles nested brackets)
26 (?:
27 \(([^)]*)\) # Inline URL in group 2 (can be empty)
28 |
29 \[([^\]]*)\] # Reference ID in group 3
30 )"
31 ).unwrap();
32
33 static ref REF_DEF_PATTERN: Regex = Regex::new(
35 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
36 ).unwrap();
37
38 static ref CODE_SPAN_PATTERN: Regex = Regex::new(
41 r"`+"
42 ).unwrap();
43
44 static ref BARE_URL_PATTERN: Regex = Regex::new(
46 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
47 ).unwrap();
48
49 static ref BARE_EMAIL_PATTERN: Regex = Regex::new(
51 r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
52 ).unwrap();
53
54 static ref ANGLE_BRACKET_PATTERN: Regex = Regex::new(
56 r"<((?:https?|ftp)://[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"
57 ).unwrap();
58
59 static ref BLOCKQUOTE_PREFIX_REGEX: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
61}
62
63#[derive(Debug, Clone)]
65pub struct LineInfo {
66 pub content: String,
68 pub byte_offset: usize,
70 pub indent: usize,
72 pub is_blank: bool,
74 pub in_code_block: bool,
76 pub in_front_matter: bool,
78 pub list_item: Option<ListItemInfo>,
80 pub heading: Option<HeadingInfo>,
82 pub blockquote: Option<BlockquoteInfo>,
84}
85
86#[derive(Debug, Clone)]
88pub struct ListItemInfo {
89 pub marker: String,
91 pub is_ordered: bool,
93 pub number: Option<usize>,
95 pub marker_column: usize,
97 pub content_column: usize,
99}
100
101#[derive(Debug, Clone, PartialEq)]
103pub enum HeadingStyle {
104 ATX,
106 Setext1,
108 Setext2,
110}
111
112#[derive(Debug, Clone)]
114pub struct ParsedLink {
115 pub line: usize,
117 pub start_col: usize,
119 pub end_col: usize,
121 pub byte_offset: usize,
123 pub byte_end: usize,
125 pub text: String,
127 pub url: String,
129 pub is_reference: bool,
131 pub reference_id: Option<String>,
133}
134
135#[derive(Debug, Clone)]
137pub struct ParsedImage {
138 pub line: usize,
140 pub start_col: usize,
142 pub end_col: usize,
144 pub byte_offset: usize,
146 pub byte_end: usize,
148 pub alt_text: String,
150 pub url: String,
152 pub is_reference: bool,
154 pub reference_id: Option<String>,
156}
157
158#[derive(Debug, Clone)]
160pub struct ReferenceDef {
161 pub line: usize,
163 pub id: String,
165 pub url: String,
167 pub title: Option<String>,
169}
170
171#[derive(Debug, Clone)]
173pub struct CodeSpan {
174 pub line: usize,
176 pub start_col: usize,
178 pub end_col: usize,
180 pub byte_offset: usize,
182 pub byte_end: usize,
184 pub backtick_count: usize,
186 pub content: String,
188}
189
190#[derive(Debug, Clone)]
192pub struct HeadingInfo {
193 pub level: u8,
195 pub style: HeadingStyle,
197 pub marker: String,
199 pub marker_column: usize,
201 pub content_column: usize,
203 pub text: String,
205 pub custom_id: Option<String>,
207 pub raw_text: String,
209 pub has_closing_sequence: bool,
211 pub closing_sequence: String,
213}
214
215#[derive(Debug, Clone)]
217pub struct BlockquoteInfo {
218 pub nesting_level: usize,
220 pub indent: String,
222 pub marker_column: usize,
224 pub prefix: String,
226 pub content: String,
228 pub has_no_space_after_marker: bool,
230 pub has_multiple_spaces_after_marker: bool,
232 pub needs_md028_fix: bool,
234}
235
236#[derive(Debug, Clone)]
238pub struct ListBlock {
239 pub start_line: usize,
241 pub end_line: usize,
243 pub is_ordered: bool,
245 pub marker: Option<String>,
247 pub blockquote_prefix: String,
249 pub item_lines: Vec<usize>,
251 pub nesting_level: usize,
253 pub max_marker_width: usize,
255}
256
257use std::sync::{Arc, Mutex};
258
259#[derive(Debug, Clone, Default)]
261pub struct CharFrequency {
262 pub hash_count: usize,
264 pub asterisk_count: usize,
266 pub underscore_count: usize,
268 pub hyphen_count: usize,
270 pub plus_count: usize,
272 pub gt_count: usize,
274 pub pipe_count: usize,
276 pub bracket_count: usize,
278 pub backtick_count: usize,
280 pub lt_count: usize,
282 pub exclamation_count: usize,
284 pub newline_count: usize,
286}
287
288#[derive(Debug, Clone)]
290pub struct HtmlTag {
291 pub line: usize,
293 pub start_col: usize,
295 pub end_col: usize,
297 pub byte_offset: usize,
299 pub byte_end: usize,
301 pub tag_name: String,
303 pub is_closing: bool,
305 pub is_self_closing: bool,
307 pub raw_content: String,
309}
310
311#[derive(Debug, Clone)]
313pub struct EmphasisSpan {
314 pub line: usize,
316 pub start_col: usize,
318 pub end_col: usize,
320 pub byte_offset: usize,
322 pub byte_end: usize,
324 pub marker: char,
326 pub marker_count: usize,
328 pub content: String,
330}
331
332#[derive(Debug, Clone)]
334pub struct TableRow {
335 pub line: usize,
337 pub is_separator: bool,
339 pub column_count: usize,
341 pub column_alignments: Vec<String>, }
344
345#[derive(Debug, Clone)]
347pub struct BareUrl {
348 pub line: usize,
350 pub start_col: usize,
352 pub end_col: usize,
354 pub byte_offset: usize,
356 pub byte_end: usize,
358 pub url: String,
360 pub url_type: String,
362}
363
364pub struct LintContext<'a> {
365 pub content: &'a str,
366 pub line_offsets: Vec<usize>,
367 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink>, pub images: Vec<ParsedImage>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, ast_cache: Mutex<Option<Arc<Node>>>, pub flavor: MarkdownFlavor, }
382
383impl<'a> LintContext<'a> {
384 pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
385 let mut line_offsets = vec![0];
386 for (i, c) in content.char_indices() {
387 if c == '\n' {
388 line_offsets.push(i + 1);
389 }
390 }
391
392 let code_blocks = CodeBlockUtils::detect_code_blocks(content);
394
395 let lines = Self::compute_line_info(content, &line_offsets, &code_blocks, flavor);
397
398 let ast = get_cached_ast(content);
400 let code_spans = Self::parse_code_spans(content, &lines, &ast);
401
402 let links = Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor);
404 let images = Self::parse_images(content, &lines, &code_blocks, &code_spans);
405 let reference_defs = Self::parse_reference_defs(content, &lines);
406 let list_blocks = Self::parse_list_blocks(&lines);
407
408 let char_frequency = Self::compute_char_frequency(content);
410
411 Self {
412 content,
413 line_offsets,
414 code_blocks,
415 lines,
416 links,
417 images,
418 reference_defs,
419 code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
420 list_blocks,
421 char_frequency,
422 html_tags_cache: Mutex::new(None),
423 emphasis_spans_cache: Mutex::new(None),
424 table_rows_cache: Mutex::new(None),
425 bare_urls_cache: Mutex::new(None),
426 ast_cache: Mutex::new(None),
427 flavor,
428 }
429 }
430
431 pub fn get_ast(&self) -> Arc<Node> {
433 let mut cache = self.ast_cache.lock().unwrap();
434
435 if cache.is_none() {
436 *cache = Some(get_cached_ast(self.content));
439 }
440
441 cache.as_ref().unwrap().clone()
442 }
443
444 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
446 let mut cache = self.code_spans_cache.lock().unwrap();
447
448 if cache.is_none() {
450 let ast = self.get_ast();
451 let code_spans = Self::parse_code_spans(self.content, &self.lines, &ast);
452 *cache = Some(Arc::new(code_spans));
453 }
454
455 cache.as_ref().unwrap().clone()
457 }
458
459 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
461 let mut cache = self.html_tags_cache.lock().unwrap();
462
463 if cache.is_none() {
464 let html_tags = Self::parse_html_tags(self.content, &self.lines, &self.code_blocks);
465 *cache = Some(Arc::new(html_tags));
466 }
467
468 cache.as_ref().unwrap().clone()
469 }
470
471 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
473 let mut cache = self.emphasis_spans_cache.lock().unwrap();
474
475 if cache.is_none() {
476 let emphasis_spans = Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks);
477 *cache = Some(Arc::new(emphasis_spans));
478 }
479
480 cache.as_ref().unwrap().clone()
481 }
482
483 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
485 let mut cache = self.table_rows_cache.lock().unwrap();
486
487 if cache.is_none() {
488 let table_rows = Self::parse_table_rows(&self.lines);
489 *cache = Some(Arc::new(table_rows));
490 }
491
492 cache.as_ref().unwrap().clone()
493 }
494
495 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
497 let mut cache = self.bare_urls_cache.lock().unwrap();
498
499 if cache.is_none() {
500 let bare_urls = Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks);
501 *cache = Some(Arc::new(bare_urls));
502 }
503
504 cache.as_ref().unwrap().clone()
505 }
506
507 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
509 match self.line_offsets.binary_search(&offset) {
510 Ok(line) => (line + 1, 1),
511 Err(line) => {
512 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
513 (line, offset - line_start + 1)
514 }
515 }
516 }
517
518 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
520 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
522 return true;
523 }
524
525 self.code_spans()
527 .iter()
528 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
529 }
530
531 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
533 if line_num > 0 {
534 self.lines.get(line_num - 1)
535 } else {
536 None
537 }
538 }
539
540 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
542 self.line_info(line_num).map(|info| info.byte_offset)
543 }
544
545 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
547 let normalized_id = ref_id.to_lowercase();
548 self.reference_defs
549 .iter()
550 .find(|def| def.id == normalized_id)
551 .map(|def| def.url.as_str())
552 }
553
554 pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink> {
556 self.links.iter().filter(|link| link.line == line_num).collect()
557 }
558
559 pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage> {
561 self.images.iter().filter(|img| img.line == line_num).collect()
562 }
563
564 pub fn is_in_list_block(&self, line_num: usize) -> bool {
566 self.list_blocks
567 .iter()
568 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
569 }
570
571 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
573 self.list_blocks
574 .iter()
575 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
576 }
577
578 pub fn has_char(&self, ch: char) -> bool {
580 match ch {
581 '#' => self.char_frequency.hash_count > 0,
582 '*' => self.char_frequency.asterisk_count > 0,
583 '_' => self.char_frequency.underscore_count > 0,
584 '-' => self.char_frequency.hyphen_count > 0,
585 '+' => self.char_frequency.plus_count > 0,
586 '>' => self.char_frequency.gt_count > 0,
587 '|' => self.char_frequency.pipe_count > 0,
588 '[' => self.char_frequency.bracket_count > 0,
589 '`' => self.char_frequency.backtick_count > 0,
590 '<' => self.char_frequency.lt_count > 0,
591 '!' => self.char_frequency.exclamation_count > 0,
592 '\n' => self.char_frequency.newline_count > 0,
593 _ => self.content.contains(ch), }
595 }
596
597 pub fn char_count(&self, ch: char) -> usize {
599 match ch {
600 '#' => self.char_frequency.hash_count,
601 '*' => self.char_frequency.asterisk_count,
602 '_' => self.char_frequency.underscore_count,
603 '-' => self.char_frequency.hyphen_count,
604 '+' => self.char_frequency.plus_count,
605 '>' => self.char_frequency.gt_count,
606 '|' => self.char_frequency.pipe_count,
607 '[' => self.char_frequency.bracket_count,
608 '`' => self.char_frequency.backtick_count,
609 '<' => self.char_frequency.lt_count,
610 '!' => self.char_frequency.exclamation_count,
611 '\n' => self.char_frequency.newline_count,
612 _ => self.content.matches(ch).count(), }
614 }
615
616 pub fn likely_has_headings(&self) -> bool {
618 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
620
621 pub fn likely_has_lists(&self) -> bool {
623 self.char_frequency.asterisk_count > 0
624 || self.char_frequency.hyphen_count > 0
625 || self.char_frequency.plus_count > 0
626 }
627
628 pub fn likely_has_emphasis(&self) -> bool {
630 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
631 }
632
633 pub fn likely_has_tables(&self) -> bool {
635 self.char_frequency.pipe_count > 2
636 }
637
638 pub fn likely_has_blockquotes(&self) -> bool {
640 self.char_frequency.gt_count > 0
641 }
642
643 pub fn likely_has_code(&self) -> bool {
645 self.char_frequency.backtick_count > 0
646 }
647
648 pub fn likely_has_links_or_images(&self) -> bool {
650 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
651 }
652
653 pub fn likely_has_html(&self) -> bool {
655 self.char_frequency.lt_count > 0
656 }
657
658 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
660 self.html_tags()
661 .iter()
662 .filter(|tag| tag.line == line_num)
663 .cloned()
664 .collect()
665 }
666
667 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
669 self.emphasis_spans()
670 .iter()
671 .filter(|span| span.line == line_num)
672 .cloned()
673 .collect()
674 }
675
676 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
678 self.table_rows()
679 .iter()
680 .filter(|row| row.line == line_num)
681 .cloned()
682 .collect()
683 }
684
685 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
687 self.bare_urls()
688 .iter()
689 .filter(|url| url.line == line_num)
690 .cloned()
691 .collect()
692 }
693
694 fn parse_links(
696 content: &str,
697 lines: &[LineInfo],
698 code_blocks: &[(usize, usize)],
699 code_spans: &[CodeSpan],
700 flavor: MarkdownFlavor,
701 ) -> Vec<ParsedLink> {
702 use crate::utils::skip_context::is_mkdocs_snippet_line;
703
704 let mut links = Vec::with_capacity(content.len() / 500); for cap in LINK_PATTERN.captures_iter(content) {
709 let full_match = cap.get(0).unwrap();
710 let match_start = full_match.start();
711 let match_end = full_match.end();
712
713 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
715 continue;
716 }
717
718 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
720 continue;
721 }
722
723 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
725 continue;
726 }
727
728 if code_spans
730 .iter()
731 .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
732 {
733 continue;
734 }
735
736 let line_idx = lines
739 .iter()
740 .position(|line| {
741 match_start >= line.byte_offset && (match_start < line.byte_offset + line.content.len() + 1)
742 })
743 .unwrap_or(0);
744
745 if is_mkdocs_snippet_line(&lines[line_idx].content, flavor) {
746 continue;
747 }
748
749 let mut line_num = 1;
751 let mut col_start = match_start;
752 for (idx, line_info) in lines.iter().enumerate() {
753 if match_start >= line_info.byte_offset {
754 line_num = idx + 1;
755 col_start = match_start - line_info.byte_offset;
756 } else {
757 break;
758 }
759 }
760
761 let mut end_line_num = 1;
763 let mut col_end = match_end;
764 for (idx, line_info) in lines.iter().enumerate() {
765 if match_end > line_info.byte_offset {
766 end_line_num = idx + 1;
767 col_end = match_end - line_info.byte_offset;
768 } else {
769 break;
770 }
771 }
772
773 if line_num == end_line_num {
775 } else {
777 }
780
781 let text = cap.get(1).map_or("", |m| m.as_str()).to_string();
782
783 if let Some(inline_url) = cap.get(2) {
784 links.push(ParsedLink {
786 line: line_num,
787 start_col: col_start,
788 end_col: col_end,
789 byte_offset: match_start,
790 byte_end: match_end,
791 text,
792 url: inline_url.as_str().to_string(),
793 is_reference: false,
794 reference_id: None,
795 });
796 } else if let Some(ref_id) = cap.get(3) {
797 let ref_id_str = ref_id.as_str();
799 let normalized_ref = if ref_id_str.is_empty() {
800 text.to_lowercase() } else {
802 ref_id_str.to_lowercase()
803 };
804
805 links.push(ParsedLink {
806 line: line_num,
807 start_col: col_start,
808 end_col: col_end,
809 byte_offset: match_start,
810 byte_end: match_end,
811 text,
812 url: String::new(), is_reference: true,
814 reference_id: Some(normalized_ref),
815 });
816 }
817 }
818
819 links
820 }
821
822 fn parse_images(
824 content: &str,
825 lines: &[LineInfo],
826 code_blocks: &[(usize, usize)],
827 code_spans: &[CodeSpan],
828 ) -> Vec<ParsedImage> {
829 let mut images = Vec::with_capacity(content.len() / 1000); for cap in IMAGE_PATTERN.captures_iter(content) {
834 let full_match = cap.get(0).unwrap();
835 let match_start = full_match.start();
836 let match_end = full_match.end();
837
838 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
840 continue;
841 }
842
843 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
845 continue;
846 }
847
848 if code_spans
850 .iter()
851 .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
852 {
853 continue;
854 }
855
856 let mut line_num = 1;
858 let mut col_start = match_start;
859 for (idx, line_info) in lines.iter().enumerate() {
860 if match_start >= line_info.byte_offset {
861 line_num = idx + 1;
862 col_start = match_start - line_info.byte_offset;
863 } else {
864 break;
865 }
866 }
867
868 let mut end_line_num = 1;
870 let mut col_end = match_end;
871 for (idx, line_info) in lines.iter().enumerate() {
872 if match_end > line_info.byte_offset {
873 end_line_num = idx + 1;
874 col_end = match_end - line_info.byte_offset;
875 } else {
876 break;
877 }
878 }
879
880 if line_num == end_line_num {
882 } else {
884 }
887
888 let alt_text = cap.get(1).map_or("", |m| m.as_str()).to_string();
889
890 if let Some(inline_url) = cap.get(2) {
891 images.push(ParsedImage {
893 line: line_num,
894 start_col: col_start,
895 end_col: col_end,
896 byte_offset: match_start,
897 byte_end: match_end,
898 alt_text,
899 url: inline_url.as_str().to_string(),
900 is_reference: false,
901 reference_id: None,
902 });
903 } else if let Some(ref_id) = cap.get(3) {
904 let ref_id_str = ref_id.as_str();
906 let normalized_ref = if ref_id_str.is_empty() {
907 alt_text.to_lowercase() } else {
909 ref_id_str.to_lowercase()
910 };
911
912 images.push(ParsedImage {
913 line: line_num,
914 start_col: col_start,
915 end_col: col_end,
916 byte_offset: match_start,
917 byte_end: match_end,
918 alt_text,
919 url: String::new(), is_reference: true,
921 reference_id: Some(normalized_ref),
922 });
923 }
924 }
925
926 images
927 }
928
929 fn parse_reference_defs(_content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
931 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
935 if line_info.in_code_block {
937 continue;
938 }
939
940 let line = &line_info.content;
941 let line_num = line_idx + 1;
942
943 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
944 let id = cap.get(1).unwrap().as_str().to_lowercase();
945 let url = cap.get(2).unwrap().as_str().to_string();
946 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
947
948 refs.push(ReferenceDef {
949 line: line_num,
950 id,
951 url,
952 title,
953 });
954 }
955 }
956
957 refs
958 }
959
960 fn compute_line_info(
962 content: &str,
963 line_offsets: &[usize],
964 code_blocks: &[(usize, usize)],
965 flavor: MarkdownFlavor,
966 ) -> Vec<LineInfo> {
967 lazy_static! {
968 static ref UNORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)([-*+])([ \t]*)(.*)").unwrap();
970 static ref ORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(\d+)([.)])([ \t]*)(.*)").unwrap();
971
972 static ref BLOCKQUOTE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*>\s*)(.*)").unwrap();
974
975 static ref ATX_HEADING_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap();
977 static ref SETEXT_UNDERLINE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap();
978
979 static ref BLOCKQUOTE_REGEX_FULL: regex::Regex = regex::Regex::new(r"^(\s*)(>+)(\s*)(.*)$").unwrap();
981 }
982
983 let content_lines: Vec<&str> = content.lines().collect();
984 let mut lines = Vec::with_capacity(content_lines.len());
985
986 let mut in_front_matter = false;
988 let mut front_matter_end = 0;
989 if content_lines.first().map(|l| l.trim()) == Some("---") {
990 in_front_matter = true;
991 for (idx, line) in content_lines.iter().enumerate().skip(1) {
992 if line.trim() == "---" {
993 front_matter_end = idx;
994 break;
995 }
996 }
997 }
998
999 for (i, line) in content_lines.iter().enumerate() {
1000 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1001 let indent = line.len() - line.trim_start().len();
1002 let is_blank = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1004 let after_prefix = caps.get(2).map_or("", |m| m.as_str());
1006 after_prefix.trim().is_empty()
1007 } else {
1008 line.trim().is_empty()
1009 };
1010 let in_code_block = code_blocks.iter().any(|&(start, end)| {
1013 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1018 let mut boundary = start;
1020 while boundary > 0 && !content.is_char_boundary(boundary) {
1021 boundary -= 1;
1022 }
1023 boundary
1024 } else {
1025 start
1026 };
1027
1028 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1029 let mut boundary = end;
1031 while boundary < content.len() && !content.is_char_boundary(boundary) {
1032 boundary += 1;
1033 }
1034 boundary
1035 } else {
1036 end.min(content.len())
1037 };
1038
1039 let block_content = &content[safe_start..safe_end];
1040 let is_multiline = block_content.contains('\n');
1041 let is_fenced = block_content.starts_with("```") || block_content.starts_with("~~~");
1042 let is_indented = !is_fenced
1043 && block_content
1044 .lines()
1045 .all(|l| l.starts_with(" ") || l.starts_with("\t") || l.trim().is_empty());
1046
1047 byte_offset >= start && byte_offset < end && (is_multiline || is_fenced || is_indented)
1048 });
1049
1050 let list_item = if !(in_code_block || is_blank || in_front_matter && i <= front_matter_end) {
1052 let (line_for_list_check, blockquote_prefix_len) = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1054 let prefix = caps.get(1).unwrap().as_str();
1055 let content = caps.get(2).unwrap().as_str();
1056 (content, prefix.len())
1057 } else {
1058 (&**line, 0)
1059 };
1060
1061 if let Some(caps) = UNORDERED_REGEX.captures(line_for_list_check) {
1062 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1063 let marker = caps.get(2).map_or("", |m| m.as_str());
1064 let spacing = caps.get(3).map_or("", |m| m.as_str());
1065 let _content = caps.get(4).map_or("", |m| m.as_str());
1066 let marker_column = blockquote_prefix_len + leading_spaces.len();
1067 let content_column = marker_column + marker.len() + spacing.len();
1068
1069 if spacing.is_empty() {
1076 None
1077 } else {
1078 Some(ListItemInfo {
1079 marker: marker.to_string(),
1080 is_ordered: false,
1081 number: None,
1082 marker_column,
1083 content_column,
1084 })
1085 }
1086 } else if let Some(caps) = ORDERED_REGEX.captures(line_for_list_check) {
1087 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1088 let number_str = caps.get(2).map_or("", |m| m.as_str());
1089 let delimiter = caps.get(3).map_or("", |m| m.as_str());
1090 let spacing = caps.get(4).map_or("", |m| m.as_str());
1091 let _content = caps.get(5).map_or("", |m| m.as_str());
1092 let marker = format!("{number_str}{delimiter}");
1093 let marker_column = blockquote_prefix_len + leading_spaces.len();
1094 let content_column = marker_column + marker.len() + spacing.len();
1095
1096 if spacing.is_empty() {
1099 None
1100 } else {
1101 Some(ListItemInfo {
1102 marker,
1103 is_ordered: true,
1104 number: number_str.parse().ok(),
1105 marker_column,
1106 content_column,
1107 })
1108 }
1109 } else {
1110 None
1111 }
1112 } else {
1113 None
1114 };
1115
1116 lines.push(LineInfo {
1117 content: line.to_string(),
1118 byte_offset,
1119 indent,
1120 is_blank,
1121 in_code_block,
1122 in_front_matter: in_front_matter && i <= front_matter_end,
1123 list_item,
1124 heading: None, blockquote: None, });
1127 }
1128
1129 for i in 0..content_lines.len() {
1131 if lines[i].in_code_block {
1132 continue;
1133 }
1134
1135 if in_front_matter && i <= front_matter_end {
1137 continue;
1138 }
1139
1140 let line = content_lines[i];
1141
1142 if let Some(caps) = BLOCKQUOTE_REGEX_FULL.captures(line) {
1144 let indent_str = caps.get(1).map_or("", |m| m.as_str());
1145 let markers = caps.get(2).map_or("", |m| m.as_str());
1146 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1147 let content = caps.get(4).map_or("", |m| m.as_str());
1148
1149 let nesting_level = markers.chars().filter(|&c| c == '>').count();
1150 let marker_column = indent_str.len();
1151
1152 let prefix = format!("{indent_str}{markers}{spaces_after}");
1154
1155 let has_no_space = spaces_after.is_empty() && !content.is_empty();
1157 let has_multiple_spaces = spaces_after.len() > 1 || spaces_after.contains('\t');
1159
1160 let needs_md028_fix = content.is_empty() && spaces_after.is_empty();
1164
1165 lines[i].blockquote = Some(BlockquoteInfo {
1166 nesting_level,
1167 indent: indent_str.to_string(),
1168 marker_column,
1169 prefix,
1170 content: content.to_string(),
1171 has_no_space_after_marker: has_no_space,
1172 has_multiple_spaces_after_marker: has_multiple_spaces,
1173 needs_md028_fix,
1174 });
1175 }
1176
1177 if lines[i].is_blank {
1179 continue;
1180 }
1181
1182 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1185 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1186 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1187 } else {
1188 false
1189 };
1190
1191 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1192 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1194 continue;
1195 }
1196 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1197 let hashes = caps.get(2).map_or("", |m| m.as_str());
1198 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1199 let rest = caps.get(4).map_or("", |m| m.as_str());
1200
1201 let level = hashes.len() as u8;
1202 let marker_column = leading_spaces.len();
1203
1204 let (text, has_closing, closing_seq) = {
1206 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1208 if rest[id_start..].trim_end().ends_with('}') {
1210 (&rest[..id_start], &rest[id_start..])
1212 } else {
1213 (rest, "")
1214 }
1215 } else {
1216 (rest, "")
1217 };
1218
1219 let trimmed_rest = rest_without_id.trim_end();
1221 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1222 let mut start_of_hashes = last_hash_pos;
1224 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1225 start_of_hashes -= 1;
1226 }
1227
1228 let has_space_before = start_of_hashes == 0
1230 || trimmed_rest
1231 .chars()
1232 .nth(start_of_hashes - 1)
1233 .is_some_and(|c| c.is_whitespace());
1234
1235 let potential_closing = &trimmed_rest[start_of_hashes..];
1237 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1238
1239 if is_all_hashes && has_space_before {
1240 let closing_hashes = potential_closing.to_string();
1242 let text_part = if !custom_id_part.is_empty() {
1245 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1248 } else {
1249 rest_without_id[..start_of_hashes].trim_end().to_string()
1250 };
1251 (text_part, true, closing_hashes)
1252 } else {
1253 (rest.to_string(), false, String::new())
1255 }
1256 } else {
1257 (rest.to_string(), false, String::new())
1259 }
1260 };
1261
1262 let content_column = marker_column + hashes.len() + spaces_after.len();
1263
1264 let raw_text = text.trim().to_string();
1266 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1267
1268 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1270 let next_line = content_lines[i + 1];
1271 if !lines[i + 1].in_code_block
1272 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1273 && let Some(next_line_id) =
1274 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1275 {
1276 custom_id = Some(next_line_id);
1277 }
1278 }
1279
1280 lines[i].heading = Some(HeadingInfo {
1281 level,
1282 style: HeadingStyle::ATX,
1283 marker: hashes.to_string(),
1284 marker_column,
1285 content_column,
1286 text: clean_text,
1287 custom_id,
1288 raw_text,
1289 has_closing_sequence: has_closing,
1290 closing_sequence: closing_seq,
1291 });
1292 }
1293 else if i + 1 < content_lines.len() {
1295 let next_line = content_lines[i + 1];
1296 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1297 if in_front_matter && i < front_matter_end {
1299 continue;
1300 }
1301
1302 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1304 continue;
1305 }
1306
1307 let underline = next_line.trim();
1308
1309 if underline == "---" {
1312 continue;
1313 }
1314
1315 let current_line_trimmed = line.trim();
1317 if current_line_trimmed.contains(':')
1318 && !current_line_trimmed.starts_with('#')
1319 && !current_line_trimmed.contains('[')
1320 && !current_line_trimmed.contains("](")
1321 {
1322 continue;
1324 }
1325
1326 let level = if underline.starts_with('=') { 1 } else { 2 };
1327 let style = if level == 1 {
1328 HeadingStyle::Setext1
1329 } else {
1330 HeadingStyle::Setext2
1331 };
1332
1333 let raw_text = line.trim().to_string();
1335 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1336
1337 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1339 let attr_line = content_lines[i + 2];
1340 if !lines[i + 2].in_code_block
1341 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1342 && let Some(attr_line_id) =
1343 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
1344 {
1345 custom_id = Some(attr_line_id);
1346 }
1347 }
1348
1349 lines[i].heading = Some(HeadingInfo {
1350 level,
1351 style,
1352 marker: underline.to_string(),
1353 marker_column: next_line.len() - next_line.trim_start().len(),
1354 content_column: lines[i].indent,
1355 text: clean_text,
1356 custom_id,
1357 raw_text,
1358 has_closing_sequence: false,
1359 closing_sequence: String::new(),
1360 });
1361 }
1362 }
1363 }
1364
1365 lines
1366 }
1367
1368 fn parse_code_spans(content: &str, lines: &[LineInfo], ast: &Node) -> Vec<CodeSpan> {
1370 let mut code_spans = Vec::new();
1371
1372 if !content.contains('`') {
1374 return code_spans;
1375 }
1376
1377 fn extract_code_spans(node: &Node, content: &str, lines: &[LineInfo], spans: &mut Vec<CodeSpan>) {
1379 match node {
1380 Node::InlineCode(inline_code) => {
1381 if let Some(pos) = &inline_code.position {
1382 let start_pos = pos.start.offset;
1383 let end_pos = pos.end.offset;
1384
1385 let full_span = &content[start_pos..end_pos];
1387 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
1388
1389 let content_start = start_pos + backtick_count;
1391 let content_end = end_pos - backtick_count;
1392 let span_content = if content_start < content_end {
1393 content[content_start..content_end].to_string()
1394 } else {
1395 String::new()
1396 };
1397
1398 let mut line_num = 1;
1400 let mut col_start = start_pos;
1401 for (idx, line_info) in lines.iter().enumerate() {
1402 if start_pos >= line_info.byte_offset {
1403 line_num = idx + 1;
1404 col_start = start_pos - line_info.byte_offset;
1405 } else {
1406 break;
1407 }
1408 }
1409
1410 let mut col_end = end_pos;
1412 for line_info in lines.iter() {
1413 if end_pos > line_info.byte_offset {
1414 col_end = end_pos - line_info.byte_offset;
1415 } else {
1416 break;
1417 }
1418 }
1419
1420 spans.push(CodeSpan {
1421 line: line_num,
1422 start_col: col_start,
1423 end_col: col_end,
1424 byte_offset: start_pos,
1425 byte_end: end_pos,
1426 backtick_count,
1427 content: span_content,
1428 });
1429 }
1430 }
1431 Node::Root(root) => {
1433 for child in &root.children {
1434 extract_code_spans(child, content, lines, spans);
1435 }
1436 }
1437 Node::Paragraph(para) => {
1438 for child in ¶.children {
1439 extract_code_spans(child, content, lines, spans);
1440 }
1441 }
1442 Node::Heading(heading) => {
1443 for child in &heading.children {
1444 extract_code_spans(child, content, lines, spans);
1445 }
1446 }
1447 Node::List(list) => {
1448 for child in &list.children {
1449 extract_code_spans(child, content, lines, spans);
1450 }
1451 }
1452 Node::ListItem(item) => {
1453 for child in &item.children {
1454 extract_code_spans(child, content, lines, spans);
1455 }
1456 }
1457 Node::Blockquote(blockquote) => {
1458 for child in &blockquote.children {
1459 extract_code_spans(child, content, lines, spans);
1460 }
1461 }
1462 Node::Table(table) => {
1463 for child in &table.children {
1464 extract_code_spans(child, content, lines, spans);
1465 }
1466 }
1467 Node::TableRow(row) => {
1468 for child in &row.children {
1469 extract_code_spans(child, content, lines, spans);
1470 }
1471 }
1472 Node::TableCell(cell) => {
1473 for child in &cell.children {
1474 extract_code_spans(child, content, lines, spans);
1475 }
1476 }
1477 Node::Emphasis(emphasis) => {
1478 for child in &emphasis.children {
1479 extract_code_spans(child, content, lines, spans);
1480 }
1481 }
1482 Node::Strong(strong) => {
1483 for child in &strong.children {
1484 extract_code_spans(child, content, lines, spans);
1485 }
1486 }
1487 Node::Link(link) => {
1488 for child in &link.children {
1489 extract_code_spans(child, content, lines, spans);
1490 }
1491 }
1492 Node::LinkReference(link_ref) => {
1493 for child in &link_ref.children {
1494 extract_code_spans(child, content, lines, spans);
1495 }
1496 }
1497 Node::FootnoteDefinition(footnote) => {
1498 for child in &footnote.children {
1499 extract_code_spans(child, content, lines, spans);
1500 }
1501 }
1502 Node::Delete(delete) => {
1503 for child in &delete.children {
1504 extract_code_spans(child, content, lines, spans);
1505 }
1506 }
1507 Node::Code(_)
1509 | Node::Text(_)
1510 | Node::Html(_)
1511 | Node::Image(_)
1512 | Node::ImageReference(_)
1513 | Node::FootnoteReference(_)
1514 | Node::Break(_)
1515 | Node::ThematicBreak(_)
1516 | Node::Definition(_)
1517 | Node::Yaml(_)
1518 | Node::Toml(_)
1519 | Node::Math(_)
1520 | Node::InlineMath(_)
1521 | Node::MdxJsxFlowElement(_)
1522 | Node::MdxFlowExpression(_)
1523 | Node::MdxJsxTextElement(_)
1524 | Node::MdxTextExpression(_)
1525 | Node::MdxjsEsm(_) => {
1526 }
1528 }
1529 }
1530
1531 extract_code_spans(ast, content, lines, &mut code_spans);
1533
1534 code_spans.sort_by_key(|span| span.byte_offset);
1536
1537 code_spans
1538 }
1539
1540 fn parse_list_blocks(lines: &[LineInfo]) -> Vec<ListBlock> {
1542 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
1545 let mut last_list_item_line = 0;
1546 let mut current_indent_level = 0;
1547 let mut last_marker_width = 0;
1548
1549 for (line_idx, line_info) in lines.iter().enumerate() {
1550 let line_num = line_idx + 1;
1551
1552 if line_info.in_code_block {
1554 if let Some(ref mut block) = current_block {
1555 let min_continuation_indent = CodeBlockUtils::calculate_min_continuation_indent(lines, line_idx);
1557
1558 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
1560
1561 match context {
1562 CodeBlockContext::Indented => {
1563 block.end_line = line_num;
1565 continue;
1566 }
1567 CodeBlockContext::Standalone => {
1568 let completed_block = current_block.take().unwrap();
1570 list_blocks.push(completed_block);
1571 continue;
1572 }
1573 CodeBlockContext::Adjacent => {
1574 block.end_line = line_num;
1576 continue;
1577 }
1578 }
1579 } else {
1580 continue;
1582 }
1583 }
1584
1585 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(&line_info.content) {
1587 caps.get(0).unwrap().as_str().to_string()
1588 } else {
1589 String::new()
1590 };
1591
1592 if let Some(list_item) = &line_info.list_item {
1594 let item_indent = list_item.marker_column;
1596 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
1599 let is_nested = nesting > block.nesting_level;
1603 let same_type =
1604 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
1605 let same_context = block.blockquote_prefix == blockquote_prefix;
1606 let reasonable_distance = line_num <= last_list_item_line + 2; let marker_compatible =
1610 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
1611
1612 let has_non_list_content = {
1614 let mut found_non_list = false;
1615 let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
1617
1618 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1620 let last_line = &lines[block_last_item_line - 1];
1621 if last_line.content.contains(r"`sqlalchemy`") && last_line.content.contains(r"\`") {
1622 log::debug!(
1623 "After problematic line {}: checking lines {} to {} for non-list content",
1624 block_last_item_line,
1625 block_last_item_line + 1,
1626 line_num
1627 );
1628 if line_num == block_last_item_line + 1 {
1630 log::debug!("Lines are consecutive, no content between");
1631 }
1632 }
1633 }
1634
1635 for check_line in (block_last_item_line + 1)..line_num {
1636 let check_idx = check_line - 1;
1637 if check_idx < lines.len() {
1638 let check_info = &lines[check_idx];
1639 let is_list_breaking_content = if check_info.in_code_block {
1641 let last_item_marker_width =
1643 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1644 lines[block_last_item_line - 1]
1645 .list_item
1646 .as_ref()
1647 .map(|li| {
1648 if li.is_ordered {
1649 li.marker.len() + 1 } else {
1651 li.marker.len()
1652 }
1653 })
1654 .unwrap_or(3) } else {
1656 3 };
1658
1659 let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
1660
1661 let context = CodeBlockUtils::analyze_code_block_context(
1663 lines,
1664 check_line - 1,
1665 min_continuation,
1666 );
1667
1668 matches!(context, CodeBlockContext::Standalone)
1670 } else if !check_info.is_blank && check_info.list_item.is_none() {
1671 let line_content = check_info.content.trim();
1673
1674 if check_info.heading.is_some()
1676 || line_content.starts_with("---")
1677 || line_content.starts_with("***")
1678 || line_content.starts_with("___")
1679 || (line_content.contains('|')
1680 && !line_content.contains("](")
1681 && !line_content.contains("http")
1682 && (line_content.matches('|').count() > 1
1683 || line_content.starts_with('|')
1684 || line_content.ends_with('|')))
1685 || line_content.starts_with(">")
1686 {
1687 true
1688 }
1689 else {
1691 let last_item_marker_width =
1692 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1693 lines[block_last_item_line - 1]
1694 .list_item
1695 .as_ref()
1696 .map(|li| {
1697 if li.is_ordered {
1698 li.marker.len() + 1 } else {
1700 li.marker.len()
1701 }
1702 })
1703 .unwrap_or(3) } else {
1705 3 };
1707
1708 let min_continuation =
1709 if block.is_ordered { last_item_marker_width } else { 2 };
1710 check_info.indent < min_continuation
1711 }
1712 } else {
1713 false
1714 };
1715
1716 if is_list_breaking_content {
1717 found_non_list = true;
1719 break;
1720 }
1721 }
1722 }
1723 found_non_list
1724 };
1725
1726 let mut continues_list = if is_nested {
1730 same_context && reasonable_distance && !has_non_list_content
1732 } else {
1733 let result = same_type
1735 && same_context
1736 && reasonable_distance
1737 && marker_compatible
1738 && !has_non_list_content;
1739
1740 if block.item_lines.last().is_some_and(|&last_line| {
1742 last_line > 0
1743 && last_line <= lines.len()
1744 && lines[last_line - 1].content.contains(r"`sqlalchemy`")
1745 && lines[last_line - 1].content.contains(r"\`")
1746 }) {
1747 log::debug!(
1748 "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
1749 );
1750 if line_num > 0 && line_num <= lines.len() {
1751 log::debug!("Current line content: {:?}", lines[line_num - 1].content);
1752 }
1753 }
1754
1755 result
1756 };
1757
1758 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
1761 if block.item_lines.contains(&(line_num - 1)) {
1763 continues_list = true;
1765 }
1766 }
1767
1768 if continues_list {
1769 block.end_line = line_num;
1771 block.item_lines.push(line_num);
1772
1773 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
1775 list_item.marker.len() + 1
1776 } else {
1777 list_item.marker.len()
1778 });
1779
1780 if !block.is_ordered
1782 && block.marker.is_some()
1783 && block.marker.as_ref() != Some(&list_item.marker)
1784 {
1785 block.marker = None;
1787 }
1788 } else {
1789 list_blocks.push(block.clone());
1792
1793 *block = ListBlock {
1794 start_line: line_num,
1795 end_line: line_num,
1796 is_ordered: list_item.is_ordered,
1797 marker: if list_item.is_ordered {
1798 None
1799 } else {
1800 Some(list_item.marker.clone())
1801 },
1802 blockquote_prefix: blockquote_prefix.clone(),
1803 item_lines: vec![line_num],
1804 nesting_level: nesting,
1805 max_marker_width: if list_item.is_ordered {
1806 list_item.marker.len() + 1
1807 } else {
1808 list_item.marker.len()
1809 },
1810 };
1811 }
1812 } else {
1813 current_block = Some(ListBlock {
1815 start_line: line_num,
1816 end_line: line_num,
1817 is_ordered: list_item.is_ordered,
1818 marker: if list_item.is_ordered {
1819 None
1820 } else {
1821 Some(list_item.marker.clone())
1822 },
1823 blockquote_prefix,
1824 item_lines: vec![line_num],
1825 nesting_level: nesting,
1826 max_marker_width: list_item.marker.len(),
1827 });
1828 }
1829
1830 last_list_item_line = line_num;
1831 current_indent_level = item_indent;
1832 last_marker_width = if list_item.is_ordered {
1833 list_item.marker.len() + 1 } else {
1835 list_item.marker.len()
1836 };
1837 } else if let Some(ref mut block) = current_block {
1838 let min_continuation_indent = if block.is_ordered {
1849 current_indent_level + last_marker_width
1850 } else {
1851 current_indent_level + 2 };
1853
1854 if line_info.indent >= min_continuation_indent {
1855 block.end_line = line_num;
1857 } else if line_info.is_blank {
1858 let mut check_idx = line_idx + 1;
1861 let mut found_continuation = false;
1862
1863 while check_idx < lines.len() && lines[check_idx].is_blank {
1865 check_idx += 1;
1866 }
1867
1868 if check_idx < lines.len() {
1869 let next_line = &lines[check_idx];
1870 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
1872 found_continuation = true;
1873 }
1874 else if !next_line.in_code_block
1876 && next_line.list_item.is_some()
1877 && let Some(item) = &next_line.list_item
1878 {
1879 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
1880 .find(&next_line.content)
1881 .map_or(String::new(), |m| m.as_str().to_string());
1882 if item.marker_column == current_indent_level
1883 && item.is_ordered == block.is_ordered
1884 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
1885 {
1886 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
1889 if let Some(between_line) = lines.get(idx) {
1890 let trimmed = between_line.content.trim();
1891 if trimmed.is_empty() {
1893 return false;
1894 }
1895 let line_indent =
1897 between_line.content.len() - between_line.content.trim_start().len();
1898
1899 if trimmed.starts_with("```")
1901 || trimmed.starts_with("~~~")
1902 || trimmed.starts_with("---")
1903 || trimmed.starts_with("***")
1904 || trimmed.starts_with("___")
1905 || trimmed.starts_with(">")
1906 || trimmed.contains('|') || between_line.heading.is_some()
1908 {
1909 return true; }
1911
1912 line_indent >= min_continuation_indent
1914 } else {
1915 false
1916 }
1917 });
1918
1919 if block.is_ordered {
1920 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1923 if let Some(between_line) = lines.get(idx) {
1924 let trimmed = between_line.content.trim();
1925 if trimmed.is_empty() {
1926 return false;
1927 }
1928 trimmed.starts_with("```")
1930 || trimmed.starts_with("~~~")
1931 || trimmed.starts_with("---")
1932 || trimmed.starts_with("***")
1933 || trimmed.starts_with("___")
1934 || trimmed.starts_with(">")
1935 || trimmed.contains('|') || between_line.heading.is_some()
1937 } else {
1938 false
1939 }
1940 });
1941 found_continuation = !has_structural_separators;
1942 } else {
1943 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1945 if let Some(between_line) = lines.get(idx) {
1946 let trimmed = between_line.content.trim();
1947 if trimmed.is_empty() {
1948 return false;
1949 }
1950 trimmed.starts_with("```")
1952 || trimmed.starts_with("~~~")
1953 || trimmed.starts_with("---")
1954 || trimmed.starts_with("***")
1955 || trimmed.starts_with("___")
1956 || trimmed.starts_with(">")
1957 || trimmed.contains('|') || between_line.heading.is_some()
1959 } else {
1960 false
1961 }
1962 });
1963 found_continuation = !has_structural_separators;
1964 }
1965 }
1966 }
1967 }
1968
1969 if found_continuation {
1970 block.end_line = line_num;
1972 } else {
1973 list_blocks.push(block.clone());
1975 current_block = None;
1976 }
1977 } else {
1978 let min_required_indent = if block.is_ordered {
1981 current_indent_level + last_marker_width
1982 } else {
1983 current_indent_level + 2
1984 };
1985
1986 let line_content = line_info.content.trim();
1991 let is_structural_separator = line_info.heading.is_some()
1992 || line_content.starts_with("```")
1993 || line_content.starts_with("~~~")
1994 || line_content.starts_with("---")
1995 || line_content.starts_with("***")
1996 || line_content.starts_with("___")
1997 || line_content.starts_with(">")
1998 || (line_content.contains('|')
1999 && !line_content.contains("](")
2000 && !line_content.contains("http")
2001 && (line_content.matches('|').count() > 1
2002 || line_content.starts_with('|')
2003 || line_content.ends_with('|'))); let is_lazy_continuation = !is_structural_separator
2008 && !line_info.is_blank
2009 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2010
2011 if is_lazy_continuation {
2012 let content_to_check = if !blockquote_prefix.is_empty() {
2015 line_info
2017 .content
2018 .strip_prefix(&blockquote_prefix)
2019 .unwrap_or(&line_info.content)
2020 .trim()
2021 } else {
2022 line_info.content.trim()
2023 };
2024
2025 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2026
2027 if starts_with_uppercase && last_list_item_line > 0 {
2030 list_blocks.push(block.clone());
2032 current_block = None;
2033 } else {
2034 block.end_line = line_num;
2036 }
2037 } else {
2038 list_blocks.push(block.clone());
2040 current_block = None;
2041 }
2042 }
2043 }
2044 }
2045
2046 if let Some(block) = current_block {
2048 list_blocks.push(block);
2049 }
2050
2051 merge_adjacent_list_blocks(&mut list_blocks, lines);
2053
2054 list_blocks
2055 }
2056
2057 fn compute_char_frequency(content: &str) -> CharFrequency {
2059 let mut frequency = CharFrequency::default();
2060
2061 for ch in content.chars() {
2062 match ch {
2063 '#' => frequency.hash_count += 1,
2064 '*' => frequency.asterisk_count += 1,
2065 '_' => frequency.underscore_count += 1,
2066 '-' => frequency.hyphen_count += 1,
2067 '+' => frequency.plus_count += 1,
2068 '>' => frequency.gt_count += 1,
2069 '|' => frequency.pipe_count += 1,
2070 '[' => frequency.bracket_count += 1,
2071 '`' => frequency.backtick_count += 1,
2072 '<' => frequency.lt_count += 1,
2073 '!' => frequency.exclamation_count += 1,
2074 '\n' => frequency.newline_count += 1,
2075 _ => {}
2076 }
2077 }
2078
2079 frequency
2080 }
2081
2082 fn parse_html_tags(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<HtmlTag> {
2084 lazy_static! {
2085 static ref HTML_TAG_REGEX: regex::Regex =
2086 regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap();
2087 }
2088
2089 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2090
2091 for cap in HTML_TAG_REGEX.captures_iter(content) {
2092 let full_match = cap.get(0).unwrap();
2093 let match_start = full_match.start();
2094 let match_end = full_match.end();
2095
2096 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2098 continue;
2099 }
2100
2101 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2102 let tag_name = cap.get(2).unwrap().as_str().to_lowercase();
2103 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2104
2105 let mut line_num = 1;
2107 let mut col_start = match_start;
2108 let mut col_end = match_end;
2109 for (idx, line_info) in lines.iter().enumerate() {
2110 if match_start >= line_info.byte_offset {
2111 line_num = idx + 1;
2112 col_start = match_start - line_info.byte_offset;
2113 col_end = match_end - line_info.byte_offset;
2114 } else {
2115 break;
2116 }
2117 }
2118
2119 html_tags.push(HtmlTag {
2120 line: line_num,
2121 start_col: col_start,
2122 end_col: col_end,
2123 byte_offset: match_start,
2124 byte_end: match_end,
2125 tag_name,
2126 is_closing,
2127 is_self_closing,
2128 raw_content: full_match.as_str().to_string(),
2129 });
2130 }
2131
2132 html_tags
2133 }
2134
2135 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2137 lazy_static! {
2138 static ref EMPHASIS_REGEX: regex::Regex =
2139 regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap();
2140 }
2141
2142 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2143
2144 for cap in EMPHASIS_REGEX.captures_iter(content) {
2145 let full_match = cap.get(0).unwrap();
2146 let match_start = full_match.start();
2147 let match_end = full_match.end();
2148
2149 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2151 continue;
2152 }
2153
2154 let opening_markers = cap.get(1).unwrap().as_str();
2155 let content_part = cap.get(2).unwrap().as_str();
2156 let closing_markers = cap.get(3).unwrap().as_str();
2157
2158 if opening_markers.chars().next() != closing_markers.chars().next()
2160 || opening_markers.len() != closing_markers.len()
2161 {
2162 continue;
2163 }
2164
2165 let marker = opening_markers.chars().next().unwrap();
2166 let marker_count = opening_markers.len();
2167
2168 let mut line_num = 1;
2170 let mut col_start = match_start;
2171 let mut col_end = match_end;
2172 for (idx, line_info) in lines.iter().enumerate() {
2173 if match_start >= line_info.byte_offset {
2174 line_num = idx + 1;
2175 col_start = match_start - line_info.byte_offset;
2176 col_end = match_end - line_info.byte_offset;
2177 } else {
2178 break;
2179 }
2180 }
2181
2182 emphasis_spans.push(EmphasisSpan {
2183 line: line_num,
2184 start_col: col_start,
2185 end_col: col_end,
2186 byte_offset: match_start,
2187 byte_end: match_end,
2188 marker,
2189 marker_count,
2190 content: content_part.to_string(),
2191 });
2192 }
2193
2194 emphasis_spans
2195 }
2196
2197 fn parse_table_rows(lines: &[LineInfo]) -> Vec<TableRow> {
2199 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2200
2201 for (line_idx, line_info) in lines.iter().enumerate() {
2202 if line_info.in_code_block || line_info.is_blank {
2204 continue;
2205 }
2206
2207 let line = &line_info.content;
2208 let line_num = line_idx + 1;
2209
2210 if !line.contains('|') {
2212 continue;
2213 }
2214
2215 let parts: Vec<&str> = line.split('|').collect();
2217 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2218
2219 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2221 let mut column_alignments = Vec::new();
2222
2223 if is_separator {
2224 for part in &parts[1..parts.len() - 1] {
2225 let trimmed = part.trim();
2227 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2228 "center".to_string()
2229 } else if trimmed.ends_with(':') {
2230 "right".to_string()
2231 } else if trimmed.starts_with(':') {
2232 "left".to_string()
2233 } else {
2234 "none".to_string()
2235 };
2236 column_alignments.push(alignment);
2237 }
2238 }
2239
2240 table_rows.push(TableRow {
2241 line: line_num,
2242 is_separator,
2243 column_count,
2244 column_alignments,
2245 });
2246 }
2247
2248 table_rows
2249 }
2250
2251 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2253 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2254
2255 for cap in BARE_URL_PATTERN.captures_iter(content) {
2257 let full_match = cap.get(0).unwrap();
2258 let match_start = full_match.start();
2259 let match_end = full_match.end();
2260
2261 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2263 continue;
2264 }
2265
2266 let preceding_char = if match_start > 0 {
2268 content.chars().nth(match_start - 1)
2269 } else {
2270 None
2271 };
2272 let following_char = content.chars().nth(match_end);
2273
2274 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2275 continue;
2276 }
2277 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2278 continue;
2279 }
2280
2281 let url = full_match.as_str();
2282 let url_type = if url.starts_with("https://") {
2283 "https"
2284 } else if url.starts_with("http://") {
2285 "http"
2286 } else if url.starts_with("ftp://") {
2287 "ftp"
2288 } else {
2289 "other"
2290 };
2291
2292 let mut line_num = 1;
2294 let mut col_start = match_start;
2295 let mut col_end = match_end;
2296 for (idx, line_info) in lines.iter().enumerate() {
2297 if match_start >= line_info.byte_offset {
2298 line_num = idx + 1;
2299 col_start = match_start - line_info.byte_offset;
2300 col_end = match_end - line_info.byte_offset;
2301 } else {
2302 break;
2303 }
2304 }
2305
2306 bare_urls.push(BareUrl {
2307 line: line_num,
2308 start_col: col_start,
2309 end_col: col_end,
2310 byte_offset: match_start,
2311 byte_end: match_end,
2312 url: url.to_string(),
2313 url_type: url_type.to_string(),
2314 });
2315 }
2316
2317 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
2319 let full_match = cap.get(0).unwrap();
2320 let match_start = full_match.start();
2321 let match_end = full_match.end();
2322
2323 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2325 continue;
2326 }
2327
2328 let preceding_char = if match_start > 0 {
2330 content.chars().nth(match_start - 1)
2331 } else {
2332 None
2333 };
2334 let following_char = content.chars().nth(match_end);
2335
2336 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2337 continue;
2338 }
2339 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2340 continue;
2341 }
2342
2343 let email = full_match.as_str();
2344
2345 let mut line_num = 1;
2347 let mut col_start = match_start;
2348 let mut col_end = match_end;
2349 for (idx, line_info) in lines.iter().enumerate() {
2350 if match_start >= line_info.byte_offset {
2351 line_num = idx + 1;
2352 col_start = match_start - line_info.byte_offset;
2353 col_end = match_end - line_info.byte_offset;
2354 } else {
2355 break;
2356 }
2357 }
2358
2359 bare_urls.push(BareUrl {
2360 line: line_num,
2361 start_col: col_start,
2362 end_col: col_end,
2363 byte_offset: match_start,
2364 byte_end: match_end,
2365 url: email.to_string(),
2366 url_type: "email".to_string(),
2367 });
2368 }
2369
2370 bare_urls
2371 }
2372}
2373
2374fn merge_adjacent_list_blocks(list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
2376 if list_blocks.len() < 2 {
2377 return;
2378 }
2379
2380 let mut merger = ListBlockMerger::new(lines);
2381 *list_blocks = merger.merge(list_blocks);
2382}
2383
2384struct ListBlockMerger<'a> {
2386 lines: &'a [LineInfo],
2387}
2388
2389impl<'a> ListBlockMerger<'a> {
2390 fn new(lines: &'a [LineInfo]) -> Self {
2391 Self { lines }
2392 }
2393
2394 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
2395 let mut merged = Vec::with_capacity(list_blocks.len());
2396 let mut current = list_blocks[0].clone();
2397
2398 for next in list_blocks.iter().skip(1) {
2399 if self.should_merge_blocks(¤t, next) {
2400 current = self.merge_two_blocks(current, next);
2401 } else {
2402 merged.push(current);
2403 current = next.clone();
2404 }
2405 }
2406
2407 merged.push(current);
2408 merged
2409 }
2410
2411 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
2413 if !self.blocks_are_compatible(current, next) {
2415 return false;
2416 }
2417
2418 let spacing = self.analyze_spacing_between(current, next);
2420 match spacing {
2421 BlockSpacing::Consecutive => true,
2422 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
2423 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
2424 self.can_merge_with_content_between(current, next)
2425 }
2426 }
2427 }
2428
2429 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
2431 current.is_ordered == next.is_ordered
2432 && current.blockquote_prefix == next.blockquote_prefix
2433 && current.nesting_level == next.nesting_level
2434 }
2435
2436 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
2438 let gap = next.start_line - current.end_line;
2439
2440 match gap {
2441 1 => BlockSpacing::Consecutive,
2442 2 => BlockSpacing::SingleBlank,
2443 _ if gap > 2 => {
2444 if self.has_only_blank_lines_between(current, next) {
2445 BlockSpacing::MultipleBlanks
2446 } else {
2447 BlockSpacing::ContentBetween
2448 }
2449 }
2450 _ => BlockSpacing::Consecutive, }
2452 }
2453
2454 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2456 if has_meaningful_content_between(current, next, self.lines) {
2459 return false; }
2461
2462 !current.is_ordered && current.marker == next.marker
2464 }
2465
2466 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2468 if has_meaningful_content_between(current, next, self.lines) {
2470 return false; }
2472
2473 current.is_ordered && next.is_ordered
2475 }
2476
2477 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2479 for line_num in (current.end_line + 1)..next.start_line {
2480 if let Some(line_info) = self.lines.get(line_num - 1)
2481 && !line_info.content.trim().is_empty()
2482 {
2483 return false;
2484 }
2485 }
2486 true
2487 }
2488
2489 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
2491 current.end_line = next.end_line;
2492 current.item_lines.extend_from_slice(&next.item_lines);
2493
2494 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
2496
2497 if !current.is_ordered && self.markers_differ(¤t, next) {
2499 current.marker = None; }
2501
2502 current
2503 }
2504
2505 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
2507 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
2508 }
2509}
2510
2511#[derive(Debug, PartialEq)]
2513enum BlockSpacing {
2514 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
2519
2520fn has_meaningful_content_between(current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
2522 for line_num in (current.end_line + 1)..next.start_line {
2524 if let Some(line_info) = lines.get(line_num - 1) {
2525 let trimmed = line_info.content.trim();
2527
2528 if trimmed.is_empty() {
2530 continue;
2531 }
2532
2533 if line_info.heading.is_some() {
2537 return true; }
2539
2540 if is_horizontal_rule(trimmed) {
2542 return true; }
2544
2545 if trimmed.contains('|') && trimmed.len() > 1 {
2548 if !trimmed.contains("](") && !trimmed.contains("http") {
2550 let pipe_count = trimmed.matches('|').count();
2552 if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
2553 return true; }
2555 }
2556 }
2557
2558 if trimmed.starts_with('>') {
2560 return true; }
2562
2563 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2565 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2566
2567 let min_continuation_indent = if current.is_ordered {
2569 current.nesting_level + current.max_marker_width + 1 } else {
2571 current.nesting_level + 2
2572 };
2573
2574 if line_indent < min_continuation_indent {
2575 return true; }
2578 }
2579
2580 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2582
2583 let min_indent = if current.is_ordered {
2585 current.nesting_level + current.max_marker_width
2586 } else {
2587 current.nesting_level + 2
2588 };
2589
2590 if line_indent < min_indent {
2592 return true; }
2594
2595 }
2598 }
2599
2600 false
2602}
2603
2604fn is_horizontal_rule(trimmed: &str) -> bool {
2606 if trimmed.len() < 3 {
2607 return false;
2608 }
2609
2610 let chars: Vec<char> = trimmed.chars().collect();
2612 if let Some(&first_char) = chars.first()
2613 && (first_char == '-' || first_char == '*' || first_char == '_')
2614 {
2615 let mut count = 0;
2616 for &ch in &chars {
2617 if ch == first_char {
2618 count += 1;
2619 } else if ch != ' ' && ch != '\t' {
2620 return false; }
2622 }
2623 return count >= 3;
2624 }
2625 false
2626}
2627
2628#[cfg(test)]
2630mod tests {
2631 use super::*;
2632
2633 #[test]
2634 fn test_empty_content() {
2635 let ctx = LintContext::new("", MarkdownFlavor::Standard);
2636 assert_eq!(ctx.content, "");
2637 assert_eq!(ctx.line_offsets, vec![0]);
2638 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2639 assert_eq!(ctx.lines.len(), 0);
2640 }
2641
2642 #[test]
2643 fn test_single_line() {
2644 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
2645 assert_eq!(ctx.content, "# Hello");
2646 assert_eq!(ctx.line_offsets, vec![0]);
2647 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2648 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
2649 }
2650
2651 #[test]
2652 fn test_multi_line() {
2653 let content = "# Title\n\nSecond line\nThird line";
2654 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2655 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
2656 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
2663
2664 #[test]
2665 fn test_line_info() {
2666 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
2667 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2668
2669 assert_eq!(ctx.lines.len(), 7);
2671
2672 let line1 = &ctx.lines[0];
2674 assert_eq!(line1.content, "# Title");
2675 assert_eq!(line1.byte_offset, 0);
2676 assert_eq!(line1.indent, 0);
2677 assert!(!line1.is_blank);
2678 assert!(!line1.in_code_block);
2679 assert!(line1.list_item.is_none());
2680
2681 let line2 = &ctx.lines[1];
2683 assert_eq!(line2.content, " indented");
2684 assert_eq!(line2.byte_offset, 8);
2685 assert_eq!(line2.indent, 4);
2686 assert!(!line2.is_blank);
2687
2688 let line3 = &ctx.lines[2];
2690 assert_eq!(line3.content, "");
2691 assert!(line3.is_blank);
2692
2693 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
2695 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
2696 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
2697 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
2698 }
2699
2700 #[test]
2701 fn test_list_item_detection() {
2702 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
2703 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2704
2705 let line1 = &ctx.lines[0];
2707 assert!(line1.list_item.is_some());
2708 let list1 = line1.list_item.as_ref().unwrap();
2709 assert_eq!(list1.marker, "-");
2710 assert!(!list1.is_ordered);
2711 assert_eq!(list1.marker_column, 0);
2712 assert_eq!(list1.content_column, 2);
2713
2714 let line2 = &ctx.lines[1];
2716 assert!(line2.list_item.is_some());
2717 let list2 = line2.list_item.as_ref().unwrap();
2718 assert_eq!(list2.marker, "*");
2719 assert_eq!(list2.marker_column, 2);
2720
2721 let line3 = &ctx.lines[2];
2723 assert!(line3.list_item.is_some());
2724 let list3 = line3.list_item.as_ref().unwrap();
2725 assert_eq!(list3.marker, "1.");
2726 assert!(list3.is_ordered);
2727 assert_eq!(list3.number, Some(1));
2728
2729 let line6 = &ctx.lines[5];
2731 assert!(line6.list_item.is_none());
2732 }
2733
2734 #[test]
2735 fn test_offset_to_line_col_edge_cases() {
2736 let content = "a\nb\nc";
2737 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2738 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
2746}