1use crate::config::MarkdownFlavor;
2use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
3use lazy_static::lazy_static;
4use regex::Regex;
5
6lazy_static! {
7 static ref LINK_PATTERN: Regex = Regex::new(
10 r"(?sx)
11 \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Link text in group 1 (handles nested brackets)
12 (?:
13 \(([^)]*)\) # Inline URL in group 2 (can be empty)
14 |
15 \[([^\]]*)\] # Reference ID in group 3
16 )"
17 ).unwrap();
18
19 static ref IMAGE_PATTERN: Regex = Regex::new(
22 r"(?sx)
23 !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Alt text in group 1 (handles nested brackets)
24 (?:
25 \(([^)]*)\) # Inline URL in group 2 (can be empty)
26 |
27 \[([^\]]*)\] # Reference ID in group 3
28 )"
29 ).unwrap();
30
31 static ref REF_DEF_PATTERN: Regex = Regex::new(
33 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
34 ).unwrap();
35
36 static ref CODE_SPAN_PATTERN: Regex = Regex::new(
39 r"`+"
40 ).unwrap();
41
42 static ref BARE_URL_PATTERN: Regex = Regex::new(
44 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
45 ).unwrap();
46
47 static ref BARE_EMAIL_PATTERN: Regex = Regex::new(
49 r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
50 ).unwrap();
51
52 static ref ANGLE_BRACKET_PATTERN: Regex = Regex::new(
54 r"<((?:https?|ftp)://[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"
55 ).unwrap();
56
57 static ref BLOCKQUOTE_PREFIX_REGEX: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
59}
60
61#[derive(Debug, Clone)]
63pub struct LineInfo {
64 pub content: String,
66 pub byte_offset: usize,
68 pub indent: usize,
70 pub is_blank: bool,
72 pub in_code_block: bool,
74 pub in_front_matter: bool,
76 pub list_item: Option<ListItemInfo>,
78 pub heading: Option<HeadingInfo>,
80 pub blockquote: Option<BlockquoteInfo>,
82}
83
84#[derive(Debug, Clone)]
86pub struct ListItemInfo {
87 pub marker: String,
89 pub is_ordered: bool,
91 pub number: Option<usize>,
93 pub marker_column: usize,
95 pub content_column: usize,
97}
98
99#[derive(Debug, Clone, PartialEq)]
101pub enum HeadingStyle {
102 ATX,
104 Setext1,
106 Setext2,
108}
109
110#[derive(Debug, Clone)]
112pub struct ParsedLink {
113 pub line: usize,
115 pub start_col: usize,
117 pub end_col: usize,
119 pub byte_offset: usize,
121 pub byte_end: usize,
123 pub text: String,
125 pub url: String,
127 pub is_reference: bool,
129 pub reference_id: Option<String>,
131}
132
133#[derive(Debug, Clone)]
135pub struct ParsedImage {
136 pub line: usize,
138 pub start_col: usize,
140 pub end_col: usize,
142 pub byte_offset: usize,
144 pub byte_end: usize,
146 pub alt_text: String,
148 pub url: String,
150 pub is_reference: bool,
152 pub reference_id: Option<String>,
154}
155
156#[derive(Debug, Clone)]
158pub struct ReferenceDef {
159 pub line: usize,
161 pub id: String,
163 pub url: String,
165 pub title: Option<String>,
167}
168
169#[derive(Debug, Clone)]
171pub struct CodeSpan {
172 pub line: usize,
174 pub start_col: usize,
176 pub end_col: usize,
178 pub byte_offset: usize,
180 pub byte_end: usize,
182 pub backtick_count: usize,
184 pub content: String,
186}
187
188#[derive(Debug, Clone)]
190pub struct HeadingInfo {
191 pub level: u8,
193 pub style: HeadingStyle,
195 pub marker: String,
197 pub marker_column: usize,
199 pub content_column: usize,
201 pub text: String,
203 pub custom_id: Option<String>,
205 pub raw_text: String,
207 pub has_closing_sequence: bool,
209 pub closing_sequence: String,
211}
212
213#[derive(Debug, Clone)]
215pub struct BlockquoteInfo {
216 pub nesting_level: usize,
218 pub indent: String,
220 pub marker_column: usize,
222 pub prefix: String,
224 pub content: String,
226 pub has_no_space_after_marker: bool,
228 pub has_multiple_spaces_after_marker: bool,
230 pub needs_md028_fix: bool,
232}
233
234#[derive(Debug, Clone)]
236pub struct ListBlock {
237 pub start_line: usize,
239 pub end_line: usize,
241 pub is_ordered: bool,
243 pub marker: Option<String>,
245 pub blockquote_prefix: String,
247 pub item_lines: Vec<usize>,
249 pub nesting_level: usize,
251 pub max_marker_width: usize,
253}
254
255use std::sync::{Arc, Mutex};
256
257#[derive(Debug, Clone, Default)]
259pub struct CharFrequency {
260 pub hash_count: usize,
262 pub asterisk_count: usize,
264 pub underscore_count: usize,
266 pub hyphen_count: usize,
268 pub plus_count: usize,
270 pub gt_count: usize,
272 pub pipe_count: usize,
274 pub bracket_count: usize,
276 pub backtick_count: usize,
278 pub lt_count: usize,
280 pub exclamation_count: usize,
282 pub newline_count: usize,
284}
285
286#[derive(Debug, Clone)]
288pub struct HtmlTag {
289 pub line: usize,
291 pub start_col: usize,
293 pub end_col: usize,
295 pub byte_offset: usize,
297 pub byte_end: usize,
299 pub tag_name: String,
301 pub is_closing: bool,
303 pub is_self_closing: bool,
305 pub raw_content: String,
307}
308
309#[derive(Debug, Clone)]
311pub struct EmphasisSpan {
312 pub line: usize,
314 pub start_col: usize,
316 pub end_col: usize,
318 pub byte_offset: usize,
320 pub byte_end: usize,
322 pub marker: char,
324 pub marker_count: usize,
326 pub content: String,
328}
329
330#[derive(Debug, Clone)]
332pub struct TableRow {
333 pub line: usize,
335 pub is_separator: bool,
337 pub column_count: usize,
339 pub column_alignments: Vec<String>, }
342
343#[derive(Debug, Clone)]
345pub struct BareUrl {
346 pub line: usize,
348 pub start_col: usize,
350 pub end_col: usize,
352 pub byte_offset: usize,
354 pub byte_end: usize,
356 pub url: String,
358 pub url_type: String,
360}
361
362pub struct LintContext<'a> {
363 pub content: &'a str,
364 pub line_offsets: Vec<usize>,
365 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink>, pub images: Vec<ParsedImage>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, pub flavor: MarkdownFlavor, }
379
380impl<'a> LintContext<'a> {
381 pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
382 let mut line_offsets = vec![0];
383 for (i, c) in content.char_indices() {
384 if c == '\n' {
385 line_offsets.push(i + 1);
386 }
387 }
388
389 let code_blocks = CodeBlockUtils::detect_code_blocks(content);
391
392 let lines = Self::compute_line_info(content, &line_offsets, &code_blocks, flavor);
394
395 let links = Self::parse_links(content, &lines, &code_blocks, flavor);
398 let images = Self::parse_images(content, &lines, &code_blocks);
399 let reference_defs = Self::parse_reference_defs(content, &lines);
400 let list_blocks = Self::parse_list_blocks(&lines);
401
402 let char_frequency = Self::compute_char_frequency(content);
404
405 Self {
406 content,
407 line_offsets,
408 code_blocks,
409 lines,
410 links,
411 images,
412 reference_defs,
413 code_spans_cache: Mutex::new(None),
414 list_blocks,
415 char_frequency,
416 html_tags_cache: Mutex::new(None),
417 emphasis_spans_cache: Mutex::new(None),
418 table_rows_cache: Mutex::new(None),
419 bare_urls_cache: Mutex::new(None),
420 flavor,
421 }
422 }
423
424 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
426 let mut cache = self.code_spans_cache.lock().unwrap();
427
428 if cache.is_none() {
430 let code_spans = Self::parse_code_spans(self.content, &self.lines);
431 *cache = Some(Arc::new(code_spans));
432 }
433
434 cache.as_ref().unwrap().clone()
436 }
437
438 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
440 let mut cache = self.html_tags_cache.lock().unwrap();
441
442 if cache.is_none() {
443 let html_tags = Self::parse_html_tags(self.content, &self.lines, &self.code_blocks);
444 *cache = Some(Arc::new(html_tags));
445 }
446
447 cache.as_ref().unwrap().clone()
448 }
449
450 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
452 let mut cache = self.emphasis_spans_cache.lock().unwrap();
453
454 if cache.is_none() {
455 let emphasis_spans = Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks);
456 *cache = Some(Arc::new(emphasis_spans));
457 }
458
459 cache.as_ref().unwrap().clone()
460 }
461
462 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
464 let mut cache = self.table_rows_cache.lock().unwrap();
465
466 if cache.is_none() {
467 let table_rows = Self::parse_table_rows(&self.lines);
468 *cache = Some(Arc::new(table_rows));
469 }
470
471 cache.as_ref().unwrap().clone()
472 }
473
474 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
476 let mut cache = self.bare_urls_cache.lock().unwrap();
477
478 if cache.is_none() {
479 let bare_urls = Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks);
480 *cache = Some(Arc::new(bare_urls));
481 }
482
483 cache.as_ref().unwrap().clone()
484 }
485
486 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
488 match self.line_offsets.binary_search(&offset) {
489 Ok(line) => (line + 1, 1),
490 Err(line) => {
491 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
492 (line, offset - line_start + 1)
493 }
494 }
495 }
496
497 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
499 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
501 return true;
502 }
503
504 self.code_spans()
506 .iter()
507 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
508 }
509
510 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
512 if line_num > 0 {
513 self.lines.get(line_num - 1)
514 } else {
515 None
516 }
517 }
518
519 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
521 self.line_info(line_num).map(|info| info.byte_offset)
522 }
523
524 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
526 let normalized_id = ref_id.to_lowercase();
527 self.reference_defs
528 .iter()
529 .find(|def| def.id == normalized_id)
530 .map(|def| def.url.as_str())
531 }
532
533 pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink> {
535 self.links.iter().filter(|link| link.line == line_num).collect()
536 }
537
538 pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage> {
540 self.images.iter().filter(|img| img.line == line_num).collect()
541 }
542
543 pub fn is_in_list_block(&self, line_num: usize) -> bool {
545 self.list_blocks
546 .iter()
547 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
548 }
549
550 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
552 self.list_blocks
553 .iter()
554 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
555 }
556
557 pub fn has_char(&self, ch: char) -> bool {
559 match ch {
560 '#' => self.char_frequency.hash_count > 0,
561 '*' => self.char_frequency.asterisk_count > 0,
562 '_' => self.char_frequency.underscore_count > 0,
563 '-' => self.char_frequency.hyphen_count > 0,
564 '+' => self.char_frequency.plus_count > 0,
565 '>' => self.char_frequency.gt_count > 0,
566 '|' => self.char_frequency.pipe_count > 0,
567 '[' => self.char_frequency.bracket_count > 0,
568 '`' => self.char_frequency.backtick_count > 0,
569 '<' => self.char_frequency.lt_count > 0,
570 '!' => self.char_frequency.exclamation_count > 0,
571 '\n' => self.char_frequency.newline_count > 0,
572 _ => self.content.contains(ch), }
574 }
575
576 pub fn char_count(&self, ch: char) -> usize {
578 match ch {
579 '#' => self.char_frequency.hash_count,
580 '*' => self.char_frequency.asterisk_count,
581 '_' => self.char_frequency.underscore_count,
582 '-' => self.char_frequency.hyphen_count,
583 '+' => self.char_frequency.plus_count,
584 '>' => self.char_frequency.gt_count,
585 '|' => self.char_frequency.pipe_count,
586 '[' => self.char_frequency.bracket_count,
587 '`' => self.char_frequency.backtick_count,
588 '<' => self.char_frequency.lt_count,
589 '!' => self.char_frequency.exclamation_count,
590 '\n' => self.char_frequency.newline_count,
591 _ => self.content.matches(ch).count(), }
593 }
594
595 pub fn likely_has_headings(&self) -> bool {
597 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
599
600 pub fn likely_has_lists(&self) -> bool {
602 self.char_frequency.asterisk_count > 0
603 || self.char_frequency.hyphen_count > 0
604 || self.char_frequency.plus_count > 0
605 }
606
607 pub fn likely_has_emphasis(&self) -> bool {
609 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
610 }
611
612 pub fn likely_has_tables(&self) -> bool {
614 self.char_frequency.pipe_count > 2
615 }
616
617 pub fn likely_has_blockquotes(&self) -> bool {
619 self.char_frequency.gt_count > 0
620 }
621
622 pub fn likely_has_code(&self) -> bool {
624 self.char_frequency.backtick_count > 0
625 }
626
627 pub fn likely_has_links_or_images(&self) -> bool {
629 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
630 }
631
632 pub fn likely_has_html(&self) -> bool {
634 self.char_frequency.lt_count > 0
635 }
636
637 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
639 self.html_tags()
640 .iter()
641 .filter(|tag| tag.line == line_num)
642 .cloned()
643 .collect()
644 }
645
646 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
648 self.emphasis_spans()
649 .iter()
650 .filter(|span| span.line == line_num)
651 .cloned()
652 .collect()
653 }
654
655 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
657 self.table_rows()
658 .iter()
659 .filter(|row| row.line == line_num)
660 .cloned()
661 .collect()
662 }
663
664 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
666 self.bare_urls()
667 .iter()
668 .filter(|url| url.line == line_num)
669 .cloned()
670 .collect()
671 }
672
673 fn parse_links(
675 content: &str,
676 lines: &[LineInfo],
677 code_blocks: &[(usize, usize)],
678 flavor: MarkdownFlavor,
679 ) -> Vec<ParsedLink> {
680 use crate::utils::skip_context::is_mkdocs_snippet_line;
681
682 let mut links = Vec::with_capacity(content.len() / 500); for cap in LINK_PATTERN.captures_iter(content) {
687 let full_match = cap.get(0).unwrap();
688 let match_start = full_match.start();
689 let match_end = full_match.end();
690
691 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
693 continue;
694 }
695
696 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
698 continue;
699 }
700
701 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
703 continue;
704 }
705
706 let line_idx = lines
709 .iter()
710 .position(|line| {
711 match_start >= line.byte_offset && (match_start < line.byte_offset + line.content.len() + 1)
712 })
713 .unwrap_or(0);
714
715 if is_mkdocs_snippet_line(&lines[line_idx].content, flavor) {
716 continue;
717 }
718
719 let mut line_num = 1;
721 let mut col_start = match_start;
722 for (idx, line_info) in lines.iter().enumerate() {
723 if match_start >= line_info.byte_offset {
724 line_num = idx + 1;
725 col_start = match_start - line_info.byte_offset;
726 } else {
727 break;
728 }
729 }
730
731 let mut end_line_num = 1;
733 let mut col_end = match_end;
734 for (idx, line_info) in lines.iter().enumerate() {
735 if match_end > line_info.byte_offset {
736 end_line_num = idx + 1;
737 col_end = match_end - line_info.byte_offset;
738 } else {
739 break;
740 }
741 }
742
743 if line_num == end_line_num {
745 } else {
747 }
750
751 let text = cap.get(1).map_or("", |m| m.as_str()).to_string();
752
753 if let Some(inline_url) = cap.get(2) {
754 links.push(ParsedLink {
756 line: line_num,
757 start_col: col_start,
758 end_col: col_end,
759 byte_offset: match_start,
760 byte_end: match_end,
761 text,
762 url: inline_url.as_str().to_string(),
763 is_reference: false,
764 reference_id: None,
765 });
766 } else if let Some(ref_id) = cap.get(3) {
767 let ref_id_str = ref_id.as_str();
769 let normalized_ref = if ref_id_str.is_empty() {
770 text.to_lowercase() } else {
772 ref_id_str.to_lowercase()
773 };
774
775 links.push(ParsedLink {
776 line: line_num,
777 start_col: col_start,
778 end_col: col_end,
779 byte_offset: match_start,
780 byte_end: match_end,
781 text,
782 url: String::new(), is_reference: true,
784 reference_id: Some(normalized_ref),
785 });
786 }
787 }
788
789 links
790 }
791
792 fn parse_images(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<ParsedImage> {
794 let mut images = Vec::with_capacity(content.len() / 1000); for cap in IMAGE_PATTERN.captures_iter(content) {
799 let full_match = cap.get(0).unwrap();
800 let match_start = full_match.start();
801 let match_end = full_match.end();
802
803 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
805 continue;
806 }
807
808 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
810 continue;
811 }
812
813 let mut line_num = 1;
815 let mut col_start = match_start;
816 for (idx, line_info) in lines.iter().enumerate() {
817 if match_start >= line_info.byte_offset {
818 line_num = idx + 1;
819 col_start = match_start - line_info.byte_offset;
820 } else {
821 break;
822 }
823 }
824
825 let mut end_line_num = 1;
827 let mut col_end = match_end;
828 for (idx, line_info) in lines.iter().enumerate() {
829 if match_end > line_info.byte_offset {
830 end_line_num = idx + 1;
831 col_end = match_end - line_info.byte_offset;
832 } else {
833 break;
834 }
835 }
836
837 if line_num == end_line_num {
839 } else {
841 }
844
845 let alt_text = cap.get(1).map_or("", |m| m.as_str()).to_string();
846
847 if let Some(inline_url) = cap.get(2) {
848 images.push(ParsedImage {
850 line: line_num,
851 start_col: col_start,
852 end_col: col_end,
853 byte_offset: match_start,
854 byte_end: match_end,
855 alt_text,
856 url: inline_url.as_str().to_string(),
857 is_reference: false,
858 reference_id: None,
859 });
860 } else if let Some(ref_id) = cap.get(3) {
861 let ref_id_str = ref_id.as_str();
863 let normalized_ref = if ref_id_str.is_empty() {
864 alt_text.to_lowercase() } else {
866 ref_id_str.to_lowercase()
867 };
868
869 images.push(ParsedImage {
870 line: line_num,
871 start_col: col_start,
872 end_col: col_end,
873 byte_offset: match_start,
874 byte_end: match_end,
875 alt_text,
876 url: String::new(), is_reference: true,
878 reference_id: Some(normalized_ref),
879 });
880 }
881 }
882
883 images
884 }
885
886 fn parse_reference_defs(_content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
888 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
892 if line_info.in_code_block {
894 continue;
895 }
896
897 let line = &line_info.content;
898 let line_num = line_idx + 1;
899
900 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
901 let id = cap.get(1).unwrap().as_str().to_lowercase();
902 let url = cap.get(2).unwrap().as_str().to_string();
903 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
904
905 refs.push(ReferenceDef {
906 line: line_num,
907 id,
908 url,
909 title,
910 });
911 }
912 }
913
914 refs
915 }
916
917 fn compute_line_info(
919 content: &str,
920 line_offsets: &[usize],
921 code_blocks: &[(usize, usize)],
922 flavor: MarkdownFlavor,
923 ) -> Vec<LineInfo> {
924 lazy_static! {
925 static ref UNORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)([-*+])([ \t]*)(.*)").unwrap();
927 static ref ORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(\d+)([.)])([ \t]*)(.*)").unwrap();
928
929 static ref BLOCKQUOTE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*>\s*)(.*)").unwrap();
931
932 static ref ATX_HEADING_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap();
934 static ref SETEXT_UNDERLINE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap();
935
936 static ref BLOCKQUOTE_REGEX_FULL: regex::Regex = regex::Regex::new(r"^(\s*)(>+)(\s*)(.*)$").unwrap();
938 }
939
940 let content_lines: Vec<&str> = content.lines().collect();
941 let mut lines = Vec::with_capacity(content_lines.len());
942
943 let mut in_front_matter = false;
945 let mut front_matter_end = 0;
946 if content_lines.first().map(|l| l.trim()) == Some("---") {
947 in_front_matter = true;
948 for (idx, line) in content_lines.iter().enumerate().skip(1) {
949 if line.trim() == "---" {
950 front_matter_end = idx;
951 break;
952 }
953 }
954 }
955
956 for (i, line) in content_lines.iter().enumerate() {
957 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
958 let indent = line.len() - line.trim_start().len();
959 let is_blank = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
961 let after_prefix = caps.get(2).map_or("", |m| m.as_str());
963 after_prefix.trim().is_empty()
964 } else {
965 line.trim().is_empty()
966 };
967 let in_code_block = code_blocks.iter().any(|&(start, end)| {
970 let safe_start = if start > 0 && !content.is_char_boundary(start) {
975 let mut boundary = start;
977 while boundary > 0 && !content.is_char_boundary(boundary) {
978 boundary -= 1;
979 }
980 boundary
981 } else {
982 start
983 };
984
985 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
986 let mut boundary = end;
988 while boundary < content.len() && !content.is_char_boundary(boundary) {
989 boundary += 1;
990 }
991 boundary
992 } else {
993 end.min(content.len())
994 };
995
996 let block_content = &content[safe_start..safe_end];
997 let is_multiline = block_content.contains('\n');
998 let is_fenced = block_content.starts_with("```") || block_content.starts_with("~~~");
999 let is_indented = !is_fenced
1000 && block_content
1001 .lines()
1002 .all(|l| l.starts_with(" ") || l.starts_with("\t") || l.trim().is_empty());
1003
1004 byte_offset >= start && byte_offset < end && (is_multiline || is_fenced || is_indented)
1005 });
1006
1007 let list_item = if !(in_code_block || is_blank || in_front_matter && i <= front_matter_end) {
1009 let (line_for_list_check, blockquote_prefix_len) = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1011 let prefix = caps.get(1).unwrap().as_str();
1012 let content = caps.get(2).unwrap().as_str();
1013 (content, prefix.len())
1014 } else {
1015 (&**line, 0)
1016 };
1017
1018 if let Some(caps) = UNORDERED_REGEX.captures(line_for_list_check) {
1019 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1020 let marker = caps.get(2).map_or("", |m| m.as_str());
1021 let spacing = caps.get(3).map_or("", |m| m.as_str());
1022 let _content = caps.get(4).map_or("", |m| m.as_str());
1023 let marker_column = blockquote_prefix_len + leading_spaces.len();
1024 let content_column = marker_column + marker.len() + spacing.len();
1025
1026 if spacing.is_empty() {
1033 None
1034 } else {
1035 Some(ListItemInfo {
1036 marker: marker.to_string(),
1037 is_ordered: false,
1038 number: None,
1039 marker_column,
1040 content_column,
1041 })
1042 }
1043 } else if let Some(caps) = ORDERED_REGEX.captures(line_for_list_check) {
1044 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1045 let number_str = caps.get(2).map_or("", |m| m.as_str());
1046 let delimiter = caps.get(3).map_or("", |m| m.as_str());
1047 let spacing = caps.get(4).map_or("", |m| m.as_str());
1048 let _content = caps.get(5).map_or("", |m| m.as_str());
1049 let marker = format!("{number_str}{delimiter}");
1050 let marker_column = blockquote_prefix_len + leading_spaces.len();
1051 let content_column = marker_column + marker.len() + spacing.len();
1052
1053 if spacing.is_empty() {
1056 None
1057 } else {
1058 Some(ListItemInfo {
1059 marker,
1060 is_ordered: true,
1061 number: number_str.parse().ok(),
1062 marker_column,
1063 content_column,
1064 })
1065 }
1066 } else {
1067 None
1068 }
1069 } else {
1070 None
1071 };
1072
1073 lines.push(LineInfo {
1074 content: line.to_string(),
1075 byte_offset,
1076 indent,
1077 is_blank,
1078 in_code_block,
1079 in_front_matter: in_front_matter && i <= front_matter_end,
1080 list_item,
1081 heading: None, blockquote: None, });
1084 }
1085
1086 for i in 0..content_lines.len() {
1088 if lines[i].in_code_block {
1089 continue;
1090 }
1091
1092 if in_front_matter && i <= front_matter_end {
1094 continue;
1095 }
1096
1097 let line = content_lines[i];
1098
1099 if let Some(caps) = BLOCKQUOTE_REGEX_FULL.captures(line) {
1101 let indent_str = caps.get(1).map_or("", |m| m.as_str());
1102 let markers = caps.get(2).map_or("", |m| m.as_str());
1103 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1104 let content = caps.get(4).map_or("", |m| m.as_str());
1105
1106 let nesting_level = markers.chars().filter(|&c| c == '>').count();
1107 let marker_column = indent_str.len();
1108
1109 let prefix = format!("{indent_str}{markers}{spaces_after}");
1111
1112 let has_no_space = spaces_after.is_empty() && !content.is_empty();
1114 let has_multiple_spaces = spaces_after.len() > 1 || spaces_after.contains('\t');
1116
1117 let needs_md028_fix = content.is_empty() && spaces_after.is_empty();
1121
1122 lines[i].blockquote = Some(BlockquoteInfo {
1123 nesting_level,
1124 indent: indent_str.to_string(),
1125 marker_column,
1126 prefix,
1127 content: content.to_string(),
1128 has_no_space_after_marker: has_no_space,
1129 has_multiple_spaces_after_marker: has_multiple_spaces,
1130 needs_md028_fix,
1131 });
1132 }
1133
1134 if lines[i].is_blank {
1136 continue;
1137 }
1138
1139 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1142 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1143 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1144 } else {
1145 false
1146 };
1147
1148 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1149 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1151 continue;
1152 }
1153 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1154 let hashes = caps.get(2).map_or("", |m| m.as_str());
1155 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1156 let rest = caps.get(4).map_or("", |m| m.as_str());
1157
1158 let level = hashes.len() as u8;
1159 let marker_column = leading_spaces.len();
1160
1161 let (text, has_closing, closing_seq) = {
1163 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1165 if rest[id_start..].trim_end().ends_with('}') {
1167 (&rest[..id_start], &rest[id_start..])
1169 } else {
1170 (rest, "")
1171 }
1172 } else {
1173 (rest, "")
1174 };
1175
1176 let trimmed_rest = rest_without_id.trim_end();
1178 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1179 let mut start_of_hashes = last_hash_pos;
1181 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1182 start_of_hashes -= 1;
1183 }
1184
1185 let has_space_before = start_of_hashes == 0
1187 || trimmed_rest
1188 .chars()
1189 .nth(start_of_hashes - 1)
1190 .is_some_and(|c| c.is_whitespace());
1191
1192 let potential_closing = &trimmed_rest[start_of_hashes..];
1194 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1195
1196 if is_all_hashes && has_space_before {
1197 let closing_hashes = potential_closing.to_string();
1199 let text_part = if !custom_id_part.is_empty() {
1202 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1205 } else {
1206 rest_without_id[..start_of_hashes].trim_end().to_string()
1207 };
1208 (text_part, true, closing_hashes)
1209 } else {
1210 (rest.to_string(), false, String::new())
1212 }
1213 } else {
1214 (rest.to_string(), false, String::new())
1216 }
1217 };
1218
1219 let content_column = marker_column + hashes.len() + spaces_after.len();
1220
1221 let raw_text = text.trim().to_string();
1223 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1224
1225 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1227 let next_line = content_lines[i + 1];
1228 if !lines[i + 1].in_code_block
1229 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1230 && let Some(next_line_id) =
1231 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1232 {
1233 custom_id = Some(next_line_id);
1234 }
1235 }
1236
1237 lines[i].heading = Some(HeadingInfo {
1238 level,
1239 style: HeadingStyle::ATX,
1240 marker: hashes.to_string(),
1241 marker_column,
1242 content_column,
1243 text: clean_text,
1244 custom_id,
1245 raw_text,
1246 has_closing_sequence: has_closing,
1247 closing_sequence: closing_seq,
1248 });
1249 }
1250 else if i + 1 < content_lines.len() {
1252 let next_line = content_lines[i + 1];
1253 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1254 if in_front_matter && i < front_matter_end {
1256 continue;
1257 }
1258
1259 if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1261 continue;
1262 }
1263
1264 let underline = next_line.trim();
1265
1266 if underline == "---" {
1269 continue;
1270 }
1271
1272 let current_line_trimmed = line.trim();
1274 if current_line_trimmed.contains(':')
1275 && !current_line_trimmed.starts_with('#')
1276 && !current_line_trimmed.contains('[')
1277 && !current_line_trimmed.contains("](")
1278 {
1279 continue;
1281 }
1282
1283 let level = if underline.starts_with('=') { 1 } else { 2 };
1284 let style = if level == 1 {
1285 HeadingStyle::Setext1
1286 } else {
1287 HeadingStyle::Setext2
1288 };
1289
1290 let raw_text = line.trim().to_string();
1292 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1293
1294 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1296 let attr_line = content_lines[i + 2];
1297 if !lines[i + 2].in_code_block
1298 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1299 && let Some(attr_line_id) =
1300 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
1301 {
1302 custom_id = Some(attr_line_id);
1303 }
1304 }
1305
1306 lines[i].heading = Some(HeadingInfo {
1307 level,
1308 style,
1309 marker: underline.to_string(),
1310 marker_column: next_line.len() - next_line.trim_start().len(),
1311 content_column: lines[i].indent,
1312 text: clean_text,
1313 custom_id,
1314 raw_text,
1315 has_closing_sequence: false,
1316 closing_sequence: String::new(),
1317 });
1318 }
1319 }
1320 }
1321
1322 lines
1323 }
1324
1325 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
1327 let mut code_spans = Vec::with_capacity(content.matches('`').count() / 2);
1329
1330 if !content.contains('`') {
1332 return code_spans;
1333 }
1334
1335 let bytes = content.as_bytes();
1336
1337 let mut escaped_positions = Vec::new();
1339 for i in 0..bytes.len() {
1340 if i > 0 && bytes[i - 1] == b'\\' && bytes[i] == b'`' {
1341 escaped_positions.push(i);
1342 }
1343 }
1344
1345 let mut pos = 0;
1346 while pos < bytes.len() {
1347 if let Some(backtick_start) = content[pos..].find('`') {
1349 let start_pos = pos + backtick_start;
1350
1351 if escaped_positions.contains(&start_pos) {
1353 pos = start_pos + 1;
1354 continue;
1355 }
1356
1357 let mut in_code_block = false;
1359 for (line_idx, line_info) in lines.iter().enumerate() {
1360 if start_pos >= line_info.byte_offset
1361 && (line_idx + 1 >= lines.len() || start_pos < lines[line_idx + 1].byte_offset)
1362 {
1363 in_code_block = line_info.in_code_block;
1364 break;
1365 }
1366 }
1367
1368 if in_code_block {
1369 pos = start_pos + 1;
1370 continue;
1371 }
1372
1373 let mut backtick_count = 0;
1375 let mut i = start_pos;
1376 while i < bytes.len() && bytes[i] == b'`' && !escaped_positions.contains(&i) {
1377 backtick_count += 1;
1378 i += 1;
1379 }
1380
1381 let search_start = start_pos + backtick_count;
1383 let mut found_closing = false;
1384 let mut closing_end = 0;
1385
1386 let mut search_pos = search_start;
1388 while search_pos < bytes.len() {
1389 if let Some(rel_pos) = content[search_pos..].find('`') {
1391 let backtick_pos = search_pos + rel_pos;
1392
1393 if escaped_positions.contains(&backtick_pos) {
1395 search_pos = backtick_pos + 1;
1396 continue;
1397 }
1398
1399 let mut count = 0;
1401 let mut j = backtick_pos;
1402 while j < bytes.len() && bytes[j] == b'`' && !escaped_positions.contains(&j) {
1403 count += 1;
1404 j += 1;
1405 }
1406
1407 if count == backtick_count {
1409 let before_ok = backtick_pos == 0
1411 || bytes[backtick_pos - 1] != b'`'
1412 || escaped_positions.contains(&(backtick_pos - 1));
1413 let after_ok = j >= bytes.len() || bytes[j] != b'`' || escaped_positions.contains(&j);
1414
1415 if before_ok && after_ok {
1416 found_closing = true;
1417 closing_end = j;
1418 break;
1419 }
1420 }
1421
1422 search_pos = backtick_pos + 1;
1423 } else {
1424 break;
1425 }
1426 }
1427
1428 if found_closing {
1429 let content_start = start_pos + backtick_count;
1431 let content_end = closing_end - backtick_count;
1432 let span_content = content[content_start..content_end].to_string();
1433
1434 let mut line_num = 1;
1436 let mut col_start = start_pos;
1437 for (idx, line_info) in lines.iter().enumerate() {
1438 if start_pos >= line_info.byte_offset {
1439 line_num = idx + 1;
1440 col_start = start_pos - line_info.byte_offset;
1441 } else {
1442 break;
1443 }
1444 }
1445
1446 let mut col_end = closing_end;
1448 for line_info in lines.iter() {
1449 if closing_end > line_info.byte_offset {
1450 col_end = closing_end - line_info.byte_offset;
1451 } else {
1452 break;
1453 }
1454 }
1455
1456 code_spans.push(CodeSpan {
1457 line: line_num,
1458 start_col: col_start,
1459 end_col: col_end,
1460 byte_offset: start_pos,
1461 byte_end: closing_end,
1462 backtick_count,
1463 content: span_content,
1464 });
1465
1466 pos = closing_end;
1468 } else {
1469 pos = start_pos + backtick_count;
1471 }
1472 } else {
1473 break;
1475 }
1476 }
1477
1478 code_spans
1479 }
1480
1481 fn parse_list_blocks(lines: &[LineInfo]) -> Vec<ListBlock> {
1483 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
1486 let mut last_list_item_line = 0;
1487 let mut current_indent_level = 0;
1488 let mut last_marker_width = 0;
1489
1490 for (line_idx, line_info) in lines.iter().enumerate() {
1491 let line_num = line_idx + 1;
1492
1493 if line_info.in_code_block {
1495 if let Some(ref mut block) = current_block {
1496 let min_continuation_indent = CodeBlockUtils::calculate_min_continuation_indent(lines, line_idx);
1498
1499 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
1501
1502 match context {
1503 CodeBlockContext::Indented => {
1504 block.end_line = line_num;
1506 continue;
1507 }
1508 CodeBlockContext::Standalone => {
1509 let completed_block = current_block.take().unwrap();
1511 list_blocks.push(completed_block);
1512 continue;
1513 }
1514 CodeBlockContext::Adjacent => {
1515 block.end_line = line_num;
1517 continue;
1518 }
1519 }
1520 } else {
1521 continue;
1523 }
1524 }
1525
1526 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(&line_info.content) {
1528 caps.get(0).unwrap().as_str().to_string()
1529 } else {
1530 String::new()
1531 };
1532
1533 if let Some(list_item) = &line_info.list_item {
1535 let item_indent = list_item.marker_column;
1537 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
1540 let is_nested = nesting > block.nesting_level;
1544 let same_type =
1545 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
1546 let same_context = block.blockquote_prefix == blockquote_prefix;
1547 let reasonable_distance = line_num <= last_list_item_line + 2; let marker_compatible =
1551 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
1552
1553 let has_non_list_content = {
1555 let mut found_non_list = false;
1556 let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
1558
1559 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1561 let last_line = &lines[block_last_item_line - 1];
1562 if last_line.content.contains(r"`sqlalchemy`") && last_line.content.contains(r"\`") {
1563 log::debug!(
1564 "After problematic line {}: checking lines {} to {} for non-list content",
1565 block_last_item_line,
1566 block_last_item_line + 1,
1567 line_num
1568 );
1569 if line_num == block_last_item_line + 1 {
1571 log::debug!("Lines are consecutive, no content between");
1572 }
1573 }
1574 }
1575
1576 for check_line in (block_last_item_line + 1)..line_num {
1577 let check_idx = check_line - 1;
1578 if check_idx < lines.len() {
1579 let check_info = &lines[check_idx];
1580 let is_list_breaking_content = if check_info.in_code_block {
1582 let last_item_marker_width =
1584 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1585 lines[block_last_item_line - 1]
1586 .list_item
1587 .as_ref()
1588 .map(|li| {
1589 if li.is_ordered {
1590 li.marker.len() + 1 } else {
1592 li.marker.len()
1593 }
1594 })
1595 .unwrap_or(3) } else {
1597 3 };
1599
1600 let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
1601
1602 let context = CodeBlockUtils::analyze_code_block_context(
1604 lines,
1605 check_line - 1,
1606 min_continuation,
1607 );
1608
1609 matches!(context, CodeBlockContext::Standalone)
1611 } else if !check_info.is_blank && check_info.list_item.is_none() {
1612 let line_content = check_info.content.trim();
1614
1615 if check_info.heading.is_some()
1617 || line_content.starts_with("---")
1618 || line_content.starts_with("***")
1619 || line_content.starts_with("___")
1620 || (line_content.contains('|')
1621 && !line_content.contains("](")
1622 && !line_content.contains("http")
1623 && (line_content.matches('|').count() > 1
1624 || line_content.starts_with('|')
1625 || line_content.ends_with('|')))
1626 || line_content.starts_with(">")
1627 {
1628 true
1629 }
1630 else {
1632 let last_item_marker_width =
1633 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1634 lines[block_last_item_line - 1]
1635 .list_item
1636 .as_ref()
1637 .map(|li| {
1638 if li.is_ordered {
1639 li.marker.len() + 1 } else {
1641 li.marker.len()
1642 }
1643 })
1644 .unwrap_or(3) } else {
1646 3 };
1648
1649 let min_continuation =
1650 if block.is_ordered { last_item_marker_width } else { 2 };
1651 check_info.indent < min_continuation
1652 }
1653 } else {
1654 false
1655 };
1656
1657 if is_list_breaking_content {
1658 found_non_list = true;
1660 break;
1661 }
1662 }
1663 }
1664 found_non_list
1665 };
1666
1667 let mut continues_list = if is_nested {
1671 same_context && reasonable_distance && !has_non_list_content
1673 } else {
1674 let result = same_type
1676 && same_context
1677 && reasonable_distance
1678 && marker_compatible
1679 && !has_non_list_content;
1680
1681 if block.item_lines.last().is_some_and(|&last_line| {
1683 last_line > 0
1684 && last_line <= lines.len()
1685 && lines[last_line - 1].content.contains(r"`sqlalchemy`")
1686 && lines[last_line - 1].content.contains(r"\`")
1687 }) {
1688 log::debug!(
1689 "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
1690 );
1691 if line_num > 0 && line_num <= lines.len() {
1692 log::debug!("Current line content: {:?}", lines[line_num - 1].content);
1693 }
1694 }
1695
1696 result
1697 };
1698
1699 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
1702 if block.item_lines.contains(&(line_num - 1)) {
1704 continues_list = true;
1706 }
1707 }
1708
1709 if continues_list {
1710 block.end_line = line_num;
1712 block.item_lines.push(line_num);
1713
1714 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
1716 list_item.marker.len() + 1
1717 } else {
1718 list_item.marker.len()
1719 });
1720
1721 if !block.is_ordered
1723 && block.marker.is_some()
1724 && block.marker.as_ref() != Some(&list_item.marker)
1725 {
1726 block.marker = None;
1728 }
1729 } else {
1730 list_blocks.push(block.clone());
1733
1734 *block = ListBlock {
1735 start_line: line_num,
1736 end_line: line_num,
1737 is_ordered: list_item.is_ordered,
1738 marker: if list_item.is_ordered {
1739 None
1740 } else {
1741 Some(list_item.marker.clone())
1742 },
1743 blockquote_prefix: blockquote_prefix.clone(),
1744 item_lines: vec![line_num],
1745 nesting_level: nesting,
1746 max_marker_width: if list_item.is_ordered {
1747 list_item.marker.len() + 1
1748 } else {
1749 list_item.marker.len()
1750 },
1751 };
1752 }
1753 } else {
1754 current_block = Some(ListBlock {
1756 start_line: line_num,
1757 end_line: line_num,
1758 is_ordered: list_item.is_ordered,
1759 marker: if list_item.is_ordered {
1760 None
1761 } else {
1762 Some(list_item.marker.clone())
1763 },
1764 blockquote_prefix,
1765 item_lines: vec![line_num],
1766 nesting_level: nesting,
1767 max_marker_width: list_item.marker.len(),
1768 });
1769 }
1770
1771 last_list_item_line = line_num;
1772 current_indent_level = item_indent;
1773 last_marker_width = if list_item.is_ordered {
1774 list_item.marker.len() + 1 } else {
1776 list_item.marker.len()
1777 };
1778 } else if let Some(ref mut block) = current_block {
1779 let min_continuation_indent = if block.is_ordered {
1790 current_indent_level + last_marker_width
1791 } else {
1792 current_indent_level + 2 };
1794
1795 if line_info.indent >= min_continuation_indent {
1796 block.end_line = line_num;
1798 } else if line_info.is_blank {
1799 let mut check_idx = line_idx + 1;
1802 let mut found_continuation = false;
1803
1804 while check_idx < lines.len() && lines[check_idx].is_blank {
1806 check_idx += 1;
1807 }
1808
1809 if check_idx < lines.len() {
1810 let next_line = &lines[check_idx];
1811 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
1813 found_continuation = true;
1814 }
1815 else if !next_line.in_code_block
1817 && next_line.list_item.is_some()
1818 && let Some(item) = &next_line.list_item
1819 {
1820 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
1821 .find(&next_line.content)
1822 .map_or(String::new(), |m| m.as_str().to_string());
1823 if item.marker_column == current_indent_level
1824 && item.is_ordered == block.is_ordered
1825 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
1826 {
1827 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
1830 if let Some(between_line) = lines.get(idx) {
1831 let trimmed = between_line.content.trim();
1832 if trimmed.is_empty() {
1834 return false;
1835 }
1836 let line_indent =
1838 between_line.content.len() - between_line.content.trim_start().len();
1839
1840 if trimmed.starts_with("```")
1842 || trimmed.starts_with("~~~")
1843 || trimmed.starts_with("---")
1844 || trimmed.starts_with("***")
1845 || trimmed.starts_with("___")
1846 || trimmed.starts_with(">")
1847 || trimmed.contains('|') || between_line.heading.is_some()
1849 {
1850 return true; }
1852
1853 line_indent >= min_continuation_indent
1855 } else {
1856 false
1857 }
1858 });
1859
1860 if block.is_ordered {
1861 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1864 if let Some(between_line) = lines.get(idx) {
1865 let trimmed = between_line.content.trim();
1866 if trimmed.is_empty() {
1867 return false;
1868 }
1869 trimmed.starts_with("```")
1871 || trimmed.starts_with("~~~")
1872 || trimmed.starts_with("---")
1873 || trimmed.starts_with("***")
1874 || trimmed.starts_with("___")
1875 || trimmed.starts_with(">")
1876 || trimmed.contains('|') || between_line.heading.is_some()
1878 } else {
1879 false
1880 }
1881 });
1882 found_continuation = !has_structural_separators;
1883 } else {
1884 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1886 if let Some(between_line) = lines.get(idx) {
1887 let trimmed = between_line.content.trim();
1888 if trimmed.is_empty() {
1889 return false;
1890 }
1891 trimmed.starts_with("```")
1893 || trimmed.starts_with("~~~")
1894 || trimmed.starts_with("---")
1895 || trimmed.starts_with("***")
1896 || trimmed.starts_with("___")
1897 || trimmed.starts_with(">")
1898 || trimmed.contains('|') || between_line.heading.is_some()
1900 } else {
1901 false
1902 }
1903 });
1904 found_continuation = !has_structural_separators;
1905 }
1906 }
1907 }
1908 }
1909
1910 if found_continuation {
1911 block.end_line = line_num;
1913 } else {
1914 list_blocks.push(block.clone());
1916 current_block = None;
1917 }
1918 } else {
1919 let min_required_indent = if block.is_ordered {
1922 current_indent_level + last_marker_width
1923 } else {
1924 current_indent_level + 2
1925 };
1926
1927 let line_content = line_info.content.trim();
1932 let is_structural_separator = line_info.heading.is_some()
1933 || line_content.starts_with("```")
1934 || line_content.starts_with("~~~")
1935 || line_content.starts_with("---")
1936 || line_content.starts_with("***")
1937 || line_content.starts_with("___")
1938 || line_content.starts_with(">")
1939 || (line_content.contains('|')
1940 && !line_content.contains("](")
1941 && !line_content.contains("http")
1942 && (line_content.matches('|').count() > 1
1943 || line_content.starts_with('|')
1944 || line_content.ends_with('|'))); let is_lazy_continuation = !is_structural_separator
1949 && !line_info.is_blank
1950 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
1951
1952 if is_lazy_continuation {
1953 let content_to_check = if !blockquote_prefix.is_empty() {
1956 line_info
1958 .content
1959 .strip_prefix(&blockquote_prefix)
1960 .unwrap_or(&line_info.content)
1961 .trim()
1962 } else {
1963 line_info.content.trim()
1964 };
1965
1966 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
1967
1968 if starts_with_uppercase && last_list_item_line > 0 {
1971 list_blocks.push(block.clone());
1973 current_block = None;
1974 } else {
1975 block.end_line = line_num;
1977 }
1978 } else {
1979 list_blocks.push(block.clone());
1981 current_block = None;
1982 }
1983 }
1984 }
1985 }
1986
1987 if let Some(block) = current_block {
1989 list_blocks.push(block);
1990 }
1991
1992 merge_adjacent_list_blocks(&mut list_blocks, lines);
1994
1995 list_blocks
1996 }
1997
1998 fn compute_char_frequency(content: &str) -> CharFrequency {
2000 let mut frequency = CharFrequency::default();
2001
2002 for ch in content.chars() {
2003 match ch {
2004 '#' => frequency.hash_count += 1,
2005 '*' => frequency.asterisk_count += 1,
2006 '_' => frequency.underscore_count += 1,
2007 '-' => frequency.hyphen_count += 1,
2008 '+' => frequency.plus_count += 1,
2009 '>' => frequency.gt_count += 1,
2010 '|' => frequency.pipe_count += 1,
2011 '[' => frequency.bracket_count += 1,
2012 '`' => frequency.backtick_count += 1,
2013 '<' => frequency.lt_count += 1,
2014 '!' => frequency.exclamation_count += 1,
2015 '\n' => frequency.newline_count += 1,
2016 _ => {}
2017 }
2018 }
2019
2020 frequency
2021 }
2022
2023 fn parse_html_tags(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<HtmlTag> {
2025 lazy_static! {
2026 static ref HTML_TAG_REGEX: regex::Regex =
2027 regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap();
2028 }
2029
2030 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2031
2032 for cap in HTML_TAG_REGEX.captures_iter(content) {
2033 let full_match = cap.get(0).unwrap();
2034 let match_start = full_match.start();
2035 let match_end = full_match.end();
2036
2037 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2039 continue;
2040 }
2041
2042 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2043 let tag_name = cap.get(2).unwrap().as_str().to_lowercase();
2044 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2045
2046 let mut line_num = 1;
2048 let mut col_start = match_start;
2049 let mut col_end = match_end;
2050 for (idx, line_info) in lines.iter().enumerate() {
2051 if match_start >= line_info.byte_offset {
2052 line_num = idx + 1;
2053 col_start = match_start - line_info.byte_offset;
2054 col_end = match_end - line_info.byte_offset;
2055 } else {
2056 break;
2057 }
2058 }
2059
2060 html_tags.push(HtmlTag {
2061 line: line_num,
2062 start_col: col_start,
2063 end_col: col_end,
2064 byte_offset: match_start,
2065 byte_end: match_end,
2066 tag_name,
2067 is_closing,
2068 is_self_closing,
2069 raw_content: full_match.as_str().to_string(),
2070 });
2071 }
2072
2073 html_tags
2074 }
2075
2076 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2078 lazy_static! {
2079 static ref EMPHASIS_REGEX: regex::Regex =
2080 regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap();
2081 }
2082
2083 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2084
2085 for cap in EMPHASIS_REGEX.captures_iter(content) {
2086 let full_match = cap.get(0).unwrap();
2087 let match_start = full_match.start();
2088 let match_end = full_match.end();
2089
2090 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2092 continue;
2093 }
2094
2095 let opening_markers = cap.get(1).unwrap().as_str();
2096 let content_part = cap.get(2).unwrap().as_str();
2097 let closing_markers = cap.get(3).unwrap().as_str();
2098
2099 if opening_markers.chars().next() != closing_markers.chars().next()
2101 || opening_markers.len() != closing_markers.len()
2102 {
2103 continue;
2104 }
2105
2106 let marker = opening_markers.chars().next().unwrap();
2107 let marker_count = opening_markers.len();
2108
2109 let mut line_num = 1;
2111 let mut col_start = match_start;
2112 let mut col_end = match_end;
2113 for (idx, line_info) in lines.iter().enumerate() {
2114 if match_start >= line_info.byte_offset {
2115 line_num = idx + 1;
2116 col_start = match_start - line_info.byte_offset;
2117 col_end = match_end - line_info.byte_offset;
2118 } else {
2119 break;
2120 }
2121 }
2122
2123 emphasis_spans.push(EmphasisSpan {
2124 line: line_num,
2125 start_col: col_start,
2126 end_col: col_end,
2127 byte_offset: match_start,
2128 byte_end: match_end,
2129 marker,
2130 marker_count,
2131 content: content_part.to_string(),
2132 });
2133 }
2134
2135 emphasis_spans
2136 }
2137
2138 fn parse_table_rows(lines: &[LineInfo]) -> Vec<TableRow> {
2140 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2141
2142 for (line_idx, line_info) in lines.iter().enumerate() {
2143 if line_info.in_code_block || line_info.is_blank {
2145 continue;
2146 }
2147
2148 let line = &line_info.content;
2149 let line_num = line_idx + 1;
2150
2151 if !line.contains('|') {
2153 continue;
2154 }
2155
2156 let parts: Vec<&str> = line.split('|').collect();
2158 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2159
2160 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2162 let mut column_alignments = Vec::new();
2163
2164 if is_separator {
2165 for part in &parts[1..parts.len() - 1] {
2166 let trimmed = part.trim();
2168 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2169 "center".to_string()
2170 } else if trimmed.ends_with(':') {
2171 "right".to_string()
2172 } else if trimmed.starts_with(':') {
2173 "left".to_string()
2174 } else {
2175 "none".to_string()
2176 };
2177 column_alignments.push(alignment);
2178 }
2179 }
2180
2181 table_rows.push(TableRow {
2182 line: line_num,
2183 is_separator,
2184 column_count,
2185 column_alignments,
2186 });
2187 }
2188
2189 table_rows
2190 }
2191
2192 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2194 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2195
2196 for cap in BARE_URL_PATTERN.captures_iter(content) {
2198 let full_match = cap.get(0).unwrap();
2199 let match_start = full_match.start();
2200 let match_end = full_match.end();
2201
2202 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2204 continue;
2205 }
2206
2207 let preceding_char = if match_start > 0 {
2209 content.chars().nth(match_start - 1)
2210 } else {
2211 None
2212 };
2213 let following_char = content.chars().nth(match_end);
2214
2215 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2216 continue;
2217 }
2218 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2219 continue;
2220 }
2221
2222 let url = full_match.as_str();
2223 let url_type = if url.starts_with("https://") {
2224 "https"
2225 } else if url.starts_with("http://") {
2226 "http"
2227 } else if url.starts_with("ftp://") {
2228 "ftp"
2229 } else {
2230 "other"
2231 };
2232
2233 let mut line_num = 1;
2235 let mut col_start = match_start;
2236 let mut col_end = match_end;
2237 for (idx, line_info) in lines.iter().enumerate() {
2238 if match_start >= line_info.byte_offset {
2239 line_num = idx + 1;
2240 col_start = match_start - line_info.byte_offset;
2241 col_end = match_end - line_info.byte_offset;
2242 } else {
2243 break;
2244 }
2245 }
2246
2247 bare_urls.push(BareUrl {
2248 line: line_num,
2249 start_col: col_start,
2250 end_col: col_end,
2251 byte_offset: match_start,
2252 byte_end: match_end,
2253 url: url.to_string(),
2254 url_type: url_type.to_string(),
2255 });
2256 }
2257
2258 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
2260 let full_match = cap.get(0).unwrap();
2261 let match_start = full_match.start();
2262 let match_end = full_match.end();
2263
2264 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2266 continue;
2267 }
2268
2269 let preceding_char = if match_start > 0 {
2271 content.chars().nth(match_start - 1)
2272 } else {
2273 None
2274 };
2275 let following_char = content.chars().nth(match_end);
2276
2277 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2278 continue;
2279 }
2280 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2281 continue;
2282 }
2283
2284 let email = full_match.as_str();
2285
2286 let mut line_num = 1;
2288 let mut col_start = match_start;
2289 let mut col_end = match_end;
2290 for (idx, line_info) in lines.iter().enumerate() {
2291 if match_start >= line_info.byte_offset {
2292 line_num = idx + 1;
2293 col_start = match_start - line_info.byte_offset;
2294 col_end = match_end - line_info.byte_offset;
2295 } else {
2296 break;
2297 }
2298 }
2299
2300 bare_urls.push(BareUrl {
2301 line: line_num,
2302 start_col: col_start,
2303 end_col: col_end,
2304 byte_offset: match_start,
2305 byte_end: match_end,
2306 url: email.to_string(),
2307 url_type: "email".to_string(),
2308 });
2309 }
2310
2311 bare_urls
2312 }
2313}
2314
2315fn merge_adjacent_list_blocks(list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
2317 if list_blocks.len() < 2 {
2318 return;
2319 }
2320
2321 let mut merger = ListBlockMerger::new(lines);
2322 *list_blocks = merger.merge(list_blocks);
2323}
2324
2325struct ListBlockMerger<'a> {
2327 lines: &'a [LineInfo],
2328}
2329
2330impl<'a> ListBlockMerger<'a> {
2331 fn new(lines: &'a [LineInfo]) -> Self {
2332 Self { lines }
2333 }
2334
2335 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
2336 let mut merged = Vec::with_capacity(list_blocks.len());
2337 let mut current = list_blocks[0].clone();
2338
2339 for next in list_blocks.iter().skip(1) {
2340 if self.should_merge_blocks(¤t, next) {
2341 current = self.merge_two_blocks(current, next);
2342 } else {
2343 merged.push(current);
2344 current = next.clone();
2345 }
2346 }
2347
2348 merged.push(current);
2349 merged
2350 }
2351
2352 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
2354 if !self.blocks_are_compatible(current, next) {
2356 return false;
2357 }
2358
2359 let spacing = self.analyze_spacing_between(current, next);
2361 match spacing {
2362 BlockSpacing::Consecutive => true,
2363 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
2364 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
2365 self.can_merge_with_content_between(current, next)
2366 }
2367 }
2368 }
2369
2370 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
2372 current.is_ordered == next.is_ordered
2373 && current.blockquote_prefix == next.blockquote_prefix
2374 && current.nesting_level == next.nesting_level
2375 }
2376
2377 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
2379 let gap = next.start_line - current.end_line;
2380
2381 match gap {
2382 1 => BlockSpacing::Consecutive,
2383 2 => BlockSpacing::SingleBlank,
2384 _ if gap > 2 => {
2385 if self.has_only_blank_lines_between(current, next) {
2386 BlockSpacing::MultipleBlanks
2387 } else {
2388 BlockSpacing::ContentBetween
2389 }
2390 }
2391 _ => BlockSpacing::Consecutive, }
2393 }
2394
2395 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2397 if has_meaningful_content_between(current, next, self.lines) {
2400 return false; }
2402
2403 !current.is_ordered && current.marker == next.marker
2405 }
2406
2407 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2409 if has_meaningful_content_between(current, next, self.lines) {
2411 return false; }
2413
2414 current.is_ordered && next.is_ordered
2416 }
2417
2418 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2420 for line_num in (current.end_line + 1)..next.start_line {
2421 if let Some(line_info) = self.lines.get(line_num - 1)
2422 && !line_info.content.trim().is_empty()
2423 {
2424 return false;
2425 }
2426 }
2427 true
2428 }
2429
2430 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
2432 current.end_line = next.end_line;
2433 current.item_lines.extend_from_slice(&next.item_lines);
2434
2435 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
2437
2438 if !current.is_ordered && self.markers_differ(¤t, next) {
2440 current.marker = None; }
2442
2443 current
2444 }
2445
2446 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
2448 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
2449 }
2450}
2451
2452#[derive(Debug, PartialEq)]
2454enum BlockSpacing {
2455 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
2460
2461fn has_meaningful_content_between(current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
2463 for line_num in (current.end_line + 1)..next.start_line {
2465 if let Some(line_info) = lines.get(line_num - 1) {
2466 let trimmed = line_info.content.trim();
2468
2469 if trimmed.is_empty() {
2471 continue;
2472 }
2473
2474 if line_info.heading.is_some() {
2478 return true; }
2480
2481 if is_horizontal_rule(trimmed) {
2483 return true; }
2485
2486 if trimmed.contains('|') && trimmed.len() > 1 {
2489 if !trimmed.contains("](") && !trimmed.contains("http") {
2491 let pipe_count = trimmed.matches('|').count();
2493 if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
2494 return true; }
2496 }
2497 }
2498
2499 if trimmed.starts_with('>') {
2501 return true; }
2503
2504 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2506 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2507
2508 let min_continuation_indent = if current.is_ordered {
2510 current.nesting_level + current.max_marker_width + 1 } else {
2512 current.nesting_level + 2
2513 };
2514
2515 if line_indent < min_continuation_indent {
2516 return true; }
2519 }
2520
2521 let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2523
2524 let min_indent = if current.is_ordered {
2526 current.nesting_level + current.max_marker_width
2527 } else {
2528 current.nesting_level + 2
2529 };
2530
2531 if line_indent < min_indent {
2533 return true; }
2535
2536 }
2539 }
2540
2541 false
2543}
2544
2545fn is_horizontal_rule(trimmed: &str) -> bool {
2547 if trimmed.len() < 3 {
2548 return false;
2549 }
2550
2551 let chars: Vec<char> = trimmed.chars().collect();
2553 if let Some(&first_char) = chars.first()
2554 && (first_char == '-' || first_char == '*' || first_char == '_')
2555 {
2556 let mut count = 0;
2557 for &ch in &chars {
2558 if ch == first_char {
2559 count += 1;
2560 } else if ch != ' ' && ch != '\t' {
2561 return false; }
2563 }
2564 return count >= 3;
2565 }
2566 false
2567}
2568
2569#[cfg(test)]
2571mod tests {
2572 use super::*;
2573
2574 #[test]
2575 fn test_empty_content() {
2576 let ctx = LintContext::new("", MarkdownFlavor::Standard);
2577 assert_eq!(ctx.content, "");
2578 assert_eq!(ctx.line_offsets, vec![0]);
2579 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2580 assert_eq!(ctx.lines.len(), 0);
2581 }
2582
2583 #[test]
2584 fn test_single_line() {
2585 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
2586 assert_eq!(ctx.content, "# Hello");
2587 assert_eq!(ctx.line_offsets, vec![0]);
2588 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2589 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
2590 }
2591
2592 #[test]
2593 fn test_multi_line() {
2594 let content = "# Title\n\nSecond line\nThird line";
2595 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2596 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
2597 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
2604
2605 #[test]
2606 fn test_line_info() {
2607 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
2608 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2609
2610 assert_eq!(ctx.lines.len(), 7);
2612
2613 let line1 = &ctx.lines[0];
2615 assert_eq!(line1.content, "# Title");
2616 assert_eq!(line1.byte_offset, 0);
2617 assert_eq!(line1.indent, 0);
2618 assert!(!line1.is_blank);
2619 assert!(!line1.in_code_block);
2620 assert!(line1.list_item.is_none());
2621
2622 let line2 = &ctx.lines[1];
2624 assert_eq!(line2.content, " indented");
2625 assert_eq!(line2.byte_offset, 8);
2626 assert_eq!(line2.indent, 4);
2627 assert!(!line2.is_blank);
2628
2629 let line3 = &ctx.lines[2];
2631 assert_eq!(line3.content, "");
2632 assert!(line3.is_blank);
2633
2634 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
2636 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
2637 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
2638 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
2639 }
2640
2641 #[test]
2642 fn test_list_item_detection() {
2643 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
2644 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2645
2646 let line1 = &ctx.lines[0];
2648 assert!(line1.list_item.is_some());
2649 let list1 = line1.list_item.as_ref().unwrap();
2650 assert_eq!(list1.marker, "-");
2651 assert!(!list1.is_ordered);
2652 assert_eq!(list1.marker_column, 0);
2653 assert_eq!(list1.content_column, 2);
2654
2655 let line2 = &ctx.lines[1];
2657 assert!(line2.list_item.is_some());
2658 let list2 = line2.list_item.as_ref().unwrap();
2659 assert_eq!(list2.marker, "*");
2660 assert_eq!(list2.marker_column, 2);
2661
2662 let line3 = &ctx.lines[2];
2664 assert!(line3.list_item.is_some());
2665 let list3 = line3.list_item.as_ref().unwrap();
2666 assert_eq!(list3.marker, "1.");
2667 assert!(list3.is_ordered);
2668 assert_eq!(list3.number, Some(1));
2669
2670 let line6 = &ctx.lines[5];
2672 assert!(line6.list_item.is_none());
2673 }
2674
2675 #[test]
2676 fn test_offset_to_line_col_edge_cases() {
2677 let content = "a\nb\nc";
2678 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2679 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
2687}