1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::sync::LazyLock;
8
9static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
12 Regex::new(
13 r#"(?sx)
14 \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Link text in group 1 (handles nested brackets)
15 (?:
16 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
17 |
18 \[([^\]]*)\] # Reference ID in group 6
19 )"#
20 ).unwrap()
21});
22
23static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
26 Regex::new(
27 r#"(?sx)
28 !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\] # Alt text in group 1 (handles nested brackets)
29 (?:
30 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
31 |
32 \[([^\]]*)\] # Reference ID in group 6
33 )"#
34 ).unwrap()
35});
36
37static REF_DEF_PATTERN: LazyLock<Regex> =
39 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
40
41static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
43 Regex::new(
44 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
45 ).unwrap()
46});
47
48static BARE_EMAIL_PATTERN: LazyLock<Regex> =
50 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
51
52static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
54
55#[derive(Debug, Clone)]
57pub struct LineInfo {
58 pub byte_offset: usize,
60 pub byte_len: usize,
62 pub indent: usize,
64 pub is_blank: bool,
66 pub in_code_block: bool,
68 pub in_front_matter: bool,
70 pub in_html_block: bool,
72 pub in_html_comment: bool,
74 pub list_item: Option<ListItemInfo>,
76 pub heading: Option<HeadingInfo>,
78 pub blockquote: Option<BlockquoteInfo>,
80 pub in_mkdocstrings: bool,
82 pub in_esm_block: bool,
84}
85
86impl LineInfo {
87 pub fn content<'a>(&self, source: &'a str) -> &'a str {
89 &source[self.byte_offset..self.byte_offset + self.byte_len]
90 }
91}
92
93#[derive(Debug, Clone)]
95pub struct ListItemInfo {
96 pub marker: String,
98 pub is_ordered: bool,
100 pub number: Option<usize>,
102 pub marker_column: usize,
104 pub content_column: usize,
106}
107
108#[derive(Debug, Clone, PartialEq)]
110pub enum HeadingStyle {
111 ATX,
113 Setext1,
115 Setext2,
117}
118
119#[derive(Debug, Clone)]
121pub struct ParsedLink<'a> {
122 pub line: usize,
124 pub start_col: usize,
126 pub end_col: usize,
128 pub byte_offset: usize,
130 pub byte_end: usize,
132 pub text: Cow<'a, str>,
134 pub url: Cow<'a, str>,
136 pub is_reference: bool,
138 pub reference_id: Option<Cow<'a, str>>,
140 pub link_type: LinkType,
142}
143
144#[derive(Debug, Clone)]
146pub struct BrokenLinkInfo {
147 pub reference: String,
149 pub span: std::ops::Range<usize>,
151}
152
153#[derive(Debug, Clone)]
155pub struct ParsedImage<'a> {
156 pub line: usize,
158 pub start_col: usize,
160 pub end_col: usize,
162 pub byte_offset: usize,
164 pub byte_end: usize,
166 pub alt_text: Cow<'a, str>,
168 pub url: Cow<'a, str>,
170 pub is_reference: bool,
172 pub reference_id: Option<Cow<'a, str>>,
174 pub link_type: LinkType,
176}
177
178#[derive(Debug, Clone)]
180pub struct ReferenceDef {
181 pub line: usize,
183 pub id: String,
185 pub url: String,
187 pub title: Option<String>,
189 pub byte_offset: usize,
191 pub byte_end: usize,
193}
194
195#[derive(Debug, Clone)]
197pub struct CodeSpan {
198 pub line: usize,
200 pub start_col: usize,
202 pub end_col: usize,
204 pub byte_offset: usize,
206 pub byte_end: usize,
208 pub backtick_count: usize,
210 pub content: String,
212}
213
214#[derive(Debug, Clone)]
216pub struct HeadingInfo {
217 pub level: u8,
219 pub style: HeadingStyle,
221 pub marker: String,
223 pub marker_column: usize,
225 pub content_column: usize,
227 pub text: String,
229 pub custom_id: Option<String>,
231 pub raw_text: String,
233 pub has_closing_sequence: bool,
235 pub closing_sequence: String,
237}
238
239#[derive(Debug, Clone)]
241pub struct BlockquoteInfo {
242 pub nesting_level: usize,
244 pub indent: String,
246 pub marker_column: usize,
248 pub prefix: String,
250 pub content: String,
252 pub has_no_space_after_marker: bool,
254 pub has_multiple_spaces_after_marker: bool,
256 pub needs_md028_fix: bool,
258}
259
260#[derive(Debug, Clone)]
262pub struct ListBlock {
263 pub start_line: usize,
265 pub end_line: usize,
267 pub is_ordered: bool,
269 pub marker: Option<String>,
271 pub blockquote_prefix: String,
273 pub item_lines: Vec<usize>,
275 pub nesting_level: usize,
277 pub max_marker_width: usize,
279}
280
281use std::sync::{Arc, Mutex};
282
283#[derive(Debug, Clone, Default)]
285pub struct CharFrequency {
286 pub hash_count: usize,
288 pub asterisk_count: usize,
290 pub underscore_count: usize,
292 pub hyphen_count: usize,
294 pub plus_count: usize,
296 pub gt_count: usize,
298 pub pipe_count: usize,
300 pub bracket_count: usize,
302 pub backtick_count: usize,
304 pub lt_count: usize,
306 pub exclamation_count: usize,
308 pub newline_count: usize,
310}
311
312#[derive(Debug, Clone)]
314pub struct HtmlTag {
315 pub line: usize,
317 pub start_col: usize,
319 pub end_col: usize,
321 pub byte_offset: usize,
323 pub byte_end: usize,
325 pub tag_name: String,
327 pub is_closing: bool,
329 pub is_self_closing: bool,
331 pub raw_content: String,
333}
334
335#[derive(Debug, Clone)]
337pub struct EmphasisSpan {
338 pub line: usize,
340 pub start_col: usize,
342 pub end_col: usize,
344 pub byte_offset: usize,
346 pub byte_end: usize,
348 pub marker: char,
350 pub marker_count: usize,
352 pub content: String,
354}
355
356#[derive(Debug, Clone)]
358pub struct TableRow {
359 pub line: usize,
361 pub is_separator: bool,
363 pub column_count: usize,
365 pub column_alignments: Vec<String>, }
368
369#[derive(Debug, Clone)]
371pub struct BareUrl {
372 pub line: usize,
374 pub start_col: usize,
376 pub end_col: usize,
378 pub byte_offset: usize,
380 pub byte_end: usize,
382 pub url: String,
384 pub url_type: String,
386}
387
388pub struct LintContext<'a> {
389 pub content: &'a str,
390 pub line_offsets: Vec<usize>,
391 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, }
410
411struct BlockquoteComponents<'a> {
413 indent: &'a str,
414 markers: &'a str,
415 spaces_after: &'a str,
416 content: &'a str,
417}
418
419#[inline]
421fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
422 let bytes = line.as_bytes();
423 let mut pos = 0;
424
425 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
427 pos += 1;
428 }
429 let indent_end = pos;
430
431 if pos >= bytes.len() || bytes[pos] != b'>' {
433 return None;
434 }
435
436 while pos < bytes.len() && bytes[pos] == b'>' {
438 pos += 1;
439 }
440 let markers_end = pos;
441
442 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
444 pos += 1;
445 }
446 let spaces_end = pos;
447
448 Some(BlockquoteComponents {
449 indent: &line[0..indent_end],
450 markers: &line[indent_end..markers_end],
451 spaces_after: &line[markers_end..spaces_end],
452 content: &line[spaces_end..],
453 })
454}
455
456impl<'a> LintContext<'a> {
457 pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
458 use std::time::Instant;
459 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
460
461 let start = Instant::now();
462 let mut line_offsets = vec![0];
463 for (i, c) in content.char_indices() {
464 if c == '\n' {
465 line_offsets.push(i + 1);
466 }
467 }
468 if profile {
469 eprintln!("[PROFILE] Line offsets: {:?}", start.elapsed());
470 }
471
472 let start = Instant::now();
474 let code_blocks = CodeBlockUtils::detect_code_blocks(content);
475 if profile {
476 eprintln!("[PROFILE] Code blocks: {:?}", start.elapsed());
477 }
478
479 let start = Instant::now();
481 let html_comment_ranges = crate::utils::skip_context::compute_html_comment_ranges(content);
482 if profile {
483 eprintln!("[PROFILE] HTML comment ranges: {:?}", start.elapsed());
484 }
485
486 let start = Instant::now();
488 let autodoc_ranges = if flavor == MarkdownFlavor::MkDocs {
489 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
490 } else {
491 Vec::new()
492 };
493 if profile {
494 eprintln!("[PROFILE] Autodoc block ranges: {:?}", start.elapsed());
495 }
496
497 let start = Instant::now();
499 let mut lines = Self::compute_basic_line_info(
500 content,
501 &line_offsets,
502 &code_blocks,
503 flavor,
504 &html_comment_ranges,
505 &autodoc_ranges,
506 );
507 if profile {
508 eprintln!("[PROFILE] Basic line info: {:?}", start.elapsed());
509 }
510
511 let start = Instant::now();
513 Self::detect_html_blocks(content, &mut lines);
514 if profile {
515 eprintln!("[PROFILE] HTML blocks: {:?}", start.elapsed());
516 }
517
518 let start = Instant::now();
520 Self::detect_esm_blocks(content, &mut lines, flavor);
521 if profile {
522 eprintln!("[PROFILE] ESM blocks: {:?}", start.elapsed());
523 }
524
525 let start = Instant::now();
527 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges);
528 if profile {
529 eprintln!("[PROFILE] Headings & blockquotes: {:?}", start.elapsed());
530 }
531
532 let start = Instant::now();
534 let code_spans = Self::parse_code_spans(content, &lines);
535 if profile {
536 eprintln!("[PROFILE] Code spans: {:?}", start.elapsed());
537 }
538
539 let start = Instant::now();
541 let (links, broken_links) =
542 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges);
543 if profile {
544 eprintln!("[PROFILE] Links: {:?}", start.elapsed());
545 }
546
547 let start = Instant::now();
548 let images = Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges);
549 if profile {
550 eprintln!("[PROFILE] Images: {:?}", start.elapsed());
551 }
552
553 let start = Instant::now();
554 let reference_defs = Self::parse_reference_defs(content, &lines);
555 if profile {
556 eprintln!("[PROFILE] Reference defs: {:?}", start.elapsed());
557 }
558
559 let start = Instant::now();
560 let list_blocks = Self::parse_list_blocks(content, &lines);
561 if profile {
562 eprintln!("[PROFILE] List blocks: {:?}", start.elapsed());
563 }
564
565 let start = Instant::now();
567 let char_frequency = Self::compute_char_frequency(content);
568 if profile {
569 eprintln!("[PROFILE] Char frequency: {:?}", start.elapsed());
570 }
571
572 let start = Instant::now();
574 let table_blocks = crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
575 content,
576 &code_blocks,
577 &code_spans,
578 &html_comment_ranges,
579 );
580 if profile {
581 eprintln!("[PROFILE] Table blocks: {:?}", start.elapsed());
582 }
583
584 let start = Instant::now();
586 let line_index = crate::utils::range_utils::LineIndex::new(content);
587 if profile {
588 eprintln!("[PROFILE] Line index: {:?}", start.elapsed());
589 }
590
591 let start = Instant::now();
593 let jinja_ranges = crate::utils::jinja_utils::find_jinja_ranges(content);
594 if profile {
595 eprintln!("[PROFILE] Jinja ranges: {:?}", start.elapsed());
596 }
597
598 Self {
599 content,
600 line_offsets,
601 code_blocks,
602 lines,
603 links,
604 images,
605 broken_links,
606 reference_defs,
607 code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
608 list_blocks,
609 char_frequency,
610 html_tags_cache: Mutex::new(None),
611 emphasis_spans_cache: Mutex::new(None),
612 table_rows_cache: Mutex::new(None),
613 bare_urls_cache: Mutex::new(None),
614 html_comment_ranges,
615 table_blocks,
616 line_index,
617 jinja_ranges,
618 flavor,
619 }
620 }
621
622 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
624 let mut cache = self.code_spans_cache.lock().expect("Code spans cache mutex poisoned");
625
626 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))))
627 }
628
629 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
631 &self.html_comment_ranges
632 }
633
634 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
636 let mut cache = self.html_tags_cache.lock().expect("HTML tags cache mutex poisoned");
637
638 Arc::clone(cache.get_or_insert_with(|| {
639 Arc::new(Self::parse_html_tags(
640 self.content,
641 &self.lines,
642 &self.code_blocks,
643 self.flavor,
644 ))
645 }))
646 }
647
648 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
650 let mut cache = self
651 .emphasis_spans_cache
652 .lock()
653 .expect("Emphasis spans cache mutex poisoned");
654
655 Arc::clone(
656 cache.get_or_insert_with(|| {
657 Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))
658 }),
659 )
660 }
661
662 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
664 let mut cache = self.table_rows_cache.lock().expect("Table rows cache mutex poisoned");
665
666 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))))
667 }
668
669 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
671 let mut cache = self.bare_urls_cache.lock().expect("Bare URLs cache mutex poisoned");
672
673 Arc::clone(
674 cache.get_or_insert_with(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
675 )
676 }
677
678 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
680 match self.line_offsets.binary_search(&offset) {
681 Ok(line) => (line + 1, 1),
682 Err(line) => {
683 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
684 (line, offset - line_start + 1)
685 }
686 }
687 }
688
689 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
691 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
693 return true;
694 }
695
696 self.code_spans()
698 .iter()
699 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
700 }
701
702 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
704 if line_num > 0 {
705 self.lines.get(line_num - 1)
706 } else {
707 None
708 }
709 }
710
711 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
713 self.line_info(line_num).map(|info| info.byte_offset)
714 }
715
716 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
718 let normalized_id = ref_id.to_lowercase();
719 self.reference_defs
720 .iter()
721 .find(|def| def.id == normalized_id)
722 .map(|def| def.url.as_str())
723 }
724
725 pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink<'_>> {
727 self.links.iter().filter(|link| link.line == line_num).collect()
728 }
729
730 pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage<'_>> {
732 self.images.iter().filter(|img| img.line == line_num).collect()
733 }
734
735 pub fn is_in_list_block(&self, line_num: usize) -> bool {
737 self.list_blocks
738 .iter()
739 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
740 }
741
742 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
744 self.list_blocks
745 .iter()
746 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
747 }
748
749 pub fn is_in_code_block(&self, line_num: usize) -> bool {
753 if line_num == 0 || line_num > self.lines.len() {
754 return false;
755 }
756 self.lines[line_num - 1].in_code_block
757 }
758
759 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
761 if line_num == 0 || line_num > self.lines.len() {
762 return false;
763 }
764 self.lines[line_num - 1].in_front_matter
765 }
766
767 pub fn is_in_html_block(&self, line_num: usize) -> bool {
769 if line_num == 0 || line_num > self.lines.len() {
770 return false;
771 }
772 self.lines[line_num - 1].in_html_block
773 }
774
775 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
777 if line_num == 0 || line_num > self.lines.len() {
778 return false;
779 }
780
781 let col_0indexed = if col > 0 { col - 1 } else { 0 };
785 let code_spans = self.code_spans();
786 code_spans
787 .iter()
788 .any(|span| span.line == line_num && col_0indexed >= span.start_col && col_0indexed < span.end_col)
789 }
790
791 #[inline]
794 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
795 self.reference_defs
796 .iter()
797 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
798 }
799
800 #[inline]
804 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
805 self.html_comment_ranges
806 .iter()
807 .any(|range| byte_pos >= range.start && byte_pos < range.end)
808 }
809
810 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
812 self.jinja_ranges
813 .iter()
814 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
815 }
816
817 pub fn has_char(&self, ch: char) -> bool {
819 match ch {
820 '#' => self.char_frequency.hash_count > 0,
821 '*' => self.char_frequency.asterisk_count > 0,
822 '_' => self.char_frequency.underscore_count > 0,
823 '-' => self.char_frequency.hyphen_count > 0,
824 '+' => self.char_frequency.plus_count > 0,
825 '>' => self.char_frequency.gt_count > 0,
826 '|' => self.char_frequency.pipe_count > 0,
827 '[' => self.char_frequency.bracket_count > 0,
828 '`' => self.char_frequency.backtick_count > 0,
829 '<' => self.char_frequency.lt_count > 0,
830 '!' => self.char_frequency.exclamation_count > 0,
831 '\n' => self.char_frequency.newline_count > 0,
832 _ => self.content.contains(ch), }
834 }
835
836 pub fn char_count(&self, ch: char) -> usize {
838 match ch {
839 '#' => self.char_frequency.hash_count,
840 '*' => self.char_frequency.asterisk_count,
841 '_' => self.char_frequency.underscore_count,
842 '-' => self.char_frequency.hyphen_count,
843 '+' => self.char_frequency.plus_count,
844 '>' => self.char_frequency.gt_count,
845 '|' => self.char_frequency.pipe_count,
846 '[' => self.char_frequency.bracket_count,
847 '`' => self.char_frequency.backtick_count,
848 '<' => self.char_frequency.lt_count,
849 '!' => self.char_frequency.exclamation_count,
850 '\n' => self.char_frequency.newline_count,
851 _ => self.content.matches(ch).count(), }
853 }
854
855 pub fn likely_has_headings(&self) -> bool {
857 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
859
860 pub fn likely_has_lists(&self) -> bool {
862 self.char_frequency.asterisk_count > 0
863 || self.char_frequency.hyphen_count > 0
864 || self.char_frequency.plus_count > 0
865 }
866
867 pub fn likely_has_emphasis(&self) -> bool {
869 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
870 }
871
872 pub fn likely_has_tables(&self) -> bool {
874 self.char_frequency.pipe_count > 2
875 }
876
877 pub fn likely_has_blockquotes(&self) -> bool {
879 self.char_frequency.gt_count > 0
880 }
881
882 pub fn likely_has_code(&self) -> bool {
884 self.char_frequency.backtick_count > 0
885 }
886
887 pub fn likely_has_links_or_images(&self) -> bool {
889 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
890 }
891
892 pub fn likely_has_html(&self) -> bool {
894 self.char_frequency.lt_count > 0
895 }
896
897 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
899 self.html_tags()
900 .iter()
901 .filter(|tag| tag.line == line_num)
902 .cloned()
903 .collect()
904 }
905
906 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
908 self.emphasis_spans()
909 .iter()
910 .filter(|span| span.line == line_num)
911 .cloned()
912 .collect()
913 }
914
915 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
917 self.table_rows()
918 .iter()
919 .filter(|row| row.line == line_num)
920 .cloned()
921 .collect()
922 }
923
924 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
926 self.bare_urls()
927 .iter()
928 .filter(|url| url.line == line_num)
929 .cloned()
930 .collect()
931 }
932
933 #[inline]
939 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
940 let idx = match lines.binary_search_by(|line| {
942 if byte_offset < line.byte_offset {
943 std::cmp::Ordering::Greater
944 } else if byte_offset > line.byte_offset + line.byte_len {
945 std::cmp::Ordering::Less
946 } else {
947 std::cmp::Ordering::Equal
948 }
949 }) {
950 Ok(idx) => idx,
951 Err(idx) => idx.saturating_sub(1),
952 };
953
954 let line = &lines[idx];
955 let line_num = idx + 1;
956 let col = byte_offset.saturating_sub(line.byte_offset);
957
958 (idx, line_num, col)
959 }
960
961 #[inline]
963 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
964 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
966
967 if idx > 0 {
969 let span = &code_spans[idx - 1];
970 if offset >= span.byte_offset && offset < span.byte_end {
971 return true;
972 }
973 }
974
975 false
976 }
977
978 fn parse_links(
980 content: &'a str,
981 lines: &[LineInfo],
982 code_blocks: &[(usize, usize)],
983 code_spans: &[CodeSpan],
984 flavor: MarkdownFlavor,
985 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
986 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>) {
987 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
988 use std::collections::HashSet;
989
990 let mut links = Vec::with_capacity(content.len() / 500);
991 let mut broken_links = Vec::new();
992
993 let mut found_positions = HashSet::new();
995
996 let mut options = Options::empty();
1006 options.insert(Options::ENABLE_WIKILINKS);
1007
1008 let parser = Parser::new_with_broken_link_callback(
1009 content,
1010 options,
1011 Some(|link: BrokenLink<'_>| {
1012 broken_links.push(BrokenLinkInfo {
1013 reference: link.reference.to_string(),
1014 span: link.span.clone(),
1015 });
1016 None
1017 }),
1018 )
1019 .into_offset_iter();
1020
1021 let mut link_stack: Vec<(
1022 usize,
1023 usize,
1024 pulldown_cmark::CowStr<'a>,
1025 LinkType,
1026 pulldown_cmark::CowStr<'a>,
1027 )> = Vec::new();
1028 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1031 match event {
1032 Event::Start(Tag::Link {
1033 link_type,
1034 dest_url,
1035 id,
1036 ..
1037 }) => {
1038 link_stack.push((range.start, range.end, dest_url, link_type, id));
1040 text_chunks.clear();
1041 }
1042 Event::Text(text) if !link_stack.is_empty() => {
1043 text_chunks.push((text.to_string(), range.start, range.end));
1045 }
1046 Event::Code(code) if !link_stack.is_empty() => {
1047 let code_text = format!("`{code}`");
1049 text_chunks.push((code_text, range.start, range.end));
1050 }
1051 Event::End(TagEnd::Link) => {
1052 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1053 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1055 text_chunks.clear();
1056 continue;
1057 }
1058
1059 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1061
1062 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1064 text_chunks.clear();
1065 continue;
1066 }
1067
1068 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1069
1070 let is_reference = matches!(
1071 link_type,
1072 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1073 );
1074
1075 let link_text = if start_pos < content.len() {
1078 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1079
1080 let mut close_pos = None;
1084 let mut depth = 0;
1085 let mut in_code_span = false;
1086
1087 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1088 let mut backslash_count = 0;
1090 let mut j = i;
1091 while j > 0 && link_bytes[j - 1] == b'\\' {
1092 backslash_count += 1;
1093 j -= 1;
1094 }
1095 let is_escaped = backslash_count % 2 != 0;
1096
1097 if byte == b'`' && !is_escaped {
1099 in_code_span = !in_code_span;
1100 }
1101
1102 if !is_escaped && !in_code_span {
1104 if byte == b'[' {
1105 depth += 1;
1106 } else if byte == b']' {
1107 if depth == 0 {
1108 close_pos = Some(i);
1110 break;
1111 } else {
1112 depth -= 1;
1113 }
1114 }
1115 }
1116 }
1117
1118 if let Some(pos) = close_pos {
1119 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1120 } else {
1121 Cow::Borrowed("")
1122 }
1123 } else {
1124 Cow::Borrowed("")
1125 };
1126
1127 let reference_id = if is_reference && !ref_id.is_empty() {
1129 Some(Cow::Owned(ref_id.to_lowercase()))
1130 } else if is_reference {
1131 Some(Cow::Owned(link_text.to_lowercase()))
1133 } else {
1134 None
1135 };
1136
1137 let has_escaped_bang = start_pos >= 2
1141 && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1142 && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1143
1144 let has_escaped_bracket =
1147 start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1148
1149 if has_escaped_bang || has_escaped_bracket {
1150 text_chunks.clear();
1151 continue; }
1153
1154 found_positions.insert(start_pos);
1156
1157 links.push(ParsedLink {
1158 line: line_num,
1159 start_col: col_start,
1160 end_col: col_end,
1161 byte_offset: start_pos,
1162 byte_end: range.end,
1163 text: link_text,
1164 url: Cow::Owned(url.to_string()),
1165 is_reference,
1166 reference_id,
1167 link_type,
1168 });
1169
1170 text_chunks.clear();
1171 }
1172 }
1173 _ => {}
1174 }
1175 }
1176
1177 for cap in LINK_PATTERN.captures_iter(content) {
1181 let full_match = cap.get(0).unwrap();
1182 let match_start = full_match.start();
1183 let match_end = full_match.end();
1184
1185 if found_positions.contains(&match_start) {
1187 continue;
1188 }
1189
1190 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1192 continue;
1193 }
1194
1195 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1197 continue;
1198 }
1199
1200 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1202 continue;
1203 }
1204
1205 if Self::is_offset_in_code_span(code_spans, match_start) {
1207 continue;
1208 }
1209
1210 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1212 continue;
1213 }
1214
1215 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1217
1218 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1220 continue;
1221 }
1222
1223 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1224
1225 let text = cap.get(1).map_or("", |m| m.as_str());
1226
1227 if let Some(ref_id) = cap.get(6) {
1229 let ref_id_str = ref_id.as_str();
1230 let normalized_ref = if ref_id_str.is_empty() {
1231 Cow::Owned(text.to_lowercase()) } else {
1233 Cow::Owned(ref_id_str.to_lowercase())
1234 };
1235
1236 links.push(ParsedLink {
1238 line: line_num,
1239 start_col: col_start,
1240 end_col: col_end,
1241 byte_offset: match_start,
1242 byte_end: match_end,
1243 text: Cow::Borrowed(text),
1244 url: Cow::Borrowed(""), is_reference: true,
1246 reference_id: Some(normalized_ref),
1247 link_type: LinkType::Reference, });
1249 }
1250 }
1251
1252 (links, broken_links)
1253 }
1254
1255 fn parse_images(
1257 content: &'a str,
1258 lines: &[LineInfo],
1259 code_blocks: &[(usize, usize)],
1260 code_spans: &[CodeSpan],
1261 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1262 ) -> Vec<ParsedImage<'a>> {
1263 use crate::utils::skip_context::is_in_html_comment_ranges;
1264 use std::collections::HashSet;
1265
1266 let mut images = Vec::with_capacity(content.len() / 1000);
1268 let mut found_positions = HashSet::new();
1269
1270 let parser = Parser::new(content).into_offset_iter();
1272 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1273 Vec::new();
1274 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1277 match event {
1278 Event::Start(Tag::Image {
1279 link_type,
1280 dest_url,
1281 id,
1282 ..
1283 }) => {
1284 image_stack.push((range.start, dest_url, link_type, id));
1285 text_chunks.clear();
1286 }
1287 Event::Text(text) if !image_stack.is_empty() => {
1288 text_chunks.push((text.to_string(), range.start, range.end));
1289 }
1290 Event::Code(code) if !image_stack.is_empty() => {
1291 let code_text = format!("`{code}`");
1292 text_chunks.push((code_text, range.start, range.end));
1293 }
1294 Event::End(TagEnd::Image) => {
1295 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1296 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1298 continue;
1299 }
1300
1301 if Self::is_offset_in_code_span(code_spans, start_pos) {
1303 continue;
1304 }
1305
1306 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1308 continue;
1309 }
1310
1311 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1313 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1314
1315 let is_reference = matches!(
1316 link_type,
1317 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1318 );
1319
1320 let alt_text = if start_pos < content.len() {
1323 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1324
1325 let mut close_pos = None;
1328 let mut depth = 0;
1329
1330 if image_bytes.len() > 2 {
1331 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1332 let mut backslash_count = 0;
1334 let mut j = i;
1335 while j > 0 && image_bytes[j - 1] == b'\\' {
1336 backslash_count += 1;
1337 j -= 1;
1338 }
1339 let is_escaped = backslash_count % 2 != 0;
1340
1341 if !is_escaped {
1342 if byte == b'[' {
1343 depth += 1;
1344 } else if byte == b']' {
1345 if depth == 0 {
1346 close_pos = Some(i);
1348 break;
1349 } else {
1350 depth -= 1;
1351 }
1352 }
1353 }
1354 }
1355 }
1356
1357 if let Some(pos) = close_pos {
1358 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1359 } else {
1360 Cow::Borrowed("")
1361 }
1362 } else {
1363 Cow::Borrowed("")
1364 };
1365
1366 let reference_id = if is_reference && !ref_id.is_empty() {
1367 Some(Cow::Owned(ref_id.to_lowercase()))
1368 } else if is_reference {
1369 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1371 None
1372 };
1373
1374 found_positions.insert(start_pos);
1375 images.push(ParsedImage {
1376 line: line_num,
1377 start_col: col_start,
1378 end_col: col_end,
1379 byte_offset: start_pos,
1380 byte_end: range.end,
1381 alt_text,
1382 url: Cow::Owned(url.to_string()),
1383 is_reference,
1384 reference_id,
1385 link_type,
1386 });
1387 }
1388 }
1389 _ => {}
1390 }
1391 }
1392
1393 for cap in IMAGE_PATTERN.captures_iter(content) {
1395 let full_match = cap.get(0).unwrap();
1396 let match_start = full_match.start();
1397 let match_end = full_match.end();
1398
1399 if found_positions.contains(&match_start) {
1401 continue;
1402 }
1403
1404 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1406 continue;
1407 }
1408
1409 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1411 || Self::is_offset_in_code_span(code_spans, match_start)
1412 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1413 {
1414 continue;
1415 }
1416
1417 if let Some(ref_id) = cap.get(6) {
1419 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1420 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1421 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1422 let ref_id_str = ref_id.as_str();
1423 let normalized_ref = if ref_id_str.is_empty() {
1424 Cow::Owned(alt_text.to_lowercase())
1425 } else {
1426 Cow::Owned(ref_id_str.to_lowercase())
1427 };
1428
1429 images.push(ParsedImage {
1430 line: line_num,
1431 start_col: col_start,
1432 end_col: col_end,
1433 byte_offset: match_start,
1434 byte_end: match_end,
1435 alt_text: Cow::Borrowed(alt_text),
1436 url: Cow::Borrowed(""),
1437 is_reference: true,
1438 reference_id: Some(normalized_ref),
1439 link_type: LinkType::Reference, });
1441 }
1442 }
1443
1444 images
1445 }
1446
1447 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1449 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1453 if line_info.in_code_block {
1455 continue;
1456 }
1457
1458 let line = line_info.content(content);
1459 let line_num = line_idx + 1;
1460
1461 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1462 let id = cap.get(1).unwrap().as_str().to_lowercase();
1463 let url = cap.get(2).unwrap().as_str().to_string();
1464 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
1465
1466 let match_obj = cap.get(0).unwrap();
1469 let byte_offset = line_info.byte_offset + match_obj.start();
1470 let byte_end = line_info.byte_offset + match_obj.end();
1471
1472 refs.push(ReferenceDef {
1473 line: line_num,
1474 id,
1475 url,
1476 title,
1477 byte_offset,
1478 byte_end,
1479 });
1480 }
1481 }
1482
1483 refs
1484 }
1485
1486 #[inline]
1490 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1491 let trimmed_start = line.trim_start();
1492 if !trimmed_start.starts_with('>') {
1493 return None;
1494 }
1495
1496 let leading_ws_len = line.len() - trimmed_start.len();
1497 let after_gt = &trimmed_start[1..];
1498 let content = after_gt.trim_start();
1499 let ws_after_gt_len = after_gt.len() - content.len();
1500 let prefix_len = leading_ws_len + 1 + ws_after_gt_len;
1501
1502 Some((&line[..prefix_len], content))
1503 }
1504
1505 #[inline]
1509 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1510 let bytes = line.as_bytes();
1511 let mut i = 0;
1512
1513 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1515 i += 1;
1516 }
1517
1518 if i >= bytes.len() {
1520 return None;
1521 }
1522 let marker = bytes[i] as char;
1523 if marker != '-' && marker != '*' && marker != '+' {
1524 return None;
1525 }
1526 let marker_pos = i;
1527 i += 1;
1528
1529 let spacing_start = i;
1531 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1532 i += 1;
1533 }
1534
1535 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1536 }
1537
1538 #[inline]
1542 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1543 let bytes = line.as_bytes();
1544 let mut i = 0;
1545
1546 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1548 i += 1;
1549 }
1550
1551 let number_start = i;
1553 while i < bytes.len() && bytes[i].is_ascii_digit() {
1554 i += 1;
1555 }
1556 if i == number_start {
1557 return None; }
1559
1560 if i >= bytes.len() {
1562 return None;
1563 }
1564 let delimiter = bytes[i] as char;
1565 if delimiter != '.' && delimiter != ')' {
1566 return None;
1567 }
1568 let delimiter_pos = i;
1569 i += 1;
1570
1571 let spacing_start = i;
1573 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1574 i += 1;
1575 }
1576
1577 Some((
1578 &line[..number_start],
1579 &line[number_start..delimiter_pos],
1580 delimiter,
1581 &line[spacing_start..i],
1582 &line[i..],
1583 ))
1584 }
1585
1586 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1589 let num_lines = line_offsets.len();
1590 let mut in_code_block = vec![false; num_lines];
1591
1592 for &(start, end) in code_blocks {
1594 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1596 let mut boundary = start;
1597 while boundary > 0 && !content.is_char_boundary(boundary) {
1598 boundary -= 1;
1599 }
1600 boundary
1601 } else {
1602 start
1603 };
1604
1605 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1606 let mut boundary = end;
1607 while boundary < content.len() && !content.is_char_boundary(boundary) {
1608 boundary += 1;
1609 }
1610 boundary
1611 } else {
1612 end.min(content.len())
1613 };
1614
1615 let first_line = line_offsets.partition_point(|&offset| offset < safe_start);
1630 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1631
1632 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1634 *flag = true;
1635 }
1636 }
1637
1638 in_code_block
1639 }
1640
1641 fn compute_basic_line_info(
1643 content: &str,
1644 line_offsets: &[usize],
1645 code_blocks: &[(usize, usize)],
1646 flavor: MarkdownFlavor,
1647 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1648 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1649 ) -> Vec<LineInfo> {
1650 let content_lines: Vec<&str> = content.lines().collect();
1651 let mut lines = Vec::with_capacity(content_lines.len());
1652
1653 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1655
1656 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1659
1660 for (i, line) in content_lines.iter().enumerate() {
1661 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1662 let indent = line.len() - line.trim_start().len();
1663
1664 let blockquote_parse = Self::parse_blockquote_prefix(line);
1666
1667 let is_blank = if let Some((_, content)) = blockquote_parse {
1669 content.trim().is_empty()
1671 } else {
1672 line.trim().is_empty()
1673 };
1674
1675 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1677
1678 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1680 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1681 let in_html_comment =
1683 crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, byte_offset);
1684 let list_item = if !(in_code_block
1685 || is_blank
1686 || in_mkdocstrings
1687 || in_html_comment
1688 || (front_matter_end > 0 && i < front_matter_end))
1689 {
1690 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1692 (content, prefix.len())
1693 } else {
1694 (&**line, 0)
1695 };
1696
1697 if let Some((leading_spaces, marker, spacing, _content)) =
1698 Self::parse_unordered_list(line_for_list_check)
1699 {
1700 let marker_column = blockquote_prefix_len + leading_spaces.len();
1701 let content_column = marker_column + 1 + spacing.len();
1702
1703 if spacing.is_empty() {
1710 None
1711 } else {
1712 Some(ListItemInfo {
1713 marker: marker.to_string(),
1714 is_ordered: false,
1715 number: None,
1716 marker_column,
1717 content_column,
1718 })
1719 }
1720 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1721 Self::parse_ordered_list(line_for_list_check)
1722 {
1723 let marker = format!("{number_str}{delimiter}");
1724 let marker_column = blockquote_prefix_len + leading_spaces.len();
1725 let content_column = marker_column + marker.len() + spacing.len();
1726
1727 if spacing.is_empty() {
1730 None
1731 } else {
1732 Some(ListItemInfo {
1733 marker,
1734 is_ordered: true,
1735 number: number_str.parse().ok(),
1736 marker_column,
1737 content_column,
1738 })
1739 }
1740 } else {
1741 None
1742 }
1743 } else {
1744 None
1745 };
1746
1747 lines.push(LineInfo {
1748 byte_offset,
1749 byte_len: line.len(),
1750 indent,
1751 is_blank,
1752 in_code_block,
1753 in_front_matter: front_matter_end > 0 && i < front_matter_end,
1754 in_html_block: false, in_html_comment,
1756 list_item,
1757 heading: None, blockquote: None, in_mkdocstrings,
1760 in_esm_block: false, });
1762 }
1763
1764 lines
1765 }
1766
1767 fn detect_headings_and_blockquotes(
1769 content: &str,
1770 lines: &mut [LineInfo],
1771 flavor: MarkdownFlavor,
1772 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1773 ) {
1774 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
1776 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
1777 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
1778 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
1779
1780 let content_lines: Vec<&str> = content.lines().collect();
1781
1782 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1784
1785 for i in 0..lines.len() {
1787 if lines[i].in_code_block {
1788 continue;
1789 }
1790
1791 if front_matter_end > 0 && i < front_matter_end {
1793 continue;
1794 }
1795
1796 if lines[i].in_html_block {
1798 continue;
1799 }
1800
1801 let line = content_lines[i];
1802
1803 if let Some(bq) = parse_blockquote_detailed(line) {
1805 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
1807
1808 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
1810
1811 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
1813 let has_multiple_spaces = bq.spaces_after.len() > 1 || bq.spaces_after.contains('\t');
1815
1816 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
1820
1821 lines[i].blockquote = Some(BlockquoteInfo {
1822 nesting_level,
1823 indent: bq.indent.to_string(),
1824 marker_column,
1825 prefix,
1826 content: bq.content.to_string(),
1827 has_no_space_after_marker: has_no_space,
1828 has_multiple_spaces_after_marker: has_multiple_spaces,
1829 needs_md028_fix,
1830 });
1831 }
1832
1833 if lines[i].is_blank {
1835 continue;
1836 }
1837
1838 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1841 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1842 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1843 } else {
1844 false
1845 };
1846
1847 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1848 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
1850 continue;
1851 }
1852 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1853 let hashes = caps.get(2).map_or("", |m| m.as_str());
1854 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1855 let rest = caps.get(4).map_or("", |m| m.as_str());
1856
1857 let level = hashes.len() as u8;
1858 let marker_column = leading_spaces.len();
1859
1860 let (text, has_closing, closing_seq) = {
1862 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1864 if rest[id_start..].trim_end().ends_with('}') {
1866 (&rest[..id_start], &rest[id_start..])
1868 } else {
1869 (rest, "")
1870 }
1871 } else {
1872 (rest, "")
1873 };
1874
1875 let trimmed_rest = rest_without_id.trim_end();
1877 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1878 let mut start_of_hashes = last_hash_pos;
1880 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1881 start_of_hashes -= 1;
1882 }
1883
1884 let has_space_before = start_of_hashes == 0
1886 || trimmed_rest
1887 .chars()
1888 .nth(start_of_hashes - 1)
1889 .is_some_and(|c| c.is_whitespace());
1890
1891 let potential_closing = &trimmed_rest[start_of_hashes..];
1893 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1894
1895 if is_all_hashes && has_space_before {
1896 let closing_hashes = potential_closing.to_string();
1898 let text_part = if !custom_id_part.is_empty() {
1901 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1904 } else {
1905 rest_without_id[..start_of_hashes].trim_end().to_string()
1906 };
1907 (text_part, true, closing_hashes)
1908 } else {
1909 (rest.to_string(), false, String::new())
1911 }
1912 } else {
1913 (rest.to_string(), false, String::new())
1915 }
1916 };
1917
1918 let content_column = marker_column + hashes.len() + spaces_after.len();
1919
1920 let raw_text = text.trim().to_string();
1922 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1923
1924 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1926 let next_line = content_lines[i + 1];
1927 if !lines[i + 1].in_code_block
1928 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1929 && let Some(next_line_id) =
1930 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1931 {
1932 custom_id = Some(next_line_id);
1933 }
1934 }
1935
1936 lines[i].heading = Some(HeadingInfo {
1937 level,
1938 style: HeadingStyle::ATX,
1939 marker: hashes.to_string(),
1940 marker_column,
1941 content_column,
1942 text: clean_text,
1943 custom_id,
1944 raw_text,
1945 has_closing_sequence: has_closing,
1946 closing_sequence: closing_seq,
1947 });
1948 }
1949 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
1951 let next_line = content_lines[i + 1];
1952 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1953 if front_matter_end > 0 && i < front_matter_end {
1955 continue;
1956 }
1957
1958 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
1960 {
1961 continue;
1962 }
1963
1964 let underline = next_line.trim();
1965
1966 if underline == "---" {
1969 continue;
1970 }
1971
1972 let current_line_trimmed = line.trim();
1974 if current_line_trimmed.contains(':')
1975 && !current_line_trimmed.starts_with('#')
1976 && !current_line_trimmed.contains('[')
1977 && !current_line_trimmed.contains("](")
1978 {
1979 continue;
1981 }
1982
1983 let level = if underline.starts_with('=') { 1 } else { 2 };
1984 let style = if level == 1 {
1985 HeadingStyle::Setext1
1986 } else {
1987 HeadingStyle::Setext2
1988 };
1989
1990 let raw_text = line.trim().to_string();
1992 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1993
1994 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1996 let attr_line = content_lines[i + 2];
1997 if !lines[i + 2].in_code_block
1998 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1999 && let Some(attr_line_id) =
2000 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2001 {
2002 custom_id = Some(attr_line_id);
2003 }
2004 }
2005
2006 lines[i].heading = Some(HeadingInfo {
2007 level,
2008 style,
2009 marker: underline.to_string(),
2010 marker_column: next_line.len() - next_line.trim_start().len(),
2011 content_column: lines[i].indent,
2012 text: clean_text,
2013 custom_id,
2014 raw_text,
2015 has_closing_sequence: false,
2016 closing_sequence: String::new(),
2017 });
2018 }
2019 }
2020 }
2021 }
2022
2023 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2025 const BLOCK_ELEMENTS: &[&str] = &[
2027 "address",
2028 "article",
2029 "aside",
2030 "blockquote",
2031 "details",
2032 "dialog",
2033 "dd",
2034 "div",
2035 "dl",
2036 "dt",
2037 "fieldset",
2038 "figcaption",
2039 "figure",
2040 "footer",
2041 "form",
2042 "h1",
2043 "h2",
2044 "h3",
2045 "h4",
2046 "h5",
2047 "h6",
2048 "header",
2049 "hr",
2050 "li",
2051 "main",
2052 "nav",
2053 "ol",
2054 "p",
2055 "pre",
2056 "script",
2057 "section",
2058 "style",
2059 "table",
2060 "tbody",
2061 "td",
2062 "tfoot",
2063 "th",
2064 "thead",
2065 "tr",
2066 "ul",
2067 ];
2068
2069 let mut i = 0;
2070 while i < lines.len() {
2071 if lines[i].in_code_block || lines[i].in_front_matter {
2073 i += 1;
2074 continue;
2075 }
2076
2077 let trimmed = lines[i].content(content).trim_start();
2078
2079 if trimmed.starts_with('<') && trimmed.len() > 1 {
2081 let after_bracket = &trimmed[1..];
2083 let is_closing = after_bracket.starts_with('/');
2084 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2085
2086 let tag_name = tag_start
2088 .chars()
2089 .take_while(|c| c.is_ascii_alphabetic() || *c == '-')
2090 .collect::<String>()
2091 .to_lowercase();
2092
2093 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2095 lines[i].in_html_block = true;
2097
2098 if !is_closing {
2101 let closing_tag = format!("</{tag_name}>");
2102 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2104 let mut j = i + 1;
2105 while j < lines.len() && j < i + 100 {
2106 if !allow_blank_lines && lines[j].is_blank {
2109 break;
2110 }
2111
2112 lines[j].in_html_block = true;
2113
2114 if lines[j].content(content).contains(&closing_tag) {
2116 break;
2117 }
2118 j += 1;
2119 }
2120 }
2121 }
2122 }
2123
2124 i += 1;
2125 }
2126 }
2127
2128 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2131 if !flavor.supports_esm_blocks() {
2133 return;
2134 }
2135
2136 for line in lines.iter_mut() {
2137 if line.is_blank || line.in_html_comment {
2139 continue;
2140 }
2141
2142 let trimmed = line.content(content).trim_start();
2144 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2145 line.in_esm_block = true;
2146 } else {
2147 break;
2149 }
2150 }
2151 }
2152
2153 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2155 let mut code_spans = Vec::new();
2156
2157 if !content.contains('`') {
2159 return code_spans;
2160 }
2161
2162 let parser = Parser::new(content).into_offset_iter();
2164
2165 for (event, range) in parser {
2166 if let Event::Code(_) = event {
2167 let start_pos = range.start;
2168 let end_pos = range.end;
2169
2170 let full_span = &content[start_pos..end_pos];
2172 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2173
2174 let content_start = start_pos + backtick_count;
2176 let content_end = end_pos - backtick_count;
2177 let span_content = if content_start < content_end {
2178 content[content_start..content_end].to_string()
2179 } else {
2180 String::new()
2181 };
2182
2183 let line_idx = lines
2186 .partition_point(|line| line.byte_offset <= start_pos)
2187 .saturating_sub(1);
2188 let line_num = line_idx + 1;
2189 let col_start = start_pos - lines[line_idx].byte_offset;
2190
2191 let end_line_idx = lines
2193 .partition_point(|line| line.byte_offset <= end_pos)
2194 .saturating_sub(1);
2195 let col_end = end_pos - lines[end_line_idx].byte_offset;
2196
2197 code_spans.push(CodeSpan {
2198 line: line_num,
2199 start_col: col_start,
2200 end_col: col_end,
2201 byte_offset: start_pos,
2202 byte_end: end_pos,
2203 backtick_count,
2204 content: span_content,
2205 });
2206 }
2207 }
2208
2209 code_spans.sort_by_key(|span| span.byte_offset);
2211
2212 code_spans
2213 }
2214
2215 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2217 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2220 let mut last_list_item_line = 0;
2221 let mut current_indent_level = 0;
2222 let mut last_marker_width = 0;
2223
2224 for (line_idx, line_info) in lines.iter().enumerate() {
2225 let line_num = line_idx + 1;
2226
2227 if line_info.in_code_block {
2229 if let Some(ref mut block) = current_block {
2230 let min_continuation_indent =
2232 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2233
2234 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2236
2237 match context {
2238 CodeBlockContext::Indented => {
2239 block.end_line = line_num;
2241 continue;
2242 }
2243 CodeBlockContext::Standalone => {
2244 let completed_block = current_block.take().unwrap();
2246 list_blocks.push(completed_block);
2247 continue;
2248 }
2249 CodeBlockContext::Adjacent => {
2250 block.end_line = line_num;
2252 continue;
2253 }
2254 }
2255 } else {
2256 continue;
2258 }
2259 }
2260
2261 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2263 caps.get(0).unwrap().as_str().to_string()
2264 } else {
2265 String::new()
2266 };
2267
2268 if let Some(list_item) = &line_info.list_item {
2270 let item_indent = list_item.marker_column;
2272 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2275 let is_nested = nesting > block.nesting_level;
2279 let same_type =
2280 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2281 let same_context = block.blockquote_prefix == blockquote_prefix;
2282 let reasonable_distance = line_num <= last_list_item_line + 2; let marker_compatible =
2286 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2287
2288 let has_non_list_content = {
2290 let mut found_non_list = false;
2291 let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
2293
2294 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
2296 let last_line = &lines[block_last_item_line - 1];
2297 let last_line_content = last_line.content(content);
2298 if last_line_content.contains(r"`sqlalchemy`") && last_line_content.contains(r"\`") {
2299 log::debug!(
2300 "After problematic line {}: checking lines {} to {} for non-list content",
2301 block_last_item_line,
2302 block_last_item_line + 1,
2303 line_num
2304 );
2305 if line_num == block_last_item_line + 1 {
2307 log::debug!("Lines are consecutive, no content between");
2308 }
2309 }
2310 }
2311
2312 for check_line in (block_last_item_line + 1)..line_num {
2313 let check_idx = check_line - 1;
2314 if check_idx < lines.len() {
2315 let check_info = &lines[check_idx];
2316 let is_list_breaking_content = if check_info.in_code_block {
2318 let last_item_marker_width =
2320 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
2321 lines[block_last_item_line - 1]
2322 .list_item
2323 .as_ref()
2324 .map(|li| {
2325 if li.is_ordered {
2326 li.marker.len() + 1 } else {
2328 li.marker.len()
2329 }
2330 })
2331 .unwrap_or(3) } else {
2333 3 };
2335
2336 let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
2337
2338 let context = CodeBlockUtils::analyze_code_block_context(
2340 lines,
2341 check_line - 1,
2342 min_continuation,
2343 );
2344
2345 matches!(context, CodeBlockContext::Standalone)
2347 } else if !check_info.is_blank && check_info.list_item.is_none() {
2348 let line_content = check_info.content(content).trim();
2350
2351 if check_info.heading.is_some()
2353 || line_content.starts_with("---")
2354 || line_content.starts_with("***")
2355 || line_content.starts_with("___")
2356 || (line_content.contains('|')
2357 && !line_content.contains("](")
2358 && !line_content.contains("http")
2359 && (line_content.matches('|').count() > 1
2360 || line_content.starts_with('|')
2361 || line_content.ends_with('|')))
2362 || line_content.starts_with(">")
2363 {
2364 true
2365 }
2366 else {
2368 let last_item_marker_width =
2369 if block_last_item_line > 0 && block_last_item_line <= lines.len() {
2370 lines[block_last_item_line - 1]
2371 .list_item
2372 .as_ref()
2373 .map(|li| {
2374 if li.is_ordered {
2375 li.marker.len() + 1 } else {
2377 li.marker.len()
2378 }
2379 })
2380 .unwrap_or(3) } else {
2382 3 };
2384
2385 let min_continuation =
2386 if block.is_ordered { last_item_marker_width } else { 2 };
2387 check_info.indent < min_continuation
2388 }
2389 } else {
2390 false
2391 };
2392
2393 if is_list_breaking_content {
2394 found_non_list = true;
2396 break;
2397 }
2398 }
2399 }
2400 found_non_list
2401 };
2402
2403 let mut continues_list = if is_nested {
2407 same_context && reasonable_distance && !has_non_list_content
2409 } else {
2410 let result = same_type
2412 && same_context
2413 && reasonable_distance
2414 && marker_compatible
2415 && !has_non_list_content;
2416
2417 if block.item_lines.last().is_some_and(|&last_line| {
2419 last_line > 0
2420 && last_line <= lines.len()
2421 && lines[last_line - 1].content(content).contains(r"`sqlalchemy`")
2422 && lines[last_line - 1].content(content).contains(r"\`")
2423 }) {
2424 log::debug!(
2425 "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
2426 );
2427 if line_num > 0 && line_num <= lines.len() {
2428 log::debug!("Current line content: {:?}", lines[line_num - 1].content(content));
2429 }
2430 }
2431
2432 result
2433 };
2434
2435 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2438 if block.item_lines.contains(&(line_num - 1)) {
2440 continues_list = true;
2442 }
2443 }
2444
2445 if continues_list {
2446 block.end_line = line_num;
2448 block.item_lines.push(line_num);
2449
2450 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2452 list_item.marker.len() + 1
2453 } else {
2454 list_item.marker.len()
2455 });
2456
2457 if !block.is_ordered
2459 && block.marker.is_some()
2460 && block.marker.as_ref() != Some(&list_item.marker)
2461 {
2462 block.marker = None;
2464 }
2465 } else {
2466 list_blocks.push(block.clone());
2469
2470 *block = ListBlock {
2471 start_line: line_num,
2472 end_line: line_num,
2473 is_ordered: list_item.is_ordered,
2474 marker: if list_item.is_ordered {
2475 None
2476 } else {
2477 Some(list_item.marker.clone())
2478 },
2479 blockquote_prefix: blockquote_prefix.clone(),
2480 item_lines: vec![line_num],
2481 nesting_level: nesting,
2482 max_marker_width: if list_item.is_ordered {
2483 list_item.marker.len() + 1
2484 } else {
2485 list_item.marker.len()
2486 },
2487 };
2488 }
2489 } else {
2490 current_block = Some(ListBlock {
2492 start_line: line_num,
2493 end_line: line_num,
2494 is_ordered: list_item.is_ordered,
2495 marker: if list_item.is_ordered {
2496 None
2497 } else {
2498 Some(list_item.marker.clone())
2499 },
2500 blockquote_prefix,
2501 item_lines: vec![line_num],
2502 nesting_level: nesting,
2503 max_marker_width: list_item.marker.len(),
2504 });
2505 }
2506
2507 last_list_item_line = line_num;
2508 current_indent_level = item_indent;
2509 last_marker_width = if list_item.is_ordered {
2510 list_item.marker.len() + 1 } else {
2512 list_item.marker.len()
2513 };
2514 } else if let Some(ref mut block) = current_block {
2515 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2525 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2526 } else {
2527 false
2528 };
2529
2530 let min_continuation_indent = if block.is_ordered {
2534 current_indent_level + last_marker_width
2535 } else {
2536 current_indent_level + 2 };
2538
2539 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2540 block.end_line = line_num;
2542 } else if line_info.is_blank {
2543 let mut check_idx = line_idx + 1;
2546 let mut found_continuation = false;
2547
2548 while check_idx < lines.len() && lines[check_idx].is_blank {
2550 check_idx += 1;
2551 }
2552
2553 if check_idx < lines.len() {
2554 let next_line = &lines[check_idx];
2555 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2557 found_continuation = true;
2558 }
2559 else if !next_line.in_code_block
2561 && next_line.list_item.is_some()
2562 && let Some(item) = &next_line.list_item
2563 {
2564 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2565 .find(next_line.content(content))
2566 .map_or(String::new(), |m| m.as_str().to_string());
2567 if item.marker_column == current_indent_level
2568 && item.is_ordered == block.is_ordered
2569 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2570 {
2571 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2574 if let Some(between_line) = lines.get(idx) {
2575 let between_content = between_line.content(content);
2576 let trimmed = between_content.trim();
2577 if trimmed.is_empty() {
2579 return false;
2580 }
2581 let line_indent = between_content.len() - between_content.trim_start().len();
2583
2584 if trimmed.starts_with("```")
2586 || trimmed.starts_with("~~~")
2587 || trimmed.starts_with("---")
2588 || trimmed.starts_with("***")
2589 || trimmed.starts_with("___")
2590 || trimmed.starts_with(">")
2591 || trimmed.contains('|') || between_line.heading.is_some()
2593 {
2594 return true; }
2596
2597 line_indent >= min_continuation_indent
2599 } else {
2600 false
2601 }
2602 });
2603
2604 if block.is_ordered {
2605 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2608 if let Some(between_line) = lines.get(idx) {
2609 let trimmed = between_line.content(content).trim();
2610 if trimmed.is_empty() {
2611 return false;
2612 }
2613 trimmed.starts_with("```")
2615 || trimmed.starts_with("~~~")
2616 || trimmed.starts_with("---")
2617 || trimmed.starts_with("***")
2618 || trimmed.starts_with("___")
2619 || trimmed.starts_with(">")
2620 || trimmed.contains('|') || between_line.heading.is_some()
2622 } else {
2623 false
2624 }
2625 });
2626 found_continuation = !has_structural_separators;
2627 } else {
2628 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2630 if let Some(between_line) = lines.get(idx) {
2631 let trimmed = between_line.content(content).trim();
2632 if trimmed.is_empty() {
2633 return false;
2634 }
2635 trimmed.starts_with("```")
2637 || trimmed.starts_with("~~~")
2638 || trimmed.starts_with("---")
2639 || trimmed.starts_with("***")
2640 || trimmed.starts_with("___")
2641 || trimmed.starts_with(">")
2642 || trimmed.contains('|') || between_line.heading.is_some()
2644 } else {
2645 false
2646 }
2647 });
2648 found_continuation = !has_structural_separators;
2649 }
2650 }
2651 }
2652 }
2653
2654 if found_continuation {
2655 block.end_line = line_num;
2657 } else {
2658 list_blocks.push(block.clone());
2660 current_block = None;
2661 }
2662 } else {
2663 let min_required_indent = if block.is_ordered {
2666 current_indent_level + last_marker_width
2667 } else {
2668 current_indent_level + 2
2669 };
2670
2671 let line_content = line_info.content(content).trim();
2676 let is_structural_separator = line_info.heading.is_some()
2677 || line_content.starts_with("```")
2678 || line_content.starts_with("~~~")
2679 || line_content.starts_with("---")
2680 || line_content.starts_with("***")
2681 || line_content.starts_with("___")
2682 || line_content.starts_with(">")
2683 || (line_content.contains('|')
2684 && !line_content.contains("](")
2685 && !line_content.contains("http")
2686 && (line_content.matches('|').count() > 1
2687 || line_content.starts_with('|')
2688 || line_content.ends_with('|'))); let is_lazy_continuation = !is_structural_separator
2693 && !line_info.is_blank
2694 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2695
2696 if is_lazy_continuation {
2697 let content_to_check = if !blockquote_prefix.is_empty() {
2700 line_info
2702 .content(content)
2703 .strip_prefix(&blockquote_prefix)
2704 .unwrap_or(line_info.content(content))
2705 .trim()
2706 } else {
2707 line_info.content(content).trim()
2708 };
2709
2710 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2711
2712 if starts_with_uppercase && last_list_item_line > 0 {
2715 list_blocks.push(block.clone());
2717 current_block = None;
2718 } else {
2719 block.end_line = line_num;
2721 }
2722 } else {
2723 list_blocks.push(block.clone());
2725 current_block = None;
2726 }
2727 }
2728 }
2729 }
2730
2731 if let Some(block) = current_block {
2733 list_blocks.push(block);
2734 }
2735
2736 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
2738
2739 list_blocks
2740 }
2741
2742 fn compute_char_frequency(content: &str) -> CharFrequency {
2744 let mut frequency = CharFrequency::default();
2745
2746 for ch in content.chars() {
2747 match ch {
2748 '#' => frequency.hash_count += 1,
2749 '*' => frequency.asterisk_count += 1,
2750 '_' => frequency.underscore_count += 1,
2751 '-' => frequency.hyphen_count += 1,
2752 '+' => frequency.plus_count += 1,
2753 '>' => frequency.gt_count += 1,
2754 '|' => frequency.pipe_count += 1,
2755 '[' => frequency.bracket_count += 1,
2756 '`' => frequency.backtick_count += 1,
2757 '<' => frequency.lt_count += 1,
2758 '!' => frequency.exclamation_count += 1,
2759 '\n' => frequency.newline_count += 1,
2760 _ => {}
2761 }
2762 }
2763
2764 frequency
2765 }
2766
2767 fn parse_html_tags(
2769 content: &str,
2770 lines: &[LineInfo],
2771 code_blocks: &[(usize, usize)],
2772 flavor: MarkdownFlavor,
2773 ) -> Vec<HtmlTag> {
2774 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
2775 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
2776
2777 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2778
2779 for cap in HTML_TAG_REGEX.captures_iter(content) {
2780 let full_match = cap.get(0).unwrap();
2781 let match_start = full_match.start();
2782 let match_end = full_match.end();
2783
2784 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2786 continue;
2787 }
2788
2789 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2790 let tag_name_original = cap.get(2).unwrap().as_str();
2791 let tag_name = tag_name_original.to_lowercase();
2792 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2793
2794 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
2797 continue;
2798 }
2799
2800 let mut line_num = 1;
2802 let mut col_start = match_start;
2803 let mut col_end = match_end;
2804 for (idx, line_info) in lines.iter().enumerate() {
2805 if match_start >= line_info.byte_offset {
2806 line_num = idx + 1;
2807 col_start = match_start - line_info.byte_offset;
2808 col_end = match_end - line_info.byte_offset;
2809 } else {
2810 break;
2811 }
2812 }
2813
2814 html_tags.push(HtmlTag {
2815 line: line_num,
2816 start_col: col_start,
2817 end_col: col_end,
2818 byte_offset: match_start,
2819 byte_end: match_end,
2820 tag_name,
2821 is_closing,
2822 is_self_closing,
2823 raw_content: full_match.as_str().to_string(),
2824 });
2825 }
2826
2827 html_tags
2828 }
2829
2830 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2832 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
2833 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
2834
2835 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2836
2837 for cap in EMPHASIS_REGEX.captures_iter(content) {
2838 let full_match = cap.get(0).unwrap();
2839 let match_start = full_match.start();
2840 let match_end = full_match.end();
2841
2842 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2844 continue;
2845 }
2846
2847 let opening_markers = cap.get(1).unwrap().as_str();
2848 let content_part = cap.get(2).unwrap().as_str();
2849 let closing_markers = cap.get(3).unwrap().as_str();
2850
2851 if opening_markers.chars().next() != closing_markers.chars().next()
2853 || opening_markers.len() != closing_markers.len()
2854 {
2855 continue;
2856 }
2857
2858 let marker = opening_markers.chars().next().unwrap();
2859 let marker_count = opening_markers.len();
2860
2861 let mut line_num = 1;
2863 let mut col_start = match_start;
2864 let mut col_end = match_end;
2865 for (idx, line_info) in lines.iter().enumerate() {
2866 if match_start >= line_info.byte_offset {
2867 line_num = idx + 1;
2868 col_start = match_start - line_info.byte_offset;
2869 col_end = match_end - line_info.byte_offset;
2870 } else {
2871 break;
2872 }
2873 }
2874
2875 emphasis_spans.push(EmphasisSpan {
2876 line: line_num,
2877 start_col: col_start,
2878 end_col: col_end,
2879 byte_offset: match_start,
2880 byte_end: match_end,
2881 marker,
2882 marker_count,
2883 content: content_part.to_string(),
2884 });
2885 }
2886
2887 emphasis_spans
2888 }
2889
2890 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
2892 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2893
2894 for (line_idx, line_info) in lines.iter().enumerate() {
2895 if line_info.in_code_block || line_info.is_blank {
2897 continue;
2898 }
2899
2900 let line = line_info.content(content);
2901 let line_num = line_idx + 1;
2902
2903 if !line.contains('|') {
2905 continue;
2906 }
2907
2908 let parts: Vec<&str> = line.split('|').collect();
2910 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2911
2912 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2914 let mut column_alignments = Vec::new();
2915
2916 if is_separator {
2917 for part in &parts[1..parts.len() - 1] {
2918 let trimmed = part.trim();
2920 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2921 "center".to_string()
2922 } else if trimmed.ends_with(':') {
2923 "right".to_string()
2924 } else if trimmed.starts_with(':') {
2925 "left".to_string()
2926 } else {
2927 "none".to_string()
2928 };
2929 column_alignments.push(alignment);
2930 }
2931 }
2932
2933 table_rows.push(TableRow {
2934 line: line_num,
2935 is_separator,
2936 column_count,
2937 column_alignments,
2938 });
2939 }
2940
2941 table_rows
2942 }
2943
2944 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2946 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2947
2948 for cap in BARE_URL_PATTERN.captures_iter(content) {
2950 let full_match = cap.get(0).unwrap();
2951 let match_start = full_match.start();
2952 let match_end = full_match.end();
2953
2954 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2956 continue;
2957 }
2958
2959 let preceding_char = if match_start > 0 {
2961 content.chars().nth(match_start - 1)
2962 } else {
2963 None
2964 };
2965 let following_char = content.chars().nth(match_end);
2966
2967 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2968 continue;
2969 }
2970 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2971 continue;
2972 }
2973
2974 let url = full_match.as_str();
2975 let url_type = if url.starts_with("https://") {
2976 "https"
2977 } else if url.starts_with("http://") {
2978 "http"
2979 } else if url.starts_with("ftp://") {
2980 "ftp"
2981 } else {
2982 "other"
2983 };
2984
2985 let mut line_num = 1;
2987 let mut col_start = match_start;
2988 let mut col_end = match_end;
2989 for (idx, line_info) in lines.iter().enumerate() {
2990 if match_start >= line_info.byte_offset {
2991 line_num = idx + 1;
2992 col_start = match_start - line_info.byte_offset;
2993 col_end = match_end - line_info.byte_offset;
2994 } else {
2995 break;
2996 }
2997 }
2998
2999 bare_urls.push(BareUrl {
3000 line: line_num,
3001 start_col: col_start,
3002 end_col: col_end,
3003 byte_offset: match_start,
3004 byte_end: match_end,
3005 url: url.to_string(),
3006 url_type: url_type.to_string(),
3007 });
3008 }
3009
3010 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3012 let full_match = cap.get(0).unwrap();
3013 let match_start = full_match.start();
3014 let match_end = full_match.end();
3015
3016 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3018 continue;
3019 }
3020
3021 let preceding_char = if match_start > 0 {
3023 content.chars().nth(match_start - 1)
3024 } else {
3025 None
3026 };
3027 let following_char = content.chars().nth(match_end);
3028
3029 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3030 continue;
3031 }
3032 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3033 continue;
3034 }
3035
3036 let email = full_match.as_str();
3037
3038 let mut line_num = 1;
3040 let mut col_start = match_start;
3041 let mut col_end = match_end;
3042 for (idx, line_info) in lines.iter().enumerate() {
3043 if match_start >= line_info.byte_offset {
3044 line_num = idx + 1;
3045 col_start = match_start - line_info.byte_offset;
3046 col_end = match_end - line_info.byte_offset;
3047 } else {
3048 break;
3049 }
3050 }
3051
3052 bare_urls.push(BareUrl {
3053 line: line_num,
3054 start_col: col_start,
3055 end_col: col_end,
3056 byte_offset: match_start,
3057 byte_end: match_end,
3058 url: email.to_string(),
3059 url_type: "email".to_string(),
3060 });
3061 }
3062
3063 bare_urls
3064 }
3065}
3066
3067fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3069 if list_blocks.len() < 2 {
3070 return;
3071 }
3072
3073 let mut merger = ListBlockMerger::new(content, lines);
3074 *list_blocks = merger.merge(list_blocks);
3075}
3076
3077struct ListBlockMerger<'a> {
3079 content: &'a str,
3080 lines: &'a [LineInfo],
3081}
3082
3083impl<'a> ListBlockMerger<'a> {
3084 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3085 Self { content, lines }
3086 }
3087
3088 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3089 let mut merged = Vec::with_capacity(list_blocks.len());
3090 let mut current = list_blocks[0].clone();
3091
3092 for next in list_blocks.iter().skip(1) {
3093 if self.should_merge_blocks(¤t, next) {
3094 current = self.merge_two_blocks(current, next);
3095 } else {
3096 merged.push(current);
3097 current = next.clone();
3098 }
3099 }
3100
3101 merged.push(current);
3102 merged
3103 }
3104
3105 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3107 if !self.blocks_are_compatible(current, next) {
3109 return false;
3110 }
3111
3112 let spacing = self.analyze_spacing_between(current, next);
3114 match spacing {
3115 BlockSpacing::Consecutive => true,
3116 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3117 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3118 self.can_merge_with_content_between(current, next)
3119 }
3120 }
3121 }
3122
3123 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3125 current.is_ordered == next.is_ordered
3126 && current.blockquote_prefix == next.blockquote_prefix
3127 && current.nesting_level == next.nesting_level
3128 }
3129
3130 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3132 let gap = next.start_line - current.end_line;
3133
3134 match gap {
3135 1 => BlockSpacing::Consecutive,
3136 2 => BlockSpacing::SingleBlank,
3137 _ if gap > 2 => {
3138 if self.has_only_blank_lines_between(current, next) {
3139 BlockSpacing::MultipleBlanks
3140 } else {
3141 BlockSpacing::ContentBetween
3142 }
3143 }
3144 _ => BlockSpacing::Consecutive, }
3146 }
3147
3148 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3150 if has_meaningful_content_between(self.content, current, next, self.lines) {
3153 return false; }
3155
3156 !current.is_ordered && current.marker == next.marker
3158 }
3159
3160 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3162 if has_meaningful_content_between(self.content, current, next, self.lines) {
3164 return false; }
3166
3167 current.is_ordered && next.is_ordered
3169 }
3170
3171 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3173 for line_num in (current.end_line + 1)..next.start_line {
3174 if let Some(line_info) = self.lines.get(line_num - 1)
3175 && !line_info.content(self.content).trim().is_empty()
3176 {
3177 return false;
3178 }
3179 }
3180 true
3181 }
3182
3183 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3185 current.end_line = next.end_line;
3186 current.item_lines.extend_from_slice(&next.item_lines);
3187
3188 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3190
3191 if !current.is_ordered && self.markers_differ(¤t, next) {
3193 current.marker = None; }
3195
3196 current
3197 }
3198
3199 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3201 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3202 }
3203}
3204
3205#[derive(Debug, PartialEq)]
3207enum BlockSpacing {
3208 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3213
3214fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3216 for line_num in (current.end_line + 1)..next.start_line {
3218 if let Some(line_info) = lines.get(line_num - 1) {
3219 let trimmed = line_info.content(content).trim();
3221
3222 if trimmed.is_empty() {
3224 continue;
3225 }
3226
3227 if line_info.heading.is_some() {
3231 return true; }
3233
3234 if is_horizontal_rule(trimmed) {
3236 return true; }
3238
3239 if trimmed.contains('|') && trimmed.len() > 1 {
3242 if !trimmed.contains("](") && !trimmed.contains("http") {
3244 let pipe_count = trimmed.matches('|').count();
3246 if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
3247 return true; }
3249 }
3250 }
3251
3252 if trimmed.starts_with('>') {
3254 return true; }
3256
3257 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3259 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3260
3261 let min_continuation_indent = if current.is_ordered {
3263 current.nesting_level + current.max_marker_width + 1 } else {
3265 current.nesting_level + 2
3266 };
3267
3268 if line_indent < min_continuation_indent {
3269 return true; }
3272 }
3273
3274 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3276
3277 let min_indent = if current.is_ordered {
3279 current.nesting_level + current.max_marker_width
3280 } else {
3281 current.nesting_level + 2
3282 };
3283
3284 if line_indent < min_indent {
3286 return true; }
3288
3289 }
3292 }
3293
3294 false
3296}
3297
3298fn is_horizontal_rule(trimmed: &str) -> bool {
3300 if trimmed.len() < 3 {
3301 return false;
3302 }
3303
3304 let chars: Vec<char> = trimmed.chars().collect();
3306 if let Some(&first_char) = chars.first()
3307 && (first_char == '-' || first_char == '*' || first_char == '_')
3308 {
3309 let mut count = 0;
3310 for &ch in &chars {
3311 if ch == first_char {
3312 count += 1;
3313 } else if ch != ' ' && ch != '\t' {
3314 return false; }
3316 }
3317 return count >= 3;
3318 }
3319 false
3320}
3321
3322#[cfg(test)]
3324mod tests {
3325 use super::*;
3326
3327 #[test]
3328 fn test_empty_content() {
3329 let ctx = LintContext::new("", MarkdownFlavor::Standard);
3330 assert_eq!(ctx.content, "");
3331 assert_eq!(ctx.line_offsets, vec![0]);
3332 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3333 assert_eq!(ctx.lines.len(), 0);
3334 }
3335
3336 #[test]
3337 fn test_single_line() {
3338 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
3339 assert_eq!(ctx.content, "# Hello");
3340 assert_eq!(ctx.line_offsets, vec![0]);
3341 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3342 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3343 }
3344
3345 #[test]
3346 fn test_multi_line() {
3347 let content = "# Title\n\nSecond line\nThird line";
3348 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3349 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3350 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3357
3358 #[test]
3359 fn test_line_info() {
3360 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3361 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3362
3363 assert_eq!(ctx.lines.len(), 7);
3365
3366 let line1 = &ctx.lines[0];
3368 assert_eq!(line1.content(ctx.content), "# Title");
3369 assert_eq!(line1.byte_offset, 0);
3370 assert_eq!(line1.indent, 0);
3371 assert!(!line1.is_blank);
3372 assert!(!line1.in_code_block);
3373 assert!(line1.list_item.is_none());
3374
3375 let line2 = &ctx.lines[1];
3377 assert_eq!(line2.content(ctx.content), " indented");
3378 assert_eq!(line2.byte_offset, 8);
3379 assert_eq!(line2.indent, 4);
3380 assert!(!line2.is_blank);
3381
3382 let line3 = &ctx.lines[2];
3384 assert_eq!(line3.content(ctx.content), "");
3385 assert!(line3.is_blank);
3386
3387 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3389 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3390 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3391 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3392 }
3393
3394 #[test]
3395 fn test_list_item_detection() {
3396 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3397 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3398
3399 let line1 = &ctx.lines[0];
3401 assert!(line1.list_item.is_some());
3402 let list1 = line1.list_item.as_ref().unwrap();
3403 assert_eq!(list1.marker, "-");
3404 assert!(!list1.is_ordered);
3405 assert_eq!(list1.marker_column, 0);
3406 assert_eq!(list1.content_column, 2);
3407
3408 let line2 = &ctx.lines[1];
3410 assert!(line2.list_item.is_some());
3411 let list2 = line2.list_item.as_ref().unwrap();
3412 assert_eq!(list2.marker, "*");
3413 assert_eq!(list2.marker_column, 2);
3414
3415 let line3 = &ctx.lines[2];
3417 assert!(line3.list_item.is_some());
3418 let list3 = line3.list_item.as_ref().unwrap();
3419 assert_eq!(list3.marker, "1.");
3420 assert!(list3.is_ordered);
3421 assert_eq!(list3.number, Some(1));
3422
3423 let line6 = &ctx.lines[5];
3425 assert!(line6.list_item.is_none());
3426 }
3427
3428 #[test]
3429 fn test_offset_to_line_col_edge_cases() {
3430 let content = "a\nb\nc";
3431 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3432 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3440
3441 #[test]
3442 fn test_mdx_esm_blocks() {
3443 let content = r##"import {Chart} from './snowfall.js'
3444export const year = 2023
3445
3446# Last year's snowfall
3447
3448In {year}, the snowfall was above average.
3449It was followed by a warm spring which caused
3450flood conditions in many of the nearby rivers.
3451
3452<Chart color="#fcb32c" year={year} />
3453"##;
3454
3455 let ctx = LintContext::new(content, MarkdownFlavor::MDX);
3456
3457 assert_eq!(ctx.lines.len(), 10);
3459 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3460 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3461 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3462 assert!(
3463 !ctx.lines[3].in_esm_block,
3464 "Line 4 (heading) should NOT be in_esm_block"
3465 );
3466 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3467 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3468 }
3469
3470 #[test]
3471 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3472 let content = r#"import {Chart} from './snowfall.js'
3473export const year = 2023
3474
3475# Last year's snowfall
3476"#;
3477
3478 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3479
3480 assert!(
3482 !ctx.lines[0].in_esm_block,
3483 "Line 1 should NOT be in_esm_block in Standard flavor"
3484 );
3485 assert!(
3486 !ctx.lines[1].in_esm_block,
3487 "Line 2 should NOT be in_esm_block in Standard flavor"
3488 );
3489 }
3490}