1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::sync::LazyLock;
8
9#[cfg(not(target_arch = "wasm32"))]
11macro_rules! profile_section {
12 ($name:expr, $profile:expr, $code:expr) => {{
13 let start = std::time::Instant::now();
14 let result = $code;
15 if $profile {
16 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
17 }
18 result
19 }};
20}
21
22#[cfg(target_arch = "wasm32")]
23macro_rules! profile_section {
24 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
25}
26
27static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
30 Regex::new(
31 r#"(?sx)
32 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
33 (?:
34 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
35 |
36 \[([^\]]*)\] # Reference ID in group 6
37 )"#
38 ).unwrap()
39});
40
41static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(
45 r#"(?sx)
46 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
47 (?:
48 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
49 |
50 \[([^\]]*)\] # Reference ID in group 6
51 )"#
52 ).unwrap()
53});
54
55static REF_DEF_PATTERN: LazyLock<Regex> =
57 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
58
59static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
61 Regex::new(
62 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
63 ).unwrap()
64});
65
66static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub is_blank: bool,
84 pub in_code_block: bool,
86 pub in_front_matter: bool,
88 pub in_html_block: bool,
90 pub in_html_comment: bool,
92 pub list_item: Option<ListItemInfo>,
94 pub heading: Option<HeadingInfo>,
96 pub blockquote: Option<BlockquoteInfo>,
98 pub in_mkdocstrings: bool,
100 pub in_esm_block: bool,
102 pub in_code_span_continuation: bool,
104}
105
106impl LineInfo {
107 pub fn content<'a>(&self, source: &'a str) -> &'a str {
109 &source[self.byte_offset..self.byte_offset + self.byte_len]
110 }
111}
112
113#[derive(Debug, Clone)]
115pub struct ListItemInfo {
116 pub marker: String,
118 pub is_ordered: bool,
120 pub number: Option<usize>,
122 pub marker_column: usize,
124 pub content_column: usize,
126}
127
128#[derive(Debug, Clone, PartialEq)]
130pub enum HeadingStyle {
131 ATX,
133 Setext1,
135 Setext2,
137}
138
139#[derive(Debug, Clone)]
141pub struct ParsedLink<'a> {
142 pub line: usize,
144 pub start_col: usize,
146 pub end_col: usize,
148 pub byte_offset: usize,
150 pub byte_end: usize,
152 pub text: Cow<'a, str>,
154 pub url: Cow<'a, str>,
156 pub is_reference: bool,
158 pub reference_id: Option<Cow<'a, str>>,
160 pub link_type: LinkType,
162}
163
164#[derive(Debug, Clone)]
166pub struct BrokenLinkInfo {
167 pub reference: String,
169 pub span: std::ops::Range<usize>,
171}
172
173#[derive(Debug, Clone)]
175pub struct FootnoteRef {
176 pub id: String,
178 pub line: usize,
180 pub byte_offset: usize,
182 pub byte_end: usize,
184}
185
186#[derive(Debug, Clone)]
188pub struct ParsedImage<'a> {
189 pub line: usize,
191 pub start_col: usize,
193 pub end_col: usize,
195 pub byte_offset: usize,
197 pub byte_end: usize,
199 pub alt_text: Cow<'a, str>,
201 pub url: Cow<'a, str>,
203 pub is_reference: bool,
205 pub reference_id: Option<Cow<'a, str>>,
207 pub link_type: LinkType,
209}
210
211#[derive(Debug, Clone)]
213pub struct ReferenceDef {
214 pub line: usize,
216 pub id: String,
218 pub url: String,
220 pub title: Option<String>,
222 pub byte_offset: usize,
224 pub byte_end: usize,
226}
227
228#[derive(Debug, Clone)]
230pub struct CodeSpan {
231 pub line: usize,
233 pub end_line: usize,
235 pub start_col: usize,
237 pub end_col: usize,
239 pub byte_offset: usize,
241 pub byte_end: usize,
243 pub backtick_count: usize,
245 pub content: String,
247}
248
249#[derive(Debug, Clone)]
251pub struct HeadingInfo {
252 pub level: u8,
254 pub style: HeadingStyle,
256 pub marker: String,
258 pub marker_column: usize,
260 pub content_column: usize,
262 pub text: String,
264 pub custom_id: Option<String>,
266 pub raw_text: String,
268 pub has_closing_sequence: bool,
270 pub closing_sequence: String,
272}
273
274#[derive(Debug, Clone)]
276pub struct BlockquoteInfo {
277 pub nesting_level: usize,
279 pub indent: String,
281 pub marker_column: usize,
283 pub prefix: String,
285 pub content: String,
287 pub has_no_space_after_marker: bool,
289 pub has_multiple_spaces_after_marker: bool,
291 pub needs_md028_fix: bool,
293}
294
295#[derive(Debug, Clone)]
297pub struct ListBlock {
298 pub start_line: usize,
300 pub end_line: usize,
302 pub is_ordered: bool,
304 pub marker: Option<String>,
306 pub blockquote_prefix: String,
308 pub item_lines: Vec<usize>,
310 pub nesting_level: usize,
312 pub max_marker_width: usize,
314}
315
316use std::sync::{Arc, Mutex};
317
318#[derive(Debug, Clone, Default)]
320pub struct CharFrequency {
321 pub hash_count: usize,
323 pub asterisk_count: usize,
325 pub underscore_count: usize,
327 pub hyphen_count: usize,
329 pub plus_count: usize,
331 pub gt_count: usize,
333 pub pipe_count: usize,
335 pub bracket_count: usize,
337 pub backtick_count: usize,
339 pub lt_count: usize,
341 pub exclamation_count: usize,
343 pub newline_count: usize,
345}
346
347#[derive(Debug, Clone)]
349pub struct HtmlTag {
350 pub line: usize,
352 pub start_col: usize,
354 pub end_col: usize,
356 pub byte_offset: usize,
358 pub byte_end: usize,
360 pub tag_name: String,
362 pub is_closing: bool,
364 pub is_self_closing: bool,
366 pub raw_content: String,
368}
369
370#[derive(Debug, Clone)]
372pub struct EmphasisSpan {
373 pub line: usize,
375 pub start_col: usize,
377 pub end_col: usize,
379 pub byte_offset: usize,
381 pub byte_end: usize,
383 pub marker: char,
385 pub marker_count: usize,
387 pub content: String,
389}
390
391#[derive(Debug, Clone)]
393pub struct TableRow {
394 pub line: usize,
396 pub is_separator: bool,
398 pub column_count: usize,
400 pub column_alignments: Vec<String>, }
403
404#[derive(Debug, Clone)]
406pub struct BareUrl {
407 pub line: usize,
409 pub start_col: usize,
411 pub end_col: usize,
413 pub byte_offset: usize,
415 pub byte_end: usize,
417 pub url: String,
419 pub url_type: String,
421}
422
423pub struct LintContext<'a> {
424 pub content: &'a str,
425 pub line_offsets: Vec<usize>,
426 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, }
446
447struct BlockquoteComponents<'a> {
449 indent: &'a str,
450 markers: &'a str,
451 spaces_after: &'a str,
452 content: &'a str,
453}
454
455#[inline]
457fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
458 let bytes = line.as_bytes();
459 let mut pos = 0;
460
461 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
463 pos += 1;
464 }
465 let indent_end = pos;
466
467 if pos >= bytes.len() || bytes[pos] != b'>' {
469 return None;
470 }
471
472 while pos < bytes.len() && bytes[pos] == b'>' {
474 pos += 1;
475 }
476 let markers_end = pos;
477
478 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
480 pos += 1;
481 }
482 let spaces_end = pos;
483
484 Some(BlockquoteComponents {
485 indent: &line[0..indent_end],
486 markers: &line[indent_end..markers_end],
487 spaces_after: &line[markers_end..spaces_end],
488 content: &line[spaces_end..],
489 })
490}
491
492impl<'a> LintContext<'a> {
493 pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
494 #[cfg(not(target_arch = "wasm32"))]
495 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
496 #[cfg(target_arch = "wasm32")]
497 let profile = false;
498
499 let line_offsets = profile_section!("Line offsets", profile, {
500 let mut offsets = vec![0];
501 for (i, c) in content.char_indices() {
502 if c == '\n' {
503 offsets.push(i + 1);
504 }
505 }
506 offsets
507 });
508
509 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
511
512 let html_comment_ranges = profile_section!(
514 "HTML comment ranges",
515 profile,
516 crate::utils::skip_context::compute_html_comment_ranges(content)
517 );
518
519 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
521 if flavor == MarkdownFlavor::MkDocs {
522 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
523 } else {
524 Vec::new()
525 }
526 });
527
528 let mut lines = profile_section!(
530 "Basic line info",
531 profile,
532 Self::compute_basic_line_info(
533 content,
534 &line_offsets,
535 &code_blocks,
536 flavor,
537 &html_comment_ranges,
538 &autodoc_ranges,
539 )
540 );
541
542 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
544
545 profile_section!(
547 "ESM blocks",
548 profile,
549 Self::detect_esm_blocks(content, &mut lines, flavor)
550 );
551
552 profile_section!(
554 "Headings & blockquotes",
555 profile,
556 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges)
557 );
558
559 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
561
562 for span in &code_spans {
565 if span.end_line > span.line {
566 for line_num in (span.line + 1)..=span.end_line {
568 if let Some(line_info) = lines.get_mut(line_num - 1) {
569 line_info.in_code_span_continuation = true;
570 }
571 }
572 }
573 }
574
575 let (links, broken_links, footnote_refs) = profile_section!(
577 "Links",
578 profile,
579 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
580 );
581
582 let images = profile_section!(
583 "Images",
584 profile,
585 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
586 );
587
588 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
589
590 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
591
592 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
594
595 let table_blocks = profile_section!(
597 "Table blocks",
598 profile,
599 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
600 content,
601 &code_blocks,
602 &code_spans,
603 &html_comment_ranges,
604 )
605 );
606
607 let line_index = profile_section!(
609 "Line index",
610 profile,
611 crate::utils::range_utils::LineIndex::new(content)
612 );
613
614 let jinja_ranges = profile_section!(
616 "Jinja ranges",
617 profile,
618 crate::utils::jinja_utils::find_jinja_ranges(content)
619 );
620
621 Self {
622 content,
623 line_offsets,
624 code_blocks,
625 lines,
626 links,
627 images,
628 broken_links,
629 footnote_refs,
630 reference_defs,
631 code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
632 list_blocks,
633 char_frequency,
634 html_tags_cache: Mutex::new(None),
635 emphasis_spans_cache: Mutex::new(None),
636 table_rows_cache: Mutex::new(None),
637 bare_urls_cache: Mutex::new(None),
638 html_comment_ranges,
639 table_blocks,
640 line_index,
641 jinja_ranges,
642 flavor,
643 }
644 }
645
646 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
648 let mut cache = self.code_spans_cache.lock().expect("Code spans cache mutex poisoned");
649
650 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))))
651 }
652
653 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
655 &self.html_comment_ranges
656 }
657
658 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
660 let mut cache = self.html_tags_cache.lock().expect("HTML tags cache mutex poisoned");
661
662 Arc::clone(cache.get_or_insert_with(|| {
663 Arc::new(Self::parse_html_tags(
664 self.content,
665 &self.lines,
666 &self.code_blocks,
667 self.flavor,
668 ))
669 }))
670 }
671
672 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
674 let mut cache = self
675 .emphasis_spans_cache
676 .lock()
677 .expect("Emphasis spans cache mutex poisoned");
678
679 Arc::clone(
680 cache.get_or_insert_with(|| {
681 Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))
682 }),
683 )
684 }
685
686 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
688 let mut cache = self.table_rows_cache.lock().expect("Table rows cache mutex poisoned");
689
690 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))))
691 }
692
693 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
695 let mut cache = self.bare_urls_cache.lock().expect("Bare URLs cache mutex poisoned");
696
697 Arc::clone(
698 cache.get_or_insert_with(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
699 )
700 }
701
702 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
704 match self.line_offsets.binary_search(&offset) {
705 Ok(line) => (line + 1, 1),
706 Err(line) => {
707 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
708 (line, offset - line_start + 1)
709 }
710 }
711 }
712
713 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
715 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
717 return true;
718 }
719
720 self.code_spans()
722 .iter()
723 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
724 }
725
726 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
728 if line_num > 0 {
729 self.lines.get(line_num - 1)
730 } else {
731 None
732 }
733 }
734
735 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
737 self.line_info(line_num).map(|info| info.byte_offset)
738 }
739
740 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
742 let normalized_id = ref_id.to_lowercase();
743 self.reference_defs
744 .iter()
745 .find(|def| def.id == normalized_id)
746 .map(|def| def.url.as_str())
747 }
748
749 pub fn is_in_list_block(&self, line_num: usize) -> bool {
751 self.list_blocks
752 .iter()
753 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
754 }
755
756 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
758 self.list_blocks
759 .iter()
760 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
761 }
762
763 pub fn is_in_code_block(&self, line_num: usize) -> bool {
767 if line_num == 0 || line_num > self.lines.len() {
768 return false;
769 }
770 self.lines[line_num - 1].in_code_block
771 }
772
773 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
775 if line_num == 0 || line_num > self.lines.len() {
776 return false;
777 }
778 self.lines[line_num - 1].in_front_matter
779 }
780
781 pub fn is_in_html_block(&self, line_num: usize) -> bool {
783 if line_num == 0 || line_num > self.lines.len() {
784 return false;
785 }
786 self.lines[line_num - 1].in_html_block
787 }
788
789 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
791 if line_num == 0 || line_num > self.lines.len() {
792 return false;
793 }
794
795 let col_0indexed = if col > 0 { col - 1 } else { 0 };
799 let code_spans = self.code_spans();
800 code_spans.iter().any(|span| {
801 if line_num < span.line || line_num > span.end_line {
803 return false;
804 }
805
806 if span.line == span.end_line {
807 col_0indexed >= span.start_col && col_0indexed < span.end_col
809 } else if line_num == span.line {
810 col_0indexed >= span.start_col
812 } else if line_num == span.end_line {
813 col_0indexed < span.end_col
815 } else {
816 true
818 }
819 })
820 }
821
822 #[inline]
824 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
825 let code_spans = self.code_spans();
826 code_spans
827 .iter()
828 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
829 }
830
831 #[inline]
834 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
835 self.reference_defs
836 .iter()
837 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
838 }
839
840 #[inline]
844 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
845 self.html_comment_ranges
846 .iter()
847 .any(|range| byte_pos >= range.start && byte_pos < range.end)
848 }
849
850 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
852 self.jinja_ranges
853 .iter()
854 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
855 }
856
857 pub fn has_char(&self, ch: char) -> bool {
859 match ch {
860 '#' => self.char_frequency.hash_count > 0,
861 '*' => self.char_frequency.asterisk_count > 0,
862 '_' => self.char_frequency.underscore_count > 0,
863 '-' => self.char_frequency.hyphen_count > 0,
864 '+' => self.char_frequency.plus_count > 0,
865 '>' => self.char_frequency.gt_count > 0,
866 '|' => self.char_frequency.pipe_count > 0,
867 '[' => self.char_frequency.bracket_count > 0,
868 '`' => self.char_frequency.backtick_count > 0,
869 '<' => self.char_frequency.lt_count > 0,
870 '!' => self.char_frequency.exclamation_count > 0,
871 '\n' => self.char_frequency.newline_count > 0,
872 _ => self.content.contains(ch), }
874 }
875
876 pub fn char_count(&self, ch: char) -> usize {
878 match ch {
879 '#' => self.char_frequency.hash_count,
880 '*' => self.char_frequency.asterisk_count,
881 '_' => self.char_frequency.underscore_count,
882 '-' => self.char_frequency.hyphen_count,
883 '+' => self.char_frequency.plus_count,
884 '>' => self.char_frequency.gt_count,
885 '|' => self.char_frequency.pipe_count,
886 '[' => self.char_frequency.bracket_count,
887 '`' => self.char_frequency.backtick_count,
888 '<' => self.char_frequency.lt_count,
889 '!' => self.char_frequency.exclamation_count,
890 '\n' => self.char_frequency.newline_count,
891 _ => self.content.matches(ch).count(), }
893 }
894
895 pub fn likely_has_headings(&self) -> bool {
897 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
899
900 pub fn likely_has_lists(&self) -> bool {
902 self.char_frequency.asterisk_count > 0
903 || self.char_frequency.hyphen_count > 0
904 || self.char_frequency.plus_count > 0
905 }
906
907 pub fn likely_has_emphasis(&self) -> bool {
909 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
910 }
911
912 pub fn likely_has_tables(&self) -> bool {
914 self.char_frequency.pipe_count > 2
915 }
916
917 pub fn likely_has_blockquotes(&self) -> bool {
919 self.char_frequency.gt_count > 0
920 }
921
922 pub fn likely_has_code(&self) -> bool {
924 self.char_frequency.backtick_count > 0
925 }
926
927 pub fn likely_has_links_or_images(&self) -> bool {
929 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
930 }
931
932 pub fn likely_has_html(&self) -> bool {
934 self.char_frequency.lt_count > 0
935 }
936
937 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
939 self.html_tags()
940 .iter()
941 .filter(|tag| tag.line == line_num)
942 .cloned()
943 .collect()
944 }
945
946 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
948 self.emphasis_spans()
949 .iter()
950 .filter(|span| span.line == line_num)
951 .cloned()
952 .collect()
953 }
954
955 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
957 self.table_rows()
958 .iter()
959 .filter(|row| row.line == line_num)
960 .cloned()
961 .collect()
962 }
963
964 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
966 self.bare_urls()
967 .iter()
968 .filter(|url| url.line == line_num)
969 .cloned()
970 .collect()
971 }
972
973 #[inline]
979 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
980 let idx = match lines.binary_search_by(|line| {
982 if byte_offset < line.byte_offset {
983 std::cmp::Ordering::Greater
984 } else if byte_offset > line.byte_offset + line.byte_len {
985 std::cmp::Ordering::Less
986 } else {
987 std::cmp::Ordering::Equal
988 }
989 }) {
990 Ok(idx) => idx,
991 Err(idx) => idx.saturating_sub(1),
992 };
993
994 let line = &lines[idx];
995 let line_num = idx + 1;
996 let col = byte_offset.saturating_sub(line.byte_offset);
997
998 (idx, line_num, col)
999 }
1000
1001 #[inline]
1003 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1004 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1006
1007 if idx > 0 {
1009 let span = &code_spans[idx - 1];
1010 if offset >= span.byte_offset && offset < span.byte_end {
1011 return true;
1012 }
1013 }
1014
1015 false
1016 }
1017
1018 fn parse_links(
1020 content: &'a str,
1021 lines: &[LineInfo],
1022 code_blocks: &[(usize, usize)],
1023 code_spans: &[CodeSpan],
1024 flavor: MarkdownFlavor,
1025 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1026 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1027 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1028 use std::collections::HashSet;
1029
1030 let mut links = Vec::with_capacity(content.len() / 500);
1031 let mut broken_links = Vec::new();
1032 let mut footnote_refs = Vec::new();
1033
1034 let mut found_positions = HashSet::new();
1036
1037 let mut options = Options::empty();
1047 options.insert(Options::ENABLE_WIKILINKS);
1048 options.insert(Options::ENABLE_FOOTNOTES);
1049
1050 let parser = Parser::new_with_broken_link_callback(
1051 content,
1052 options,
1053 Some(|link: BrokenLink<'_>| {
1054 broken_links.push(BrokenLinkInfo {
1055 reference: link.reference.to_string(),
1056 span: link.span.clone(),
1057 });
1058 None
1059 }),
1060 )
1061 .into_offset_iter();
1062
1063 let mut link_stack: Vec<(
1064 usize,
1065 usize,
1066 pulldown_cmark::CowStr<'a>,
1067 LinkType,
1068 pulldown_cmark::CowStr<'a>,
1069 )> = Vec::new();
1070 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1073 match event {
1074 Event::Start(Tag::Link {
1075 link_type,
1076 dest_url,
1077 id,
1078 ..
1079 }) => {
1080 link_stack.push((range.start, range.end, dest_url, link_type, id));
1082 text_chunks.clear();
1083 }
1084 Event::Text(text) if !link_stack.is_empty() => {
1085 text_chunks.push((text.to_string(), range.start, range.end));
1087 }
1088 Event::Code(code) if !link_stack.is_empty() => {
1089 let code_text = format!("`{code}`");
1091 text_chunks.push((code_text, range.start, range.end));
1092 }
1093 Event::End(TagEnd::Link) => {
1094 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1095 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1097 text_chunks.clear();
1098 continue;
1099 }
1100
1101 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1103
1104 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1106 text_chunks.clear();
1107 continue;
1108 }
1109
1110 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1111
1112 let is_reference = matches!(
1113 link_type,
1114 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1115 );
1116
1117 let link_text = if start_pos < content.len() {
1120 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1121
1122 let mut close_pos = None;
1126 let mut depth = 0;
1127 let mut in_code_span = false;
1128
1129 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1130 let mut backslash_count = 0;
1132 let mut j = i;
1133 while j > 0 && link_bytes[j - 1] == b'\\' {
1134 backslash_count += 1;
1135 j -= 1;
1136 }
1137 let is_escaped = backslash_count % 2 != 0;
1138
1139 if byte == b'`' && !is_escaped {
1141 in_code_span = !in_code_span;
1142 }
1143
1144 if !is_escaped && !in_code_span {
1146 if byte == b'[' {
1147 depth += 1;
1148 } else if byte == b']' {
1149 if depth == 0 {
1150 close_pos = Some(i);
1152 break;
1153 } else {
1154 depth -= 1;
1155 }
1156 }
1157 }
1158 }
1159
1160 if let Some(pos) = close_pos {
1161 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1162 } else {
1163 Cow::Borrowed("")
1164 }
1165 } else {
1166 Cow::Borrowed("")
1167 };
1168
1169 let reference_id = if is_reference && !ref_id.is_empty() {
1171 Some(Cow::Owned(ref_id.to_lowercase()))
1172 } else if is_reference {
1173 Some(Cow::Owned(link_text.to_lowercase()))
1175 } else {
1176 None
1177 };
1178
1179 let has_escaped_bang = start_pos >= 2
1183 && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1184 && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1185
1186 let has_escaped_bracket =
1189 start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1190
1191 if has_escaped_bang || has_escaped_bracket {
1192 text_chunks.clear();
1193 continue; }
1195
1196 found_positions.insert(start_pos);
1198
1199 links.push(ParsedLink {
1200 line: line_num,
1201 start_col: col_start,
1202 end_col: col_end,
1203 byte_offset: start_pos,
1204 byte_end: range.end,
1205 text: link_text,
1206 url: Cow::Owned(url.to_string()),
1207 is_reference,
1208 reference_id,
1209 link_type,
1210 });
1211
1212 text_chunks.clear();
1213 }
1214 }
1215 Event::FootnoteReference(footnote_id) => {
1216 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1219 continue;
1220 }
1221
1222 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1223 footnote_refs.push(FootnoteRef {
1224 id: footnote_id.to_string(),
1225 line: line_num,
1226 byte_offset: range.start,
1227 byte_end: range.end,
1228 });
1229 }
1230 _ => {}
1231 }
1232 }
1233
1234 for cap in LINK_PATTERN.captures_iter(content) {
1238 let full_match = cap.get(0).unwrap();
1239 let match_start = full_match.start();
1240 let match_end = full_match.end();
1241
1242 if found_positions.contains(&match_start) {
1244 continue;
1245 }
1246
1247 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1249 continue;
1250 }
1251
1252 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1254 continue;
1255 }
1256
1257 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1259 continue;
1260 }
1261
1262 if Self::is_offset_in_code_span(code_spans, match_start) {
1264 continue;
1265 }
1266
1267 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1269 continue;
1270 }
1271
1272 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1274
1275 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1277 continue;
1278 }
1279
1280 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1281
1282 let text = cap.get(1).map_or("", |m| m.as_str());
1283
1284 if let Some(ref_id) = cap.get(6) {
1286 let ref_id_str = ref_id.as_str();
1287 let normalized_ref = if ref_id_str.is_empty() {
1288 Cow::Owned(text.to_lowercase()) } else {
1290 Cow::Owned(ref_id_str.to_lowercase())
1291 };
1292
1293 links.push(ParsedLink {
1295 line: line_num,
1296 start_col: col_start,
1297 end_col: col_end,
1298 byte_offset: match_start,
1299 byte_end: match_end,
1300 text: Cow::Borrowed(text),
1301 url: Cow::Borrowed(""), is_reference: true,
1303 reference_id: Some(normalized_ref),
1304 link_type: LinkType::Reference, });
1306 }
1307 }
1308
1309 (links, broken_links, footnote_refs)
1310 }
1311
1312 fn parse_images(
1314 content: &'a str,
1315 lines: &[LineInfo],
1316 code_blocks: &[(usize, usize)],
1317 code_spans: &[CodeSpan],
1318 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1319 ) -> Vec<ParsedImage<'a>> {
1320 use crate::utils::skip_context::is_in_html_comment_ranges;
1321 use std::collections::HashSet;
1322
1323 let mut images = Vec::with_capacity(content.len() / 1000);
1325 let mut found_positions = HashSet::new();
1326
1327 let parser = Parser::new(content).into_offset_iter();
1329 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1330 Vec::new();
1331 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1334 match event {
1335 Event::Start(Tag::Image {
1336 link_type,
1337 dest_url,
1338 id,
1339 ..
1340 }) => {
1341 image_stack.push((range.start, dest_url, link_type, id));
1342 text_chunks.clear();
1343 }
1344 Event::Text(text) if !image_stack.is_empty() => {
1345 text_chunks.push((text.to_string(), range.start, range.end));
1346 }
1347 Event::Code(code) if !image_stack.is_empty() => {
1348 let code_text = format!("`{code}`");
1349 text_chunks.push((code_text, range.start, range.end));
1350 }
1351 Event::End(TagEnd::Image) => {
1352 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1353 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1355 continue;
1356 }
1357
1358 if Self::is_offset_in_code_span(code_spans, start_pos) {
1360 continue;
1361 }
1362
1363 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1365 continue;
1366 }
1367
1368 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1370 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1371
1372 let is_reference = matches!(
1373 link_type,
1374 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1375 );
1376
1377 let alt_text = if start_pos < content.len() {
1380 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1381
1382 let mut close_pos = None;
1385 let mut depth = 0;
1386
1387 if image_bytes.len() > 2 {
1388 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1389 let mut backslash_count = 0;
1391 let mut j = i;
1392 while j > 0 && image_bytes[j - 1] == b'\\' {
1393 backslash_count += 1;
1394 j -= 1;
1395 }
1396 let is_escaped = backslash_count % 2 != 0;
1397
1398 if !is_escaped {
1399 if byte == b'[' {
1400 depth += 1;
1401 } else if byte == b']' {
1402 if depth == 0 {
1403 close_pos = Some(i);
1405 break;
1406 } else {
1407 depth -= 1;
1408 }
1409 }
1410 }
1411 }
1412 }
1413
1414 if let Some(pos) = close_pos {
1415 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1416 } else {
1417 Cow::Borrowed("")
1418 }
1419 } else {
1420 Cow::Borrowed("")
1421 };
1422
1423 let reference_id = if is_reference && !ref_id.is_empty() {
1424 Some(Cow::Owned(ref_id.to_lowercase()))
1425 } else if is_reference {
1426 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1428 None
1429 };
1430
1431 found_positions.insert(start_pos);
1432 images.push(ParsedImage {
1433 line: line_num,
1434 start_col: col_start,
1435 end_col: col_end,
1436 byte_offset: start_pos,
1437 byte_end: range.end,
1438 alt_text,
1439 url: Cow::Owned(url.to_string()),
1440 is_reference,
1441 reference_id,
1442 link_type,
1443 });
1444 }
1445 }
1446 _ => {}
1447 }
1448 }
1449
1450 for cap in IMAGE_PATTERN.captures_iter(content) {
1452 let full_match = cap.get(0).unwrap();
1453 let match_start = full_match.start();
1454 let match_end = full_match.end();
1455
1456 if found_positions.contains(&match_start) {
1458 continue;
1459 }
1460
1461 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1463 continue;
1464 }
1465
1466 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1468 || Self::is_offset_in_code_span(code_spans, match_start)
1469 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1470 {
1471 continue;
1472 }
1473
1474 if let Some(ref_id) = cap.get(6) {
1476 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1477 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1478 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1479 let ref_id_str = ref_id.as_str();
1480 let normalized_ref = if ref_id_str.is_empty() {
1481 Cow::Owned(alt_text.to_lowercase())
1482 } else {
1483 Cow::Owned(ref_id_str.to_lowercase())
1484 };
1485
1486 images.push(ParsedImage {
1487 line: line_num,
1488 start_col: col_start,
1489 end_col: col_end,
1490 byte_offset: match_start,
1491 byte_end: match_end,
1492 alt_text: Cow::Borrowed(alt_text),
1493 url: Cow::Borrowed(""),
1494 is_reference: true,
1495 reference_id: Some(normalized_ref),
1496 link_type: LinkType::Reference, });
1498 }
1499 }
1500
1501 images
1502 }
1503
1504 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1506 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1510 if line_info.in_code_block {
1512 continue;
1513 }
1514
1515 let line = line_info.content(content);
1516 let line_num = line_idx + 1;
1517
1518 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1519 let id = cap.get(1).unwrap().as_str().to_lowercase();
1520 let url = cap.get(2).unwrap().as_str().to_string();
1521 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
1522
1523 let match_obj = cap.get(0).unwrap();
1526 let byte_offset = line_info.byte_offset + match_obj.start();
1527 let byte_end = line_info.byte_offset + match_obj.end();
1528
1529 refs.push(ReferenceDef {
1530 line: line_num,
1531 id,
1532 url,
1533 title,
1534 byte_offset,
1535 byte_end,
1536 });
1537 }
1538 }
1539
1540 refs
1541 }
1542
1543 #[inline]
1547 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1548 let trimmed_start = line.trim_start();
1549 if !trimmed_start.starts_with('>') {
1550 return None;
1551 }
1552
1553 let leading_ws_len = line.len() - trimmed_start.len();
1554 let after_gt = &trimmed_start[1..];
1555 let content = after_gt.trim_start();
1556 let ws_after_gt_len = after_gt.len() - content.len();
1557 let prefix_len = leading_ws_len + 1 + ws_after_gt_len;
1558
1559 Some((&line[..prefix_len], content))
1560 }
1561
1562 #[inline]
1566 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1567 let bytes = line.as_bytes();
1568 let mut i = 0;
1569
1570 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1572 i += 1;
1573 }
1574
1575 if i >= bytes.len() {
1577 return None;
1578 }
1579 let marker = bytes[i] as char;
1580 if marker != '-' && marker != '*' && marker != '+' {
1581 return None;
1582 }
1583 let marker_pos = i;
1584 i += 1;
1585
1586 let spacing_start = i;
1588 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1589 i += 1;
1590 }
1591
1592 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1593 }
1594
1595 #[inline]
1599 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1600 let bytes = line.as_bytes();
1601 let mut i = 0;
1602
1603 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1605 i += 1;
1606 }
1607
1608 let number_start = i;
1610 while i < bytes.len() && bytes[i].is_ascii_digit() {
1611 i += 1;
1612 }
1613 if i == number_start {
1614 return None; }
1616
1617 if i >= bytes.len() {
1619 return None;
1620 }
1621 let delimiter = bytes[i] as char;
1622 if delimiter != '.' && delimiter != ')' {
1623 return None;
1624 }
1625 let delimiter_pos = i;
1626 i += 1;
1627
1628 let spacing_start = i;
1630 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1631 i += 1;
1632 }
1633
1634 Some((
1635 &line[..number_start],
1636 &line[number_start..delimiter_pos],
1637 delimiter,
1638 &line[spacing_start..i],
1639 &line[i..],
1640 ))
1641 }
1642
1643 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1646 let num_lines = line_offsets.len();
1647 let mut in_code_block = vec![false; num_lines];
1648
1649 for &(start, end) in code_blocks {
1651 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1653 let mut boundary = start;
1654 while boundary > 0 && !content.is_char_boundary(boundary) {
1655 boundary -= 1;
1656 }
1657 boundary
1658 } else {
1659 start
1660 };
1661
1662 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1663 let mut boundary = end;
1664 while boundary < content.len() && !content.is_char_boundary(boundary) {
1665 boundary += 1;
1666 }
1667 boundary
1668 } else {
1669 end.min(content.len())
1670 };
1671
1672 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1691 let first_line = first_line_after.saturating_sub(1);
1692 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1693
1694 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1696 *flag = true;
1697 }
1698 }
1699
1700 in_code_block
1701 }
1702
1703 fn compute_basic_line_info(
1705 content: &str,
1706 line_offsets: &[usize],
1707 code_blocks: &[(usize, usize)],
1708 flavor: MarkdownFlavor,
1709 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1710 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1711 ) -> Vec<LineInfo> {
1712 let content_lines: Vec<&str> = content.lines().collect();
1713 let mut lines = Vec::with_capacity(content_lines.len());
1714
1715 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1717
1718 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1721
1722 for (i, line) in content_lines.iter().enumerate() {
1723 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1724 let indent = line.len() - line.trim_start().len();
1725
1726 let blockquote_parse = Self::parse_blockquote_prefix(line);
1728
1729 let is_blank = if let Some((_, content)) = blockquote_parse {
1731 content.trim().is_empty()
1733 } else {
1734 line.trim().is_empty()
1735 };
1736
1737 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1739
1740 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1742 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1743 let in_html_comment =
1745 crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, byte_offset);
1746 let list_item = if !(in_code_block
1747 || is_blank
1748 || in_mkdocstrings
1749 || in_html_comment
1750 || (front_matter_end > 0 && i < front_matter_end))
1751 {
1752 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1754 (content, prefix.len())
1755 } else {
1756 (&**line, 0)
1757 };
1758
1759 if let Some((leading_spaces, marker, spacing, _content)) =
1760 Self::parse_unordered_list(line_for_list_check)
1761 {
1762 let marker_column = blockquote_prefix_len + leading_spaces.len();
1763 let content_column = marker_column + 1 + spacing.len();
1764
1765 if spacing.is_empty() {
1772 None
1773 } else {
1774 Some(ListItemInfo {
1775 marker: marker.to_string(),
1776 is_ordered: false,
1777 number: None,
1778 marker_column,
1779 content_column,
1780 })
1781 }
1782 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1783 Self::parse_ordered_list(line_for_list_check)
1784 {
1785 let marker = format!("{number_str}{delimiter}");
1786 let marker_column = blockquote_prefix_len + leading_spaces.len();
1787 let content_column = marker_column + marker.len() + spacing.len();
1788
1789 if spacing.is_empty() {
1792 None
1793 } else {
1794 Some(ListItemInfo {
1795 marker,
1796 is_ordered: true,
1797 number: number_str.parse().ok(),
1798 marker_column,
1799 content_column,
1800 })
1801 }
1802 } else {
1803 None
1804 }
1805 } else {
1806 None
1807 };
1808
1809 lines.push(LineInfo {
1810 byte_offset,
1811 byte_len: line.len(),
1812 indent,
1813 is_blank,
1814 in_code_block,
1815 in_front_matter: front_matter_end > 0 && i < front_matter_end,
1816 in_html_block: false, in_html_comment,
1818 list_item,
1819 heading: None, blockquote: None, in_mkdocstrings,
1822 in_esm_block: false, in_code_span_continuation: false, });
1825 }
1826
1827 lines
1828 }
1829
1830 fn detect_headings_and_blockquotes(
1832 content: &str,
1833 lines: &mut [LineInfo],
1834 flavor: MarkdownFlavor,
1835 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1836 ) {
1837 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
1839 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
1840 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
1841 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
1842
1843 let content_lines: Vec<&str> = content.lines().collect();
1844
1845 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1847
1848 for i in 0..lines.len() {
1850 if lines[i].in_code_block {
1851 continue;
1852 }
1853
1854 if front_matter_end > 0 && i < front_matter_end {
1856 continue;
1857 }
1858
1859 if lines[i].in_html_block {
1861 continue;
1862 }
1863
1864 let line = content_lines[i];
1865
1866 if let Some(bq) = parse_blockquote_detailed(line) {
1868 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
1870
1871 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
1873
1874 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
1876 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
1879
1880 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
1884
1885 lines[i].blockquote = Some(BlockquoteInfo {
1886 nesting_level,
1887 indent: bq.indent.to_string(),
1888 marker_column,
1889 prefix,
1890 content: bq.content.to_string(),
1891 has_no_space_after_marker: has_no_space,
1892 has_multiple_spaces_after_marker: has_multiple_spaces,
1893 needs_md028_fix,
1894 });
1895 }
1896
1897 if lines[i].is_blank {
1899 continue;
1900 }
1901
1902 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1905 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1906 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1907 } else {
1908 false
1909 };
1910
1911 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1912 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
1914 continue;
1915 }
1916 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1917 let hashes = caps.get(2).map_or("", |m| m.as_str());
1918 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1919 let rest = caps.get(4).map_or("", |m| m.as_str());
1920
1921 let level = hashes.len() as u8;
1922 let marker_column = leading_spaces.len();
1923
1924 let (text, has_closing, closing_seq) = {
1926 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1928 if rest[id_start..].trim_end().ends_with('}') {
1930 (&rest[..id_start], &rest[id_start..])
1932 } else {
1933 (rest, "")
1934 }
1935 } else {
1936 (rest, "")
1937 };
1938
1939 let trimmed_rest = rest_without_id.trim_end();
1941 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1942 let mut start_of_hashes = last_hash_pos;
1944 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1945 start_of_hashes -= 1;
1946 }
1947
1948 let has_space_before = start_of_hashes == 0
1950 || trimmed_rest
1951 .chars()
1952 .nth(start_of_hashes - 1)
1953 .is_some_and(|c| c.is_whitespace());
1954
1955 let potential_closing = &trimmed_rest[start_of_hashes..];
1957 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1958
1959 if is_all_hashes && has_space_before {
1960 let closing_hashes = potential_closing.to_string();
1962 let text_part = if !custom_id_part.is_empty() {
1965 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1968 } else {
1969 rest_without_id[..start_of_hashes].trim_end().to_string()
1970 };
1971 (text_part, true, closing_hashes)
1972 } else {
1973 (rest.to_string(), false, String::new())
1975 }
1976 } else {
1977 (rest.to_string(), false, String::new())
1979 }
1980 };
1981
1982 let content_column = marker_column + hashes.len() + spaces_after.len();
1983
1984 let raw_text = text.trim().to_string();
1986 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1987
1988 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1990 let next_line = content_lines[i + 1];
1991 if !lines[i + 1].in_code_block
1992 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1993 && let Some(next_line_id) =
1994 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1995 {
1996 custom_id = Some(next_line_id);
1997 }
1998 }
1999
2000 lines[i].heading = Some(HeadingInfo {
2001 level,
2002 style: HeadingStyle::ATX,
2003 marker: hashes.to_string(),
2004 marker_column,
2005 content_column,
2006 text: clean_text,
2007 custom_id,
2008 raw_text,
2009 has_closing_sequence: has_closing,
2010 closing_sequence: closing_seq,
2011 });
2012 }
2013 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2015 let next_line = content_lines[i + 1];
2016 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2017 if front_matter_end > 0 && i < front_matter_end {
2019 continue;
2020 }
2021
2022 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2024 {
2025 continue;
2026 }
2027
2028 let underline = next_line.trim();
2029
2030 if underline == "---" {
2033 continue;
2034 }
2035
2036 let current_line_trimmed = line.trim();
2038 if current_line_trimmed.contains(':')
2039 && !current_line_trimmed.starts_with('#')
2040 && !current_line_trimmed.contains('[')
2041 && !current_line_trimmed.contains("](")
2042 {
2043 continue;
2045 }
2046
2047 let level = if underline.starts_with('=') { 1 } else { 2 };
2048 let style = if level == 1 {
2049 HeadingStyle::Setext1
2050 } else {
2051 HeadingStyle::Setext2
2052 };
2053
2054 let raw_text = line.trim().to_string();
2056 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2057
2058 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2060 let attr_line = content_lines[i + 2];
2061 if !lines[i + 2].in_code_block
2062 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2063 && let Some(attr_line_id) =
2064 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2065 {
2066 custom_id = Some(attr_line_id);
2067 }
2068 }
2069
2070 lines[i].heading = Some(HeadingInfo {
2071 level,
2072 style,
2073 marker: underline.to_string(),
2074 marker_column: next_line.len() - next_line.trim_start().len(),
2075 content_column: lines[i].indent,
2076 text: clean_text,
2077 custom_id,
2078 raw_text,
2079 has_closing_sequence: false,
2080 closing_sequence: String::new(),
2081 });
2082 }
2083 }
2084 }
2085 }
2086
2087 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2089 const BLOCK_ELEMENTS: &[&str] = &[
2091 "address",
2092 "article",
2093 "aside",
2094 "blockquote",
2095 "details",
2096 "dialog",
2097 "dd",
2098 "div",
2099 "dl",
2100 "dt",
2101 "fieldset",
2102 "figcaption",
2103 "figure",
2104 "footer",
2105 "form",
2106 "h1",
2107 "h2",
2108 "h3",
2109 "h4",
2110 "h5",
2111 "h6",
2112 "header",
2113 "hr",
2114 "li",
2115 "main",
2116 "nav",
2117 "ol",
2118 "p",
2119 "picture",
2120 "pre",
2121 "script",
2122 "section",
2123 "style",
2124 "table",
2125 "tbody",
2126 "td",
2127 "textarea",
2128 "tfoot",
2129 "th",
2130 "thead",
2131 "tr",
2132 "ul",
2133 ];
2134
2135 let mut i = 0;
2136 while i < lines.len() {
2137 if lines[i].in_code_block || lines[i].in_front_matter {
2139 i += 1;
2140 continue;
2141 }
2142
2143 let trimmed = lines[i].content(content).trim_start();
2144
2145 if trimmed.starts_with('<') && trimmed.len() > 1 {
2147 let after_bracket = &trimmed[1..];
2149 let is_closing = after_bracket.starts_with('/');
2150 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2151
2152 let tag_name = tag_start
2154 .chars()
2155 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2156 .collect::<String>()
2157 .to_lowercase();
2158
2159 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2161 lines[i].in_html_block = true;
2163
2164 if !is_closing {
2167 let closing_tag = format!("</{tag_name}>");
2168 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2170 let mut j = i + 1;
2171 while j < lines.len() && j < i + 100 {
2172 if !allow_blank_lines && lines[j].is_blank {
2175 break;
2176 }
2177
2178 lines[j].in_html_block = true;
2179
2180 if lines[j].content(content).contains(&closing_tag) {
2182 break;
2183 }
2184 j += 1;
2185 }
2186 }
2187 }
2188 }
2189
2190 i += 1;
2191 }
2192 }
2193
2194 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2197 if !flavor.supports_esm_blocks() {
2199 return;
2200 }
2201
2202 for line in lines.iter_mut() {
2203 if line.is_blank || line.in_html_comment {
2205 continue;
2206 }
2207
2208 let trimmed = line.content(content).trim_start();
2210 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2211 line.in_esm_block = true;
2212 } else {
2213 break;
2215 }
2216 }
2217 }
2218
2219 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2221 let mut code_spans = Vec::new();
2222
2223 if !content.contains('`') {
2225 return code_spans;
2226 }
2227
2228 let parser = Parser::new(content).into_offset_iter();
2230
2231 for (event, range) in parser {
2232 if let Event::Code(_) = event {
2233 let start_pos = range.start;
2234 let end_pos = range.end;
2235
2236 let full_span = &content[start_pos..end_pos];
2238 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2239
2240 let content_start = start_pos + backtick_count;
2242 let content_end = end_pos - backtick_count;
2243 let span_content = if content_start < content_end {
2244 content[content_start..content_end].to_string()
2245 } else {
2246 String::new()
2247 };
2248
2249 let line_idx = lines
2252 .partition_point(|line| line.byte_offset <= start_pos)
2253 .saturating_sub(1);
2254 let line_num = line_idx + 1;
2255 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2256
2257 let end_line_idx = lines
2259 .partition_point(|line| line.byte_offset <= end_pos)
2260 .saturating_sub(1);
2261 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2262
2263 let line_content = lines[line_idx].content(content);
2266 let col_start = if byte_col_start <= line_content.len() {
2267 line_content[..byte_col_start].chars().count()
2268 } else {
2269 line_content.chars().count()
2270 };
2271
2272 let end_line_content = lines[end_line_idx].content(content);
2273 let col_end = if byte_col_end <= end_line_content.len() {
2274 end_line_content[..byte_col_end].chars().count()
2275 } else {
2276 end_line_content.chars().count()
2277 };
2278
2279 code_spans.push(CodeSpan {
2280 line: line_num,
2281 end_line: end_line_idx + 1,
2282 start_col: col_start,
2283 end_col: col_end,
2284 byte_offset: start_pos,
2285 byte_end: end_pos,
2286 backtick_count,
2287 content: span_content,
2288 });
2289 }
2290 }
2291
2292 code_spans.sort_by_key(|span| span.byte_offset);
2294
2295 code_spans
2296 }
2297
2298 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2309 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2311
2312 #[inline]
2315 fn reset_tracking_state(
2316 list_item: &ListItemInfo,
2317 has_list_breaking_content: &mut bool,
2318 min_continuation: &mut usize,
2319 ) {
2320 *has_list_breaking_content = false;
2321 let marker_width = if list_item.is_ordered {
2322 list_item.marker.len() + 1 } else {
2324 list_item.marker.len()
2325 };
2326 *min_continuation = if list_item.is_ordered {
2327 marker_width
2328 } else {
2329 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2330 };
2331 }
2332
2333 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2336 let mut last_list_item_line = 0;
2337 let mut current_indent_level = 0;
2338 let mut last_marker_width = 0;
2339
2340 let mut has_list_breaking_content_since_last_item = false;
2342 let mut min_continuation_for_tracking = 0;
2343
2344 for (line_idx, line_info) in lines.iter().enumerate() {
2345 let line_num = line_idx + 1;
2346
2347 if line_info.in_code_block {
2349 if let Some(ref mut block) = current_block {
2350 let min_continuation_indent =
2352 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2353
2354 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2356
2357 match context {
2358 CodeBlockContext::Indented => {
2359 block.end_line = line_num;
2361 continue;
2362 }
2363 CodeBlockContext::Standalone => {
2364 let completed_block = current_block.take().unwrap();
2366 list_blocks.push(completed_block);
2367 continue;
2368 }
2369 CodeBlockContext::Adjacent => {
2370 block.end_line = line_num;
2372 continue;
2373 }
2374 }
2375 } else {
2376 continue;
2378 }
2379 }
2380
2381 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2383 caps.get(0).unwrap().as_str().to_string()
2384 } else {
2385 String::new()
2386 };
2387
2388 if current_block.is_some()
2391 && line_info.list_item.is_none()
2392 && !line_info.is_blank
2393 && !line_info.in_code_span_continuation
2394 {
2395 let line_content = line_info.content(content).trim();
2396
2397 let breaks_list = line_info.heading.is_some()
2399 || line_content.starts_with("---")
2400 || line_content.starts_with("***")
2401 || line_content.starts_with("___")
2402 || (line_content.contains('|')
2403 && !line_content.contains("](")
2404 && !line_content.contains("http")
2405 && (line_content.matches('|').count() > 1
2406 || line_content.starts_with('|')
2407 || line_content.ends_with('|')))
2408 || line_content.starts_with(">")
2409 || (line_info.indent < min_continuation_for_tracking);
2410
2411 if breaks_list {
2412 has_list_breaking_content_since_last_item = true;
2413 }
2414 }
2415
2416 if line_info.in_code_span_continuation
2419 && line_info.list_item.is_none()
2420 && let Some(ref mut block) = current_block
2421 {
2422 block.end_line = line_num;
2423 }
2424
2425 if let Some(list_item) = &line_info.list_item {
2427 let item_indent = list_item.marker_column;
2429 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2432 let is_nested = nesting > block.nesting_level;
2436 let same_type =
2437 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2438 let same_context = block.blockquote_prefix == blockquote_prefix;
2439 let reasonable_distance = line_num <= last_list_item_line + 2; let marker_compatible =
2443 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2444
2445 let has_non_list_content = has_list_breaking_content_since_last_item;
2448
2449 let mut continues_list = if is_nested {
2453 same_context && reasonable_distance && !has_non_list_content
2455 } else {
2456 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2458 };
2459
2460 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2463 if block.item_lines.contains(&(line_num - 1)) {
2465 continues_list = true;
2467 }
2468 }
2469
2470 if continues_list {
2471 block.end_line = line_num;
2473 block.item_lines.push(line_num);
2474
2475 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2477 list_item.marker.len() + 1
2478 } else {
2479 list_item.marker.len()
2480 });
2481
2482 if !block.is_ordered
2484 && block.marker.is_some()
2485 && block.marker.as_ref() != Some(&list_item.marker)
2486 {
2487 block.marker = None;
2489 }
2490
2491 reset_tracking_state(
2493 list_item,
2494 &mut has_list_breaking_content_since_last_item,
2495 &mut min_continuation_for_tracking,
2496 );
2497 } else {
2498 list_blocks.push(block.clone());
2501
2502 *block = ListBlock {
2503 start_line: line_num,
2504 end_line: line_num,
2505 is_ordered: list_item.is_ordered,
2506 marker: if list_item.is_ordered {
2507 None
2508 } else {
2509 Some(list_item.marker.clone())
2510 },
2511 blockquote_prefix: blockquote_prefix.clone(),
2512 item_lines: vec![line_num],
2513 nesting_level: nesting,
2514 max_marker_width: if list_item.is_ordered {
2515 list_item.marker.len() + 1
2516 } else {
2517 list_item.marker.len()
2518 },
2519 };
2520
2521 reset_tracking_state(
2523 list_item,
2524 &mut has_list_breaking_content_since_last_item,
2525 &mut min_continuation_for_tracking,
2526 );
2527 }
2528 } else {
2529 current_block = Some(ListBlock {
2531 start_line: line_num,
2532 end_line: line_num,
2533 is_ordered: list_item.is_ordered,
2534 marker: if list_item.is_ordered {
2535 None
2536 } else {
2537 Some(list_item.marker.clone())
2538 },
2539 blockquote_prefix,
2540 item_lines: vec![line_num],
2541 nesting_level: nesting,
2542 max_marker_width: list_item.marker.len(),
2543 });
2544
2545 reset_tracking_state(
2547 list_item,
2548 &mut has_list_breaking_content_since_last_item,
2549 &mut min_continuation_for_tracking,
2550 );
2551 }
2552
2553 last_list_item_line = line_num;
2554 current_indent_level = item_indent;
2555 last_marker_width = if list_item.is_ordered {
2556 list_item.marker.len() + 1 } else {
2558 list_item.marker.len()
2559 };
2560 } else if let Some(ref mut block) = current_block {
2561 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2571 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2572 } else {
2573 false
2574 };
2575
2576 let min_continuation_indent = if block.is_ordered {
2580 current_indent_level + last_marker_width
2581 } else {
2582 current_indent_level + 2 };
2584
2585 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2586 block.end_line = line_num;
2588 } else if line_info.is_blank {
2589 let mut check_idx = line_idx + 1;
2592 let mut found_continuation = false;
2593
2594 while check_idx < lines.len() && lines[check_idx].is_blank {
2596 check_idx += 1;
2597 }
2598
2599 if check_idx < lines.len() {
2600 let next_line = &lines[check_idx];
2601 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2603 found_continuation = true;
2604 }
2605 else if !next_line.in_code_block
2607 && next_line.list_item.is_some()
2608 && let Some(item) = &next_line.list_item
2609 {
2610 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2611 .find(next_line.content(content))
2612 .map_or(String::new(), |m| m.as_str().to_string());
2613 if item.marker_column == current_indent_level
2614 && item.is_ordered == block.is_ordered
2615 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2616 {
2617 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2620 if let Some(between_line) = lines.get(idx) {
2621 let between_content = between_line.content(content);
2622 let trimmed = between_content.trim();
2623 if trimmed.is_empty() {
2625 return false;
2626 }
2627 let line_indent = between_content.len() - between_content.trim_start().len();
2629
2630 if trimmed.starts_with("```")
2632 || trimmed.starts_with("~~~")
2633 || trimmed.starts_with("---")
2634 || trimmed.starts_with("***")
2635 || trimmed.starts_with("___")
2636 || trimmed.starts_with(">")
2637 || trimmed.contains('|') || between_line.heading.is_some()
2639 {
2640 return true; }
2642
2643 line_indent >= min_continuation_indent
2645 } else {
2646 false
2647 }
2648 });
2649
2650 if block.is_ordered {
2651 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2654 if let Some(between_line) = lines.get(idx) {
2655 let trimmed = between_line.content(content).trim();
2656 if trimmed.is_empty() {
2657 return false;
2658 }
2659 trimmed.starts_with("```")
2661 || trimmed.starts_with("~~~")
2662 || trimmed.starts_with("---")
2663 || trimmed.starts_with("***")
2664 || trimmed.starts_with("___")
2665 || trimmed.starts_with(">")
2666 || trimmed.contains('|') || between_line.heading.is_some()
2668 } else {
2669 false
2670 }
2671 });
2672 found_continuation = !has_structural_separators;
2673 } else {
2674 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2676 if let Some(between_line) = lines.get(idx) {
2677 let trimmed = between_line.content(content).trim();
2678 if trimmed.is_empty() {
2679 return false;
2680 }
2681 trimmed.starts_with("```")
2683 || trimmed.starts_with("~~~")
2684 || trimmed.starts_with("---")
2685 || trimmed.starts_with("***")
2686 || trimmed.starts_with("___")
2687 || trimmed.starts_with(">")
2688 || trimmed.contains('|') || between_line.heading.is_some()
2690 } else {
2691 false
2692 }
2693 });
2694 found_continuation = !has_structural_separators;
2695 }
2696 }
2697 }
2698 }
2699
2700 if found_continuation {
2701 block.end_line = line_num;
2703 } else {
2704 list_blocks.push(block.clone());
2706 current_block = None;
2707 }
2708 } else {
2709 let min_required_indent = if block.is_ordered {
2712 current_indent_level + last_marker_width
2713 } else {
2714 current_indent_level + 2
2715 };
2716
2717 let line_content = line_info.content(content).trim();
2722 let is_structural_separator = line_info.heading.is_some()
2723 || line_content.starts_with("```")
2724 || line_content.starts_with("~~~")
2725 || line_content.starts_with("---")
2726 || line_content.starts_with("***")
2727 || line_content.starts_with("___")
2728 || line_content.starts_with(">")
2729 || (line_content.contains('|')
2730 && !line_content.contains("](")
2731 && !line_content.contains("http")
2732 && (line_content.matches('|').count() > 1
2733 || line_content.starts_with('|')
2734 || line_content.ends_with('|'))); let is_lazy_continuation = !is_structural_separator
2739 && !line_info.is_blank
2740 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2741
2742 if is_lazy_continuation {
2743 let content_to_check = if !blockquote_prefix.is_empty() {
2746 line_info
2748 .content(content)
2749 .strip_prefix(&blockquote_prefix)
2750 .unwrap_or(line_info.content(content))
2751 .trim()
2752 } else {
2753 line_info.content(content).trim()
2754 };
2755
2756 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2757
2758 if starts_with_uppercase && last_list_item_line > 0 {
2761 list_blocks.push(block.clone());
2763 current_block = None;
2764 } else {
2765 block.end_line = line_num;
2767 }
2768 } else {
2769 list_blocks.push(block.clone());
2771 current_block = None;
2772 }
2773 }
2774 }
2775 }
2776
2777 if let Some(block) = current_block {
2779 list_blocks.push(block);
2780 }
2781
2782 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
2784
2785 list_blocks
2786 }
2787
2788 fn compute_char_frequency(content: &str) -> CharFrequency {
2790 let mut frequency = CharFrequency::default();
2791
2792 for ch in content.chars() {
2793 match ch {
2794 '#' => frequency.hash_count += 1,
2795 '*' => frequency.asterisk_count += 1,
2796 '_' => frequency.underscore_count += 1,
2797 '-' => frequency.hyphen_count += 1,
2798 '+' => frequency.plus_count += 1,
2799 '>' => frequency.gt_count += 1,
2800 '|' => frequency.pipe_count += 1,
2801 '[' => frequency.bracket_count += 1,
2802 '`' => frequency.backtick_count += 1,
2803 '<' => frequency.lt_count += 1,
2804 '!' => frequency.exclamation_count += 1,
2805 '\n' => frequency.newline_count += 1,
2806 _ => {}
2807 }
2808 }
2809
2810 frequency
2811 }
2812
2813 fn parse_html_tags(
2815 content: &str,
2816 lines: &[LineInfo],
2817 code_blocks: &[(usize, usize)],
2818 flavor: MarkdownFlavor,
2819 ) -> Vec<HtmlTag> {
2820 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
2821 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
2822
2823 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2824
2825 for cap in HTML_TAG_REGEX.captures_iter(content) {
2826 let full_match = cap.get(0).unwrap();
2827 let match_start = full_match.start();
2828 let match_end = full_match.end();
2829
2830 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2832 continue;
2833 }
2834
2835 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2836 let tag_name_original = cap.get(2).unwrap().as_str();
2837 let tag_name = tag_name_original.to_lowercase();
2838 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2839
2840 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
2843 continue;
2844 }
2845
2846 let mut line_num = 1;
2848 let mut col_start = match_start;
2849 let mut col_end = match_end;
2850 for (idx, line_info) in lines.iter().enumerate() {
2851 if match_start >= line_info.byte_offset {
2852 line_num = idx + 1;
2853 col_start = match_start - line_info.byte_offset;
2854 col_end = match_end - line_info.byte_offset;
2855 } else {
2856 break;
2857 }
2858 }
2859
2860 html_tags.push(HtmlTag {
2861 line: line_num,
2862 start_col: col_start,
2863 end_col: col_end,
2864 byte_offset: match_start,
2865 byte_end: match_end,
2866 tag_name,
2867 is_closing,
2868 is_self_closing,
2869 raw_content: full_match.as_str().to_string(),
2870 });
2871 }
2872
2873 html_tags
2874 }
2875
2876 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2878 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
2879 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
2880
2881 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2882
2883 for cap in EMPHASIS_REGEX.captures_iter(content) {
2884 let full_match = cap.get(0).unwrap();
2885 let match_start = full_match.start();
2886 let match_end = full_match.end();
2887
2888 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2890 continue;
2891 }
2892
2893 let opening_markers = cap.get(1).unwrap().as_str();
2894 let content_part = cap.get(2).unwrap().as_str();
2895 let closing_markers = cap.get(3).unwrap().as_str();
2896
2897 if opening_markers.chars().next() != closing_markers.chars().next()
2899 || opening_markers.len() != closing_markers.len()
2900 {
2901 continue;
2902 }
2903
2904 let marker = opening_markers.chars().next().unwrap();
2905 let marker_count = opening_markers.len();
2906
2907 let mut line_num = 1;
2909 let mut col_start = match_start;
2910 let mut col_end = match_end;
2911 for (idx, line_info) in lines.iter().enumerate() {
2912 if match_start >= line_info.byte_offset {
2913 line_num = idx + 1;
2914 col_start = match_start - line_info.byte_offset;
2915 col_end = match_end - line_info.byte_offset;
2916 } else {
2917 break;
2918 }
2919 }
2920
2921 emphasis_spans.push(EmphasisSpan {
2922 line: line_num,
2923 start_col: col_start,
2924 end_col: col_end,
2925 byte_offset: match_start,
2926 byte_end: match_end,
2927 marker,
2928 marker_count,
2929 content: content_part.to_string(),
2930 });
2931 }
2932
2933 emphasis_spans
2934 }
2935
2936 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
2938 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2939
2940 for (line_idx, line_info) in lines.iter().enumerate() {
2941 if line_info.in_code_block || line_info.is_blank {
2943 continue;
2944 }
2945
2946 let line = line_info.content(content);
2947 let line_num = line_idx + 1;
2948
2949 if !line.contains('|') {
2951 continue;
2952 }
2953
2954 let parts: Vec<&str> = line.split('|').collect();
2956 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2957
2958 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2960 let mut column_alignments = Vec::new();
2961
2962 if is_separator {
2963 for part in &parts[1..parts.len() - 1] {
2964 let trimmed = part.trim();
2966 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2967 "center".to_string()
2968 } else if trimmed.ends_with(':') {
2969 "right".to_string()
2970 } else if trimmed.starts_with(':') {
2971 "left".to_string()
2972 } else {
2973 "none".to_string()
2974 };
2975 column_alignments.push(alignment);
2976 }
2977 }
2978
2979 table_rows.push(TableRow {
2980 line: line_num,
2981 is_separator,
2982 column_count,
2983 column_alignments,
2984 });
2985 }
2986
2987 table_rows
2988 }
2989
2990 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2992 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2993
2994 for cap in BARE_URL_PATTERN.captures_iter(content) {
2996 let full_match = cap.get(0).unwrap();
2997 let match_start = full_match.start();
2998 let match_end = full_match.end();
2999
3000 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3002 continue;
3003 }
3004
3005 let preceding_char = if match_start > 0 {
3007 content.chars().nth(match_start - 1)
3008 } else {
3009 None
3010 };
3011 let following_char = content.chars().nth(match_end);
3012
3013 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3014 continue;
3015 }
3016 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3017 continue;
3018 }
3019
3020 let url = full_match.as_str();
3021 let url_type = if url.starts_with("https://") {
3022 "https"
3023 } else if url.starts_with("http://") {
3024 "http"
3025 } else if url.starts_with("ftp://") {
3026 "ftp"
3027 } else {
3028 "other"
3029 };
3030
3031 let mut line_num = 1;
3033 let mut col_start = match_start;
3034 let mut col_end = match_end;
3035 for (idx, line_info) in lines.iter().enumerate() {
3036 if match_start >= line_info.byte_offset {
3037 line_num = idx + 1;
3038 col_start = match_start - line_info.byte_offset;
3039 col_end = match_end - line_info.byte_offset;
3040 } else {
3041 break;
3042 }
3043 }
3044
3045 bare_urls.push(BareUrl {
3046 line: line_num,
3047 start_col: col_start,
3048 end_col: col_end,
3049 byte_offset: match_start,
3050 byte_end: match_end,
3051 url: url.to_string(),
3052 url_type: url_type.to_string(),
3053 });
3054 }
3055
3056 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3058 let full_match = cap.get(0).unwrap();
3059 let match_start = full_match.start();
3060 let match_end = full_match.end();
3061
3062 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3064 continue;
3065 }
3066
3067 let preceding_char = if match_start > 0 {
3069 content.chars().nth(match_start - 1)
3070 } else {
3071 None
3072 };
3073 let following_char = content.chars().nth(match_end);
3074
3075 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3076 continue;
3077 }
3078 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3079 continue;
3080 }
3081
3082 let email = full_match.as_str();
3083
3084 let mut line_num = 1;
3086 let mut col_start = match_start;
3087 let mut col_end = match_end;
3088 for (idx, line_info) in lines.iter().enumerate() {
3089 if match_start >= line_info.byte_offset {
3090 line_num = idx + 1;
3091 col_start = match_start - line_info.byte_offset;
3092 col_end = match_end - line_info.byte_offset;
3093 } else {
3094 break;
3095 }
3096 }
3097
3098 bare_urls.push(BareUrl {
3099 line: line_num,
3100 start_col: col_start,
3101 end_col: col_end,
3102 byte_offset: match_start,
3103 byte_end: match_end,
3104 url: email.to_string(),
3105 url_type: "email".to_string(),
3106 });
3107 }
3108
3109 bare_urls
3110 }
3111}
3112
3113fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3115 if list_blocks.len() < 2 {
3116 return;
3117 }
3118
3119 let mut merger = ListBlockMerger::new(content, lines);
3120 *list_blocks = merger.merge(list_blocks);
3121}
3122
3123struct ListBlockMerger<'a> {
3125 content: &'a str,
3126 lines: &'a [LineInfo],
3127}
3128
3129impl<'a> ListBlockMerger<'a> {
3130 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3131 Self { content, lines }
3132 }
3133
3134 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3135 let mut merged = Vec::with_capacity(list_blocks.len());
3136 let mut current = list_blocks[0].clone();
3137
3138 for next in list_blocks.iter().skip(1) {
3139 if self.should_merge_blocks(¤t, next) {
3140 current = self.merge_two_blocks(current, next);
3141 } else {
3142 merged.push(current);
3143 current = next.clone();
3144 }
3145 }
3146
3147 merged.push(current);
3148 merged
3149 }
3150
3151 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3153 if !self.blocks_are_compatible(current, next) {
3155 return false;
3156 }
3157
3158 let spacing = self.analyze_spacing_between(current, next);
3160 match spacing {
3161 BlockSpacing::Consecutive => true,
3162 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3163 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3164 self.can_merge_with_content_between(current, next)
3165 }
3166 }
3167 }
3168
3169 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3171 current.is_ordered == next.is_ordered
3172 && current.blockquote_prefix == next.blockquote_prefix
3173 && current.nesting_level == next.nesting_level
3174 }
3175
3176 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3178 let gap = next.start_line - current.end_line;
3179
3180 match gap {
3181 1 => BlockSpacing::Consecutive,
3182 2 => BlockSpacing::SingleBlank,
3183 _ if gap > 2 => {
3184 if self.has_only_blank_lines_between(current, next) {
3185 BlockSpacing::MultipleBlanks
3186 } else {
3187 BlockSpacing::ContentBetween
3188 }
3189 }
3190 _ => BlockSpacing::Consecutive, }
3192 }
3193
3194 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3196 if has_meaningful_content_between(self.content, current, next, self.lines) {
3199 return false; }
3201
3202 !current.is_ordered && current.marker == next.marker
3204 }
3205
3206 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3208 if has_meaningful_content_between(self.content, current, next, self.lines) {
3210 return false; }
3212
3213 current.is_ordered && next.is_ordered
3215 }
3216
3217 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3219 for line_num in (current.end_line + 1)..next.start_line {
3220 if let Some(line_info) = self.lines.get(line_num - 1)
3221 && !line_info.content(self.content).trim().is_empty()
3222 {
3223 return false;
3224 }
3225 }
3226 true
3227 }
3228
3229 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3231 current.end_line = next.end_line;
3232 current.item_lines.extend_from_slice(&next.item_lines);
3233
3234 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3236
3237 if !current.is_ordered && self.markers_differ(¤t, next) {
3239 current.marker = None; }
3241
3242 current
3243 }
3244
3245 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3247 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3248 }
3249}
3250
3251#[derive(Debug, PartialEq)]
3253enum BlockSpacing {
3254 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3259
3260fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3262 for line_num in (current.end_line + 1)..next.start_line {
3264 if let Some(line_info) = lines.get(line_num - 1) {
3265 let trimmed = line_info.content(content).trim();
3267
3268 if trimmed.is_empty() {
3270 continue;
3271 }
3272
3273 if line_info.heading.is_some() {
3277 return true; }
3279
3280 if is_horizontal_rule(trimmed) {
3282 return true; }
3284
3285 if trimmed.contains('|') && trimmed.len() > 1 {
3288 if !trimmed.contains("](") && !trimmed.contains("http") {
3290 let pipe_count = trimmed.matches('|').count();
3292 if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
3293 return true; }
3295 }
3296 }
3297
3298 if trimmed.starts_with('>') {
3300 return true; }
3302
3303 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3305 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3306
3307 let min_continuation_indent = if current.is_ordered {
3309 current.nesting_level + current.max_marker_width + 1 } else {
3311 current.nesting_level + 2
3312 };
3313
3314 if line_indent < min_continuation_indent {
3315 return true; }
3318 }
3319
3320 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3322
3323 let min_indent = if current.is_ordered {
3325 current.nesting_level + current.max_marker_width
3326 } else {
3327 current.nesting_level + 2
3328 };
3329
3330 if line_indent < min_indent {
3332 return true; }
3334
3335 }
3338 }
3339
3340 false
3342}
3343
3344fn is_horizontal_rule(trimmed: &str) -> bool {
3346 if trimmed.len() < 3 {
3347 return false;
3348 }
3349
3350 let chars: Vec<char> = trimmed.chars().collect();
3352 if let Some(&first_char) = chars.first()
3353 && (first_char == '-' || first_char == '*' || first_char == '_')
3354 {
3355 let mut count = 0;
3356 for &ch in &chars {
3357 if ch == first_char {
3358 count += 1;
3359 } else if ch != ' ' && ch != '\t' {
3360 return false; }
3362 }
3363 return count >= 3;
3364 }
3365 false
3366}
3367
3368#[cfg(test)]
3370mod tests {
3371 use super::*;
3372
3373 #[test]
3374 fn test_empty_content() {
3375 let ctx = LintContext::new("", MarkdownFlavor::Standard);
3376 assert_eq!(ctx.content, "");
3377 assert_eq!(ctx.line_offsets, vec![0]);
3378 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3379 assert_eq!(ctx.lines.len(), 0);
3380 }
3381
3382 #[test]
3383 fn test_single_line() {
3384 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
3385 assert_eq!(ctx.content, "# Hello");
3386 assert_eq!(ctx.line_offsets, vec![0]);
3387 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3388 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3389 }
3390
3391 #[test]
3392 fn test_multi_line() {
3393 let content = "# Title\n\nSecond line\nThird line";
3394 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3395 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3396 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3403
3404 #[test]
3405 fn test_line_info() {
3406 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3407 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3408
3409 assert_eq!(ctx.lines.len(), 7);
3411
3412 let line1 = &ctx.lines[0];
3414 assert_eq!(line1.content(ctx.content), "# Title");
3415 assert_eq!(line1.byte_offset, 0);
3416 assert_eq!(line1.indent, 0);
3417 assert!(!line1.is_blank);
3418 assert!(!line1.in_code_block);
3419 assert!(line1.list_item.is_none());
3420
3421 let line2 = &ctx.lines[1];
3423 assert_eq!(line2.content(ctx.content), " indented");
3424 assert_eq!(line2.byte_offset, 8);
3425 assert_eq!(line2.indent, 4);
3426 assert!(!line2.is_blank);
3427
3428 let line3 = &ctx.lines[2];
3430 assert_eq!(line3.content(ctx.content), "");
3431 assert!(line3.is_blank);
3432
3433 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3435 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3436 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3437 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3438 }
3439
3440 #[test]
3441 fn test_list_item_detection() {
3442 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3443 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3444
3445 let line1 = &ctx.lines[0];
3447 assert!(line1.list_item.is_some());
3448 let list1 = line1.list_item.as_ref().unwrap();
3449 assert_eq!(list1.marker, "-");
3450 assert!(!list1.is_ordered);
3451 assert_eq!(list1.marker_column, 0);
3452 assert_eq!(list1.content_column, 2);
3453
3454 let line2 = &ctx.lines[1];
3456 assert!(line2.list_item.is_some());
3457 let list2 = line2.list_item.as_ref().unwrap();
3458 assert_eq!(list2.marker, "*");
3459 assert_eq!(list2.marker_column, 2);
3460
3461 let line3 = &ctx.lines[2];
3463 assert!(line3.list_item.is_some());
3464 let list3 = line3.list_item.as_ref().unwrap();
3465 assert_eq!(list3.marker, "1.");
3466 assert!(list3.is_ordered);
3467 assert_eq!(list3.number, Some(1));
3468
3469 let line6 = &ctx.lines[5];
3471 assert!(line6.list_item.is_none());
3472 }
3473
3474 #[test]
3475 fn test_offset_to_line_col_edge_cases() {
3476 let content = "a\nb\nc";
3477 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3478 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3486
3487 #[test]
3488 fn test_mdx_esm_blocks() {
3489 let content = r##"import {Chart} from './snowfall.js'
3490export const year = 2023
3491
3492# Last year's snowfall
3493
3494In {year}, the snowfall was above average.
3495It was followed by a warm spring which caused
3496flood conditions in many of the nearby rivers.
3497
3498<Chart color="#fcb32c" year={year} />
3499"##;
3500
3501 let ctx = LintContext::new(content, MarkdownFlavor::MDX);
3502
3503 assert_eq!(ctx.lines.len(), 10);
3505 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3506 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3507 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3508 assert!(
3509 !ctx.lines[3].in_esm_block,
3510 "Line 4 (heading) should NOT be in_esm_block"
3511 );
3512 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3513 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3514 }
3515
3516 #[test]
3517 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3518 let content = r#"import {Chart} from './snowfall.js'
3519export const year = 2023
3520
3521# Last year's snowfall
3522"#;
3523
3524 let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3525
3526 assert!(
3528 !ctx.lines[0].in_esm_block,
3529 "Line 1 should NOT be in_esm_block in Standard flavor"
3530 );
3531 assert!(
3532 !ctx.lines[1].in_esm_block,
3533 "Line 2 should NOT be in_esm_block in Standard flavor"
3534 );
3535 }
3536}