1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13 ($name:expr, $profile:expr, $code:expr) => {{
14 let start = std::time::Instant::now();
15 let result = $code;
16 if $profile {
17 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18 }
19 result
20 }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(
32 r#"(?sx)
33 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34 (?:
35 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
36 |
37 \[([^\]]*)\] # Reference ID in group 6
38 )"#
39 ).unwrap()
40});
41
42static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?sx)
47 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48 (?:
49 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
50 |
51 \[([^\]]*)\] # Reference ID in group 6
52 )"#
53 ).unwrap()
54});
55
56static REF_DEF_PATTERN: LazyLock<Regex> =
58 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62 Regex::new(
63 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64 ).unwrap()
65});
66
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub is_blank: bool,
85 pub in_code_block: bool,
87 pub in_front_matter: bool,
89 pub in_html_block: bool,
91 pub in_html_comment: bool,
93 pub list_item: Option<ListItemInfo>,
95 pub heading: Option<HeadingInfo>,
97 pub blockquote: Option<BlockquoteInfo>,
99 pub in_mkdocstrings: bool,
101 pub in_esm_block: bool,
103 pub in_code_span_continuation: bool,
105}
106
107impl LineInfo {
108 pub fn content<'a>(&self, source: &'a str) -> &'a str {
110 &source[self.byte_offset..self.byte_offset + self.byte_len]
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct ListItemInfo {
117 pub marker: String,
119 pub is_ordered: bool,
121 pub number: Option<usize>,
123 pub marker_column: usize,
125 pub content_column: usize,
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum HeadingStyle {
132 ATX,
134 Setext1,
136 Setext2,
138}
139
140#[derive(Debug, Clone)]
142pub struct ParsedLink<'a> {
143 pub line: usize,
145 pub start_col: usize,
147 pub end_col: usize,
149 pub byte_offset: usize,
151 pub byte_end: usize,
153 pub text: Cow<'a, str>,
155 pub url: Cow<'a, str>,
157 pub is_reference: bool,
159 pub reference_id: Option<Cow<'a, str>>,
161 pub link_type: LinkType,
163}
164
165#[derive(Debug, Clone)]
167pub struct BrokenLinkInfo {
168 pub reference: String,
170 pub span: std::ops::Range<usize>,
172}
173
174#[derive(Debug, Clone)]
176pub struct FootnoteRef {
177 pub id: String,
179 pub line: usize,
181 pub byte_offset: usize,
183 pub byte_end: usize,
185}
186
187#[derive(Debug, Clone)]
189pub struct ParsedImage<'a> {
190 pub line: usize,
192 pub start_col: usize,
194 pub end_col: usize,
196 pub byte_offset: usize,
198 pub byte_end: usize,
200 pub alt_text: Cow<'a, str>,
202 pub url: Cow<'a, str>,
204 pub is_reference: bool,
206 pub reference_id: Option<Cow<'a, str>>,
208 pub link_type: LinkType,
210}
211
212#[derive(Debug, Clone)]
214pub struct ReferenceDef {
215 pub line: usize,
217 pub id: String,
219 pub url: String,
221 pub title: Option<String>,
223 pub byte_offset: usize,
225 pub byte_end: usize,
227}
228
229#[derive(Debug, Clone)]
231pub struct CodeSpan {
232 pub line: usize,
234 pub end_line: usize,
236 pub start_col: usize,
238 pub end_col: usize,
240 pub byte_offset: usize,
242 pub byte_end: usize,
244 pub backtick_count: usize,
246 pub content: String,
248}
249
250#[derive(Debug, Clone)]
252pub struct HeadingInfo {
253 pub level: u8,
255 pub style: HeadingStyle,
257 pub marker: String,
259 pub marker_column: usize,
261 pub content_column: usize,
263 pub text: String,
265 pub custom_id: Option<String>,
267 pub raw_text: String,
269 pub has_closing_sequence: bool,
271 pub closing_sequence: String,
273}
274
275#[derive(Debug, Clone)]
277pub struct BlockquoteInfo {
278 pub nesting_level: usize,
280 pub indent: String,
282 pub marker_column: usize,
284 pub prefix: String,
286 pub content: String,
288 pub has_no_space_after_marker: bool,
290 pub has_multiple_spaces_after_marker: bool,
292 pub needs_md028_fix: bool,
294}
295
296#[derive(Debug, Clone)]
298pub struct ListBlock {
299 pub start_line: usize,
301 pub end_line: usize,
303 pub is_ordered: bool,
305 pub marker: Option<String>,
307 pub blockquote_prefix: String,
309 pub item_lines: Vec<usize>,
311 pub nesting_level: usize,
313 pub max_marker_width: usize,
315}
316
317use std::sync::{Arc, Mutex};
318
319#[derive(Debug, Clone, Default)]
321pub struct CharFrequency {
322 pub hash_count: usize,
324 pub asterisk_count: usize,
326 pub underscore_count: usize,
328 pub hyphen_count: usize,
330 pub plus_count: usize,
332 pub gt_count: usize,
334 pub pipe_count: usize,
336 pub bracket_count: usize,
338 pub backtick_count: usize,
340 pub lt_count: usize,
342 pub exclamation_count: usize,
344 pub newline_count: usize,
346}
347
348#[derive(Debug, Clone)]
350pub struct HtmlTag {
351 pub line: usize,
353 pub start_col: usize,
355 pub end_col: usize,
357 pub byte_offset: usize,
359 pub byte_end: usize,
361 pub tag_name: String,
363 pub is_closing: bool,
365 pub is_self_closing: bool,
367 pub raw_content: String,
369}
370
371#[derive(Debug, Clone)]
373pub struct EmphasisSpan {
374 pub line: usize,
376 pub start_col: usize,
378 pub end_col: usize,
380 pub byte_offset: usize,
382 pub byte_end: usize,
384 pub marker: char,
386 pub marker_count: usize,
388 pub content: String,
390}
391
392#[derive(Debug, Clone)]
394pub struct TableRow {
395 pub line: usize,
397 pub is_separator: bool,
399 pub column_count: usize,
401 pub column_alignments: Vec<String>, }
404
405#[derive(Debug, Clone)]
407pub struct BareUrl {
408 pub line: usize,
410 pub start_col: usize,
412 pub end_col: usize,
414 pub byte_offset: usize,
416 pub byte_end: usize,
418 pub url: String,
420 pub url_type: String,
422}
423
424pub struct LintContext<'a> {
425 pub content: &'a str,
426 pub line_offsets: Vec<usize>,
427 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
448
449struct BlockquoteComponents<'a> {
451 indent: &'a str,
452 markers: &'a str,
453 spaces_after: &'a str,
454 content: &'a str,
455}
456
457#[inline]
459fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
460 let bytes = line.as_bytes();
461 let mut pos = 0;
462
463 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
465 pos += 1;
466 }
467 let indent_end = pos;
468
469 if pos >= bytes.len() || bytes[pos] != b'>' {
471 return None;
472 }
473
474 while pos < bytes.len() && bytes[pos] == b'>' {
476 pos += 1;
477 }
478 let markers_end = pos;
479
480 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
482 pos += 1;
483 }
484 let spaces_end = pos;
485
486 Some(BlockquoteComponents {
487 indent: &line[0..indent_end],
488 markers: &line[indent_end..markers_end],
489 spaces_after: &line[markers_end..spaces_end],
490 content: &line[spaces_end..],
491 })
492}
493
494impl<'a> LintContext<'a> {
495 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
496 #[cfg(not(target_arch = "wasm32"))]
497 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
498 #[cfg(target_arch = "wasm32")]
499 let profile = false;
500
501 let line_offsets = profile_section!("Line offsets", profile, {
502 let mut offsets = vec![0];
503 for (i, c) in content.char_indices() {
504 if c == '\n' {
505 offsets.push(i + 1);
506 }
507 }
508 offsets
509 });
510
511 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
513
514 let html_comment_ranges = profile_section!(
516 "HTML comment ranges",
517 profile,
518 crate::utils::skip_context::compute_html_comment_ranges(content)
519 );
520
521 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
523 if flavor == MarkdownFlavor::MkDocs {
524 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
525 } else {
526 Vec::new()
527 }
528 });
529
530 let mut lines = profile_section!(
532 "Basic line info",
533 profile,
534 Self::compute_basic_line_info(
535 content,
536 &line_offsets,
537 &code_blocks,
538 flavor,
539 &html_comment_ranges,
540 &autodoc_ranges,
541 )
542 );
543
544 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
546
547 profile_section!(
549 "ESM blocks",
550 profile,
551 Self::detect_esm_blocks(content, &mut lines, flavor)
552 );
553
554 profile_section!(
556 "Headings & blockquotes",
557 profile,
558 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges)
559 );
560
561 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
563
564 for span in &code_spans {
567 if span.end_line > span.line {
568 for line_num in (span.line + 1)..=span.end_line {
570 if let Some(line_info) = lines.get_mut(line_num - 1) {
571 line_info.in_code_span_continuation = true;
572 }
573 }
574 }
575 }
576
577 let (links, broken_links, footnote_refs) = profile_section!(
579 "Links",
580 profile,
581 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
582 );
583
584 let images = profile_section!(
585 "Images",
586 profile,
587 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
588 );
589
590 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
591
592 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
593
594 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
596
597 let table_blocks = profile_section!(
599 "Table blocks",
600 profile,
601 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
602 content,
603 &code_blocks,
604 &code_spans,
605 &html_comment_ranges,
606 )
607 );
608
609 let line_index = profile_section!(
611 "Line index",
612 profile,
613 crate::utils::range_utils::LineIndex::new(content)
614 );
615
616 let jinja_ranges = profile_section!(
618 "Jinja ranges",
619 profile,
620 crate::utils::jinja_utils::find_jinja_ranges(content)
621 );
622
623 Self {
624 content,
625 line_offsets,
626 code_blocks,
627 lines,
628 links,
629 images,
630 broken_links,
631 footnote_refs,
632 reference_defs,
633 code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
634 list_blocks,
635 char_frequency,
636 html_tags_cache: Mutex::new(None),
637 emphasis_spans_cache: Mutex::new(None),
638 table_rows_cache: Mutex::new(None),
639 bare_urls_cache: Mutex::new(None),
640 html_comment_ranges,
641 table_blocks,
642 line_index,
643 jinja_ranges,
644 flavor,
645 source_file,
646 }
647 }
648
649 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
651 let mut cache = self.code_spans_cache.lock().expect("Code spans cache mutex poisoned");
652
653 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))))
654 }
655
656 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
658 &self.html_comment_ranges
659 }
660
661 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
663 let mut cache = self.html_tags_cache.lock().expect("HTML tags cache mutex poisoned");
664
665 Arc::clone(cache.get_or_insert_with(|| {
666 Arc::new(Self::parse_html_tags(
667 self.content,
668 &self.lines,
669 &self.code_blocks,
670 self.flavor,
671 ))
672 }))
673 }
674
675 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
677 let mut cache = self
678 .emphasis_spans_cache
679 .lock()
680 .expect("Emphasis spans cache mutex poisoned");
681
682 Arc::clone(
683 cache.get_or_insert_with(|| {
684 Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))
685 }),
686 )
687 }
688
689 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
691 let mut cache = self.table_rows_cache.lock().expect("Table rows cache mutex poisoned");
692
693 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))))
694 }
695
696 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
698 let mut cache = self.bare_urls_cache.lock().expect("Bare URLs cache mutex poisoned");
699
700 Arc::clone(
701 cache.get_or_insert_with(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
702 )
703 }
704
705 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
707 match self.line_offsets.binary_search(&offset) {
708 Ok(line) => (line + 1, 1),
709 Err(line) => {
710 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
711 (line, offset - line_start + 1)
712 }
713 }
714 }
715
716 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
718 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
720 return true;
721 }
722
723 self.code_spans()
725 .iter()
726 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
727 }
728
729 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
731 if line_num > 0 {
732 self.lines.get(line_num - 1)
733 } else {
734 None
735 }
736 }
737
738 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
740 self.line_info(line_num).map(|info| info.byte_offset)
741 }
742
743 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
745 let normalized_id = ref_id.to_lowercase();
746 self.reference_defs
747 .iter()
748 .find(|def| def.id == normalized_id)
749 .map(|def| def.url.as_str())
750 }
751
752 pub fn is_in_list_block(&self, line_num: usize) -> bool {
754 self.list_blocks
755 .iter()
756 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
757 }
758
759 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
761 self.list_blocks
762 .iter()
763 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
764 }
765
766 pub fn is_in_code_block(&self, line_num: usize) -> bool {
770 if line_num == 0 || line_num > self.lines.len() {
771 return false;
772 }
773 self.lines[line_num - 1].in_code_block
774 }
775
776 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
778 if line_num == 0 || line_num > self.lines.len() {
779 return false;
780 }
781 self.lines[line_num - 1].in_front_matter
782 }
783
784 pub fn is_in_html_block(&self, line_num: usize) -> bool {
786 if line_num == 0 || line_num > self.lines.len() {
787 return false;
788 }
789 self.lines[line_num - 1].in_html_block
790 }
791
792 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
794 if line_num == 0 || line_num > self.lines.len() {
795 return false;
796 }
797
798 let col_0indexed = if col > 0 { col - 1 } else { 0 };
802 let code_spans = self.code_spans();
803 code_spans.iter().any(|span| {
804 if line_num < span.line || line_num > span.end_line {
806 return false;
807 }
808
809 if span.line == span.end_line {
810 col_0indexed >= span.start_col && col_0indexed < span.end_col
812 } else if line_num == span.line {
813 col_0indexed >= span.start_col
815 } else if line_num == span.end_line {
816 col_0indexed < span.end_col
818 } else {
819 true
821 }
822 })
823 }
824
825 #[inline]
827 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
828 let code_spans = self.code_spans();
829 code_spans
830 .iter()
831 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
832 }
833
834 #[inline]
837 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
838 self.reference_defs
839 .iter()
840 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
841 }
842
843 #[inline]
847 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
848 self.html_comment_ranges
849 .iter()
850 .any(|range| byte_pos >= range.start && byte_pos < range.end)
851 }
852
853 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
855 self.jinja_ranges
856 .iter()
857 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
858 }
859
860 pub fn has_char(&self, ch: char) -> bool {
862 match ch {
863 '#' => self.char_frequency.hash_count > 0,
864 '*' => self.char_frequency.asterisk_count > 0,
865 '_' => self.char_frequency.underscore_count > 0,
866 '-' => self.char_frequency.hyphen_count > 0,
867 '+' => self.char_frequency.plus_count > 0,
868 '>' => self.char_frequency.gt_count > 0,
869 '|' => self.char_frequency.pipe_count > 0,
870 '[' => self.char_frequency.bracket_count > 0,
871 '`' => self.char_frequency.backtick_count > 0,
872 '<' => self.char_frequency.lt_count > 0,
873 '!' => self.char_frequency.exclamation_count > 0,
874 '\n' => self.char_frequency.newline_count > 0,
875 _ => self.content.contains(ch), }
877 }
878
879 pub fn char_count(&self, ch: char) -> usize {
881 match ch {
882 '#' => self.char_frequency.hash_count,
883 '*' => self.char_frequency.asterisk_count,
884 '_' => self.char_frequency.underscore_count,
885 '-' => self.char_frequency.hyphen_count,
886 '+' => self.char_frequency.plus_count,
887 '>' => self.char_frequency.gt_count,
888 '|' => self.char_frequency.pipe_count,
889 '[' => self.char_frequency.bracket_count,
890 '`' => self.char_frequency.backtick_count,
891 '<' => self.char_frequency.lt_count,
892 '!' => self.char_frequency.exclamation_count,
893 '\n' => self.char_frequency.newline_count,
894 _ => self.content.matches(ch).count(), }
896 }
897
898 pub fn likely_has_headings(&self) -> bool {
900 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
902
903 pub fn likely_has_lists(&self) -> bool {
905 self.char_frequency.asterisk_count > 0
906 || self.char_frequency.hyphen_count > 0
907 || self.char_frequency.plus_count > 0
908 }
909
910 pub fn likely_has_emphasis(&self) -> bool {
912 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
913 }
914
915 pub fn likely_has_tables(&self) -> bool {
917 self.char_frequency.pipe_count > 2
918 }
919
920 pub fn likely_has_blockquotes(&self) -> bool {
922 self.char_frequency.gt_count > 0
923 }
924
925 pub fn likely_has_code(&self) -> bool {
927 self.char_frequency.backtick_count > 0
928 }
929
930 pub fn likely_has_links_or_images(&self) -> bool {
932 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
933 }
934
935 pub fn likely_has_html(&self) -> bool {
937 self.char_frequency.lt_count > 0
938 }
939
940 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
942 self.html_tags()
943 .iter()
944 .filter(|tag| tag.line == line_num)
945 .cloned()
946 .collect()
947 }
948
949 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
951 self.emphasis_spans()
952 .iter()
953 .filter(|span| span.line == line_num)
954 .cloned()
955 .collect()
956 }
957
958 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
960 self.table_rows()
961 .iter()
962 .filter(|row| row.line == line_num)
963 .cloned()
964 .collect()
965 }
966
967 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
969 self.bare_urls()
970 .iter()
971 .filter(|url| url.line == line_num)
972 .cloned()
973 .collect()
974 }
975
976 #[inline]
982 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
983 let idx = match lines.binary_search_by(|line| {
985 if byte_offset < line.byte_offset {
986 std::cmp::Ordering::Greater
987 } else if byte_offset > line.byte_offset + line.byte_len {
988 std::cmp::Ordering::Less
989 } else {
990 std::cmp::Ordering::Equal
991 }
992 }) {
993 Ok(idx) => idx,
994 Err(idx) => idx.saturating_sub(1),
995 };
996
997 let line = &lines[idx];
998 let line_num = idx + 1;
999 let col = byte_offset.saturating_sub(line.byte_offset);
1000
1001 (idx, line_num, col)
1002 }
1003
1004 #[inline]
1006 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1007 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1009
1010 if idx > 0 {
1012 let span = &code_spans[idx - 1];
1013 if offset >= span.byte_offset && offset < span.byte_end {
1014 return true;
1015 }
1016 }
1017
1018 false
1019 }
1020
1021 fn parse_links(
1023 content: &'a str,
1024 lines: &[LineInfo],
1025 code_blocks: &[(usize, usize)],
1026 code_spans: &[CodeSpan],
1027 flavor: MarkdownFlavor,
1028 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1029 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1030 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1031 use std::collections::HashSet;
1032
1033 let mut links = Vec::with_capacity(content.len() / 500);
1034 let mut broken_links = Vec::new();
1035 let mut footnote_refs = Vec::new();
1036
1037 let mut found_positions = HashSet::new();
1039
1040 let mut options = Options::empty();
1050 options.insert(Options::ENABLE_WIKILINKS);
1051 options.insert(Options::ENABLE_FOOTNOTES);
1052
1053 let parser = Parser::new_with_broken_link_callback(
1054 content,
1055 options,
1056 Some(|link: BrokenLink<'_>| {
1057 broken_links.push(BrokenLinkInfo {
1058 reference: link.reference.to_string(),
1059 span: link.span.clone(),
1060 });
1061 None
1062 }),
1063 )
1064 .into_offset_iter();
1065
1066 let mut link_stack: Vec<(
1067 usize,
1068 usize,
1069 pulldown_cmark::CowStr<'a>,
1070 LinkType,
1071 pulldown_cmark::CowStr<'a>,
1072 )> = Vec::new();
1073 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1076 match event {
1077 Event::Start(Tag::Link {
1078 link_type,
1079 dest_url,
1080 id,
1081 ..
1082 }) => {
1083 link_stack.push((range.start, range.end, dest_url, link_type, id));
1085 text_chunks.clear();
1086 }
1087 Event::Text(text) if !link_stack.is_empty() => {
1088 text_chunks.push((text.to_string(), range.start, range.end));
1090 }
1091 Event::Code(code) if !link_stack.is_empty() => {
1092 let code_text = format!("`{code}`");
1094 text_chunks.push((code_text, range.start, range.end));
1095 }
1096 Event::End(TagEnd::Link) => {
1097 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1098 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1100 text_chunks.clear();
1101 continue;
1102 }
1103
1104 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1106
1107 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1109 text_chunks.clear();
1110 continue;
1111 }
1112
1113 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1114
1115 let is_reference = matches!(
1116 link_type,
1117 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1118 );
1119
1120 let link_text = if start_pos < content.len() {
1123 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1124
1125 let mut close_pos = None;
1129 let mut depth = 0;
1130 let mut in_code_span = false;
1131
1132 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1133 let mut backslash_count = 0;
1135 let mut j = i;
1136 while j > 0 && link_bytes[j - 1] == b'\\' {
1137 backslash_count += 1;
1138 j -= 1;
1139 }
1140 let is_escaped = backslash_count % 2 != 0;
1141
1142 if byte == b'`' && !is_escaped {
1144 in_code_span = !in_code_span;
1145 }
1146
1147 if !is_escaped && !in_code_span {
1149 if byte == b'[' {
1150 depth += 1;
1151 } else if byte == b']' {
1152 if depth == 0 {
1153 close_pos = Some(i);
1155 break;
1156 } else {
1157 depth -= 1;
1158 }
1159 }
1160 }
1161 }
1162
1163 if let Some(pos) = close_pos {
1164 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1165 } else {
1166 Cow::Borrowed("")
1167 }
1168 } else {
1169 Cow::Borrowed("")
1170 };
1171
1172 let reference_id = if is_reference && !ref_id.is_empty() {
1174 Some(Cow::Owned(ref_id.to_lowercase()))
1175 } else if is_reference {
1176 Some(Cow::Owned(link_text.to_lowercase()))
1178 } else {
1179 None
1180 };
1181
1182 let has_escaped_bang = start_pos >= 2
1186 && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1187 && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1188
1189 let has_escaped_bracket =
1192 start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1193
1194 if has_escaped_bang || has_escaped_bracket {
1195 text_chunks.clear();
1196 continue; }
1198
1199 found_positions.insert(start_pos);
1201
1202 links.push(ParsedLink {
1203 line: line_num,
1204 start_col: col_start,
1205 end_col: col_end,
1206 byte_offset: start_pos,
1207 byte_end: range.end,
1208 text: link_text,
1209 url: Cow::Owned(url.to_string()),
1210 is_reference,
1211 reference_id,
1212 link_type,
1213 });
1214
1215 text_chunks.clear();
1216 }
1217 }
1218 Event::FootnoteReference(footnote_id) => {
1219 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1222 continue;
1223 }
1224
1225 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1226 footnote_refs.push(FootnoteRef {
1227 id: footnote_id.to_string(),
1228 line: line_num,
1229 byte_offset: range.start,
1230 byte_end: range.end,
1231 });
1232 }
1233 _ => {}
1234 }
1235 }
1236
1237 for cap in LINK_PATTERN.captures_iter(content) {
1241 let full_match = cap.get(0).unwrap();
1242 let match_start = full_match.start();
1243 let match_end = full_match.end();
1244
1245 if found_positions.contains(&match_start) {
1247 continue;
1248 }
1249
1250 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1252 continue;
1253 }
1254
1255 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1257 continue;
1258 }
1259
1260 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1262 continue;
1263 }
1264
1265 if Self::is_offset_in_code_span(code_spans, match_start) {
1267 continue;
1268 }
1269
1270 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1272 continue;
1273 }
1274
1275 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1277
1278 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1280 continue;
1281 }
1282
1283 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1284
1285 let text = cap.get(1).map_or("", |m| m.as_str());
1286
1287 if let Some(ref_id) = cap.get(6) {
1289 let ref_id_str = ref_id.as_str();
1290 let normalized_ref = if ref_id_str.is_empty() {
1291 Cow::Owned(text.to_lowercase()) } else {
1293 Cow::Owned(ref_id_str.to_lowercase())
1294 };
1295
1296 links.push(ParsedLink {
1298 line: line_num,
1299 start_col: col_start,
1300 end_col: col_end,
1301 byte_offset: match_start,
1302 byte_end: match_end,
1303 text: Cow::Borrowed(text),
1304 url: Cow::Borrowed(""), is_reference: true,
1306 reference_id: Some(normalized_ref),
1307 link_type: LinkType::Reference, });
1309 }
1310 }
1311
1312 (links, broken_links, footnote_refs)
1313 }
1314
1315 fn parse_images(
1317 content: &'a str,
1318 lines: &[LineInfo],
1319 code_blocks: &[(usize, usize)],
1320 code_spans: &[CodeSpan],
1321 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1322 ) -> Vec<ParsedImage<'a>> {
1323 use crate::utils::skip_context::is_in_html_comment_ranges;
1324 use std::collections::HashSet;
1325
1326 let mut images = Vec::with_capacity(content.len() / 1000);
1328 let mut found_positions = HashSet::new();
1329
1330 let parser = Parser::new(content).into_offset_iter();
1332 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1333 Vec::new();
1334 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1337 match event {
1338 Event::Start(Tag::Image {
1339 link_type,
1340 dest_url,
1341 id,
1342 ..
1343 }) => {
1344 image_stack.push((range.start, dest_url, link_type, id));
1345 text_chunks.clear();
1346 }
1347 Event::Text(text) if !image_stack.is_empty() => {
1348 text_chunks.push((text.to_string(), range.start, range.end));
1349 }
1350 Event::Code(code) if !image_stack.is_empty() => {
1351 let code_text = format!("`{code}`");
1352 text_chunks.push((code_text, range.start, range.end));
1353 }
1354 Event::End(TagEnd::Image) => {
1355 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1356 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1358 continue;
1359 }
1360
1361 if Self::is_offset_in_code_span(code_spans, start_pos) {
1363 continue;
1364 }
1365
1366 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1368 continue;
1369 }
1370
1371 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1373 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1374
1375 let is_reference = matches!(
1376 link_type,
1377 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1378 );
1379
1380 let alt_text = if start_pos < content.len() {
1383 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1384
1385 let mut close_pos = None;
1388 let mut depth = 0;
1389
1390 if image_bytes.len() > 2 {
1391 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1392 let mut backslash_count = 0;
1394 let mut j = i;
1395 while j > 0 && image_bytes[j - 1] == b'\\' {
1396 backslash_count += 1;
1397 j -= 1;
1398 }
1399 let is_escaped = backslash_count % 2 != 0;
1400
1401 if !is_escaped {
1402 if byte == b'[' {
1403 depth += 1;
1404 } else if byte == b']' {
1405 if depth == 0 {
1406 close_pos = Some(i);
1408 break;
1409 } else {
1410 depth -= 1;
1411 }
1412 }
1413 }
1414 }
1415 }
1416
1417 if let Some(pos) = close_pos {
1418 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1419 } else {
1420 Cow::Borrowed("")
1421 }
1422 } else {
1423 Cow::Borrowed("")
1424 };
1425
1426 let reference_id = if is_reference && !ref_id.is_empty() {
1427 Some(Cow::Owned(ref_id.to_lowercase()))
1428 } else if is_reference {
1429 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1431 None
1432 };
1433
1434 found_positions.insert(start_pos);
1435 images.push(ParsedImage {
1436 line: line_num,
1437 start_col: col_start,
1438 end_col: col_end,
1439 byte_offset: start_pos,
1440 byte_end: range.end,
1441 alt_text,
1442 url: Cow::Owned(url.to_string()),
1443 is_reference,
1444 reference_id,
1445 link_type,
1446 });
1447 }
1448 }
1449 _ => {}
1450 }
1451 }
1452
1453 for cap in IMAGE_PATTERN.captures_iter(content) {
1455 let full_match = cap.get(0).unwrap();
1456 let match_start = full_match.start();
1457 let match_end = full_match.end();
1458
1459 if found_positions.contains(&match_start) {
1461 continue;
1462 }
1463
1464 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1466 continue;
1467 }
1468
1469 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1471 || Self::is_offset_in_code_span(code_spans, match_start)
1472 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1473 {
1474 continue;
1475 }
1476
1477 if let Some(ref_id) = cap.get(6) {
1479 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1480 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1481 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1482 let ref_id_str = ref_id.as_str();
1483 let normalized_ref = if ref_id_str.is_empty() {
1484 Cow::Owned(alt_text.to_lowercase())
1485 } else {
1486 Cow::Owned(ref_id_str.to_lowercase())
1487 };
1488
1489 images.push(ParsedImage {
1490 line: line_num,
1491 start_col: col_start,
1492 end_col: col_end,
1493 byte_offset: match_start,
1494 byte_end: match_end,
1495 alt_text: Cow::Borrowed(alt_text),
1496 url: Cow::Borrowed(""),
1497 is_reference: true,
1498 reference_id: Some(normalized_ref),
1499 link_type: LinkType::Reference, });
1501 }
1502 }
1503
1504 images
1505 }
1506
1507 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1509 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1513 if line_info.in_code_block {
1515 continue;
1516 }
1517
1518 let line = line_info.content(content);
1519 let line_num = line_idx + 1;
1520
1521 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1522 let id = cap.get(1).unwrap().as_str().to_lowercase();
1523 let url = cap.get(2).unwrap().as_str().to_string();
1524 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
1525
1526 let match_obj = cap.get(0).unwrap();
1529 let byte_offset = line_info.byte_offset + match_obj.start();
1530 let byte_end = line_info.byte_offset + match_obj.end();
1531
1532 refs.push(ReferenceDef {
1533 line: line_num,
1534 id,
1535 url,
1536 title,
1537 byte_offset,
1538 byte_end,
1539 });
1540 }
1541 }
1542
1543 refs
1544 }
1545
1546 #[inline]
1550 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1551 let trimmed_start = line.trim_start();
1552 if !trimmed_start.starts_with('>') {
1553 return None;
1554 }
1555
1556 let leading_ws_len = line.len() - trimmed_start.len();
1557 let after_gt = &trimmed_start[1..];
1558 let content = after_gt.trim_start();
1559 let ws_after_gt_len = after_gt.len() - content.len();
1560 let prefix_len = leading_ws_len + 1 + ws_after_gt_len;
1561
1562 Some((&line[..prefix_len], content))
1563 }
1564
1565 #[inline]
1569 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1570 let bytes = line.as_bytes();
1571 let mut i = 0;
1572
1573 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1575 i += 1;
1576 }
1577
1578 if i >= bytes.len() {
1580 return None;
1581 }
1582 let marker = bytes[i] as char;
1583 if marker != '-' && marker != '*' && marker != '+' {
1584 return None;
1585 }
1586 let marker_pos = i;
1587 i += 1;
1588
1589 let spacing_start = i;
1591 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1592 i += 1;
1593 }
1594
1595 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1596 }
1597
1598 #[inline]
1602 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1603 let bytes = line.as_bytes();
1604 let mut i = 0;
1605
1606 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1608 i += 1;
1609 }
1610
1611 let number_start = i;
1613 while i < bytes.len() && bytes[i].is_ascii_digit() {
1614 i += 1;
1615 }
1616 if i == number_start {
1617 return None; }
1619
1620 if i >= bytes.len() {
1622 return None;
1623 }
1624 let delimiter = bytes[i] as char;
1625 if delimiter != '.' && delimiter != ')' {
1626 return None;
1627 }
1628 let delimiter_pos = i;
1629 i += 1;
1630
1631 let spacing_start = i;
1633 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1634 i += 1;
1635 }
1636
1637 Some((
1638 &line[..number_start],
1639 &line[number_start..delimiter_pos],
1640 delimiter,
1641 &line[spacing_start..i],
1642 &line[i..],
1643 ))
1644 }
1645
1646 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1649 let num_lines = line_offsets.len();
1650 let mut in_code_block = vec![false; num_lines];
1651
1652 for &(start, end) in code_blocks {
1654 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1656 let mut boundary = start;
1657 while boundary > 0 && !content.is_char_boundary(boundary) {
1658 boundary -= 1;
1659 }
1660 boundary
1661 } else {
1662 start
1663 };
1664
1665 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1666 let mut boundary = end;
1667 while boundary < content.len() && !content.is_char_boundary(boundary) {
1668 boundary += 1;
1669 }
1670 boundary
1671 } else {
1672 end.min(content.len())
1673 };
1674
1675 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1694 let first_line = first_line_after.saturating_sub(1);
1695 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1696
1697 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1699 *flag = true;
1700 }
1701 }
1702
1703 in_code_block
1704 }
1705
1706 fn compute_basic_line_info(
1708 content: &str,
1709 line_offsets: &[usize],
1710 code_blocks: &[(usize, usize)],
1711 flavor: MarkdownFlavor,
1712 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1713 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1714 ) -> Vec<LineInfo> {
1715 let content_lines: Vec<&str> = content.lines().collect();
1716 let mut lines = Vec::with_capacity(content_lines.len());
1717
1718 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1720
1721 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1724
1725 for (i, line) in content_lines.iter().enumerate() {
1726 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1727 let indent = line.len() - line.trim_start().len();
1728
1729 let blockquote_parse = Self::parse_blockquote_prefix(line);
1731
1732 let is_blank = if let Some((_, content)) = blockquote_parse {
1734 content.trim().is_empty()
1736 } else {
1737 line.trim().is_empty()
1738 };
1739
1740 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1742
1743 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1745 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1746 let in_html_comment =
1748 crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, byte_offset);
1749 let list_item = if !(in_code_block
1750 || is_blank
1751 || in_mkdocstrings
1752 || in_html_comment
1753 || (front_matter_end > 0 && i < front_matter_end))
1754 {
1755 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1757 (content, prefix.len())
1758 } else {
1759 (&**line, 0)
1760 };
1761
1762 if let Some((leading_spaces, marker, spacing, _content)) =
1763 Self::parse_unordered_list(line_for_list_check)
1764 {
1765 let marker_column = blockquote_prefix_len + leading_spaces.len();
1766 let content_column = marker_column + 1 + spacing.len();
1767
1768 if spacing.is_empty() {
1775 None
1776 } else {
1777 Some(ListItemInfo {
1778 marker: marker.to_string(),
1779 is_ordered: false,
1780 number: None,
1781 marker_column,
1782 content_column,
1783 })
1784 }
1785 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1786 Self::parse_ordered_list(line_for_list_check)
1787 {
1788 let marker = format!("{number_str}{delimiter}");
1789 let marker_column = blockquote_prefix_len + leading_spaces.len();
1790 let content_column = marker_column + marker.len() + spacing.len();
1791
1792 if spacing.is_empty() {
1795 None
1796 } else {
1797 Some(ListItemInfo {
1798 marker,
1799 is_ordered: true,
1800 number: number_str.parse().ok(),
1801 marker_column,
1802 content_column,
1803 })
1804 }
1805 } else {
1806 None
1807 }
1808 } else {
1809 None
1810 };
1811
1812 lines.push(LineInfo {
1813 byte_offset,
1814 byte_len: line.len(),
1815 indent,
1816 is_blank,
1817 in_code_block,
1818 in_front_matter: front_matter_end > 0 && i < front_matter_end,
1819 in_html_block: false, in_html_comment,
1821 list_item,
1822 heading: None, blockquote: None, in_mkdocstrings,
1825 in_esm_block: false, in_code_span_continuation: false, });
1828 }
1829
1830 lines
1831 }
1832
1833 fn detect_headings_and_blockquotes(
1835 content: &str,
1836 lines: &mut [LineInfo],
1837 flavor: MarkdownFlavor,
1838 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1839 ) {
1840 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
1842 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
1843 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
1844 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
1845
1846 let content_lines: Vec<&str> = content.lines().collect();
1847
1848 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1850
1851 for i in 0..lines.len() {
1853 if lines[i].in_code_block {
1854 continue;
1855 }
1856
1857 if front_matter_end > 0 && i < front_matter_end {
1859 continue;
1860 }
1861
1862 if lines[i].in_html_block {
1864 continue;
1865 }
1866
1867 let line = content_lines[i];
1868
1869 if let Some(bq) = parse_blockquote_detailed(line) {
1871 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
1873
1874 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
1876
1877 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
1879 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
1882
1883 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
1887
1888 lines[i].blockquote = Some(BlockquoteInfo {
1889 nesting_level,
1890 indent: bq.indent.to_string(),
1891 marker_column,
1892 prefix,
1893 content: bq.content.to_string(),
1894 has_no_space_after_marker: has_no_space,
1895 has_multiple_spaces_after_marker: has_multiple_spaces,
1896 needs_md028_fix,
1897 });
1898 }
1899
1900 if lines[i].is_blank {
1902 continue;
1903 }
1904
1905 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1908 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1909 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1910 } else {
1911 false
1912 };
1913
1914 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1915 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
1917 continue;
1918 }
1919 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1920 let hashes = caps.get(2).map_or("", |m| m.as_str());
1921 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1922 let rest = caps.get(4).map_or("", |m| m.as_str());
1923
1924 let level = hashes.len() as u8;
1925 let marker_column = leading_spaces.len();
1926
1927 let (text, has_closing, closing_seq) = {
1929 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1931 if rest[id_start..].trim_end().ends_with('}') {
1933 (&rest[..id_start], &rest[id_start..])
1935 } else {
1936 (rest, "")
1937 }
1938 } else {
1939 (rest, "")
1940 };
1941
1942 let trimmed_rest = rest_without_id.trim_end();
1944 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1945 let mut start_of_hashes = last_hash_pos;
1947 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1948 start_of_hashes -= 1;
1949 }
1950
1951 let has_space_before = start_of_hashes == 0
1953 || trimmed_rest
1954 .chars()
1955 .nth(start_of_hashes - 1)
1956 .is_some_and(|c| c.is_whitespace());
1957
1958 let potential_closing = &trimmed_rest[start_of_hashes..];
1960 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1961
1962 if is_all_hashes && has_space_before {
1963 let closing_hashes = potential_closing.to_string();
1965 let text_part = if !custom_id_part.is_empty() {
1968 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1971 } else {
1972 rest_without_id[..start_of_hashes].trim_end().to_string()
1973 };
1974 (text_part, true, closing_hashes)
1975 } else {
1976 (rest.to_string(), false, String::new())
1978 }
1979 } else {
1980 (rest.to_string(), false, String::new())
1982 }
1983 };
1984
1985 let content_column = marker_column + hashes.len() + spaces_after.len();
1986
1987 let raw_text = text.trim().to_string();
1989 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1990
1991 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1993 let next_line = content_lines[i + 1];
1994 if !lines[i + 1].in_code_block
1995 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1996 && let Some(next_line_id) =
1997 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1998 {
1999 custom_id = Some(next_line_id);
2000 }
2001 }
2002
2003 lines[i].heading = Some(HeadingInfo {
2004 level,
2005 style: HeadingStyle::ATX,
2006 marker: hashes.to_string(),
2007 marker_column,
2008 content_column,
2009 text: clean_text,
2010 custom_id,
2011 raw_text,
2012 has_closing_sequence: has_closing,
2013 closing_sequence: closing_seq,
2014 });
2015 }
2016 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2018 let next_line = content_lines[i + 1];
2019 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2020 if front_matter_end > 0 && i < front_matter_end {
2022 continue;
2023 }
2024
2025 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2027 {
2028 continue;
2029 }
2030
2031 let underline = next_line.trim();
2032
2033 let level = if underline.starts_with('=') { 1 } else { 2 };
2034 let style = if level == 1 {
2035 HeadingStyle::Setext1
2036 } else {
2037 HeadingStyle::Setext2
2038 };
2039
2040 let raw_text = line.trim().to_string();
2042 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2043
2044 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2046 let attr_line = content_lines[i + 2];
2047 if !lines[i + 2].in_code_block
2048 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2049 && let Some(attr_line_id) =
2050 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2051 {
2052 custom_id = Some(attr_line_id);
2053 }
2054 }
2055
2056 lines[i].heading = Some(HeadingInfo {
2057 level,
2058 style,
2059 marker: underline.to_string(),
2060 marker_column: next_line.len() - next_line.trim_start().len(),
2061 content_column: lines[i].indent,
2062 text: clean_text,
2063 custom_id,
2064 raw_text,
2065 has_closing_sequence: false,
2066 closing_sequence: String::new(),
2067 });
2068 }
2069 }
2070 }
2071 }
2072
2073 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2075 const BLOCK_ELEMENTS: &[&str] = &[
2077 "address",
2078 "article",
2079 "aside",
2080 "blockquote",
2081 "details",
2082 "dialog",
2083 "dd",
2084 "div",
2085 "dl",
2086 "dt",
2087 "fieldset",
2088 "figcaption",
2089 "figure",
2090 "footer",
2091 "form",
2092 "h1",
2093 "h2",
2094 "h3",
2095 "h4",
2096 "h5",
2097 "h6",
2098 "header",
2099 "hr",
2100 "li",
2101 "main",
2102 "nav",
2103 "ol",
2104 "p",
2105 "picture",
2106 "pre",
2107 "script",
2108 "section",
2109 "style",
2110 "table",
2111 "tbody",
2112 "td",
2113 "textarea",
2114 "tfoot",
2115 "th",
2116 "thead",
2117 "tr",
2118 "ul",
2119 ];
2120
2121 let mut i = 0;
2122 while i < lines.len() {
2123 if lines[i].in_code_block || lines[i].in_front_matter {
2125 i += 1;
2126 continue;
2127 }
2128
2129 let trimmed = lines[i].content(content).trim_start();
2130
2131 if trimmed.starts_with('<') && trimmed.len() > 1 {
2133 let after_bracket = &trimmed[1..];
2135 let is_closing = after_bracket.starts_with('/');
2136 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2137
2138 let tag_name = tag_start
2140 .chars()
2141 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2142 .collect::<String>()
2143 .to_lowercase();
2144
2145 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2147 lines[i].in_html_block = true;
2149
2150 if !is_closing {
2153 let closing_tag = format!("</{tag_name}>");
2154 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2156 let mut j = i + 1;
2157 while j < lines.len() && j < i + 100 {
2158 if !allow_blank_lines && lines[j].is_blank {
2161 break;
2162 }
2163
2164 lines[j].in_html_block = true;
2165
2166 if lines[j].content(content).contains(&closing_tag) {
2168 break;
2169 }
2170 j += 1;
2171 }
2172 }
2173 }
2174 }
2175
2176 i += 1;
2177 }
2178 }
2179
2180 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2183 if !flavor.supports_esm_blocks() {
2185 return;
2186 }
2187
2188 for line in lines.iter_mut() {
2189 if line.is_blank || line.in_html_comment {
2191 continue;
2192 }
2193
2194 let trimmed = line.content(content).trim_start();
2196 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2197 line.in_esm_block = true;
2198 } else {
2199 break;
2201 }
2202 }
2203 }
2204
2205 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2207 let mut code_spans = Vec::new();
2208
2209 if !content.contains('`') {
2211 return code_spans;
2212 }
2213
2214 let parser = Parser::new(content).into_offset_iter();
2216
2217 for (event, range) in parser {
2218 if let Event::Code(_) = event {
2219 let start_pos = range.start;
2220 let end_pos = range.end;
2221
2222 let full_span = &content[start_pos..end_pos];
2224 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2225
2226 let content_start = start_pos + backtick_count;
2228 let content_end = end_pos - backtick_count;
2229 let span_content = if content_start < content_end {
2230 content[content_start..content_end].to_string()
2231 } else {
2232 String::new()
2233 };
2234
2235 let line_idx = lines
2238 .partition_point(|line| line.byte_offset <= start_pos)
2239 .saturating_sub(1);
2240 let line_num = line_idx + 1;
2241 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2242
2243 let end_line_idx = lines
2245 .partition_point(|line| line.byte_offset <= end_pos)
2246 .saturating_sub(1);
2247 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2248
2249 let line_content = lines[line_idx].content(content);
2252 let col_start = if byte_col_start <= line_content.len() {
2253 line_content[..byte_col_start].chars().count()
2254 } else {
2255 line_content.chars().count()
2256 };
2257
2258 let end_line_content = lines[end_line_idx].content(content);
2259 let col_end = if byte_col_end <= end_line_content.len() {
2260 end_line_content[..byte_col_end].chars().count()
2261 } else {
2262 end_line_content.chars().count()
2263 };
2264
2265 code_spans.push(CodeSpan {
2266 line: line_num,
2267 end_line: end_line_idx + 1,
2268 start_col: col_start,
2269 end_col: col_end,
2270 byte_offset: start_pos,
2271 byte_end: end_pos,
2272 backtick_count,
2273 content: span_content,
2274 });
2275 }
2276 }
2277
2278 code_spans.sort_by_key(|span| span.byte_offset);
2280
2281 code_spans
2282 }
2283
2284 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2295 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2297
2298 #[inline]
2301 fn reset_tracking_state(
2302 list_item: &ListItemInfo,
2303 has_list_breaking_content: &mut bool,
2304 min_continuation: &mut usize,
2305 ) {
2306 *has_list_breaking_content = false;
2307 let marker_width = if list_item.is_ordered {
2308 list_item.marker.len() + 1 } else {
2310 list_item.marker.len()
2311 };
2312 *min_continuation = if list_item.is_ordered {
2313 marker_width
2314 } else {
2315 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2316 };
2317 }
2318
2319 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2322 let mut last_list_item_line = 0;
2323 let mut current_indent_level = 0;
2324 let mut last_marker_width = 0;
2325
2326 let mut has_list_breaking_content_since_last_item = false;
2328 let mut min_continuation_for_tracking = 0;
2329
2330 for (line_idx, line_info) in lines.iter().enumerate() {
2331 let line_num = line_idx + 1;
2332
2333 if line_info.in_code_block {
2335 if let Some(ref mut block) = current_block {
2336 let min_continuation_indent =
2338 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2339
2340 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2342
2343 match context {
2344 CodeBlockContext::Indented => {
2345 block.end_line = line_num;
2347 continue;
2348 }
2349 CodeBlockContext::Standalone => {
2350 let completed_block = current_block.take().unwrap();
2352 list_blocks.push(completed_block);
2353 continue;
2354 }
2355 CodeBlockContext::Adjacent => {
2356 block.end_line = line_num;
2358 continue;
2359 }
2360 }
2361 } else {
2362 continue;
2364 }
2365 }
2366
2367 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2369 caps.get(0).unwrap().as_str().to_string()
2370 } else {
2371 String::new()
2372 };
2373
2374 if current_block.is_some()
2377 && line_info.list_item.is_none()
2378 && !line_info.is_blank
2379 && !line_info.in_code_span_continuation
2380 {
2381 let line_content = line_info.content(content).trim();
2382
2383 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2388 let breaks_list = line_info.heading.is_some()
2389 || line_content.starts_with("---")
2390 || line_content.starts_with("***")
2391 || line_content.starts_with("___")
2392 || crate::utils::skip_context::is_table_line(line_content)
2393 || line_content.starts_with(">")
2394 || (line_info.indent > 0
2395 && line_info.indent < min_continuation_for_tracking
2396 && !is_lazy_continuation);
2397
2398 if breaks_list {
2399 has_list_breaking_content_since_last_item = true;
2400 }
2401 }
2402
2403 if line_info.in_code_span_continuation
2406 && line_info.list_item.is_none()
2407 && let Some(ref mut block) = current_block
2408 {
2409 block.end_line = line_num;
2410 }
2411
2412 let is_valid_continuation =
2417 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2419 && line_info.list_item.is_none()
2420 && !line_info.is_blank
2421 && !line_info.in_code_block
2422 && is_valid_continuation
2423 && let Some(ref mut block) = current_block
2424 {
2425 block.end_line = line_num;
2426 }
2427
2428 if let Some(list_item) = &line_info.list_item {
2430 let item_indent = list_item.marker_column;
2432 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2435 let is_nested = nesting > block.nesting_level;
2439 let same_type =
2440 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2441 let same_context = block.blockquote_prefix == blockquote_prefix;
2442 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2444
2445 let marker_compatible =
2447 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2448
2449 let has_non_list_content = has_list_breaking_content_since_last_item;
2452
2453 let mut continues_list = if is_nested {
2457 same_context && reasonable_distance && !has_non_list_content
2459 } else {
2460 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2462 };
2463
2464 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2467 if block.item_lines.contains(&(line_num - 1)) {
2470 continues_list = true;
2472 } else {
2473 continues_list = true;
2477 }
2478 }
2479
2480 if continues_list {
2481 block.end_line = line_num;
2483 block.item_lines.push(line_num);
2484
2485 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2487 list_item.marker.len() + 1
2488 } else {
2489 list_item.marker.len()
2490 });
2491
2492 if !block.is_ordered
2494 && block.marker.is_some()
2495 && block.marker.as_ref() != Some(&list_item.marker)
2496 {
2497 block.marker = None;
2499 }
2500
2501 reset_tracking_state(
2503 list_item,
2504 &mut has_list_breaking_content_since_last_item,
2505 &mut min_continuation_for_tracking,
2506 );
2507 } else {
2508 list_blocks.push(block.clone());
2511
2512 *block = ListBlock {
2513 start_line: line_num,
2514 end_line: line_num,
2515 is_ordered: list_item.is_ordered,
2516 marker: if list_item.is_ordered {
2517 None
2518 } else {
2519 Some(list_item.marker.clone())
2520 },
2521 blockquote_prefix: blockquote_prefix.clone(),
2522 item_lines: vec![line_num],
2523 nesting_level: nesting,
2524 max_marker_width: if list_item.is_ordered {
2525 list_item.marker.len() + 1
2526 } else {
2527 list_item.marker.len()
2528 },
2529 };
2530
2531 reset_tracking_state(
2533 list_item,
2534 &mut has_list_breaking_content_since_last_item,
2535 &mut min_continuation_for_tracking,
2536 );
2537 }
2538 } else {
2539 current_block = Some(ListBlock {
2541 start_line: line_num,
2542 end_line: line_num,
2543 is_ordered: list_item.is_ordered,
2544 marker: if list_item.is_ordered {
2545 None
2546 } else {
2547 Some(list_item.marker.clone())
2548 },
2549 blockquote_prefix,
2550 item_lines: vec![line_num],
2551 nesting_level: nesting,
2552 max_marker_width: list_item.marker.len(),
2553 });
2554
2555 reset_tracking_state(
2557 list_item,
2558 &mut has_list_breaking_content_since_last_item,
2559 &mut min_continuation_for_tracking,
2560 );
2561 }
2562
2563 last_list_item_line = line_num;
2564 current_indent_level = item_indent;
2565 last_marker_width = if list_item.is_ordered {
2566 list_item.marker.len() + 1 } else {
2568 list_item.marker.len()
2569 };
2570 } else if let Some(ref mut block) = current_block {
2571 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2581 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2582 } else {
2583 false
2584 };
2585
2586 let min_continuation_indent = if block.is_ordered {
2590 current_indent_level + last_marker_width
2591 } else {
2592 current_indent_level + 2 };
2594
2595 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2596 block.end_line = line_num;
2598 } else if line_info.is_blank {
2599 let mut check_idx = line_idx + 1;
2602 let mut found_continuation = false;
2603
2604 while check_idx < lines.len() && lines[check_idx].is_blank {
2606 check_idx += 1;
2607 }
2608
2609 if check_idx < lines.len() {
2610 let next_line = &lines[check_idx];
2611 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2613 found_continuation = true;
2614 }
2615 else if !next_line.in_code_block
2617 && next_line.list_item.is_some()
2618 && let Some(item) = &next_line.list_item
2619 {
2620 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2621 .find(next_line.content(content))
2622 .map_or(String::new(), |m| m.as_str().to_string());
2623 if item.marker_column == current_indent_level
2624 && item.is_ordered == block.is_ordered
2625 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2626 {
2627 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2630 if let Some(between_line) = lines.get(idx) {
2631 let between_content = between_line.content(content);
2632 let trimmed = between_content.trim();
2633 if trimmed.is_empty() {
2635 return false;
2636 }
2637 let line_indent = between_content.len() - between_content.trim_start().len();
2639
2640 if trimmed.starts_with("```")
2642 || trimmed.starts_with("~~~")
2643 || trimmed.starts_with("---")
2644 || trimmed.starts_with("***")
2645 || trimmed.starts_with("___")
2646 || trimmed.starts_with(">")
2647 || crate::utils::skip_context::is_table_line(trimmed)
2648 || between_line.heading.is_some()
2649 {
2650 return true; }
2652
2653 line_indent >= min_continuation_indent
2655 } else {
2656 false
2657 }
2658 });
2659
2660 if block.is_ordered {
2661 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2664 if let Some(between_line) = lines.get(idx) {
2665 let trimmed = between_line.content(content).trim();
2666 if trimmed.is_empty() {
2667 return false;
2668 }
2669 trimmed.starts_with("```")
2671 || trimmed.starts_with("~~~")
2672 || trimmed.starts_with("---")
2673 || trimmed.starts_with("***")
2674 || trimmed.starts_with("___")
2675 || trimmed.starts_with(">")
2676 || crate::utils::skip_context::is_table_line(trimmed)
2677 || between_line.heading.is_some()
2678 } else {
2679 false
2680 }
2681 });
2682 found_continuation = !has_structural_separators;
2683 } else {
2684 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2686 if let Some(between_line) = lines.get(idx) {
2687 let trimmed = between_line.content(content).trim();
2688 if trimmed.is_empty() {
2689 return false;
2690 }
2691 trimmed.starts_with("```")
2693 || trimmed.starts_with("~~~")
2694 || trimmed.starts_with("---")
2695 || trimmed.starts_with("***")
2696 || trimmed.starts_with("___")
2697 || trimmed.starts_with(">")
2698 || crate::utils::skip_context::is_table_line(trimmed)
2699 || between_line.heading.is_some()
2700 } else {
2701 false
2702 }
2703 });
2704 found_continuation = !has_structural_separators;
2705 }
2706 }
2707 }
2708 }
2709
2710 if found_continuation {
2711 block.end_line = line_num;
2713 } else {
2714 list_blocks.push(block.clone());
2716 current_block = None;
2717 }
2718 } else {
2719 let min_required_indent = if block.is_ordered {
2722 current_indent_level + last_marker_width
2723 } else {
2724 current_indent_level + 2
2725 };
2726
2727 let line_content = line_info.content(content).trim();
2732
2733 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
2735
2736 let is_structural_separator = line_info.heading.is_some()
2737 || line_content.starts_with("```")
2738 || line_content.starts_with("~~~")
2739 || line_content.starts_with("---")
2740 || line_content.starts_with("***")
2741 || line_content.starts_with("___")
2742 || line_content.starts_with(">")
2743 || looks_like_table;
2744
2745 let is_lazy_continuation = !is_structural_separator
2748 && !line_info.is_blank
2749 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2750
2751 if is_lazy_continuation {
2752 let content_to_check = if !blockquote_prefix.is_empty() {
2755 line_info
2757 .content(content)
2758 .strip_prefix(&blockquote_prefix)
2759 .unwrap_or(line_info.content(content))
2760 .trim()
2761 } else {
2762 line_info.content(content).trim()
2763 };
2764
2765 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2766
2767 if starts_with_uppercase && last_list_item_line > 0 {
2770 list_blocks.push(block.clone());
2772 current_block = None;
2773 } else {
2774 block.end_line = line_num;
2776 }
2777 } else {
2778 list_blocks.push(block.clone());
2780 current_block = None;
2781 }
2782 }
2783 }
2784 }
2785
2786 if let Some(block) = current_block {
2788 list_blocks.push(block);
2789 }
2790
2791 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
2793
2794 list_blocks
2795 }
2796
2797 fn compute_char_frequency(content: &str) -> CharFrequency {
2799 let mut frequency = CharFrequency::default();
2800
2801 for ch in content.chars() {
2802 match ch {
2803 '#' => frequency.hash_count += 1,
2804 '*' => frequency.asterisk_count += 1,
2805 '_' => frequency.underscore_count += 1,
2806 '-' => frequency.hyphen_count += 1,
2807 '+' => frequency.plus_count += 1,
2808 '>' => frequency.gt_count += 1,
2809 '|' => frequency.pipe_count += 1,
2810 '[' => frequency.bracket_count += 1,
2811 '`' => frequency.backtick_count += 1,
2812 '<' => frequency.lt_count += 1,
2813 '!' => frequency.exclamation_count += 1,
2814 '\n' => frequency.newline_count += 1,
2815 _ => {}
2816 }
2817 }
2818
2819 frequency
2820 }
2821
2822 fn parse_html_tags(
2824 content: &str,
2825 lines: &[LineInfo],
2826 code_blocks: &[(usize, usize)],
2827 flavor: MarkdownFlavor,
2828 ) -> Vec<HtmlTag> {
2829 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
2830 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
2831
2832 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2833
2834 for cap in HTML_TAG_REGEX.captures_iter(content) {
2835 let full_match = cap.get(0).unwrap();
2836 let match_start = full_match.start();
2837 let match_end = full_match.end();
2838
2839 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2841 continue;
2842 }
2843
2844 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2845 let tag_name_original = cap.get(2).unwrap().as_str();
2846 let tag_name = tag_name_original.to_lowercase();
2847 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2848
2849 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
2852 continue;
2853 }
2854
2855 let mut line_num = 1;
2857 let mut col_start = match_start;
2858 let mut col_end = match_end;
2859 for (idx, line_info) in lines.iter().enumerate() {
2860 if match_start >= line_info.byte_offset {
2861 line_num = idx + 1;
2862 col_start = match_start - line_info.byte_offset;
2863 col_end = match_end - line_info.byte_offset;
2864 } else {
2865 break;
2866 }
2867 }
2868
2869 html_tags.push(HtmlTag {
2870 line: line_num,
2871 start_col: col_start,
2872 end_col: col_end,
2873 byte_offset: match_start,
2874 byte_end: match_end,
2875 tag_name,
2876 is_closing,
2877 is_self_closing,
2878 raw_content: full_match.as_str().to_string(),
2879 });
2880 }
2881
2882 html_tags
2883 }
2884
2885 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2887 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
2888 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
2889
2890 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2891
2892 for cap in EMPHASIS_REGEX.captures_iter(content) {
2893 let full_match = cap.get(0).unwrap();
2894 let match_start = full_match.start();
2895 let match_end = full_match.end();
2896
2897 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2899 continue;
2900 }
2901
2902 let opening_markers = cap.get(1).unwrap().as_str();
2903 let content_part = cap.get(2).unwrap().as_str();
2904 let closing_markers = cap.get(3).unwrap().as_str();
2905
2906 if opening_markers.chars().next() != closing_markers.chars().next()
2908 || opening_markers.len() != closing_markers.len()
2909 {
2910 continue;
2911 }
2912
2913 let marker = opening_markers.chars().next().unwrap();
2914 let marker_count = opening_markers.len();
2915
2916 let mut line_num = 1;
2918 let mut col_start = match_start;
2919 let mut col_end = match_end;
2920 for (idx, line_info) in lines.iter().enumerate() {
2921 if match_start >= line_info.byte_offset {
2922 line_num = idx + 1;
2923 col_start = match_start - line_info.byte_offset;
2924 col_end = match_end - line_info.byte_offset;
2925 } else {
2926 break;
2927 }
2928 }
2929
2930 emphasis_spans.push(EmphasisSpan {
2931 line: line_num,
2932 start_col: col_start,
2933 end_col: col_end,
2934 byte_offset: match_start,
2935 byte_end: match_end,
2936 marker,
2937 marker_count,
2938 content: content_part.to_string(),
2939 });
2940 }
2941
2942 emphasis_spans
2943 }
2944
2945 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
2947 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2948
2949 for (line_idx, line_info) in lines.iter().enumerate() {
2950 if line_info.in_code_block || line_info.is_blank {
2952 continue;
2953 }
2954
2955 let line = line_info.content(content);
2956 let line_num = line_idx + 1;
2957
2958 if !line.contains('|') {
2960 continue;
2961 }
2962
2963 let parts: Vec<&str> = line.split('|').collect();
2965 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2966
2967 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2969 let mut column_alignments = Vec::new();
2970
2971 if is_separator {
2972 for part in &parts[1..parts.len() - 1] {
2973 let trimmed = part.trim();
2975 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2976 "center".to_string()
2977 } else if trimmed.ends_with(':') {
2978 "right".to_string()
2979 } else if trimmed.starts_with(':') {
2980 "left".to_string()
2981 } else {
2982 "none".to_string()
2983 };
2984 column_alignments.push(alignment);
2985 }
2986 }
2987
2988 table_rows.push(TableRow {
2989 line: line_num,
2990 is_separator,
2991 column_count,
2992 column_alignments,
2993 });
2994 }
2995
2996 table_rows
2997 }
2998
2999 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3001 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3002
3003 for cap in BARE_URL_PATTERN.captures_iter(content) {
3005 let full_match = cap.get(0).unwrap();
3006 let match_start = full_match.start();
3007 let match_end = full_match.end();
3008
3009 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3011 continue;
3012 }
3013
3014 let preceding_char = if match_start > 0 {
3016 content.chars().nth(match_start - 1)
3017 } else {
3018 None
3019 };
3020 let following_char = content.chars().nth(match_end);
3021
3022 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3023 continue;
3024 }
3025 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3026 continue;
3027 }
3028
3029 let url = full_match.as_str();
3030 let url_type = if url.starts_with("https://") {
3031 "https"
3032 } else if url.starts_with("http://") {
3033 "http"
3034 } else if url.starts_with("ftp://") {
3035 "ftp"
3036 } else {
3037 "other"
3038 };
3039
3040 let mut line_num = 1;
3042 let mut col_start = match_start;
3043 let mut col_end = match_end;
3044 for (idx, line_info) in lines.iter().enumerate() {
3045 if match_start >= line_info.byte_offset {
3046 line_num = idx + 1;
3047 col_start = match_start - line_info.byte_offset;
3048 col_end = match_end - line_info.byte_offset;
3049 } else {
3050 break;
3051 }
3052 }
3053
3054 bare_urls.push(BareUrl {
3055 line: line_num,
3056 start_col: col_start,
3057 end_col: col_end,
3058 byte_offset: match_start,
3059 byte_end: match_end,
3060 url: url.to_string(),
3061 url_type: url_type.to_string(),
3062 });
3063 }
3064
3065 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3067 let full_match = cap.get(0).unwrap();
3068 let match_start = full_match.start();
3069 let match_end = full_match.end();
3070
3071 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3073 continue;
3074 }
3075
3076 let preceding_char = if match_start > 0 {
3078 content.chars().nth(match_start - 1)
3079 } else {
3080 None
3081 };
3082 let following_char = content.chars().nth(match_end);
3083
3084 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3085 continue;
3086 }
3087 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3088 continue;
3089 }
3090
3091 let email = full_match.as_str();
3092
3093 let mut line_num = 1;
3095 let mut col_start = match_start;
3096 let mut col_end = match_end;
3097 for (idx, line_info) in lines.iter().enumerate() {
3098 if match_start >= line_info.byte_offset {
3099 line_num = idx + 1;
3100 col_start = match_start - line_info.byte_offset;
3101 col_end = match_end - line_info.byte_offset;
3102 } else {
3103 break;
3104 }
3105 }
3106
3107 bare_urls.push(BareUrl {
3108 line: line_num,
3109 start_col: col_start,
3110 end_col: col_end,
3111 byte_offset: match_start,
3112 byte_end: match_end,
3113 url: email.to_string(),
3114 url_type: "email".to_string(),
3115 });
3116 }
3117
3118 bare_urls
3119 }
3120}
3121
3122fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3124 if list_blocks.len() < 2 {
3125 return;
3126 }
3127
3128 let mut merger = ListBlockMerger::new(content, lines);
3129 *list_blocks = merger.merge(list_blocks);
3130}
3131
3132struct ListBlockMerger<'a> {
3134 content: &'a str,
3135 lines: &'a [LineInfo],
3136}
3137
3138impl<'a> ListBlockMerger<'a> {
3139 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3140 Self { content, lines }
3141 }
3142
3143 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3144 let mut merged = Vec::with_capacity(list_blocks.len());
3145 let mut current = list_blocks[0].clone();
3146
3147 for next in list_blocks.iter().skip(1) {
3148 if self.should_merge_blocks(¤t, next) {
3149 current = self.merge_two_blocks(current, next);
3150 } else {
3151 merged.push(current);
3152 current = next.clone();
3153 }
3154 }
3155
3156 merged.push(current);
3157 merged
3158 }
3159
3160 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3162 if !self.blocks_are_compatible(current, next) {
3164 return false;
3165 }
3166
3167 let spacing = self.analyze_spacing_between(current, next);
3169 match spacing {
3170 BlockSpacing::Consecutive => true,
3171 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3172 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3173 self.can_merge_with_content_between(current, next)
3174 }
3175 }
3176 }
3177
3178 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3180 current.is_ordered == next.is_ordered
3181 && current.blockquote_prefix == next.blockquote_prefix
3182 && current.nesting_level == next.nesting_level
3183 }
3184
3185 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3187 let gap = next.start_line - current.end_line;
3188
3189 match gap {
3190 1 => BlockSpacing::Consecutive,
3191 2 => BlockSpacing::SingleBlank,
3192 _ if gap > 2 => {
3193 if self.has_only_blank_lines_between(current, next) {
3194 BlockSpacing::MultipleBlanks
3195 } else {
3196 BlockSpacing::ContentBetween
3197 }
3198 }
3199 _ => BlockSpacing::Consecutive, }
3201 }
3202
3203 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3205 if has_meaningful_content_between(self.content, current, next, self.lines) {
3208 return false; }
3210
3211 !current.is_ordered && current.marker == next.marker
3213 }
3214
3215 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3217 if has_meaningful_content_between(self.content, current, next, self.lines) {
3219 return false; }
3221
3222 current.is_ordered && next.is_ordered
3224 }
3225
3226 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3228 for line_num in (current.end_line + 1)..next.start_line {
3229 if let Some(line_info) = self.lines.get(line_num - 1)
3230 && !line_info.content(self.content).trim().is_empty()
3231 {
3232 return false;
3233 }
3234 }
3235 true
3236 }
3237
3238 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3240 current.end_line = next.end_line;
3241 current.item_lines.extend_from_slice(&next.item_lines);
3242
3243 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3245
3246 if !current.is_ordered && self.markers_differ(¤t, next) {
3248 current.marker = None; }
3250
3251 current
3252 }
3253
3254 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3256 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3257 }
3258}
3259
3260#[derive(Debug, PartialEq)]
3262enum BlockSpacing {
3263 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3268
3269fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3271 for line_num in (current.end_line + 1)..next.start_line {
3273 if let Some(line_info) = lines.get(line_num - 1) {
3274 let trimmed = line_info.content(content).trim();
3276
3277 if trimmed.is_empty() {
3279 continue;
3280 }
3281
3282 if line_info.heading.is_some() {
3286 return true; }
3288
3289 if is_horizontal_rule(trimmed) {
3291 return true; }
3293
3294 if crate::utils::skip_context::is_table_line(trimmed) {
3296 return true; }
3298
3299 if trimmed.starts_with('>') {
3301 return true; }
3303
3304 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3306 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3307
3308 let min_continuation_indent = if current.is_ordered {
3310 current.nesting_level + current.max_marker_width + 1 } else {
3312 current.nesting_level + 2
3313 };
3314
3315 if line_indent < min_continuation_indent {
3316 return true; }
3319 }
3320
3321 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3323
3324 let min_indent = if current.is_ordered {
3326 current.nesting_level + current.max_marker_width
3327 } else {
3328 current.nesting_level + 2
3329 };
3330
3331 if line_indent < min_indent {
3333 return true; }
3335
3336 }
3339 }
3340
3341 false
3343}
3344
3345fn is_horizontal_rule(trimmed: &str) -> bool {
3347 if trimmed.len() < 3 {
3348 return false;
3349 }
3350
3351 let chars: Vec<char> = trimmed.chars().collect();
3353 if let Some(&first_char) = chars.first()
3354 && (first_char == '-' || first_char == '*' || first_char == '_')
3355 {
3356 let mut count = 0;
3357 for &ch in &chars {
3358 if ch == first_char {
3359 count += 1;
3360 } else if ch != ' ' && ch != '\t' {
3361 return false; }
3363 }
3364 return count >= 3;
3365 }
3366 false
3367}
3368
3369#[cfg(test)]
3371mod tests {
3372 use super::*;
3373
3374 #[test]
3375 fn test_empty_content() {
3376 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3377 assert_eq!(ctx.content, "");
3378 assert_eq!(ctx.line_offsets, vec![0]);
3379 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3380 assert_eq!(ctx.lines.len(), 0);
3381 }
3382
3383 #[test]
3384 fn test_single_line() {
3385 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3386 assert_eq!(ctx.content, "# Hello");
3387 assert_eq!(ctx.line_offsets, vec![0]);
3388 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3389 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3390 }
3391
3392 #[test]
3393 fn test_multi_line() {
3394 let content = "# Title\n\nSecond line\nThird line";
3395 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3396 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3397 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3404
3405 #[test]
3406 fn test_line_info() {
3407 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3408 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3409
3410 assert_eq!(ctx.lines.len(), 7);
3412
3413 let line1 = &ctx.lines[0];
3415 assert_eq!(line1.content(ctx.content), "# Title");
3416 assert_eq!(line1.byte_offset, 0);
3417 assert_eq!(line1.indent, 0);
3418 assert!(!line1.is_blank);
3419 assert!(!line1.in_code_block);
3420 assert!(line1.list_item.is_none());
3421
3422 let line2 = &ctx.lines[1];
3424 assert_eq!(line2.content(ctx.content), " indented");
3425 assert_eq!(line2.byte_offset, 8);
3426 assert_eq!(line2.indent, 4);
3427 assert!(!line2.is_blank);
3428
3429 let line3 = &ctx.lines[2];
3431 assert_eq!(line3.content(ctx.content), "");
3432 assert!(line3.is_blank);
3433
3434 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3436 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3437 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3438 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3439 }
3440
3441 #[test]
3442 fn test_list_item_detection() {
3443 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3444 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3445
3446 let line1 = &ctx.lines[0];
3448 assert!(line1.list_item.is_some());
3449 let list1 = line1.list_item.as_ref().unwrap();
3450 assert_eq!(list1.marker, "-");
3451 assert!(!list1.is_ordered);
3452 assert_eq!(list1.marker_column, 0);
3453 assert_eq!(list1.content_column, 2);
3454
3455 let line2 = &ctx.lines[1];
3457 assert!(line2.list_item.is_some());
3458 let list2 = line2.list_item.as_ref().unwrap();
3459 assert_eq!(list2.marker, "*");
3460 assert_eq!(list2.marker_column, 2);
3461
3462 let line3 = &ctx.lines[2];
3464 assert!(line3.list_item.is_some());
3465 let list3 = line3.list_item.as_ref().unwrap();
3466 assert_eq!(list3.marker, "1.");
3467 assert!(list3.is_ordered);
3468 assert_eq!(list3.number, Some(1));
3469
3470 let line6 = &ctx.lines[5];
3472 assert!(line6.list_item.is_none());
3473 }
3474
3475 #[test]
3476 fn test_offset_to_line_col_edge_cases() {
3477 let content = "a\nb\nc";
3478 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3479 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3487
3488 #[test]
3489 fn test_mdx_esm_blocks() {
3490 let content = r##"import {Chart} from './snowfall.js'
3491export const year = 2023
3492
3493# Last year's snowfall
3494
3495In {year}, the snowfall was above average.
3496It was followed by a warm spring which caused
3497flood conditions in many of the nearby rivers.
3498
3499<Chart color="#fcb32c" year={year} />
3500"##;
3501
3502 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3503
3504 assert_eq!(ctx.lines.len(), 10);
3506 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3507 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3508 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3509 assert!(
3510 !ctx.lines[3].in_esm_block,
3511 "Line 4 (heading) should NOT be in_esm_block"
3512 );
3513 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3514 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3515 }
3516
3517 #[test]
3518 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3519 let content = r#"import {Chart} from './snowfall.js'
3520export const year = 2023
3521
3522# Last year's snowfall
3523"#;
3524
3525 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3526
3527 assert!(
3529 !ctx.lines[0].in_esm_block,
3530 "Line 1 should NOT be in_esm_block in Standard flavor"
3531 );
3532 assert!(
3533 !ctx.lines[1].in_esm_block,
3534 "Line 2 should NOT be in_esm_block in Standard flavor"
3535 );
3536 }
3537}