1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13 ($name:expr, $profile:expr, $code:expr) => {{
14 let start = std::time::Instant::now();
15 let result = $code;
16 if $profile {
17 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18 }
19 result
20 }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(
32 r#"(?sx)
33 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34 (?:
35 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
36 |
37 \[([^\]]*)\] # Reference ID in group 6
38 )"#
39 ).unwrap()
40});
41
42static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?sx)
47 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48 (?:
49 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
50 |
51 \[([^\]]*)\] # Reference ID in group 6
52 )"#
53 ).unwrap()
54});
55
56static REF_DEF_PATTERN: LazyLock<Regex> =
58 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62 Regex::new(
63 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64 ).unwrap()
65});
66
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub is_blank: bool,
85 pub in_code_block: bool,
87 pub in_front_matter: bool,
89 pub in_html_block: bool,
91 pub in_html_comment: bool,
93 pub list_item: Option<ListItemInfo>,
95 pub heading: Option<HeadingInfo>,
97 pub blockquote: Option<BlockquoteInfo>,
99 pub in_mkdocstrings: bool,
101 pub in_esm_block: bool,
103 pub in_code_span_continuation: bool,
105}
106
107impl LineInfo {
108 pub fn content<'a>(&self, source: &'a str) -> &'a str {
110 &source[self.byte_offset..self.byte_offset + self.byte_len]
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct ListItemInfo {
117 pub marker: String,
119 pub is_ordered: bool,
121 pub number: Option<usize>,
123 pub marker_column: usize,
125 pub content_column: usize,
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum HeadingStyle {
132 ATX,
134 Setext1,
136 Setext2,
138}
139
140#[derive(Debug, Clone)]
142pub struct ParsedLink<'a> {
143 pub line: usize,
145 pub start_col: usize,
147 pub end_col: usize,
149 pub byte_offset: usize,
151 pub byte_end: usize,
153 pub text: Cow<'a, str>,
155 pub url: Cow<'a, str>,
157 pub is_reference: bool,
159 pub reference_id: Option<Cow<'a, str>>,
161 pub link_type: LinkType,
163}
164
165#[derive(Debug, Clone)]
167pub struct BrokenLinkInfo {
168 pub reference: String,
170 pub span: std::ops::Range<usize>,
172}
173
174#[derive(Debug, Clone)]
176pub struct FootnoteRef {
177 pub id: String,
179 pub line: usize,
181 pub byte_offset: usize,
183 pub byte_end: usize,
185}
186
187#[derive(Debug, Clone)]
189pub struct ParsedImage<'a> {
190 pub line: usize,
192 pub start_col: usize,
194 pub end_col: usize,
196 pub byte_offset: usize,
198 pub byte_end: usize,
200 pub alt_text: Cow<'a, str>,
202 pub url: Cow<'a, str>,
204 pub is_reference: bool,
206 pub reference_id: Option<Cow<'a, str>>,
208 pub link_type: LinkType,
210}
211
212#[derive(Debug, Clone)]
214pub struct ReferenceDef {
215 pub line: usize,
217 pub id: String,
219 pub url: String,
221 pub title: Option<String>,
223 pub byte_offset: usize,
225 pub byte_end: usize,
227}
228
229#[derive(Debug, Clone)]
231pub struct CodeSpan {
232 pub line: usize,
234 pub end_line: usize,
236 pub start_col: usize,
238 pub end_col: usize,
240 pub byte_offset: usize,
242 pub byte_end: usize,
244 pub backtick_count: usize,
246 pub content: String,
248}
249
250#[derive(Debug, Clone)]
252pub struct HeadingInfo {
253 pub level: u8,
255 pub style: HeadingStyle,
257 pub marker: String,
259 pub marker_column: usize,
261 pub content_column: usize,
263 pub text: String,
265 pub custom_id: Option<String>,
267 pub raw_text: String,
269 pub has_closing_sequence: bool,
271 pub closing_sequence: String,
273}
274
275#[derive(Debug, Clone)]
277pub struct BlockquoteInfo {
278 pub nesting_level: usize,
280 pub indent: String,
282 pub marker_column: usize,
284 pub prefix: String,
286 pub content: String,
288 pub has_no_space_after_marker: bool,
290 pub has_multiple_spaces_after_marker: bool,
292 pub needs_md028_fix: bool,
294}
295
296#[derive(Debug, Clone)]
298pub struct ListBlock {
299 pub start_line: usize,
301 pub end_line: usize,
303 pub is_ordered: bool,
305 pub marker: Option<String>,
307 pub blockquote_prefix: String,
309 pub item_lines: Vec<usize>,
311 pub nesting_level: usize,
313 pub max_marker_width: usize,
315}
316
317use std::sync::{Arc, OnceLock};
318
319#[derive(Debug, Clone, Default)]
321pub struct CharFrequency {
322 pub hash_count: usize,
324 pub asterisk_count: usize,
326 pub underscore_count: usize,
328 pub hyphen_count: usize,
330 pub plus_count: usize,
332 pub gt_count: usize,
334 pub pipe_count: usize,
336 pub bracket_count: usize,
338 pub backtick_count: usize,
340 pub lt_count: usize,
342 pub exclamation_count: usize,
344 pub newline_count: usize,
346}
347
348#[derive(Debug, Clone)]
350pub struct HtmlTag {
351 pub line: usize,
353 pub start_col: usize,
355 pub end_col: usize,
357 pub byte_offset: usize,
359 pub byte_end: usize,
361 pub tag_name: String,
363 pub is_closing: bool,
365 pub is_self_closing: bool,
367 pub raw_content: String,
369}
370
371#[derive(Debug, Clone)]
373pub struct EmphasisSpan {
374 pub line: usize,
376 pub start_col: usize,
378 pub end_col: usize,
380 pub byte_offset: usize,
382 pub byte_end: usize,
384 pub marker: char,
386 pub marker_count: usize,
388 pub content: String,
390}
391
392#[derive(Debug, Clone)]
394pub struct TableRow {
395 pub line: usize,
397 pub is_separator: bool,
399 pub column_count: usize,
401 pub column_alignments: Vec<String>, }
404
405#[derive(Debug, Clone)]
407pub struct BareUrl {
408 pub line: usize,
410 pub start_col: usize,
412 pub end_col: usize,
414 pub byte_offset: usize,
416 pub byte_end: usize,
418 pub url: String,
420 pub url_type: String,
422}
423
424pub struct LintContext<'a> {
425 pub content: &'a str,
426 pub line_offsets: Vec<usize>,
427 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
448
449struct BlockquoteComponents<'a> {
451 indent: &'a str,
452 markers: &'a str,
453 spaces_after: &'a str,
454 content: &'a str,
455}
456
457#[inline]
459fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
460 let bytes = line.as_bytes();
461 let mut pos = 0;
462
463 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
465 pos += 1;
466 }
467 let indent_end = pos;
468
469 if pos >= bytes.len() || bytes[pos] != b'>' {
471 return None;
472 }
473
474 while pos < bytes.len() && bytes[pos] == b'>' {
476 pos += 1;
477 }
478 let markers_end = pos;
479
480 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
482 pos += 1;
483 }
484 let spaces_end = pos;
485
486 Some(BlockquoteComponents {
487 indent: &line[0..indent_end],
488 markers: &line[indent_end..markers_end],
489 spaces_after: &line[markers_end..spaces_end],
490 content: &line[spaces_end..],
491 })
492}
493
494impl<'a> LintContext<'a> {
495 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
496 #[cfg(not(target_arch = "wasm32"))]
497 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
498 #[cfg(target_arch = "wasm32")]
499 let profile = false;
500
501 let line_offsets = profile_section!("Line offsets", profile, {
502 let mut offsets = vec![0];
503 for (i, c) in content.char_indices() {
504 if c == '\n' {
505 offsets.push(i + 1);
506 }
507 }
508 offsets
509 });
510
511 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
513
514 let html_comment_ranges = profile_section!(
516 "HTML comment ranges",
517 profile,
518 crate::utils::skip_context::compute_html_comment_ranges(content)
519 );
520
521 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
523 if flavor == MarkdownFlavor::MkDocs {
524 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
525 } else {
526 Vec::new()
527 }
528 });
529
530 let mut lines = profile_section!(
532 "Basic line info",
533 profile,
534 Self::compute_basic_line_info(
535 content,
536 &line_offsets,
537 &code_blocks,
538 flavor,
539 &html_comment_ranges,
540 &autodoc_ranges,
541 )
542 );
543
544 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
546
547 profile_section!(
549 "ESM blocks",
550 profile,
551 Self::detect_esm_blocks(content, &mut lines, flavor)
552 );
553
554 profile_section!(
556 "Headings & blockquotes",
557 profile,
558 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges)
559 );
560
561 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
563
564 for span in &code_spans {
567 if span.end_line > span.line {
568 for line_num in (span.line + 1)..=span.end_line {
570 if let Some(line_info) = lines.get_mut(line_num - 1) {
571 line_info.in_code_span_continuation = true;
572 }
573 }
574 }
575 }
576
577 let (links, broken_links, footnote_refs) = profile_section!(
579 "Links",
580 profile,
581 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
582 );
583
584 let images = profile_section!(
585 "Images",
586 profile,
587 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
588 );
589
590 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
591
592 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
593
594 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
596
597 let table_blocks = profile_section!(
599 "Table blocks",
600 profile,
601 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
602 content,
603 &code_blocks,
604 &code_spans,
605 &html_comment_ranges,
606 )
607 );
608
609 let line_index = profile_section!(
611 "Line index",
612 profile,
613 crate::utils::range_utils::LineIndex::new(content)
614 );
615
616 let jinja_ranges = profile_section!(
618 "Jinja ranges",
619 profile,
620 crate::utils::jinja_utils::find_jinja_ranges(content)
621 );
622
623 Self {
624 content,
625 line_offsets,
626 code_blocks,
627 lines,
628 links,
629 images,
630 broken_links,
631 footnote_refs,
632 reference_defs,
633 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
634 list_blocks,
635 char_frequency,
636 html_tags_cache: OnceLock::new(),
637 emphasis_spans_cache: OnceLock::new(),
638 table_rows_cache: OnceLock::new(),
639 bare_urls_cache: OnceLock::new(),
640 html_comment_ranges,
641 table_blocks,
642 line_index,
643 jinja_ranges,
644 flavor,
645 source_file,
646 }
647 }
648
649 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
651 Arc::clone(
652 self.code_spans_cache
653 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
654 )
655 }
656
657 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
659 &self.html_comment_ranges
660 }
661
662 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
664 Arc::clone(self.html_tags_cache.get_or_init(|| {
665 Arc::new(Self::parse_html_tags(
666 self.content,
667 &self.lines,
668 &self.code_blocks,
669 self.flavor,
670 ))
671 }))
672 }
673
674 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
676 Arc::clone(
677 self.emphasis_spans_cache
678 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
679 )
680 }
681
682 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
684 Arc::clone(
685 self.table_rows_cache
686 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
687 )
688 }
689
690 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
692 Arc::clone(
693 self.bare_urls_cache
694 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
695 )
696 }
697
698 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
700 match self.line_offsets.binary_search(&offset) {
701 Ok(line) => (line + 1, 1),
702 Err(line) => {
703 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
704 (line, offset - line_start + 1)
705 }
706 }
707 }
708
709 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
711 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
713 return true;
714 }
715
716 self.code_spans()
718 .iter()
719 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
720 }
721
722 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
724 if line_num > 0 {
725 self.lines.get(line_num - 1)
726 } else {
727 None
728 }
729 }
730
731 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
733 self.line_info(line_num).map(|info| info.byte_offset)
734 }
735
736 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
738 let normalized_id = ref_id.to_lowercase();
739 self.reference_defs
740 .iter()
741 .find(|def| def.id == normalized_id)
742 .map(|def| def.url.as_str())
743 }
744
745 pub fn is_in_list_block(&self, line_num: usize) -> bool {
747 self.list_blocks
748 .iter()
749 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
750 }
751
752 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
754 self.list_blocks
755 .iter()
756 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
757 }
758
759 pub fn is_in_code_block(&self, line_num: usize) -> bool {
763 if line_num == 0 || line_num > self.lines.len() {
764 return false;
765 }
766 self.lines[line_num - 1].in_code_block
767 }
768
769 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
771 if line_num == 0 || line_num > self.lines.len() {
772 return false;
773 }
774 self.lines[line_num - 1].in_front_matter
775 }
776
777 pub fn is_in_html_block(&self, line_num: usize) -> bool {
779 if line_num == 0 || line_num > self.lines.len() {
780 return false;
781 }
782 self.lines[line_num - 1].in_html_block
783 }
784
785 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
787 if line_num == 0 || line_num > self.lines.len() {
788 return false;
789 }
790
791 let col_0indexed = if col > 0 { col - 1 } else { 0 };
795 let code_spans = self.code_spans();
796 code_spans.iter().any(|span| {
797 if line_num < span.line || line_num > span.end_line {
799 return false;
800 }
801
802 if span.line == span.end_line {
803 col_0indexed >= span.start_col && col_0indexed < span.end_col
805 } else if line_num == span.line {
806 col_0indexed >= span.start_col
808 } else if line_num == span.end_line {
809 col_0indexed < span.end_col
811 } else {
812 true
814 }
815 })
816 }
817
818 #[inline]
820 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
821 let code_spans = self.code_spans();
822 code_spans
823 .iter()
824 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
825 }
826
827 #[inline]
830 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
831 self.reference_defs
832 .iter()
833 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
834 }
835
836 #[inline]
840 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
841 self.html_comment_ranges
842 .iter()
843 .any(|range| byte_pos >= range.start && byte_pos < range.end)
844 }
845
846 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
848 self.jinja_ranges
849 .iter()
850 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
851 }
852
853 pub fn has_char(&self, ch: char) -> bool {
855 match ch {
856 '#' => self.char_frequency.hash_count > 0,
857 '*' => self.char_frequency.asterisk_count > 0,
858 '_' => self.char_frequency.underscore_count > 0,
859 '-' => self.char_frequency.hyphen_count > 0,
860 '+' => self.char_frequency.plus_count > 0,
861 '>' => self.char_frequency.gt_count > 0,
862 '|' => self.char_frequency.pipe_count > 0,
863 '[' => self.char_frequency.bracket_count > 0,
864 '`' => self.char_frequency.backtick_count > 0,
865 '<' => self.char_frequency.lt_count > 0,
866 '!' => self.char_frequency.exclamation_count > 0,
867 '\n' => self.char_frequency.newline_count > 0,
868 _ => self.content.contains(ch), }
870 }
871
872 pub fn char_count(&self, ch: char) -> usize {
874 match ch {
875 '#' => self.char_frequency.hash_count,
876 '*' => self.char_frequency.asterisk_count,
877 '_' => self.char_frequency.underscore_count,
878 '-' => self.char_frequency.hyphen_count,
879 '+' => self.char_frequency.plus_count,
880 '>' => self.char_frequency.gt_count,
881 '|' => self.char_frequency.pipe_count,
882 '[' => self.char_frequency.bracket_count,
883 '`' => self.char_frequency.backtick_count,
884 '<' => self.char_frequency.lt_count,
885 '!' => self.char_frequency.exclamation_count,
886 '\n' => self.char_frequency.newline_count,
887 _ => self.content.matches(ch).count(), }
889 }
890
891 pub fn likely_has_headings(&self) -> bool {
893 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
895
896 pub fn likely_has_lists(&self) -> bool {
898 self.char_frequency.asterisk_count > 0
899 || self.char_frequency.hyphen_count > 0
900 || self.char_frequency.plus_count > 0
901 }
902
903 pub fn likely_has_emphasis(&self) -> bool {
905 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
906 }
907
908 pub fn likely_has_tables(&self) -> bool {
910 self.char_frequency.pipe_count > 2
911 }
912
913 pub fn likely_has_blockquotes(&self) -> bool {
915 self.char_frequency.gt_count > 0
916 }
917
918 pub fn likely_has_code(&self) -> bool {
920 self.char_frequency.backtick_count > 0
921 }
922
923 pub fn likely_has_links_or_images(&self) -> bool {
925 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
926 }
927
928 pub fn likely_has_html(&self) -> bool {
930 self.char_frequency.lt_count > 0
931 }
932
933 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
935 self.html_tags()
936 .iter()
937 .filter(|tag| tag.line == line_num)
938 .cloned()
939 .collect()
940 }
941
942 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
944 self.emphasis_spans()
945 .iter()
946 .filter(|span| span.line == line_num)
947 .cloned()
948 .collect()
949 }
950
951 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
953 self.table_rows()
954 .iter()
955 .filter(|row| row.line == line_num)
956 .cloned()
957 .collect()
958 }
959
960 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
962 self.bare_urls()
963 .iter()
964 .filter(|url| url.line == line_num)
965 .cloned()
966 .collect()
967 }
968
969 #[inline]
975 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
976 let idx = match lines.binary_search_by(|line| {
978 if byte_offset < line.byte_offset {
979 std::cmp::Ordering::Greater
980 } else if byte_offset > line.byte_offset + line.byte_len {
981 std::cmp::Ordering::Less
982 } else {
983 std::cmp::Ordering::Equal
984 }
985 }) {
986 Ok(idx) => idx,
987 Err(idx) => idx.saturating_sub(1),
988 };
989
990 let line = &lines[idx];
991 let line_num = idx + 1;
992 let col = byte_offset.saturating_sub(line.byte_offset);
993
994 (idx, line_num, col)
995 }
996
997 #[inline]
999 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1000 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1002
1003 if idx > 0 {
1005 let span = &code_spans[idx - 1];
1006 if offset >= span.byte_offset && offset < span.byte_end {
1007 return true;
1008 }
1009 }
1010
1011 false
1012 }
1013
1014 fn parse_links(
1016 content: &'a str,
1017 lines: &[LineInfo],
1018 code_blocks: &[(usize, usize)],
1019 code_spans: &[CodeSpan],
1020 flavor: MarkdownFlavor,
1021 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1022 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1023 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1024 use std::collections::HashSet;
1025
1026 let mut links = Vec::with_capacity(content.len() / 500);
1027 let mut broken_links = Vec::new();
1028 let mut footnote_refs = Vec::new();
1029
1030 let mut found_positions = HashSet::new();
1032
1033 let mut options = Options::empty();
1043 options.insert(Options::ENABLE_WIKILINKS);
1044 options.insert(Options::ENABLE_FOOTNOTES);
1045
1046 let parser = Parser::new_with_broken_link_callback(
1047 content,
1048 options,
1049 Some(|link: BrokenLink<'_>| {
1050 broken_links.push(BrokenLinkInfo {
1051 reference: link.reference.to_string(),
1052 span: link.span.clone(),
1053 });
1054 None
1055 }),
1056 )
1057 .into_offset_iter();
1058
1059 let mut link_stack: Vec<(
1060 usize,
1061 usize,
1062 pulldown_cmark::CowStr<'a>,
1063 LinkType,
1064 pulldown_cmark::CowStr<'a>,
1065 )> = Vec::new();
1066 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1069 match event {
1070 Event::Start(Tag::Link {
1071 link_type,
1072 dest_url,
1073 id,
1074 ..
1075 }) => {
1076 link_stack.push((range.start, range.end, dest_url, link_type, id));
1078 text_chunks.clear();
1079 }
1080 Event::Text(text) if !link_stack.is_empty() => {
1081 text_chunks.push((text.to_string(), range.start, range.end));
1083 }
1084 Event::Code(code) if !link_stack.is_empty() => {
1085 let code_text = format!("`{code}`");
1087 text_chunks.push((code_text, range.start, range.end));
1088 }
1089 Event::End(TagEnd::Link) => {
1090 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1091 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1093 text_chunks.clear();
1094 continue;
1095 }
1096
1097 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1099
1100 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1102 text_chunks.clear();
1103 continue;
1104 }
1105
1106 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1107
1108 let is_reference = matches!(
1109 link_type,
1110 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1111 );
1112
1113 let link_text = if start_pos < content.len() {
1116 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1117
1118 let mut close_pos = None;
1122 let mut depth = 0;
1123 let mut in_code_span = false;
1124
1125 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1126 let mut backslash_count = 0;
1128 let mut j = i;
1129 while j > 0 && link_bytes[j - 1] == b'\\' {
1130 backslash_count += 1;
1131 j -= 1;
1132 }
1133 let is_escaped = backslash_count % 2 != 0;
1134
1135 if byte == b'`' && !is_escaped {
1137 in_code_span = !in_code_span;
1138 }
1139
1140 if !is_escaped && !in_code_span {
1142 if byte == b'[' {
1143 depth += 1;
1144 } else if byte == b']' {
1145 if depth == 0 {
1146 close_pos = Some(i);
1148 break;
1149 } else {
1150 depth -= 1;
1151 }
1152 }
1153 }
1154 }
1155
1156 if let Some(pos) = close_pos {
1157 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1158 } else {
1159 Cow::Borrowed("")
1160 }
1161 } else {
1162 Cow::Borrowed("")
1163 };
1164
1165 let reference_id = if is_reference && !ref_id.is_empty() {
1167 Some(Cow::Owned(ref_id.to_lowercase()))
1168 } else if is_reference {
1169 Some(Cow::Owned(link_text.to_lowercase()))
1171 } else {
1172 None
1173 };
1174
1175 let has_escaped_bang = start_pos >= 2
1179 && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1180 && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1181
1182 let has_escaped_bracket =
1185 start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1186
1187 if has_escaped_bang || has_escaped_bracket {
1188 text_chunks.clear();
1189 continue; }
1191
1192 found_positions.insert(start_pos);
1194
1195 links.push(ParsedLink {
1196 line: line_num,
1197 start_col: col_start,
1198 end_col: col_end,
1199 byte_offset: start_pos,
1200 byte_end: range.end,
1201 text: link_text,
1202 url: Cow::Owned(url.to_string()),
1203 is_reference,
1204 reference_id,
1205 link_type,
1206 });
1207
1208 text_chunks.clear();
1209 }
1210 }
1211 Event::FootnoteReference(footnote_id) => {
1212 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1215 continue;
1216 }
1217
1218 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1219 footnote_refs.push(FootnoteRef {
1220 id: footnote_id.to_string(),
1221 line: line_num,
1222 byte_offset: range.start,
1223 byte_end: range.end,
1224 });
1225 }
1226 _ => {}
1227 }
1228 }
1229
1230 for cap in LINK_PATTERN.captures_iter(content) {
1234 let full_match = cap.get(0).unwrap();
1235 let match_start = full_match.start();
1236 let match_end = full_match.end();
1237
1238 if found_positions.contains(&match_start) {
1240 continue;
1241 }
1242
1243 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1245 continue;
1246 }
1247
1248 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1250 continue;
1251 }
1252
1253 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1255 continue;
1256 }
1257
1258 if Self::is_offset_in_code_span(code_spans, match_start) {
1260 continue;
1261 }
1262
1263 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1265 continue;
1266 }
1267
1268 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1270
1271 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1273 continue;
1274 }
1275
1276 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1277
1278 let text = cap.get(1).map_or("", |m| m.as_str());
1279
1280 if let Some(ref_id) = cap.get(6) {
1282 let ref_id_str = ref_id.as_str();
1283 let normalized_ref = if ref_id_str.is_empty() {
1284 Cow::Owned(text.to_lowercase()) } else {
1286 Cow::Owned(ref_id_str.to_lowercase())
1287 };
1288
1289 links.push(ParsedLink {
1291 line: line_num,
1292 start_col: col_start,
1293 end_col: col_end,
1294 byte_offset: match_start,
1295 byte_end: match_end,
1296 text: Cow::Borrowed(text),
1297 url: Cow::Borrowed(""), is_reference: true,
1299 reference_id: Some(normalized_ref),
1300 link_type: LinkType::Reference, });
1302 }
1303 }
1304
1305 (links, broken_links, footnote_refs)
1306 }
1307
1308 fn parse_images(
1310 content: &'a str,
1311 lines: &[LineInfo],
1312 code_blocks: &[(usize, usize)],
1313 code_spans: &[CodeSpan],
1314 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1315 ) -> Vec<ParsedImage<'a>> {
1316 use crate::utils::skip_context::is_in_html_comment_ranges;
1317 use std::collections::HashSet;
1318
1319 let mut images = Vec::with_capacity(content.len() / 1000);
1321 let mut found_positions = HashSet::new();
1322
1323 let parser = Parser::new(content).into_offset_iter();
1325 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1326 Vec::new();
1327 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1330 match event {
1331 Event::Start(Tag::Image {
1332 link_type,
1333 dest_url,
1334 id,
1335 ..
1336 }) => {
1337 image_stack.push((range.start, dest_url, link_type, id));
1338 text_chunks.clear();
1339 }
1340 Event::Text(text) if !image_stack.is_empty() => {
1341 text_chunks.push((text.to_string(), range.start, range.end));
1342 }
1343 Event::Code(code) if !image_stack.is_empty() => {
1344 let code_text = format!("`{code}`");
1345 text_chunks.push((code_text, range.start, range.end));
1346 }
1347 Event::End(TagEnd::Image) => {
1348 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1349 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1351 continue;
1352 }
1353
1354 if Self::is_offset_in_code_span(code_spans, start_pos) {
1356 continue;
1357 }
1358
1359 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1361 continue;
1362 }
1363
1364 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1366 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1367
1368 let is_reference = matches!(
1369 link_type,
1370 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1371 );
1372
1373 let alt_text = if start_pos < content.len() {
1376 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1377
1378 let mut close_pos = None;
1381 let mut depth = 0;
1382
1383 if image_bytes.len() > 2 {
1384 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1385 let mut backslash_count = 0;
1387 let mut j = i;
1388 while j > 0 && image_bytes[j - 1] == b'\\' {
1389 backslash_count += 1;
1390 j -= 1;
1391 }
1392 let is_escaped = backslash_count % 2 != 0;
1393
1394 if !is_escaped {
1395 if byte == b'[' {
1396 depth += 1;
1397 } else if byte == b']' {
1398 if depth == 0 {
1399 close_pos = Some(i);
1401 break;
1402 } else {
1403 depth -= 1;
1404 }
1405 }
1406 }
1407 }
1408 }
1409
1410 if let Some(pos) = close_pos {
1411 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1412 } else {
1413 Cow::Borrowed("")
1414 }
1415 } else {
1416 Cow::Borrowed("")
1417 };
1418
1419 let reference_id = if is_reference && !ref_id.is_empty() {
1420 Some(Cow::Owned(ref_id.to_lowercase()))
1421 } else if is_reference {
1422 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1424 None
1425 };
1426
1427 found_positions.insert(start_pos);
1428 images.push(ParsedImage {
1429 line: line_num,
1430 start_col: col_start,
1431 end_col: col_end,
1432 byte_offset: start_pos,
1433 byte_end: range.end,
1434 alt_text,
1435 url: Cow::Owned(url.to_string()),
1436 is_reference,
1437 reference_id,
1438 link_type,
1439 });
1440 }
1441 }
1442 _ => {}
1443 }
1444 }
1445
1446 for cap in IMAGE_PATTERN.captures_iter(content) {
1448 let full_match = cap.get(0).unwrap();
1449 let match_start = full_match.start();
1450 let match_end = full_match.end();
1451
1452 if found_positions.contains(&match_start) {
1454 continue;
1455 }
1456
1457 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1459 continue;
1460 }
1461
1462 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1464 || Self::is_offset_in_code_span(code_spans, match_start)
1465 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1466 {
1467 continue;
1468 }
1469
1470 if let Some(ref_id) = cap.get(6) {
1472 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1473 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1474 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1475 let ref_id_str = ref_id.as_str();
1476 let normalized_ref = if ref_id_str.is_empty() {
1477 Cow::Owned(alt_text.to_lowercase())
1478 } else {
1479 Cow::Owned(ref_id_str.to_lowercase())
1480 };
1481
1482 images.push(ParsedImage {
1483 line: line_num,
1484 start_col: col_start,
1485 end_col: col_end,
1486 byte_offset: match_start,
1487 byte_end: match_end,
1488 alt_text: Cow::Borrowed(alt_text),
1489 url: Cow::Borrowed(""),
1490 is_reference: true,
1491 reference_id: Some(normalized_ref),
1492 link_type: LinkType::Reference, });
1494 }
1495 }
1496
1497 images
1498 }
1499
1500 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1502 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1506 if line_info.in_code_block {
1508 continue;
1509 }
1510
1511 let line = line_info.content(content);
1512 let line_num = line_idx + 1;
1513
1514 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1515 let id = cap.get(1).unwrap().as_str().to_lowercase();
1516 let url = cap.get(2).unwrap().as_str().to_string();
1517 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
1518
1519 let match_obj = cap.get(0).unwrap();
1522 let byte_offset = line_info.byte_offset + match_obj.start();
1523 let byte_end = line_info.byte_offset + match_obj.end();
1524
1525 refs.push(ReferenceDef {
1526 line: line_num,
1527 id,
1528 url,
1529 title,
1530 byte_offset,
1531 byte_end,
1532 });
1533 }
1534 }
1535
1536 refs
1537 }
1538
1539 #[inline]
1543 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1544 let trimmed_start = line.trim_start();
1545 if !trimmed_start.starts_with('>') {
1546 return None;
1547 }
1548
1549 let mut remaining = line;
1551 let mut total_prefix_len = 0;
1552
1553 loop {
1554 let trimmed = remaining.trim_start();
1555 if !trimmed.starts_with('>') {
1556 break;
1557 }
1558
1559 let leading_ws_len = remaining.len() - trimmed.len();
1561 total_prefix_len += leading_ws_len + 1;
1562
1563 let after_gt = &trimmed[1..];
1564
1565 if let Some(stripped) = after_gt.strip_prefix(' ') {
1567 total_prefix_len += 1;
1568 remaining = stripped;
1569 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1570 total_prefix_len += 1;
1571 remaining = stripped;
1572 } else {
1573 remaining = after_gt;
1574 }
1575 }
1576
1577 Some((&line[..total_prefix_len], remaining))
1578 }
1579
1580 #[inline]
1584 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1585 let bytes = line.as_bytes();
1586 let mut i = 0;
1587
1588 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1590 i += 1;
1591 }
1592
1593 if i >= bytes.len() {
1595 return None;
1596 }
1597 let marker = bytes[i] as char;
1598 if marker != '-' && marker != '*' && marker != '+' {
1599 return None;
1600 }
1601 let marker_pos = i;
1602 i += 1;
1603
1604 let spacing_start = i;
1606 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1607 i += 1;
1608 }
1609
1610 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1611 }
1612
1613 #[inline]
1617 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1618 let bytes = line.as_bytes();
1619 let mut i = 0;
1620
1621 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1623 i += 1;
1624 }
1625
1626 let number_start = i;
1628 while i < bytes.len() && bytes[i].is_ascii_digit() {
1629 i += 1;
1630 }
1631 if i == number_start {
1632 return None; }
1634
1635 if i >= bytes.len() {
1637 return None;
1638 }
1639 let delimiter = bytes[i] as char;
1640 if delimiter != '.' && delimiter != ')' {
1641 return None;
1642 }
1643 let delimiter_pos = i;
1644 i += 1;
1645
1646 let spacing_start = i;
1648 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1649 i += 1;
1650 }
1651
1652 Some((
1653 &line[..number_start],
1654 &line[number_start..delimiter_pos],
1655 delimiter,
1656 &line[spacing_start..i],
1657 &line[i..],
1658 ))
1659 }
1660
1661 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1664 let num_lines = line_offsets.len();
1665 let mut in_code_block = vec![false; num_lines];
1666
1667 for &(start, end) in code_blocks {
1669 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1671 let mut boundary = start;
1672 while boundary > 0 && !content.is_char_boundary(boundary) {
1673 boundary -= 1;
1674 }
1675 boundary
1676 } else {
1677 start
1678 };
1679
1680 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1681 let mut boundary = end;
1682 while boundary < content.len() && !content.is_char_boundary(boundary) {
1683 boundary += 1;
1684 }
1685 boundary
1686 } else {
1687 end.min(content.len())
1688 };
1689
1690 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1709 let first_line = first_line_after.saturating_sub(1);
1710 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1711
1712 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1714 *flag = true;
1715 }
1716 }
1717
1718 in_code_block
1719 }
1720
1721 fn compute_basic_line_info(
1723 content: &str,
1724 line_offsets: &[usize],
1725 code_blocks: &[(usize, usize)],
1726 flavor: MarkdownFlavor,
1727 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1728 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1729 ) -> Vec<LineInfo> {
1730 let content_lines: Vec<&str> = content.lines().collect();
1731 let mut lines = Vec::with_capacity(content_lines.len());
1732
1733 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1735
1736 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1739
1740 for (i, line) in content_lines.iter().enumerate() {
1741 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1742 let indent = line.len() - line.trim_start().len();
1743
1744 let blockquote_parse = Self::parse_blockquote_prefix(line);
1746
1747 let is_blank = if let Some((_, content)) = blockquote_parse {
1749 content.trim().is_empty()
1751 } else {
1752 line.trim().is_empty()
1753 };
1754
1755 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1757
1758 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1760 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1761 let line_end_offset = byte_offset + line.len();
1764 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1765 html_comment_ranges,
1766 byte_offset,
1767 line_end_offset,
1768 );
1769 let list_item = if !(in_code_block
1770 || is_blank
1771 || in_mkdocstrings
1772 || in_html_comment
1773 || (front_matter_end > 0 && i < front_matter_end))
1774 {
1775 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1777 (content, prefix.len())
1778 } else {
1779 (&**line, 0)
1780 };
1781
1782 if let Some((leading_spaces, marker, spacing, _content)) =
1783 Self::parse_unordered_list(line_for_list_check)
1784 {
1785 let marker_column = blockquote_prefix_len + leading_spaces.len();
1786 let content_column = marker_column + 1 + spacing.len();
1787
1788 if spacing.is_empty() {
1795 None
1796 } else {
1797 Some(ListItemInfo {
1798 marker: marker.to_string(),
1799 is_ordered: false,
1800 number: None,
1801 marker_column,
1802 content_column,
1803 })
1804 }
1805 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1806 Self::parse_ordered_list(line_for_list_check)
1807 {
1808 let marker = format!("{number_str}{delimiter}");
1809 let marker_column = blockquote_prefix_len + leading_spaces.len();
1810 let content_column = marker_column + marker.len() + spacing.len();
1811
1812 if spacing.is_empty() {
1815 None
1816 } else {
1817 Some(ListItemInfo {
1818 marker,
1819 is_ordered: true,
1820 number: number_str.parse().ok(),
1821 marker_column,
1822 content_column,
1823 })
1824 }
1825 } else {
1826 None
1827 }
1828 } else {
1829 None
1830 };
1831
1832 lines.push(LineInfo {
1833 byte_offset,
1834 byte_len: line.len(),
1835 indent,
1836 is_blank,
1837 in_code_block,
1838 in_front_matter: front_matter_end > 0 && i < front_matter_end,
1839 in_html_block: false, in_html_comment,
1841 list_item,
1842 heading: None, blockquote: None, in_mkdocstrings,
1845 in_esm_block: false, in_code_span_continuation: false, });
1848 }
1849
1850 lines
1851 }
1852
1853 fn detect_headings_and_blockquotes(
1855 content: &str,
1856 lines: &mut [LineInfo],
1857 flavor: MarkdownFlavor,
1858 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1859 ) {
1860 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
1862 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
1863 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
1864 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
1865
1866 let content_lines: Vec<&str> = content.lines().collect();
1867
1868 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1870
1871 for i in 0..lines.len() {
1873 if lines[i].in_code_block {
1874 continue;
1875 }
1876
1877 if front_matter_end > 0 && i < front_matter_end {
1879 continue;
1880 }
1881
1882 if lines[i].in_html_block {
1884 continue;
1885 }
1886
1887 let line = content_lines[i];
1888
1889 if let Some(bq) = parse_blockquote_detailed(line) {
1891 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
1893
1894 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
1896
1897 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
1899 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
1902
1903 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
1907
1908 lines[i].blockquote = Some(BlockquoteInfo {
1909 nesting_level,
1910 indent: bq.indent.to_string(),
1911 marker_column,
1912 prefix,
1913 content: bq.content.to_string(),
1914 has_no_space_after_marker: has_no_space,
1915 has_multiple_spaces_after_marker: has_multiple_spaces,
1916 needs_md028_fix,
1917 });
1918 }
1919
1920 if lines[i].is_blank {
1922 continue;
1923 }
1924
1925 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1928 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1929 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1930 } else {
1931 false
1932 };
1933
1934 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1935 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
1937 continue;
1938 }
1939 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1940 let hashes = caps.get(2).map_or("", |m| m.as_str());
1941 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1942 let rest = caps.get(4).map_or("", |m| m.as_str());
1943
1944 let level = hashes.len() as u8;
1945 let marker_column = leading_spaces.len();
1946
1947 let (text, has_closing, closing_seq) = {
1949 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1951 if rest[id_start..].trim_end().ends_with('}') {
1953 (&rest[..id_start], &rest[id_start..])
1955 } else {
1956 (rest, "")
1957 }
1958 } else {
1959 (rest, "")
1960 };
1961
1962 let trimmed_rest = rest_without_id.trim_end();
1964 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1965 let mut start_of_hashes = last_hash_pos;
1967 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1968 start_of_hashes -= 1;
1969 }
1970
1971 let has_space_before = start_of_hashes == 0
1973 || trimmed_rest
1974 .chars()
1975 .nth(start_of_hashes - 1)
1976 .is_some_and(|c| c.is_whitespace());
1977
1978 let potential_closing = &trimmed_rest[start_of_hashes..];
1980 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1981
1982 if is_all_hashes && has_space_before {
1983 let closing_hashes = potential_closing.to_string();
1985 let text_part = if !custom_id_part.is_empty() {
1988 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1991 } else {
1992 rest_without_id[..start_of_hashes].trim_end().to_string()
1993 };
1994 (text_part, true, closing_hashes)
1995 } else {
1996 (rest.to_string(), false, String::new())
1998 }
1999 } else {
2000 (rest.to_string(), false, String::new())
2002 }
2003 };
2004
2005 let content_column = marker_column + hashes.len() + spaces_after.len();
2006
2007 let raw_text = text.trim().to_string();
2009 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2010
2011 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2013 let next_line = content_lines[i + 1];
2014 if !lines[i + 1].in_code_block
2015 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2016 && let Some(next_line_id) =
2017 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2018 {
2019 custom_id = Some(next_line_id);
2020 }
2021 }
2022
2023 lines[i].heading = Some(HeadingInfo {
2024 level,
2025 style: HeadingStyle::ATX,
2026 marker: hashes.to_string(),
2027 marker_column,
2028 content_column,
2029 text: clean_text,
2030 custom_id,
2031 raw_text,
2032 has_closing_sequence: has_closing,
2033 closing_sequence: closing_seq,
2034 });
2035 }
2036 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2038 let next_line = content_lines[i + 1];
2039 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2040 if front_matter_end > 0 && i < front_matter_end {
2042 continue;
2043 }
2044
2045 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2047 {
2048 continue;
2049 }
2050
2051 let underline = next_line.trim();
2052
2053 let level = if underline.starts_with('=') { 1 } else { 2 };
2054 let style = if level == 1 {
2055 HeadingStyle::Setext1
2056 } else {
2057 HeadingStyle::Setext2
2058 };
2059
2060 let raw_text = line.trim().to_string();
2062 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2063
2064 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2066 let attr_line = content_lines[i + 2];
2067 if !lines[i + 2].in_code_block
2068 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2069 && let Some(attr_line_id) =
2070 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2071 {
2072 custom_id = Some(attr_line_id);
2073 }
2074 }
2075
2076 lines[i].heading = Some(HeadingInfo {
2077 level,
2078 style,
2079 marker: underline.to_string(),
2080 marker_column: next_line.len() - next_line.trim_start().len(),
2081 content_column: lines[i].indent,
2082 text: clean_text,
2083 custom_id,
2084 raw_text,
2085 has_closing_sequence: false,
2086 closing_sequence: String::new(),
2087 });
2088 }
2089 }
2090 }
2091 }
2092
2093 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2095 const BLOCK_ELEMENTS: &[&str] = &[
2098 "address",
2099 "article",
2100 "aside",
2101 "audio",
2102 "blockquote",
2103 "canvas",
2104 "details",
2105 "dialog",
2106 "dd",
2107 "div",
2108 "dl",
2109 "dt",
2110 "embed",
2111 "fieldset",
2112 "figcaption",
2113 "figure",
2114 "footer",
2115 "form",
2116 "h1",
2117 "h2",
2118 "h3",
2119 "h4",
2120 "h5",
2121 "h6",
2122 "header",
2123 "hr",
2124 "iframe",
2125 "li",
2126 "main",
2127 "menu",
2128 "nav",
2129 "noscript",
2130 "object",
2131 "ol",
2132 "p",
2133 "picture",
2134 "pre",
2135 "script",
2136 "search",
2137 "section",
2138 "source",
2139 "style",
2140 "summary",
2141 "svg",
2142 "table",
2143 "tbody",
2144 "td",
2145 "template",
2146 "textarea",
2147 "tfoot",
2148 "th",
2149 "thead",
2150 "tr",
2151 "track",
2152 "ul",
2153 "video",
2154 ];
2155
2156 let mut i = 0;
2157 while i < lines.len() {
2158 if lines[i].in_code_block || lines[i].in_front_matter {
2160 i += 1;
2161 continue;
2162 }
2163
2164 let trimmed = lines[i].content(content).trim_start();
2165
2166 if trimmed.starts_with('<') && trimmed.len() > 1 {
2168 let after_bracket = &trimmed[1..];
2170 let is_closing = after_bracket.starts_with('/');
2171 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2172
2173 let tag_name = tag_start
2175 .chars()
2176 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2177 .collect::<String>()
2178 .to_lowercase();
2179
2180 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2182 lines[i].in_html_block = true;
2184
2185 if !is_closing {
2188 let closing_tag = format!("</{tag_name}>");
2189 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2191 let mut j = i + 1;
2192 while j < lines.len() && j < i + 100 {
2193 if !allow_blank_lines && lines[j].is_blank {
2196 break;
2197 }
2198
2199 lines[j].in_html_block = true;
2200
2201 if lines[j].content(content).contains(&closing_tag) {
2203 break;
2204 }
2205 j += 1;
2206 }
2207 }
2208 }
2209 }
2210
2211 i += 1;
2212 }
2213 }
2214
2215 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2218 if !flavor.supports_esm_blocks() {
2220 return;
2221 }
2222
2223 let mut in_multiline_comment = false;
2224
2225 for line in lines.iter_mut() {
2226 if line.is_blank || line.in_html_comment {
2228 continue;
2229 }
2230
2231 let trimmed = line.content(content).trim_start();
2232
2233 if in_multiline_comment {
2235 if trimmed.contains("*/") {
2236 in_multiline_comment = false;
2237 }
2238 continue;
2239 }
2240
2241 if trimmed.starts_with("//") {
2243 continue;
2244 }
2245
2246 if trimmed.starts_with("/*") {
2248 if !trimmed.contains("*/") {
2249 in_multiline_comment = true;
2250 }
2251 continue;
2252 }
2253
2254 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2256 line.in_esm_block = true;
2257 } else {
2258 break;
2260 }
2261 }
2262 }
2263
2264 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2266 let mut code_spans = Vec::new();
2267
2268 if !content.contains('`') {
2270 return code_spans;
2271 }
2272
2273 let parser = Parser::new(content).into_offset_iter();
2275
2276 for (event, range) in parser {
2277 if let Event::Code(_) = event {
2278 let start_pos = range.start;
2279 let end_pos = range.end;
2280
2281 let full_span = &content[start_pos..end_pos];
2283 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2284
2285 let content_start = start_pos + backtick_count;
2287 let content_end = end_pos - backtick_count;
2288 let span_content = if content_start < content_end {
2289 content[content_start..content_end].to_string()
2290 } else {
2291 String::new()
2292 };
2293
2294 let line_idx = lines
2297 .partition_point(|line| line.byte_offset <= start_pos)
2298 .saturating_sub(1);
2299 let line_num = line_idx + 1;
2300 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2301
2302 let end_line_idx = lines
2304 .partition_point(|line| line.byte_offset <= end_pos)
2305 .saturating_sub(1);
2306 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2307
2308 let line_content = lines[line_idx].content(content);
2311 let col_start = if byte_col_start <= line_content.len() {
2312 line_content[..byte_col_start].chars().count()
2313 } else {
2314 line_content.chars().count()
2315 };
2316
2317 let end_line_content = lines[end_line_idx].content(content);
2318 let col_end = if byte_col_end <= end_line_content.len() {
2319 end_line_content[..byte_col_end].chars().count()
2320 } else {
2321 end_line_content.chars().count()
2322 };
2323
2324 code_spans.push(CodeSpan {
2325 line: line_num,
2326 end_line: end_line_idx + 1,
2327 start_col: col_start,
2328 end_col: col_end,
2329 byte_offset: start_pos,
2330 byte_end: end_pos,
2331 backtick_count,
2332 content: span_content,
2333 });
2334 }
2335 }
2336
2337 code_spans.sort_by_key(|span| span.byte_offset);
2339
2340 code_spans
2341 }
2342
2343 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2354 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2356
2357 #[inline]
2360 fn reset_tracking_state(
2361 list_item: &ListItemInfo,
2362 has_list_breaking_content: &mut bool,
2363 min_continuation: &mut usize,
2364 ) {
2365 *has_list_breaking_content = false;
2366 let marker_width = if list_item.is_ordered {
2367 list_item.marker.len() + 1 } else {
2369 list_item.marker.len()
2370 };
2371 *min_continuation = if list_item.is_ordered {
2372 marker_width
2373 } else {
2374 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2375 };
2376 }
2377
2378 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2381 let mut last_list_item_line = 0;
2382 let mut current_indent_level = 0;
2383 let mut last_marker_width = 0;
2384
2385 let mut has_list_breaking_content_since_last_item = false;
2387 let mut min_continuation_for_tracking = 0;
2388
2389 for (line_idx, line_info) in lines.iter().enumerate() {
2390 let line_num = line_idx + 1;
2391
2392 if line_info.in_code_block {
2394 if let Some(ref mut block) = current_block {
2395 let min_continuation_indent =
2397 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2398
2399 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2401
2402 match context {
2403 CodeBlockContext::Indented => {
2404 block.end_line = line_num;
2406 continue;
2407 }
2408 CodeBlockContext::Standalone => {
2409 let completed_block = current_block.take().unwrap();
2411 list_blocks.push(completed_block);
2412 continue;
2413 }
2414 CodeBlockContext::Adjacent => {
2415 block.end_line = line_num;
2417 continue;
2418 }
2419 }
2420 } else {
2421 continue;
2423 }
2424 }
2425
2426 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2428 caps.get(0).unwrap().as_str().to_string()
2429 } else {
2430 String::new()
2431 };
2432
2433 if current_block.is_some()
2436 && line_info.list_item.is_none()
2437 && !line_info.is_blank
2438 && !line_info.in_code_span_continuation
2439 {
2440 let line_content = line_info.content(content).trim();
2441
2442 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2447 let breaks_list = line_info.heading.is_some()
2448 || line_content.starts_with("---")
2449 || line_content.starts_with("***")
2450 || line_content.starts_with("___")
2451 || crate::utils::skip_context::is_table_line(line_content)
2452 || line_content.starts_with(">")
2453 || (line_info.indent > 0
2454 && line_info.indent < min_continuation_for_tracking
2455 && !is_lazy_continuation);
2456
2457 if breaks_list {
2458 has_list_breaking_content_since_last_item = true;
2459 }
2460 }
2461
2462 if line_info.in_code_span_continuation
2465 && line_info.list_item.is_none()
2466 && let Some(ref mut block) = current_block
2467 {
2468 block.end_line = line_num;
2469 }
2470
2471 let is_valid_continuation =
2476 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2478 && line_info.list_item.is_none()
2479 && !line_info.is_blank
2480 && !line_info.in_code_block
2481 && is_valid_continuation
2482 && let Some(ref mut block) = current_block
2483 {
2484 block.end_line = line_num;
2485 }
2486
2487 if let Some(list_item) = &line_info.list_item {
2489 let item_indent = list_item.marker_column;
2491 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2494 let is_nested = nesting > block.nesting_level;
2498 let same_type =
2499 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2500 let same_context = block.blockquote_prefix == blockquote_prefix;
2501 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2503
2504 let marker_compatible =
2506 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2507
2508 let has_non_list_content = has_list_breaking_content_since_last_item;
2511
2512 let mut continues_list = if is_nested {
2516 same_context && reasonable_distance && !has_non_list_content
2518 } else {
2519 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2521 };
2522
2523 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2526 if block.item_lines.contains(&(line_num - 1)) {
2529 continues_list = true;
2531 } else {
2532 continues_list = true;
2536 }
2537 }
2538
2539 if continues_list {
2540 block.end_line = line_num;
2542 block.item_lines.push(line_num);
2543
2544 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2546 list_item.marker.len() + 1
2547 } else {
2548 list_item.marker.len()
2549 });
2550
2551 if !block.is_ordered
2553 && block.marker.is_some()
2554 && block.marker.as_ref() != Some(&list_item.marker)
2555 {
2556 block.marker = None;
2558 }
2559
2560 reset_tracking_state(
2562 list_item,
2563 &mut has_list_breaking_content_since_last_item,
2564 &mut min_continuation_for_tracking,
2565 );
2566 } else {
2567 list_blocks.push(block.clone());
2570
2571 *block = ListBlock {
2572 start_line: line_num,
2573 end_line: line_num,
2574 is_ordered: list_item.is_ordered,
2575 marker: if list_item.is_ordered {
2576 None
2577 } else {
2578 Some(list_item.marker.clone())
2579 },
2580 blockquote_prefix: blockquote_prefix.clone(),
2581 item_lines: vec![line_num],
2582 nesting_level: nesting,
2583 max_marker_width: if list_item.is_ordered {
2584 list_item.marker.len() + 1
2585 } else {
2586 list_item.marker.len()
2587 },
2588 };
2589
2590 reset_tracking_state(
2592 list_item,
2593 &mut has_list_breaking_content_since_last_item,
2594 &mut min_continuation_for_tracking,
2595 );
2596 }
2597 } else {
2598 current_block = Some(ListBlock {
2600 start_line: line_num,
2601 end_line: line_num,
2602 is_ordered: list_item.is_ordered,
2603 marker: if list_item.is_ordered {
2604 None
2605 } else {
2606 Some(list_item.marker.clone())
2607 },
2608 blockquote_prefix,
2609 item_lines: vec![line_num],
2610 nesting_level: nesting,
2611 max_marker_width: list_item.marker.len(),
2612 });
2613
2614 reset_tracking_state(
2616 list_item,
2617 &mut has_list_breaking_content_since_last_item,
2618 &mut min_continuation_for_tracking,
2619 );
2620 }
2621
2622 last_list_item_line = line_num;
2623 current_indent_level = item_indent;
2624 last_marker_width = if list_item.is_ordered {
2625 list_item.marker.len() + 1 } else {
2627 list_item.marker.len()
2628 };
2629 } else if let Some(ref mut block) = current_block {
2630 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2640 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2641 } else {
2642 false
2643 };
2644
2645 let min_continuation_indent = if block.is_ordered {
2649 current_indent_level + last_marker_width
2650 } else {
2651 current_indent_level + 2 };
2653
2654 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2655 block.end_line = line_num;
2657 } else if line_info.is_blank {
2658 let mut check_idx = line_idx + 1;
2661 let mut found_continuation = false;
2662
2663 while check_idx < lines.len() && lines[check_idx].is_blank {
2665 check_idx += 1;
2666 }
2667
2668 if check_idx < lines.len() {
2669 let next_line = &lines[check_idx];
2670 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2672 found_continuation = true;
2673 }
2674 else if !next_line.in_code_block
2676 && next_line.list_item.is_some()
2677 && let Some(item) = &next_line.list_item
2678 {
2679 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2680 .find(next_line.content(content))
2681 .map_or(String::new(), |m| m.as_str().to_string());
2682 if item.marker_column == current_indent_level
2683 && item.is_ordered == block.is_ordered
2684 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2685 {
2686 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2689 if let Some(between_line) = lines.get(idx) {
2690 let between_content = between_line.content(content);
2691 let trimmed = between_content.trim();
2692 if trimmed.is_empty() {
2694 return false;
2695 }
2696 let line_indent = between_content.len() - between_content.trim_start().len();
2698
2699 if trimmed.starts_with("```")
2701 || trimmed.starts_with("~~~")
2702 || trimmed.starts_with("---")
2703 || trimmed.starts_with("***")
2704 || trimmed.starts_with("___")
2705 || trimmed.starts_with(">")
2706 || crate::utils::skip_context::is_table_line(trimmed)
2707 || between_line.heading.is_some()
2708 {
2709 return true; }
2711
2712 line_indent >= min_continuation_indent
2714 } else {
2715 false
2716 }
2717 });
2718
2719 if block.is_ordered {
2720 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2723 if let Some(between_line) = lines.get(idx) {
2724 let trimmed = between_line.content(content).trim();
2725 if trimmed.is_empty() {
2726 return false;
2727 }
2728 trimmed.starts_with("```")
2730 || trimmed.starts_with("~~~")
2731 || trimmed.starts_with("---")
2732 || trimmed.starts_with("***")
2733 || trimmed.starts_with("___")
2734 || trimmed.starts_with(">")
2735 || crate::utils::skip_context::is_table_line(trimmed)
2736 || between_line.heading.is_some()
2737 } else {
2738 false
2739 }
2740 });
2741 found_continuation = !has_structural_separators;
2742 } else {
2743 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2745 if let Some(between_line) = lines.get(idx) {
2746 let trimmed = between_line.content(content).trim();
2747 if trimmed.is_empty() {
2748 return false;
2749 }
2750 trimmed.starts_with("```")
2752 || trimmed.starts_with("~~~")
2753 || trimmed.starts_with("---")
2754 || trimmed.starts_with("***")
2755 || trimmed.starts_with("___")
2756 || trimmed.starts_with(">")
2757 || crate::utils::skip_context::is_table_line(trimmed)
2758 || between_line.heading.is_some()
2759 } else {
2760 false
2761 }
2762 });
2763 found_continuation = !has_structural_separators;
2764 }
2765 }
2766 }
2767 }
2768
2769 if found_continuation {
2770 block.end_line = line_num;
2772 } else {
2773 list_blocks.push(block.clone());
2775 current_block = None;
2776 }
2777 } else {
2778 let min_required_indent = if block.is_ordered {
2781 current_indent_level + last_marker_width
2782 } else {
2783 current_indent_level + 2
2784 };
2785
2786 let line_content = line_info.content(content).trim();
2791
2792 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
2794
2795 let is_structural_separator = line_info.heading.is_some()
2796 || line_content.starts_with("```")
2797 || line_content.starts_with("~~~")
2798 || line_content.starts_with("---")
2799 || line_content.starts_with("***")
2800 || line_content.starts_with("___")
2801 || line_content.starts_with(">")
2802 || looks_like_table;
2803
2804 let is_lazy_continuation = !is_structural_separator
2807 && !line_info.is_blank
2808 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2809
2810 if is_lazy_continuation {
2811 let content_to_check = if !blockquote_prefix.is_empty() {
2814 line_info
2816 .content(content)
2817 .strip_prefix(&blockquote_prefix)
2818 .unwrap_or(line_info.content(content))
2819 .trim()
2820 } else {
2821 line_info.content(content).trim()
2822 };
2823
2824 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2825
2826 if starts_with_uppercase && last_list_item_line > 0 {
2829 list_blocks.push(block.clone());
2831 current_block = None;
2832 } else {
2833 block.end_line = line_num;
2835 }
2836 } else {
2837 list_blocks.push(block.clone());
2839 current_block = None;
2840 }
2841 }
2842 }
2843 }
2844
2845 if let Some(block) = current_block {
2847 list_blocks.push(block);
2848 }
2849
2850 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
2852
2853 list_blocks
2854 }
2855
2856 fn compute_char_frequency(content: &str) -> CharFrequency {
2858 let mut frequency = CharFrequency::default();
2859
2860 for ch in content.chars() {
2861 match ch {
2862 '#' => frequency.hash_count += 1,
2863 '*' => frequency.asterisk_count += 1,
2864 '_' => frequency.underscore_count += 1,
2865 '-' => frequency.hyphen_count += 1,
2866 '+' => frequency.plus_count += 1,
2867 '>' => frequency.gt_count += 1,
2868 '|' => frequency.pipe_count += 1,
2869 '[' => frequency.bracket_count += 1,
2870 '`' => frequency.backtick_count += 1,
2871 '<' => frequency.lt_count += 1,
2872 '!' => frequency.exclamation_count += 1,
2873 '\n' => frequency.newline_count += 1,
2874 _ => {}
2875 }
2876 }
2877
2878 frequency
2879 }
2880
2881 fn parse_html_tags(
2883 content: &str,
2884 lines: &[LineInfo],
2885 code_blocks: &[(usize, usize)],
2886 flavor: MarkdownFlavor,
2887 ) -> Vec<HtmlTag> {
2888 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
2889 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
2890
2891 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2892
2893 for cap in HTML_TAG_REGEX.captures_iter(content) {
2894 let full_match = cap.get(0).unwrap();
2895 let match_start = full_match.start();
2896 let match_end = full_match.end();
2897
2898 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2900 continue;
2901 }
2902
2903 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2904 let tag_name_original = cap.get(2).unwrap().as_str();
2905 let tag_name = tag_name_original.to_lowercase();
2906 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2907
2908 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
2911 continue;
2912 }
2913
2914 let mut line_num = 1;
2916 let mut col_start = match_start;
2917 let mut col_end = match_end;
2918 for (idx, line_info) in lines.iter().enumerate() {
2919 if match_start >= line_info.byte_offset {
2920 line_num = idx + 1;
2921 col_start = match_start - line_info.byte_offset;
2922 col_end = match_end - line_info.byte_offset;
2923 } else {
2924 break;
2925 }
2926 }
2927
2928 html_tags.push(HtmlTag {
2929 line: line_num,
2930 start_col: col_start,
2931 end_col: col_end,
2932 byte_offset: match_start,
2933 byte_end: match_end,
2934 tag_name,
2935 is_closing,
2936 is_self_closing,
2937 raw_content: full_match.as_str().to_string(),
2938 });
2939 }
2940
2941 html_tags
2942 }
2943
2944 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2946 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
2947 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
2948
2949 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2950
2951 for cap in EMPHASIS_REGEX.captures_iter(content) {
2952 let full_match = cap.get(0).unwrap();
2953 let match_start = full_match.start();
2954 let match_end = full_match.end();
2955
2956 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2958 continue;
2959 }
2960
2961 let opening_markers = cap.get(1).unwrap().as_str();
2962 let content_part = cap.get(2).unwrap().as_str();
2963 let closing_markers = cap.get(3).unwrap().as_str();
2964
2965 if opening_markers.chars().next() != closing_markers.chars().next()
2967 || opening_markers.len() != closing_markers.len()
2968 {
2969 continue;
2970 }
2971
2972 let marker = opening_markers.chars().next().unwrap();
2973 let marker_count = opening_markers.len();
2974
2975 let mut line_num = 1;
2977 let mut col_start = match_start;
2978 let mut col_end = match_end;
2979 for (idx, line_info) in lines.iter().enumerate() {
2980 if match_start >= line_info.byte_offset {
2981 line_num = idx + 1;
2982 col_start = match_start - line_info.byte_offset;
2983 col_end = match_end - line_info.byte_offset;
2984 } else {
2985 break;
2986 }
2987 }
2988
2989 emphasis_spans.push(EmphasisSpan {
2990 line: line_num,
2991 start_col: col_start,
2992 end_col: col_end,
2993 byte_offset: match_start,
2994 byte_end: match_end,
2995 marker,
2996 marker_count,
2997 content: content_part.to_string(),
2998 });
2999 }
3000
3001 emphasis_spans
3002 }
3003
3004 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3006 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3007
3008 for (line_idx, line_info) in lines.iter().enumerate() {
3009 if line_info.in_code_block || line_info.is_blank {
3011 continue;
3012 }
3013
3014 let line = line_info.content(content);
3015 let line_num = line_idx + 1;
3016
3017 if !line.contains('|') {
3019 continue;
3020 }
3021
3022 let parts: Vec<&str> = line.split('|').collect();
3024 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3025
3026 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3028 let mut column_alignments = Vec::new();
3029
3030 if is_separator {
3031 for part in &parts[1..parts.len() - 1] {
3032 let trimmed = part.trim();
3034 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3035 "center".to_string()
3036 } else if trimmed.ends_with(':') {
3037 "right".to_string()
3038 } else if trimmed.starts_with(':') {
3039 "left".to_string()
3040 } else {
3041 "none".to_string()
3042 };
3043 column_alignments.push(alignment);
3044 }
3045 }
3046
3047 table_rows.push(TableRow {
3048 line: line_num,
3049 is_separator,
3050 column_count,
3051 column_alignments,
3052 });
3053 }
3054
3055 table_rows
3056 }
3057
3058 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3060 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3061
3062 for cap in BARE_URL_PATTERN.captures_iter(content) {
3064 let full_match = cap.get(0).unwrap();
3065 let match_start = full_match.start();
3066 let match_end = full_match.end();
3067
3068 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3070 continue;
3071 }
3072
3073 let preceding_char = if match_start > 0 {
3075 content.chars().nth(match_start - 1)
3076 } else {
3077 None
3078 };
3079 let following_char = content.chars().nth(match_end);
3080
3081 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3082 continue;
3083 }
3084 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3085 continue;
3086 }
3087
3088 let url = full_match.as_str();
3089 let url_type = if url.starts_with("https://") {
3090 "https"
3091 } else if url.starts_with("http://") {
3092 "http"
3093 } else if url.starts_with("ftp://") {
3094 "ftp"
3095 } else {
3096 "other"
3097 };
3098
3099 let mut line_num = 1;
3101 let mut col_start = match_start;
3102 let mut col_end = match_end;
3103 for (idx, line_info) in lines.iter().enumerate() {
3104 if match_start >= line_info.byte_offset {
3105 line_num = idx + 1;
3106 col_start = match_start - line_info.byte_offset;
3107 col_end = match_end - line_info.byte_offset;
3108 } else {
3109 break;
3110 }
3111 }
3112
3113 bare_urls.push(BareUrl {
3114 line: line_num,
3115 start_col: col_start,
3116 end_col: col_end,
3117 byte_offset: match_start,
3118 byte_end: match_end,
3119 url: url.to_string(),
3120 url_type: url_type.to_string(),
3121 });
3122 }
3123
3124 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3126 let full_match = cap.get(0).unwrap();
3127 let match_start = full_match.start();
3128 let match_end = full_match.end();
3129
3130 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3132 continue;
3133 }
3134
3135 let preceding_char = if match_start > 0 {
3137 content.chars().nth(match_start - 1)
3138 } else {
3139 None
3140 };
3141 let following_char = content.chars().nth(match_end);
3142
3143 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3144 continue;
3145 }
3146 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3147 continue;
3148 }
3149
3150 let email = full_match.as_str();
3151
3152 let mut line_num = 1;
3154 let mut col_start = match_start;
3155 let mut col_end = match_end;
3156 for (idx, line_info) in lines.iter().enumerate() {
3157 if match_start >= line_info.byte_offset {
3158 line_num = idx + 1;
3159 col_start = match_start - line_info.byte_offset;
3160 col_end = match_end - line_info.byte_offset;
3161 } else {
3162 break;
3163 }
3164 }
3165
3166 bare_urls.push(BareUrl {
3167 line: line_num,
3168 start_col: col_start,
3169 end_col: col_end,
3170 byte_offset: match_start,
3171 byte_end: match_end,
3172 url: email.to_string(),
3173 url_type: "email".to_string(),
3174 });
3175 }
3176
3177 bare_urls
3178 }
3179}
3180
3181fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3183 if list_blocks.len() < 2 {
3184 return;
3185 }
3186
3187 let mut merger = ListBlockMerger::new(content, lines);
3188 *list_blocks = merger.merge(list_blocks);
3189}
3190
3191struct ListBlockMerger<'a> {
3193 content: &'a str,
3194 lines: &'a [LineInfo],
3195}
3196
3197impl<'a> ListBlockMerger<'a> {
3198 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3199 Self { content, lines }
3200 }
3201
3202 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3203 let mut merged = Vec::with_capacity(list_blocks.len());
3204 let mut current = list_blocks[0].clone();
3205
3206 for next in list_blocks.iter().skip(1) {
3207 if self.should_merge_blocks(¤t, next) {
3208 current = self.merge_two_blocks(current, next);
3209 } else {
3210 merged.push(current);
3211 current = next.clone();
3212 }
3213 }
3214
3215 merged.push(current);
3216 merged
3217 }
3218
3219 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3221 if !self.blocks_are_compatible(current, next) {
3223 return false;
3224 }
3225
3226 let spacing = self.analyze_spacing_between(current, next);
3228 match spacing {
3229 BlockSpacing::Consecutive => true,
3230 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3231 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3232 self.can_merge_with_content_between(current, next)
3233 }
3234 }
3235 }
3236
3237 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3239 current.is_ordered == next.is_ordered
3240 && current.blockquote_prefix == next.blockquote_prefix
3241 && current.nesting_level == next.nesting_level
3242 }
3243
3244 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3246 let gap = next.start_line - current.end_line;
3247
3248 match gap {
3249 1 => BlockSpacing::Consecutive,
3250 2 => BlockSpacing::SingleBlank,
3251 _ if gap > 2 => {
3252 if self.has_only_blank_lines_between(current, next) {
3253 BlockSpacing::MultipleBlanks
3254 } else {
3255 BlockSpacing::ContentBetween
3256 }
3257 }
3258 _ => BlockSpacing::Consecutive, }
3260 }
3261
3262 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3264 if has_meaningful_content_between(self.content, current, next, self.lines) {
3267 return false; }
3269
3270 !current.is_ordered && current.marker == next.marker
3272 }
3273
3274 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3276 if has_meaningful_content_between(self.content, current, next, self.lines) {
3278 return false; }
3280
3281 current.is_ordered && next.is_ordered
3283 }
3284
3285 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3287 for line_num in (current.end_line + 1)..next.start_line {
3288 if let Some(line_info) = self.lines.get(line_num - 1)
3289 && !line_info.content(self.content).trim().is_empty()
3290 {
3291 return false;
3292 }
3293 }
3294 true
3295 }
3296
3297 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3299 current.end_line = next.end_line;
3300 current.item_lines.extend_from_slice(&next.item_lines);
3301
3302 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3304
3305 if !current.is_ordered && self.markers_differ(¤t, next) {
3307 current.marker = None; }
3309
3310 current
3311 }
3312
3313 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3315 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3316 }
3317}
3318
3319#[derive(Debug, PartialEq)]
3321enum BlockSpacing {
3322 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3327
3328fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3330 for line_num in (current.end_line + 1)..next.start_line {
3332 if let Some(line_info) = lines.get(line_num - 1) {
3333 let trimmed = line_info.content(content).trim();
3335
3336 if trimmed.is_empty() {
3338 continue;
3339 }
3340
3341 if line_info.heading.is_some() {
3345 return true; }
3347
3348 if is_horizontal_rule(trimmed) {
3350 return true; }
3352
3353 if crate::utils::skip_context::is_table_line(trimmed) {
3355 return true; }
3357
3358 if trimmed.starts_with('>') {
3360 return true; }
3362
3363 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3365 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3366
3367 let min_continuation_indent = if current.is_ordered {
3369 current.nesting_level + current.max_marker_width + 1 } else {
3371 current.nesting_level + 2
3372 };
3373
3374 if line_indent < min_continuation_indent {
3375 return true; }
3378 }
3379
3380 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3382
3383 let min_indent = if current.is_ordered {
3385 current.nesting_level + current.max_marker_width
3386 } else {
3387 current.nesting_level + 2
3388 };
3389
3390 if line_indent < min_indent {
3392 return true; }
3394
3395 }
3398 }
3399
3400 false
3402}
3403
3404fn is_horizontal_rule(trimmed: &str) -> bool {
3406 if trimmed.len() < 3 {
3407 return false;
3408 }
3409
3410 let chars: Vec<char> = trimmed.chars().collect();
3412 if let Some(&first_char) = chars.first()
3413 && (first_char == '-' || first_char == '*' || first_char == '_')
3414 {
3415 let mut count = 0;
3416 for &ch in &chars {
3417 if ch == first_char {
3418 count += 1;
3419 } else if ch != ' ' && ch != '\t' {
3420 return false; }
3422 }
3423 return count >= 3;
3424 }
3425 false
3426}
3427
3428#[cfg(test)]
3430mod tests {
3431 use super::*;
3432
3433 #[test]
3434 fn test_empty_content() {
3435 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3436 assert_eq!(ctx.content, "");
3437 assert_eq!(ctx.line_offsets, vec![0]);
3438 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3439 assert_eq!(ctx.lines.len(), 0);
3440 }
3441
3442 #[test]
3443 fn test_single_line() {
3444 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3445 assert_eq!(ctx.content, "# Hello");
3446 assert_eq!(ctx.line_offsets, vec![0]);
3447 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3448 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3449 }
3450
3451 #[test]
3452 fn test_multi_line() {
3453 let content = "# Title\n\nSecond line\nThird line";
3454 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3455 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3456 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3463
3464 #[test]
3465 fn test_line_info() {
3466 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3467 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3468
3469 assert_eq!(ctx.lines.len(), 7);
3471
3472 let line1 = &ctx.lines[0];
3474 assert_eq!(line1.content(ctx.content), "# Title");
3475 assert_eq!(line1.byte_offset, 0);
3476 assert_eq!(line1.indent, 0);
3477 assert!(!line1.is_blank);
3478 assert!(!line1.in_code_block);
3479 assert!(line1.list_item.is_none());
3480
3481 let line2 = &ctx.lines[1];
3483 assert_eq!(line2.content(ctx.content), " indented");
3484 assert_eq!(line2.byte_offset, 8);
3485 assert_eq!(line2.indent, 4);
3486 assert!(!line2.is_blank);
3487
3488 let line3 = &ctx.lines[2];
3490 assert_eq!(line3.content(ctx.content), "");
3491 assert!(line3.is_blank);
3492
3493 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3495 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3496 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3497 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3498 }
3499
3500 #[test]
3501 fn test_list_item_detection() {
3502 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3503 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3504
3505 let line1 = &ctx.lines[0];
3507 assert!(line1.list_item.is_some());
3508 let list1 = line1.list_item.as_ref().unwrap();
3509 assert_eq!(list1.marker, "-");
3510 assert!(!list1.is_ordered);
3511 assert_eq!(list1.marker_column, 0);
3512 assert_eq!(list1.content_column, 2);
3513
3514 let line2 = &ctx.lines[1];
3516 assert!(line2.list_item.is_some());
3517 let list2 = line2.list_item.as_ref().unwrap();
3518 assert_eq!(list2.marker, "*");
3519 assert_eq!(list2.marker_column, 2);
3520
3521 let line3 = &ctx.lines[2];
3523 assert!(line3.list_item.is_some());
3524 let list3 = line3.list_item.as_ref().unwrap();
3525 assert_eq!(list3.marker, "1.");
3526 assert!(list3.is_ordered);
3527 assert_eq!(list3.number, Some(1));
3528
3529 let line6 = &ctx.lines[5];
3531 assert!(line6.list_item.is_none());
3532 }
3533
3534 #[test]
3535 fn test_offset_to_line_col_edge_cases() {
3536 let content = "a\nb\nc";
3537 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3538 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3546
3547 #[test]
3548 fn test_mdx_esm_blocks() {
3549 let content = r##"import {Chart} from './snowfall.js'
3550export const year = 2023
3551
3552# Last year's snowfall
3553
3554In {year}, the snowfall was above average.
3555It was followed by a warm spring which caused
3556flood conditions in many of the nearby rivers.
3557
3558<Chart color="#fcb32c" year={year} />
3559"##;
3560
3561 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3562
3563 assert_eq!(ctx.lines.len(), 10);
3565 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3566 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3567 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3568 assert!(
3569 !ctx.lines[3].in_esm_block,
3570 "Line 4 (heading) should NOT be in_esm_block"
3571 );
3572 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3573 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3574 }
3575
3576 #[test]
3577 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3578 let content = r#"import {Chart} from './snowfall.js'
3579export const year = 2023
3580
3581# Last year's snowfall
3582"#;
3583
3584 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3585
3586 assert!(
3588 !ctx.lines[0].in_esm_block,
3589 "Line 1 should NOT be in_esm_block in Standard flavor"
3590 );
3591 assert!(
3592 !ctx.lines[1].in_esm_block,
3593 "Line 2 should NOT be in_esm_block in Standard flavor"
3594 );
3595 }
3596}