1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13 ($name:expr, $profile:expr, $code:expr) => {{
14 let start = std::time::Instant::now();
15 let result = $code;
16 if $profile {
17 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18 }
19 result
20 }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(
32 r#"(?sx)
33 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34 (?:
35 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
36 |
37 \[([^\]]*)\] # Reference ID in group 6
38 )"#
39 ).unwrap()
40});
41
42static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?sx)
47 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48 (?:
49 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
50 |
51 \[([^\]]*)\] # Reference ID in group 6
52 )"#
53 ).unwrap()
54});
55
56static REF_DEF_PATTERN: LazyLock<Regex> =
58 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62 Regex::new(
63 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64 ).unwrap()
65});
66
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub is_blank: bool,
85 pub in_code_block: bool,
87 pub in_front_matter: bool,
89 pub in_html_block: bool,
91 pub in_html_comment: bool,
93 pub list_item: Option<ListItemInfo>,
95 pub heading: Option<HeadingInfo>,
97 pub blockquote: Option<BlockquoteInfo>,
99 pub in_mkdocstrings: bool,
101 pub in_esm_block: bool,
103 pub in_code_span_continuation: bool,
105}
106
107impl LineInfo {
108 pub fn content<'a>(&self, source: &'a str) -> &'a str {
110 &source[self.byte_offset..self.byte_offset + self.byte_len]
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct ListItemInfo {
117 pub marker: String,
119 pub is_ordered: bool,
121 pub number: Option<usize>,
123 pub marker_column: usize,
125 pub content_column: usize,
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum HeadingStyle {
132 ATX,
134 Setext1,
136 Setext2,
138}
139
140#[derive(Debug, Clone)]
142pub struct ParsedLink<'a> {
143 pub line: usize,
145 pub start_col: usize,
147 pub end_col: usize,
149 pub byte_offset: usize,
151 pub byte_end: usize,
153 pub text: Cow<'a, str>,
155 pub url: Cow<'a, str>,
157 pub is_reference: bool,
159 pub reference_id: Option<Cow<'a, str>>,
161 pub link_type: LinkType,
163}
164
165#[derive(Debug, Clone)]
167pub struct BrokenLinkInfo {
168 pub reference: String,
170 pub span: std::ops::Range<usize>,
172}
173
174#[derive(Debug, Clone)]
176pub struct FootnoteRef {
177 pub id: String,
179 pub line: usize,
181 pub byte_offset: usize,
183 pub byte_end: usize,
185}
186
187#[derive(Debug, Clone)]
189pub struct ParsedImage<'a> {
190 pub line: usize,
192 pub start_col: usize,
194 pub end_col: usize,
196 pub byte_offset: usize,
198 pub byte_end: usize,
200 pub alt_text: Cow<'a, str>,
202 pub url: Cow<'a, str>,
204 pub is_reference: bool,
206 pub reference_id: Option<Cow<'a, str>>,
208 pub link_type: LinkType,
210}
211
212#[derive(Debug, Clone)]
214pub struct ReferenceDef {
215 pub line: usize,
217 pub id: String,
219 pub url: String,
221 pub title: Option<String>,
223 pub byte_offset: usize,
225 pub byte_end: usize,
227}
228
229#[derive(Debug, Clone)]
231pub struct CodeSpan {
232 pub line: usize,
234 pub end_line: usize,
236 pub start_col: usize,
238 pub end_col: usize,
240 pub byte_offset: usize,
242 pub byte_end: usize,
244 pub backtick_count: usize,
246 pub content: String,
248}
249
250#[derive(Debug, Clone)]
252pub struct HeadingInfo {
253 pub level: u8,
255 pub style: HeadingStyle,
257 pub marker: String,
259 pub marker_column: usize,
261 pub content_column: usize,
263 pub text: String,
265 pub custom_id: Option<String>,
267 pub raw_text: String,
269 pub has_closing_sequence: bool,
271 pub closing_sequence: String,
273}
274
275#[derive(Debug, Clone)]
277pub struct BlockquoteInfo {
278 pub nesting_level: usize,
280 pub indent: String,
282 pub marker_column: usize,
284 pub prefix: String,
286 pub content: String,
288 pub has_no_space_after_marker: bool,
290 pub has_multiple_spaces_after_marker: bool,
292 pub needs_md028_fix: bool,
294}
295
296#[derive(Debug, Clone)]
298pub struct ListBlock {
299 pub start_line: usize,
301 pub end_line: usize,
303 pub is_ordered: bool,
305 pub marker: Option<String>,
307 pub blockquote_prefix: String,
309 pub item_lines: Vec<usize>,
311 pub nesting_level: usize,
313 pub max_marker_width: usize,
315}
316
317use std::sync::{Arc, Mutex};
318
319#[derive(Debug, Clone, Default)]
321pub struct CharFrequency {
322 pub hash_count: usize,
324 pub asterisk_count: usize,
326 pub underscore_count: usize,
328 pub hyphen_count: usize,
330 pub plus_count: usize,
332 pub gt_count: usize,
334 pub pipe_count: usize,
336 pub bracket_count: usize,
338 pub backtick_count: usize,
340 pub lt_count: usize,
342 pub exclamation_count: usize,
344 pub newline_count: usize,
346}
347
348#[derive(Debug, Clone)]
350pub struct HtmlTag {
351 pub line: usize,
353 pub start_col: usize,
355 pub end_col: usize,
357 pub byte_offset: usize,
359 pub byte_end: usize,
361 pub tag_name: String,
363 pub is_closing: bool,
365 pub is_self_closing: bool,
367 pub raw_content: String,
369}
370
371#[derive(Debug, Clone)]
373pub struct EmphasisSpan {
374 pub line: usize,
376 pub start_col: usize,
378 pub end_col: usize,
380 pub byte_offset: usize,
382 pub byte_end: usize,
384 pub marker: char,
386 pub marker_count: usize,
388 pub content: String,
390}
391
392#[derive(Debug, Clone)]
394pub struct TableRow {
395 pub line: usize,
397 pub is_separator: bool,
399 pub column_count: usize,
401 pub column_alignments: Vec<String>, }
404
405#[derive(Debug, Clone)]
407pub struct BareUrl {
408 pub line: usize,
410 pub start_col: usize,
412 pub end_col: usize,
414 pub byte_offset: usize,
416 pub byte_end: usize,
418 pub url: String,
420 pub url_type: String,
422}
423
424pub struct LintContext<'a> {
425 pub content: &'a str,
426 pub line_offsets: Vec<usize>,
427 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
448
449struct BlockquoteComponents<'a> {
451 indent: &'a str,
452 markers: &'a str,
453 spaces_after: &'a str,
454 content: &'a str,
455}
456
457#[inline]
459fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
460 let bytes = line.as_bytes();
461 let mut pos = 0;
462
463 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
465 pos += 1;
466 }
467 let indent_end = pos;
468
469 if pos >= bytes.len() || bytes[pos] != b'>' {
471 return None;
472 }
473
474 while pos < bytes.len() && bytes[pos] == b'>' {
476 pos += 1;
477 }
478 let markers_end = pos;
479
480 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
482 pos += 1;
483 }
484 let spaces_end = pos;
485
486 Some(BlockquoteComponents {
487 indent: &line[0..indent_end],
488 markers: &line[indent_end..markers_end],
489 spaces_after: &line[markers_end..spaces_end],
490 content: &line[spaces_end..],
491 })
492}
493
494impl<'a> LintContext<'a> {
495 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
496 #[cfg(not(target_arch = "wasm32"))]
497 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
498 #[cfg(target_arch = "wasm32")]
499 let profile = false;
500
501 let line_offsets = profile_section!("Line offsets", profile, {
502 let mut offsets = vec![0];
503 for (i, c) in content.char_indices() {
504 if c == '\n' {
505 offsets.push(i + 1);
506 }
507 }
508 offsets
509 });
510
511 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
513
514 let html_comment_ranges = profile_section!(
516 "HTML comment ranges",
517 profile,
518 crate::utils::skip_context::compute_html_comment_ranges(content)
519 );
520
521 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
523 if flavor == MarkdownFlavor::MkDocs {
524 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
525 } else {
526 Vec::new()
527 }
528 });
529
530 let mut lines = profile_section!(
532 "Basic line info",
533 profile,
534 Self::compute_basic_line_info(
535 content,
536 &line_offsets,
537 &code_blocks,
538 flavor,
539 &html_comment_ranges,
540 &autodoc_ranges,
541 )
542 );
543
544 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
546
547 profile_section!(
549 "ESM blocks",
550 profile,
551 Self::detect_esm_blocks(content, &mut lines, flavor)
552 );
553
554 profile_section!(
556 "Headings & blockquotes",
557 profile,
558 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges)
559 );
560
561 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
563
564 for span in &code_spans {
567 if span.end_line > span.line {
568 for line_num in (span.line + 1)..=span.end_line {
570 if let Some(line_info) = lines.get_mut(line_num - 1) {
571 line_info.in_code_span_continuation = true;
572 }
573 }
574 }
575 }
576
577 let (links, broken_links, footnote_refs) = profile_section!(
579 "Links",
580 profile,
581 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
582 );
583
584 let images = profile_section!(
585 "Images",
586 profile,
587 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
588 );
589
590 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
591
592 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
593
594 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
596
597 let table_blocks = profile_section!(
599 "Table blocks",
600 profile,
601 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
602 content,
603 &code_blocks,
604 &code_spans,
605 &html_comment_ranges,
606 )
607 );
608
609 let line_index = profile_section!(
611 "Line index",
612 profile,
613 crate::utils::range_utils::LineIndex::new(content)
614 );
615
616 let jinja_ranges = profile_section!(
618 "Jinja ranges",
619 profile,
620 crate::utils::jinja_utils::find_jinja_ranges(content)
621 );
622
623 Self {
624 content,
625 line_offsets,
626 code_blocks,
627 lines,
628 links,
629 images,
630 broken_links,
631 footnote_refs,
632 reference_defs,
633 code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
634 list_blocks,
635 char_frequency,
636 html_tags_cache: Mutex::new(None),
637 emphasis_spans_cache: Mutex::new(None),
638 table_rows_cache: Mutex::new(None),
639 bare_urls_cache: Mutex::new(None),
640 html_comment_ranges,
641 table_blocks,
642 line_index,
643 jinja_ranges,
644 flavor,
645 source_file,
646 }
647 }
648
649 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
651 let mut cache = self.code_spans_cache.lock().expect("Code spans cache mutex poisoned");
652
653 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))))
654 }
655
656 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
658 &self.html_comment_ranges
659 }
660
661 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
663 let mut cache = self.html_tags_cache.lock().expect("HTML tags cache mutex poisoned");
664
665 Arc::clone(cache.get_or_insert_with(|| {
666 Arc::new(Self::parse_html_tags(
667 self.content,
668 &self.lines,
669 &self.code_blocks,
670 self.flavor,
671 ))
672 }))
673 }
674
675 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
677 let mut cache = self
678 .emphasis_spans_cache
679 .lock()
680 .expect("Emphasis spans cache mutex poisoned");
681
682 Arc::clone(
683 cache.get_or_insert_with(|| {
684 Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))
685 }),
686 )
687 }
688
689 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
691 let mut cache = self.table_rows_cache.lock().expect("Table rows cache mutex poisoned");
692
693 Arc::clone(cache.get_or_insert_with(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))))
694 }
695
696 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
698 let mut cache = self.bare_urls_cache.lock().expect("Bare URLs cache mutex poisoned");
699
700 Arc::clone(
701 cache.get_or_insert_with(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
702 )
703 }
704
705 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
707 match self.line_offsets.binary_search(&offset) {
708 Ok(line) => (line + 1, 1),
709 Err(line) => {
710 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
711 (line, offset - line_start + 1)
712 }
713 }
714 }
715
716 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
718 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
720 return true;
721 }
722
723 self.code_spans()
725 .iter()
726 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
727 }
728
729 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
731 if line_num > 0 {
732 self.lines.get(line_num - 1)
733 } else {
734 None
735 }
736 }
737
738 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
740 self.line_info(line_num).map(|info| info.byte_offset)
741 }
742
743 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
745 let normalized_id = ref_id.to_lowercase();
746 self.reference_defs
747 .iter()
748 .find(|def| def.id == normalized_id)
749 .map(|def| def.url.as_str())
750 }
751
752 pub fn is_in_list_block(&self, line_num: usize) -> bool {
754 self.list_blocks
755 .iter()
756 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
757 }
758
759 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
761 self.list_blocks
762 .iter()
763 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
764 }
765
766 pub fn is_in_code_block(&self, line_num: usize) -> bool {
770 if line_num == 0 || line_num > self.lines.len() {
771 return false;
772 }
773 self.lines[line_num - 1].in_code_block
774 }
775
776 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
778 if line_num == 0 || line_num > self.lines.len() {
779 return false;
780 }
781 self.lines[line_num - 1].in_front_matter
782 }
783
784 pub fn is_in_html_block(&self, line_num: usize) -> bool {
786 if line_num == 0 || line_num > self.lines.len() {
787 return false;
788 }
789 self.lines[line_num - 1].in_html_block
790 }
791
792 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
794 if line_num == 0 || line_num > self.lines.len() {
795 return false;
796 }
797
798 let col_0indexed = if col > 0 { col - 1 } else { 0 };
802 let code_spans = self.code_spans();
803 code_spans.iter().any(|span| {
804 if line_num < span.line || line_num > span.end_line {
806 return false;
807 }
808
809 if span.line == span.end_line {
810 col_0indexed >= span.start_col && col_0indexed < span.end_col
812 } else if line_num == span.line {
813 col_0indexed >= span.start_col
815 } else if line_num == span.end_line {
816 col_0indexed < span.end_col
818 } else {
819 true
821 }
822 })
823 }
824
825 #[inline]
827 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
828 let code_spans = self.code_spans();
829 code_spans
830 .iter()
831 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
832 }
833
834 #[inline]
837 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
838 self.reference_defs
839 .iter()
840 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
841 }
842
843 #[inline]
847 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
848 self.html_comment_ranges
849 .iter()
850 .any(|range| byte_pos >= range.start && byte_pos < range.end)
851 }
852
853 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
855 self.jinja_ranges
856 .iter()
857 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
858 }
859
860 pub fn has_char(&self, ch: char) -> bool {
862 match ch {
863 '#' => self.char_frequency.hash_count > 0,
864 '*' => self.char_frequency.asterisk_count > 0,
865 '_' => self.char_frequency.underscore_count > 0,
866 '-' => self.char_frequency.hyphen_count > 0,
867 '+' => self.char_frequency.plus_count > 0,
868 '>' => self.char_frequency.gt_count > 0,
869 '|' => self.char_frequency.pipe_count > 0,
870 '[' => self.char_frequency.bracket_count > 0,
871 '`' => self.char_frequency.backtick_count > 0,
872 '<' => self.char_frequency.lt_count > 0,
873 '!' => self.char_frequency.exclamation_count > 0,
874 '\n' => self.char_frequency.newline_count > 0,
875 _ => self.content.contains(ch), }
877 }
878
879 pub fn char_count(&self, ch: char) -> usize {
881 match ch {
882 '#' => self.char_frequency.hash_count,
883 '*' => self.char_frequency.asterisk_count,
884 '_' => self.char_frequency.underscore_count,
885 '-' => self.char_frequency.hyphen_count,
886 '+' => self.char_frequency.plus_count,
887 '>' => self.char_frequency.gt_count,
888 '|' => self.char_frequency.pipe_count,
889 '[' => self.char_frequency.bracket_count,
890 '`' => self.char_frequency.backtick_count,
891 '<' => self.char_frequency.lt_count,
892 '!' => self.char_frequency.exclamation_count,
893 '\n' => self.char_frequency.newline_count,
894 _ => self.content.matches(ch).count(), }
896 }
897
898 pub fn likely_has_headings(&self) -> bool {
900 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
902
903 pub fn likely_has_lists(&self) -> bool {
905 self.char_frequency.asterisk_count > 0
906 || self.char_frequency.hyphen_count > 0
907 || self.char_frequency.plus_count > 0
908 }
909
910 pub fn likely_has_emphasis(&self) -> bool {
912 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
913 }
914
915 pub fn likely_has_tables(&self) -> bool {
917 self.char_frequency.pipe_count > 2
918 }
919
920 pub fn likely_has_blockquotes(&self) -> bool {
922 self.char_frequency.gt_count > 0
923 }
924
925 pub fn likely_has_code(&self) -> bool {
927 self.char_frequency.backtick_count > 0
928 }
929
930 pub fn likely_has_links_or_images(&self) -> bool {
932 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
933 }
934
935 pub fn likely_has_html(&self) -> bool {
937 self.char_frequency.lt_count > 0
938 }
939
940 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
942 self.html_tags()
943 .iter()
944 .filter(|tag| tag.line == line_num)
945 .cloned()
946 .collect()
947 }
948
949 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
951 self.emphasis_spans()
952 .iter()
953 .filter(|span| span.line == line_num)
954 .cloned()
955 .collect()
956 }
957
958 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
960 self.table_rows()
961 .iter()
962 .filter(|row| row.line == line_num)
963 .cloned()
964 .collect()
965 }
966
967 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
969 self.bare_urls()
970 .iter()
971 .filter(|url| url.line == line_num)
972 .cloned()
973 .collect()
974 }
975
976 #[inline]
982 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
983 let idx = match lines.binary_search_by(|line| {
985 if byte_offset < line.byte_offset {
986 std::cmp::Ordering::Greater
987 } else if byte_offset > line.byte_offset + line.byte_len {
988 std::cmp::Ordering::Less
989 } else {
990 std::cmp::Ordering::Equal
991 }
992 }) {
993 Ok(idx) => idx,
994 Err(idx) => idx.saturating_sub(1),
995 };
996
997 let line = &lines[idx];
998 let line_num = idx + 1;
999 let col = byte_offset.saturating_sub(line.byte_offset);
1000
1001 (idx, line_num, col)
1002 }
1003
1004 #[inline]
1006 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1007 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1009
1010 if idx > 0 {
1012 let span = &code_spans[idx - 1];
1013 if offset >= span.byte_offset && offset < span.byte_end {
1014 return true;
1015 }
1016 }
1017
1018 false
1019 }
1020
1021 fn parse_links(
1023 content: &'a str,
1024 lines: &[LineInfo],
1025 code_blocks: &[(usize, usize)],
1026 code_spans: &[CodeSpan],
1027 flavor: MarkdownFlavor,
1028 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1029 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1030 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1031 use std::collections::HashSet;
1032
1033 let mut links = Vec::with_capacity(content.len() / 500);
1034 let mut broken_links = Vec::new();
1035 let mut footnote_refs = Vec::new();
1036
1037 let mut found_positions = HashSet::new();
1039
1040 let mut options = Options::empty();
1050 options.insert(Options::ENABLE_WIKILINKS);
1051 options.insert(Options::ENABLE_FOOTNOTES);
1052
1053 let parser = Parser::new_with_broken_link_callback(
1054 content,
1055 options,
1056 Some(|link: BrokenLink<'_>| {
1057 broken_links.push(BrokenLinkInfo {
1058 reference: link.reference.to_string(),
1059 span: link.span.clone(),
1060 });
1061 None
1062 }),
1063 )
1064 .into_offset_iter();
1065
1066 let mut link_stack: Vec<(
1067 usize,
1068 usize,
1069 pulldown_cmark::CowStr<'a>,
1070 LinkType,
1071 pulldown_cmark::CowStr<'a>,
1072 )> = Vec::new();
1073 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1076 match event {
1077 Event::Start(Tag::Link {
1078 link_type,
1079 dest_url,
1080 id,
1081 ..
1082 }) => {
1083 link_stack.push((range.start, range.end, dest_url, link_type, id));
1085 text_chunks.clear();
1086 }
1087 Event::Text(text) if !link_stack.is_empty() => {
1088 text_chunks.push((text.to_string(), range.start, range.end));
1090 }
1091 Event::Code(code) if !link_stack.is_empty() => {
1092 let code_text = format!("`{code}`");
1094 text_chunks.push((code_text, range.start, range.end));
1095 }
1096 Event::End(TagEnd::Link) => {
1097 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1098 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1100 text_chunks.clear();
1101 continue;
1102 }
1103
1104 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1106
1107 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1109 text_chunks.clear();
1110 continue;
1111 }
1112
1113 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1114
1115 let is_reference = matches!(
1116 link_type,
1117 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1118 );
1119
1120 let link_text = if start_pos < content.len() {
1123 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1124
1125 let mut close_pos = None;
1129 let mut depth = 0;
1130 let mut in_code_span = false;
1131
1132 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1133 let mut backslash_count = 0;
1135 let mut j = i;
1136 while j > 0 && link_bytes[j - 1] == b'\\' {
1137 backslash_count += 1;
1138 j -= 1;
1139 }
1140 let is_escaped = backslash_count % 2 != 0;
1141
1142 if byte == b'`' && !is_escaped {
1144 in_code_span = !in_code_span;
1145 }
1146
1147 if !is_escaped && !in_code_span {
1149 if byte == b'[' {
1150 depth += 1;
1151 } else if byte == b']' {
1152 if depth == 0 {
1153 close_pos = Some(i);
1155 break;
1156 } else {
1157 depth -= 1;
1158 }
1159 }
1160 }
1161 }
1162
1163 if let Some(pos) = close_pos {
1164 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1165 } else {
1166 Cow::Borrowed("")
1167 }
1168 } else {
1169 Cow::Borrowed("")
1170 };
1171
1172 let reference_id = if is_reference && !ref_id.is_empty() {
1174 Some(Cow::Owned(ref_id.to_lowercase()))
1175 } else if is_reference {
1176 Some(Cow::Owned(link_text.to_lowercase()))
1178 } else {
1179 None
1180 };
1181
1182 let has_escaped_bang = start_pos >= 2
1186 && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1187 && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1188
1189 let has_escaped_bracket =
1192 start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1193
1194 if has_escaped_bang || has_escaped_bracket {
1195 text_chunks.clear();
1196 continue; }
1198
1199 found_positions.insert(start_pos);
1201
1202 links.push(ParsedLink {
1203 line: line_num,
1204 start_col: col_start,
1205 end_col: col_end,
1206 byte_offset: start_pos,
1207 byte_end: range.end,
1208 text: link_text,
1209 url: Cow::Owned(url.to_string()),
1210 is_reference,
1211 reference_id,
1212 link_type,
1213 });
1214
1215 text_chunks.clear();
1216 }
1217 }
1218 Event::FootnoteReference(footnote_id) => {
1219 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1222 continue;
1223 }
1224
1225 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1226 footnote_refs.push(FootnoteRef {
1227 id: footnote_id.to_string(),
1228 line: line_num,
1229 byte_offset: range.start,
1230 byte_end: range.end,
1231 });
1232 }
1233 _ => {}
1234 }
1235 }
1236
1237 for cap in LINK_PATTERN.captures_iter(content) {
1241 let full_match = cap.get(0).unwrap();
1242 let match_start = full_match.start();
1243 let match_end = full_match.end();
1244
1245 if found_positions.contains(&match_start) {
1247 continue;
1248 }
1249
1250 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1252 continue;
1253 }
1254
1255 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1257 continue;
1258 }
1259
1260 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1262 continue;
1263 }
1264
1265 if Self::is_offset_in_code_span(code_spans, match_start) {
1267 continue;
1268 }
1269
1270 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1272 continue;
1273 }
1274
1275 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1277
1278 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1280 continue;
1281 }
1282
1283 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1284
1285 let text = cap.get(1).map_or("", |m| m.as_str());
1286
1287 if let Some(ref_id) = cap.get(6) {
1289 let ref_id_str = ref_id.as_str();
1290 let normalized_ref = if ref_id_str.is_empty() {
1291 Cow::Owned(text.to_lowercase()) } else {
1293 Cow::Owned(ref_id_str.to_lowercase())
1294 };
1295
1296 links.push(ParsedLink {
1298 line: line_num,
1299 start_col: col_start,
1300 end_col: col_end,
1301 byte_offset: match_start,
1302 byte_end: match_end,
1303 text: Cow::Borrowed(text),
1304 url: Cow::Borrowed(""), is_reference: true,
1306 reference_id: Some(normalized_ref),
1307 link_type: LinkType::Reference, });
1309 }
1310 }
1311
1312 (links, broken_links, footnote_refs)
1313 }
1314
1315 fn parse_images(
1317 content: &'a str,
1318 lines: &[LineInfo],
1319 code_blocks: &[(usize, usize)],
1320 code_spans: &[CodeSpan],
1321 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1322 ) -> Vec<ParsedImage<'a>> {
1323 use crate::utils::skip_context::is_in_html_comment_ranges;
1324 use std::collections::HashSet;
1325
1326 let mut images = Vec::with_capacity(content.len() / 1000);
1328 let mut found_positions = HashSet::new();
1329
1330 let parser = Parser::new(content).into_offset_iter();
1332 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1333 Vec::new();
1334 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1337 match event {
1338 Event::Start(Tag::Image {
1339 link_type,
1340 dest_url,
1341 id,
1342 ..
1343 }) => {
1344 image_stack.push((range.start, dest_url, link_type, id));
1345 text_chunks.clear();
1346 }
1347 Event::Text(text) if !image_stack.is_empty() => {
1348 text_chunks.push((text.to_string(), range.start, range.end));
1349 }
1350 Event::Code(code) if !image_stack.is_empty() => {
1351 let code_text = format!("`{code}`");
1352 text_chunks.push((code_text, range.start, range.end));
1353 }
1354 Event::End(TagEnd::Image) => {
1355 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1356 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1358 continue;
1359 }
1360
1361 if Self::is_offset_in_code_span(code_spans, start_pos) {
1363 continue;
1364 }
1365
1366 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1368 continue;
1369 }
1370
1371 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1373 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1374
1375 let is_reference = matches!(
1376 link_type,
1377 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1378 );
1379
1380 let alt_text = if start_pos < content.len() {
1383 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1384
1385 let mut close_pos = None;
1388 let mut depth = 0;
1389
1390 if image_bytes.len() > 2 {
1391 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1392 let mut backslash_count = 0;
1394 let mut j = i;
1395 while j > 0 && image_bytes[j - 1] == b'\\' {
1396 backslash_count += 1;
1397 j -= 1;
1398 }
1399 let is_escaped = backslash_count % 2 != 0;
1400
1401 if !is_escaped {
1402 if byte == b'[' {
1403 depth += 1;
1404 } else if byte == b']' {
1405 if depth == 0 {
1406 close_pos = Some(i);
1408 break;
1409 } else {
1410 depth -= 1;
1411 }
1412 }
1413 }
1414 }
1415 }
1416
1417 if let Some(pos) = close_pos {
1418 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1419 } else {
1420 Cow::Borrowed("")
1421 }
1422 } else {
1423 Cow::Borrowed("")
1424 };
1425
1426 let reference_id = if is_reference && !ref_id.is_empty() {
1427 Some(Cow::Owned(ref_id.to_lowercase()))
1428 } else if is_reference {
1429 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1431 None
1432 };
1433
1434 found_positions.insert(start_pos);
1435 images.push(ParsedImage {
1436 line: line_num,
1437 start_col: col_start,
1438 end_col: col_end,
1439 byte_offset: start_pos,
1440 byte_end: range.end,
1441 alt_text,
1442 url: Cow::Owned(url.to_string()),
1443 is_reference,
1444 reference_id,
1445 link_type,
1446 });
1447 }
1448 }
1449 _ => {}
1450 }
1451 }
1452
1453 for cap in IMAGE_PATTERN.captures_iter(content) {
1455 let full_match = cap.get(0).unwrap();
1456 let match_start = full_match.start();
1457 let match_end = full_match.end();
1458
1459 if found_positions.contains(&match_start) {
1461 continue;
1462 }
1463
1464 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1466 continue;
1467 }
1468
1469 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1471 || Self::is_offset_in_code_span(code_spans, match_start)
1472 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1473 {
1474 continue;
1475 }
1476
1477 if let Some(ref_id) = cap.get(6) {
1479 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1480 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1481 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1482 let ref_id_str = ref_id.as_str();
1483 let normalized_ref = if ref_id_str.is_empty() {
1484 Cow::Owned(alt_text.to_lowercase())
1485 } else {
1486 Cow::Owned(ref_id_str.to_lowercase())
1487 };
1488
1489 images.push(ParsedImage {
1490 line: line_num,
1491 start_col: col_start,
1492 end_col: col_end,
1493 byte_offset: match_start,
1494 byte_end: match_end,
1495 alt_text: Cow::Borrowed(alt_text),
1496 url: Cow::Borrowed(""),
1497 is_reference: true,
1498 reference_id: Some(normalized_ref),
1499 link_type: LinkType::Reference, });
1501 }
1502 }
1503
1504 images
1505 }
1506
1507 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1509 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1513 if line_info.in_code_block {
1515 continue;
1516 }
1517
1518 let line = line_info.content(content);
1519 let line_num = line_idx + 1;
1520
1521 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1522 let id = cap.get(1).unwrap().as_str().to_lowercase();
1523 let url = cap.get(2).unwrap().as_str().to_string();
1524 let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
1525
1526 let match_obj = cap.get(0).unwrap();
1529 let byte_offset = line_info.byte_offset + match_obj.start();
1530 let byte_end = line_info.byte_offset + match_obj.end();
1531
1532 refs.push(ReferenceDef {
1533 line: line_num,
1534 id,
1535 url,
1536 title,
1537 byte_offset,
1538 byte_end,
1539 });
1540 }
1541 }
1542
1543 refs
1544 }
1545
1546 #[inline]
1550 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1551 let trimmed_start = line.trim_start();
1552 if !trimmed_start.starts_with('>') {
1553 return None;
1554 }
1555
1556 let leading_ws_len = line.len() - trimmed_start.len();
1557 let after_gt = &trimmed_start[1..];
1558 let content = after_gt.trim_start();
1559 let ws_after_gt_len = after_gt.len() - content.len();
1560 let prefix_len = leading_ws_len + 1 + ws_after_gt_len;
1561
1562 Some((&line[..prefix_len], content))
1563 }
1564
1565 #[inline]
1569 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1570 let bytes = line.as_bytes();
1571 let mut i = 0;
1572
1573 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1575 i += 1;
1576 }
1577
1578 if i >= bytes.len() {
1580 return None;
1581 }
1582 let marker = bytes[i] as char;
1583 if marker != '-' && marker != '*' && marker != '+' {
1584 return None;
1585 }
1586 let marker_pos = i;
1587 i += 1;
1588
1589 let spacing_start = i;
1591 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1592 i += 1;
1593 }
1594
1595 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1596 }
1597
1598 #[inline]
1602 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1603 let bytes = line.as_bytes();
1604 let mut i = 0;
1605
1606 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1608 i += 1;
1609 }
1610
1611 let number_start = i;
1613 while i < bytes.len() && bytes[i].is_ascii_digit() {
1614 i += 1;
1615 }
1616 if i == number_start {
1617 return None; }
1619
1620 if i >= bytes.len() {
1622 return None;
1623 }
1624 let delimiter = bytes[i] as char;
1625 if delimiter != '.' && delimiter != ')' {
1626 return None;
1627 }
1628 let delimiter_pos = i;
1629 i += 1;
1630
1631 let spacing_start = i;
1633 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1634 i += 1;
1635 }
1636
1637 Some((
1638 &line[..number_start],
1639 &line[number_start..delimiter_pos],
1640 delimiter,
1641 &line[spacing_start..i],
1642 &line[i..],
1643 ))
1644 }
1645
1646 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1649 let num_lines = line_offsets.len();
1650 let mut in_code_block = vec![false; num_lines];
1651
1652 for &(start, end) in code_blocks {
1654 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1656 let mut boundary = start;
1657 while boundary > 0 && !content.is_char_boundary(boundary) {
1658 boundary -= 1;
1659 }
1660 boundary
1661 } else {
1662 start
1663 };
1664
1665 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1666 let mut boundary = end;
1667 while boundary < content.len() && !content.is_char_boundary(boundary) {
1668 boundary += 1;
1669 }
1670 boundary
1671 } else {
1672 end.min(content.len())
1673 };
1674
1675 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1694 let first_line = first_line_after.saturating_sub(1);
1695 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1696
1697 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1699 *flag = true;
1700 }
1701 }
1702
1703 in_code_block
1704 }
1705
1706 fn compute_basic_line_info(
1708 content: &str,
1709 line_offsets: &[usize],
1710 code_blocks: &[(usize, usize)],
1711 flavor: MarkdownFlavor,
1712 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1713 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1714 ) -> Vec<LineInfo> {
1715 let content_lines: Vec<&str> = content.lines().collect();
1716 let mut lines = Vec::with_capacity(content_lines.len());
1717
1718 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1720
1721 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1724
1725 for (i, line) in content_lines.iter().enumerate() {
1726 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1727 let indent = line.len() - line.trim_start().len();
1728
1729 let blockquote_parse = Self::parse_blockquote_prefix(line);
1731
1732 let is_blank = if let Some((_, content)) = blockquote_parse {
1734 content.trim().is_empty()
1736 } else {
1737 line.trim().is_empty()
1738 };
1739
1740 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1742
1743 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1745 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1746 let in_html_comment =
1748 crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, byte_offset);
1749 let list_item = if !(in_code_block
1750 || is_blank
1751 || in_mkdocstrings
1752 || in_html_comment
1753 || (front_matter_end > 0 && i < front_matter_end))
1754 {
1755 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1757 (content, prefix.len())
1758 } else {
1759 (&**line, 0)
1760 };
1761
1762 if let Some((leading_spaces, marker, spacing, _content)) =
1763 Self::parse_unordered_list(line_for_list_check)
1764 {
1765 let marker_column = blockquote_prefix_len + leading_spaces.len();
1766 let content_column = marker_column + 1 + spacing.len();
1767
1768 if spacing.is_empty() {
1775 None
1776 } else {
1777 Some(ListItemInfo {
1778 marker: marker.to_string(),
1779 is_ordered: false,
1780 number: None,
1781 marker_column,
1782 content_column,
1783 })
1784 }
1785 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1786 Self::parse_ordered_list(line_for_list_check)
1787 {
1788 let marker = format!("{number_str}{delimiter}");
1789 let marker_column = blockquote_prefix_len + leading_spaces.len();
1790 let content_column = marker_column + marker.len() + spacing.len();
1791
1792 if spacing.is_empty() {
1795 None
1796 } else {
1797 Some(ListItemInfo {
1798 marker,
1799 is_ordered: true,
1800 number: number_str.parse().ok(),
1801 marker_column,
1802 content_column,
1803 })
1804 }
1805 } else {
1806 None
1807 }
1808 } else {
1809 None
1810 };
1811
1812 lines.push(LineInfo {
1813 byte_offset,
1814 byte_len: line.len(),
1815 indent,
1816 is_blank,
1817 in_code_block,
1818 in_front_matter: front_matter_end > 0 && i < front_matter_end,
1819 in_html_block: false, in_html_comment,
1821 list_item,
1822 heading: None, blockquote: None, in_mkdocstrings,
1825 in_esm_block: false, in_code_span_continuation: false, });
1828 }
1829
1830 lines
1831 }
1832
1833 fn detect_headings_and_blockquotes(
1835 content: &str,
1836 lines: &mut [LineInfo],
1837 flavor: MarkdownFlavor,
1838 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1839 ) {
1840 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
1842 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
1843 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
1844 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
1845
1846 let content_lines: Vec<&str> = content.lines().collect();
1847
1848 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1850
1851 for i in 0..lines.len() {
1853 if lines[i].in_code_block {
1854 continue;
1855 }
1856
1857 if front_matter_end > 0 && i < front_matter_end {
1859 continue;
1860 }
1861
1862 if lines[i].in_html_block {
1864 continue;
1865 }
1866
1867 let line = content_lines[i];
1868
1869 if let Some(bq) = parse_blockquote_detailed(line) {
1871 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
1873
1874 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
1876
1877 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
1879 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
1882
1883 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
1887
1888 lines[i].blockquote = Some(BlockquoteInfo {
1889 nesting_level,
1890 indent: bq.indent.to_string(),
1891 marker_column,
1892 prefix,
1893 content: bq.content.to_string(),
1894 has_no_space_after_marker: has_no_space,
1895 has_multiple_spaces_after_marker: has_multiple_spaces,
1896 needs_md028_fix,
1897 });
1898 }
1899
1900 if lines[i].is_blank {
1902 continue;
1903 }
1904
1905 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1908 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1909 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1910 } else {
1911 false
1912 };
1913
1914 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1915 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
1917 continue;
1918 }
1919 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1920 let hashes = caps.get(2).map_or("", |m| m.as_str());
1921 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1922 let rest = caps.get(4).map_or("", |m| m.as_str());
1923
1924 let level = hashes.len() as u8;
1925 let marker_column = leading_spaces.len();
1926
1927 let (text, has_closing, closing_seq) = {
1929 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1931 if rest[id_start..].trim_end().ends_with('}') {
1933 (&rest[..id_start], &rest[id_start..])
1935 } else {
1936 (rest, "")
1937 }
1938 } else {
1939 (rest, "")
1940 };
1941
1942 let trimmed_rest = rest_without_id.trim_end();
1944 if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1945 let mut start_of_hashes = last_hash_pos;
1947 while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1948 start_of_hashes -= 1;
1949 }
1950
1951 let has_space_before = start_of_hashes == 0
1953 || trimmed_rest
1954 .chars()
1955 .nth(start_of_hashes - 1)
1956 .is_some_and(|c| c.is_whitespace());
1957
1958 let potential_closing = &trimmed_rest[start_of_hashes..];
1960 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1961
1962 if is_all_hashes && has_space_before {
1963 let closing_hashes = potential_closing.to_string();
1965 let text_part = if !custom_id_part.is_empty() {
1968 format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1971 } else {
1972 rest_without_id[..start_of_hashes].trim_end().to_string()
1973 };
1974 (text_part, true, closing_hashes)
1975 } else {
1976 (rest.to_string(), false, String::new())
1978 }
1979 } else {
1980 (rest.to_string(), false, String::new())
1982 }
1983 };
1984
1985 let content_column = marker_column + hashes.len() + spaces_after.len();
1986
1987 let raw_text = text.trim().to_string();
1989 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1990
1991 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1993 let next_line = content_lines[i + 1];
1994 if !lines[i + 1].in_code_block
1995 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1996 && let Some(next_line_id) =
1997 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1998 {
1999 custom_id = Some(next_line_id);
2000 }
2001 }
2002
2003 lines[i].heading = Some(HeadingInfo {
2004 level,
2005 style: HeadingStyle::ATX,
2006 marker: hashes.to_string(),
2007 marker_column,
2008 content_column,
2009 text: clean_text,
2010 custom_id,
2011 raw_text,
2012 has_closing_sequence: has_closing,
2013 closing_sequence: closing_seq,
2014 });
2015 }
2016 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2018 let next_line = content_lines[i + 1];
2019 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2020 if front_matter_end > 0 && i < front_matter_end {
2022 continue;
2023 }
2024
2025 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2027 {
2028 continue;
2029 }
2030
2031 let underline = next_line.trim();
2032
2033 let level = if underline.starts_with('=') { 1 } else { 2 };
2034 let style = if level == 1 {
2035 HeadingStyle::Setext1
2036 } else {
2037 HeadingStyle::Setext2
2038 };
2039
2040 let raw_text = line.trim().to_string();
2042 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2043
2044 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2046 let attr_line = content_lines[i + 2];
2047 if !lines[i + 2].in_code_block
2048 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2049 && let Some(attr_line_id) =
2050 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2051 {
2052 custom_id = Some(attr_line_id);
2053 }
2054 }
2055
2056 lines[i].heading = Some(HeadingInfo {
2057 level,
2058 style,
2059 marker: underline.to_string(),
2060 marker_column: next_line.len() - next_line.trim_start().len(),
2061 content_column: lines[i].indent,
2062 text: clean_text,
2063 custom_id,
2064 raw_text,
2065 has_closing_sequence: false,
2066 closing_sequence: String::new(),
2067 });
2068 }
2069 }
2070 }
2071 }
2072
2073 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2075 const BLOCK_ELEMENTS: &[&str] = &[
2077 "address",
2078 "article",
2079 "aside",
2080 "blockquote",
2081 "details",
2082 "dialog",
2083 "dd",
2084 "div",
2085 "dl",
2086 "dt",
2087 "fieldset",
2088 "figcaption",
2089 "figure",
2090 "footer",
2091 "form",
2092 "h1",
2093 "h2",
2094 "h3",
2095 "h4",
2096 "h5",
2097 "h6",
2098 "header",
2099 "hr",
2100 "li",
2101 "main",
2102 "nav",
2103 "ol",
2104 "p",
2105 "picture",
2106 "pre",
2107 "script",
2108 "section",
2109 "style",
2110 "table",
2111 "tbody",
2112 "td",
2113 "textarea",
2114 "tfoot",
2115 "th",
2116 "thead",
2117 "tr",
2118 "ul",
2119 ];
2120
2121 let mut i = 0;
2122 while i < lines.len() {
2123 if lines[i].in_code_block || lines[i].in_front_matter {
2125 i += 1;
2126 continue;
2127 }
2128
2129 let trimmed = lines[i].content(content).trim_start();
2130
2131 if trimmed.starts_with('<') && trimmed.len() > 1 {
2133 let after_bracket = &trimmed[1..];
2135 let is_closing = after_bracket.starts_with('/');
2136 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2137
2138 let tag_name = tag_start
2140 .chars()
2141 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2142 .collect::<String>()
2143 .to_lowercase();
2144
2145 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2147 lines[i].in_html_block = true;
2149
2150 if !is_closing {
2153 let closing_tag = format!("</{tag_name}>");
2154 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2156 let mut j = i + 1;
2157 while j < lines.len() && j < i + 100 {
2158 if !allow_blank_lines && lines[j].is_blank {
2161 break;
2162 }
2163
2164 lines[j].in_html_block = true;
2165
2166 if lines[j].content(content).contains(&closing_tag) {
2168 break;
2169 }
2170 j += 1;
2171 }
2172 }
2173 }
2174 }
2175
2176 i += 1;
2177 }
2178 }
2179
2180 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2183 if !flavor.supports_esm_blocks() {
2185 return;
2186 }
2187
2188 for line in lines.iter_mut() {
2189 if line.is_blank || line.in_html_comment {
2191 continue;
2192 }
2193
2194 let trimmed = line.content(content).trim_start();
2196 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2197 line.in_esm_block = true;
2198 } else {
2199 break;
2201 }
2202 }
2203 }
2204
2205 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2207 let mut code_spans = Vec::new();
2208
2209 if !content.contains('`') {
2211 return code_spans;
2212 }
2213
2214 let parser = Parser::new(content).into_offset_iter();
2216
2217 for (event, range) in parser {
2218 if let Event::Code(_) = event {
2219 let start_pos = range.start;
2220 let end_pos = range.end;
2221
2222 let full_span = &content[start_pos..end_pos];
2224 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2225
2226 let content_start = start_pos + backtick_count;
2228 let content_end = end_pos - backtick_count;
2229 let span_content = if content_start < content_end {
2230 content[content_start..content_end].to_string()
2231 } else {
2232 String::new()
2233 };
2234
2235 let line_idx = lines
2238 .partition_point(|line| line.byte_offset <= start_pos)
2239 .saturating_sub(1);
2240 let line_num = line_idx + 1;
2241 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2242
2243 let end_line_idx = lines
2245 .partition_point(|line| line.byte_offset <= end_pos)
2246 .saturating_sub(1);
2247 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2248
2249 let line_content = lines[line_idx].content(content);
2252 let col_start = if byte_col_start <= line_content.len() {
2253 line_content[..byte_col_start].chars().count()
2254 } else {
2255 line_content.chars().count()
2256 };
2257
2258 let end_line_content = lines[end_line_idx].content(content);
2259 let col_end = if byte_col_end <= end_line_content.len() {
2260 end_line_content[..byte_col_end].chars().count()
2261 } else {
2262 end_line_content.chars().count()
2263 };
2264
2265 code_spans.push(CodeSpan {
2266 line: line_num,
2267 end_line: end_line_idx + 1,
2268 start_col: col_start,
2269 end_col: col_end,
2270 byte_offset: start_pos,
2271 byte_end: end_pos,
2272 backtick_count,
2273 content: span_content,
2274 });
2275 }
2276 }
2277
2278 code_spans.sort_by_key(|span| span.byte_offset);
2280
2281 code_spans
2282 }
2283
2284 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2295 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2297
2298 #[inline]
2301 fn reset_tracking_state(
2302 list_item: &ListItemInfo,
2303 has_list_breaking_content: &mut bool,
2304 min_continuation: &mut usize,
2305 ) {
2306 *has_list_breaking_content = false;
2307 let marker_width = if list_item.is_ordered {
2308 list_item.marker.len() + 1 } else {
2310 list_item.marker.len()
2311 };
2312 *min_continuation = if list_item.is_ordered {
2313 marker_width
2314 } else {
2315 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2316 };
2317 }
2318
2319 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2322 let mut last_list_item_line = 0;
2323 let mut current_indent_level = 0;
2324 let mut last_marker_width = 0;
2325
2326 let mut has_list_breaking_content_since_last_item = false;
2328 let mut min_continuation_for_tracking = 0;
2329
2330 for (line_idx, line_info) in lines.iter().enumerate() {
2331 let line_num = line_idx + 1;
2332
2333 if line_info.in_code_block {
2335 if let Some(ref mut block) = current_block {
2336 let min_continuation_indent =
2338 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2339
2340 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2342
2343 match context {
2344 CodeBlockContext::Indented => {
2345 block.end_line = line_num;
2347 continue;
2348 }
2349 CodeBlockContext::Standalone => {
2350 let completed_block = current_block.take().unwrap();
2352 list_blocks.push(completed_block);
2353 continue;
2354 }
2355 CodeBlockContext::Adjacent => {
2356 block.end_line = line_num;
2358 continue;
2359 }
2360 }
2361 } else {
2362 continue;
2364 }
2365 }
2366
2367 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2369 caps.get(0).unwrap().as_str().to_string()
2370 } else {
2371 String::new()
2372 };
2373
2374 if current_block.is_some()
2377 && line_info.list_item.is_none()
2378 && !line_info.is_blank
2379 && !line_info.in_code_span_continuation
2380 {
2381 let line_content = line_info.content(content).trim();
2382
2383 let pipes_outside_code = {
2385 let mut count = 0;
2386 let mut in_code = false;
2387 for ch in line_content.chars() {
2388 if ch == '`' {
2389 in_code = !in_code;
2390 } else if ch == '|' && !in_code {
2391 count += 1;
2392 }
2393 }
2394 count
2395 };
2396
2397 let breaks_list = line_info.heading.is_some()
2399 || line_content.starts_with("---")
2400 || line_content.starts_with("***")
2401 || line_content.starts_with("___")
2402 || (pipes_outside_code > 0
2403 && !line_content.contains("](")
2404 && !line_content.contains("http")
2405 && (pipes_outside_code > 1 || line_content.starts_with('|') || line_content.ends_with('|')))
2406 || line_content.starts_with(">")
2407 || (line_info.indent < min_continuation_for_tracking);
2408
2409 if breaks_list {
2410 has_list_breaking_content_since_last_item = true;
2411 }
2412 }
2413
2414 if line_info.in_code_span_continuation
2417 && line_info.list_item.is_none()
2418 && let Some(ref mut block) = current_block
2419 {
2420 block.end_line = line_num;
2421 }
2422
2423 if !line_info.in_code_span_continuation
2427 && line_info.list_item.is_none()
2428 && !line_info.is_blank
2429 && !line_info.in_code_block
2430 && line_info.indent >= min_continuation_for_tracking
2431 && let Some(ref mut block) = current_block
2432 {
2433 block.end_line = line_num;
2434 }
2435
2436 if let Some(list_item) = &line_info.list_item {
2438 let item_indent = list_item.marker_column;
2440 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2443 let is_nested = nesting > block.nesting_level;
2447 let same_type =
2448 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2449 let same_context = block.blockquote_prefix == blockquote_prefix;
2450 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2452
2453 let marker_compatible =
2455 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2456
2457 let has_non_list_content = has_list_breaking_content_since_last_item;
2460
2461 let mut continues_list = if is_nested {
2465 same_context && reasonable_distance && !has_non_list_content
2467 } else {
2468 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2470 };
2471
2472 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2475 if block.item_lines.contains(&(line_num - 1)) {
2477 continues_list = true;
2479 }
2480 }
2481
2482 if continues_list {
2483 block.end_line = line_num;
2485 block.item_lines.push(line_num);
2486
2487 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2489 list_item.marker.len() + 1
2490 } else {
2491 list_item.marker.len()
2492 });
2493
2494 if !block.is_ordered
2496 && block.marker.is_some()
2497 && block.marker.as_ref() != Some(&list_item.marker)
2498 {
2499 block.marker = None;
2501 }
2502
2503 reset_tracking_state(
2505 list_item,
2506 &mut has_list_breaking_content_since_last_item,
2507 &mut min_continuation_for_tracking,
2508 );
2509 } else {
2510 list_blocks.push(block.clone());
2513
2514 *block = ListBlock {
2515 start_line: line_num,
2516 end_line: line_num,
2517 is_ordered: list_item.is_ordered,
2518 marker: if list_item.is_ordered {
2519 None
2520 } else {
2521 Some(list_item.marker.clone())
2522 },
2523 blockquote_prefix: blockquote_prefix.clone(),
2524 item_lines: vec![line_num],
2525 nesting_level: nesting,
2526 max_marker_width: if list_item.is_ordered {
2527 list_item.marker.len() + 1
2528 } else {
2529 list_item.marker.len()
2530 },
2531 };
2532
2533 reset_tracking_state(
2535 list_item,
2536 &mut has_list_breaking_content_since_last_item,
2537 &mut min_continuation_for_tracking,
2538 );
2539 }
2540 } else {
2541 current_block = Some(ListBlock {
2543 start_line: line_num,
2544 end_line: line_num,
2545 is_ordered: list_item.is_ordered,
2546 marker: if list_item.is_ordered {
2547 None
2548 } else {
2549 Some(list_item.marker.clone())
2550 },
2551 blockquote_prefix,
2552 item_lines: vec![line_num],
2553 nesting_level: nesting,
2554 max_marker_width: list_item.marker.len(),
2555 });
2556
2557 reset_tracking_state(
2559 list_item,
2560 &mut has_list_breaking_content_since_last_item,
2561 &mut min_continuation_for_tracking,
2562 );
2563 }
2564
2565 last_list_item_line = line_num;
2566 current_indent_level = item_indent;
2567 last_marker_width = if list_item.is_ordered {
2568 list_item.marker.len() + 1 } else {
2570 list_item.marker.len()
2571 };
2572 } else if let Some(ref mut block) = current_block {
2573 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2583 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2584 } else {
2585 false
2586 };
2587
2588 let min_continuation_indent = if block.is_ordered {
2592 current_indent_level + last_marker_width
2593 } else {
2594 current_indent_level + 2 };
2596
2597 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2598 block.end_line = line_num;
2600 } else if line_info.is_blank {
2601 let mut check_idx = line_idx + 1;
2604 let mut found_continuation = false;
2605
2606 while check_idx < lines.len() && lines[check_idx].is_blank {
2608 check_idx += 1;
2609 }
2610
2611 if check_idx < lines.len() {
2612 let next_line = &lines[check_idx];
2613 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2615 found_continuation = true;
2616 }
2617 else if !next_line.in_code_block
2619 && next_line.list_item.is_some()
2620 && let Some(item) = &next_line.list_item
2621 {
2622 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2623 .find(next_line.content(content))
2624 .map_or(String::new(), |m| m.as_str().to_string());
2625 if item.marker_column == current_indent_level
2626 && item.is_ordered == block.is_ordered
2627 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2628 {
2629 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2632 if let Some(between_line) = lines.get(idx) {
2633 let between_content = between_line.content(content);
2634 let trimmed = between_content.trim();
2635 if trimmed.is_empty() {
2637 return false;
2638 }
2639 let line_indent = between_content.len() - between_content.trim_start().len();
2641
2642 if trimmed.starts_with("```")
2644 || trimmed.starts_with("~~~")
2645 || trimmed.starts_with("---")
2646 || trimmed.starts_with("***")
2647 || trimmed.starts_with("___")
2648 || trimmed.starts_with(">")
2649 || trimmed.contains('|') || between_line.heading.is_some()
2651 {
2652 return true; }
2654
2655 line_indent >= min_continuation_indent
2657 } else {
2658 false
2659 }
2660 });
2661
2662 if block.is_ordered {
2663 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2666 if let Some(between_line) = lines.get(idx) {
2667 let trimmed = between_line.content(content).trim();
2668 if trimmed.is_empty() {
2669 return false;
2670 }
2671 trimmed.starts_with("```")
2673 || trimmed.starts_with("~~~")
2674 || trimmed.starts_with("---")
2675 || trimmed.starts_with("***")
2676 || trimmed.starts_with("___")
2677 || trimmed.starts_with(">")
2678 || trimmed.contains('|') || between_line.heading.is_some()
2680 } else {
2681 false
2682 }
2683 });
2684 found_continuation = !has_structural_separators;
2685 } else {
2686 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2688 if let Some(between_line) = lines.get(idx) {
2689 let trimmed = between_line.content(content).trim();
2690 if trimmed.is_empty() {
2691 return false;
2692 }
2693 trimmed.starts_with("```")
2695 || trimmed.starts_with("~~~")
2696 || trimmed.starts_with("---")
2697 || trimmed.starts_with("***")
2698 || trimmed.starts_with("___")
2699 || trimmed.starts_with(">")
2700 || trimmed.contains('|') || between_line.heading.is_some()
2702 } else {
2703 false
2704 }
2705 });
2706 found_continuation = !has_structural_separators;
2707 }
2708 }
2709 }
2710 }
2711
2712 if found_continuation {
2713 block.end_line = line_num;
2715 } else {
2716 list_blocks.push(block.clone());
2718 current_block = None;
2719 }
2720 } else {
2721 let min_required_indent = if block.is_ordered {
2724 current_indent_level + last_marker_width
2725 } else {
2726 current_indent_level + 2
2727 };
2728
2729 let line_content = line_info.content(content).trim();
2734 let is_structural_separator = line_info.heading.is_some()
2735 || line_content.starts_with("```")
2736 || line_content.starts_with("~~~")
2737 || line_content.starts_with("---")
2738 || line_content.starts_with("***")
2739 || line_content.starts_with("___")
2740 || line_content.starts_with(">")
2741 || (line_content.contains('|')
2742 && !line_content.contains("](")
2743 && !line_content.contains("http")
2744 && (line_content.matches('|').count() > 1
2745 || line_content.starts_with('|')
2746 || line_content.ends_with('|'))); let is_lazy_continuation = !is_structural_separator
2751 && !line_info.is_blank
2752 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2753
2754 if is_lazy_continuation {
2755 let content_to_check = if !blockquote_prefix.is_empty() {
2758 line_info
2760 .content(content)
2761 .strip_prefix(&blockquote_prefix)
2762 .unwrap_or(line_info.content(content))
2763 .trim()
2764 } else {
2765 line_info.content(content).trim()
2766 };
2767
2768 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2769
2770 if starts_with_uppercase && last_list_item_line > 0 {
2773 list_blocks.push(block.clone());
2775 current_block = None;
2776 } else {
2777 block.end_line = line_num;
2779 }
2780 } else {
2781 list_blocks.push(block.clone());
2783 current_block = None;
2784 }
2785 }
2786 }
2787 }
2788
2789 if let Some(block) = current_block {
2791 list_blocks.push(block);
2792 }
2793
2794 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
2796
2797 list_blocks
2798 }
2799
2800 fn compute_char_frequency(content: &str) -> CharFrequency {
2802 let mut frequency = CharFrequency::default();
2803
2804 for ch in content.chars() {
2805 match ch {
2806 '#' => frequency.hash_count += 1,
2807 '*' => frequency.asterisk_count += 1,
2808 '_' => frequency.underscore_count += 1,
2809 '-' => frequency.hyphen_count += 1,
2810 '+' => frequency.plus_count += 1,
2811 '>' => frequency.gt_count += 1,
2812 '|' => frequency.pipe_count += 1,
2813 '[' => frequency.bracket_count += 1,
2814 '`' => frequency.backtick_count += 1,
2815 '<' => frequency.lt_count += 1,
2816 '!' => frequency.exclamation_count += 1,
2817 '\n' => frequency.newline_count += 1,
2818 _ => {}
2819 }
2820 }
2821
2822 frequency
2823 }
2824
2825 fn parse_html_tags(
2827 content: &str,
2828 lines: &[LineInfo],
2829 code_blocks: &[(usize, usize)],
2830 flavor: MarkdownFlavor,
2831 ) -> Vec<HtmlTag> {
2832 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
2833 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
2834
2835 let mut html_tags = Vec::with_capacity(content.matches('<').count());
2836
2837 for cap in HTML_TAG_REGEX.captures_iter(content) {
2838 let full_match = cap.get(0).unwrap();
2839 let match_start = full_match.start();
2840 let match_end = full_match.end();
2841
2842 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2844 continue;
2845 }
2846
2847 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2848 let tag_name_original = cap.get(2).unwrap().as_str();
2849 let tag_name = tag_name_original.to_lowercase();
2850 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2851
2852 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
2855 continue;
2856 }
2857
2858 let mut line_num = 1;
2860 let mut col_start = match_start;
2861 let mut col_end = match_end;
2862 for (idx, line_info) in lines.iter().enumerate() {
2863 if match_start >= line_info.byte_offset {
2864 line_num = idx + 1;
2865 col_start = match_start - line_info.byte_offset;
2866 col_end = match_end - line_info.byte_offset;
2867 } else {
2868 break;
2869 }
2870 }
2871
2872 html_tags.push(HtmlTag {
2873 line: line_num,
2874 start_col: col_start,
2875 end_col: col_end,
2876 byte_offset: match_start,
2877 byte_end: match_end,
2878 tag_name,
2879 is_closing,
2880 is_self_closing,
2881 raw_content: full_match.as_str().to_string(),
2882 });
2883 }
2884
2885 html_tags
2886 }
2887
2888 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2890 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
2891 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
2892
2893 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2894
2895 for cap in EMPHASIS_REGEX.captures_iter(content) {
2896 let full_match = cap.get(0).unwrap();
2897 let match_start = full_match.start();
2898 let match_end = full_match.end();
2899
2900 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2902 continue;
2903 }
2904
2905 let opening_markers = cap.get(1).unwrap().as_str();
2906 let content_part = cap.get(2).unwrap().as_str();
2907 let closing_markers = cap.get(3).unwrap().as_str();
2908
2909 if opening_markers.chars().next() != closing_markers.chars().next()
2911 || opening_markers.len() != closing_markers.len()
2912 {
2913 continue;
2914 }
2915
2916 let marker = opening_markers.chars().next().unwrap();
2917 let marker_count = opening_markers.len();
2918
2919 let mut line_num = 1;
2921 let mut col_start = match_start;
2922 let mut col_end = match_end;
2923 for (idx, line_info) in lines.iter().enumerate() {
2924 if match_start >= line_info.byte_offset {
2925 line_num = idx + 1;
2926 col_start = match_start - line_info.byte_offset;
2927 col_end = match_end - line_info.byte_offset;
2928 } else {
2929 break;
2930 }
2931 }
2932
2933 emphasis_spans.push(EmphasisSpan {
2934 line: line_num,
2935 start_col: col_start,
2936 end_col: col_end,
2937 byte_offset: match_start,
2938 byte_end: match_end,
2939 marker,
2940 marker_count,
2941 content: content_part.to_string(),
2942 });
2943 }
2944
2945 emphasis_spans
2946 }
2947
2948 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
2950 let mut table_rows = Vec::with_capacity(lines.len() / 20);
2951
2952 for (line_idx, line_info) in lines.iter().enumerate() {
2953 if line_info.in_code_block || line_info.is_blank {
2955 continue;
2956 }
2957
2958 let line = line_info.content(content);
2959 let line_num = line_idx + 1;
2960
2961 if !line.contains('|') {
2963 continue;
2964 }
2965
2966 let parts: Vec<&str> = line.split('|').collect();
2968 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2969
2970 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2972 let mut column_alignments = Vec::new();
2973
2974 if is_separator {
2975 for part in &parts[1..parts.len() - 1] {
2976 let trimmed = part.trim();
2978 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2979 "center".to_string()
2980 } else if trimmed.ends_with(':') {
2981 "right".to_string()
2982 } else if trimmed.starts_with(':') {
2983 "left".to_string()
2984 } else {
2985 "none".to_string()
2986 };
2987 column_alignments.push(alignment);
2988 }
2989 }
2990
2991 table_rows.push(TableRow {
2992 line: line_num,
2993 is_separator,
2994 column_count,
2995 column_alignments,
2996 });
2997 }
2998
2999 table_rows
3000 }
3001
3002 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3004 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3005
3006 for cap in BARE_URL_PATTERN.captures_iter(content) {
3008 let full_match = cap.get(0).unwrap();
3009 let match_start = full_match.start();
3010 let match_end = full_match.end();
3011
3012 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3014 continue;
3015 }
3016
3017 let preceding_char = if match_start > 0 {
3019 content.chars().nth(match_start - 1)
3020 } else {
3021 None
3022 };
3023 let following_char = content.chars().nth(match_end);
3024
3025 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3026 continue;
3027 }
3028 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3029 continue;
3030 }
3031
3032 let url = full_match.as_str();
3033 let url_type = if url.starts_with("https://") {
3034 "https"
3035 } else if url.starts_with("http://") {
3036 "http"
3037 } else if url.starts_with("ftp://") {
3038 "ftp"
3039 } else {
3040 "other"
3041 };
3042
3043 let mut line_num = 1;
3045 let mut col_start = match_start;
3046 let mut col_end = match_end;
3047 for (idx, line_info) in lines.iter().enumerate() {
3048 if match_start >= line_info.byte_offset {
3049 line_num = idx + 1;
3050 col_start = match_start - line_info.byte_offset;
3051 col_end = match_end - line_info.byte_offset;
3052 } else {
3053 break;
3054 }
3055 }
3056
3057 bare_urls.push(BareUrl {
3058 line: line_num,
3059 start_col: col_start,
3060 end_col: col_end,
3061 byte_offset: match_start,
3062 byte_end: match_end,
3063 url: url.to_string(),
3064 url_type: url_type.to_string(),
3065 });
3066 }
3067
3068 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3070 let full_match = cap.get(0).unwrap();
3071 let match_start = full_match.start();
3072 let match_end = full_match.end();
3073
3074 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3076 continue;
3077 }
3078
3079 let preceding_char = if match_start > 0 {
3081 content.chars().nth(match_start - 1)
3082 } else {
3083 None
3084 };
3085 let following_char = content.chars().nth(match_end);
3086
3087 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3088 continue;
3089 }
3090 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3091 continue;
3092 }
3093
3094 let email = full_match.as_str();
3095
3096 let mut line_num = 1;
3098 let mut col_start = match_start;
3099 let mut col_end = match_end;
3100 for (idx, line_info) in lines.iter().enumerate() {
3101 if match_start >= line_info.byte_offset {
3102 line_num = idx + 1;
3103 col_start = match_start - line_info.byte_offset;
3104 col_end = match_end - line_info.byte_offset;
3105 } else {
3106 break;
3107 }
3108 }
3109
3110 bare_urls.push(BareUrl {
3111 line: line_num,
3112 start_col: col_start,
3113 end_col: col_end,
3114 byte_offset: match_start,
3115 byte_end: match_end,
3116 url: email.to_string(),
3117 url_type: "email".to_string(),
3118 });
3119 }
3120
3121 bare_urls
3122 }
3123}
3124
3125fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3127 if list_blocks.len() < 2 {
3128 return;
3129 }
3130
3131 let mut merger = ListBlockMerger::new(content, lines);
3132 *list_blocks = merger.merge(list_blocks);
3133}
3134
3135struct ListBlockMerger<'a> {
3137 content: &'a str,
3138 lines: &'a [LineInfo],
3139}
3140
3141impl<'a> ListBlockMerger<'a> {
3142 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3143 Self { content, lines }
3144 }
3145
3146 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3147 let mut merged = Vec::with_capacity(list_blocks.len());
3148 let mut current = list_blocks[0].clone();
3149
3150 for next in list_blocks.iter().skip(1) {
3151 if self.should_merge_blocks(¤t, next) {
3152 current = self.merge_two_blocks(current, next);
3153 } else {
3154 merged.push(current);
3155 current = next.clone();
3156 }
3157 }
3158
3159 merged.push(current);
3160 merged
3161 }
3162
3163 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3165 if !self.blocks_are_compatible(current, next) {
3167 return false;
3168 }
3169
3170 let spacing = self.analyze_spacing_between(current, next);
3172 match spacing {
3173 BlockSpacing::Consecutive => true,
3174 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3175 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3176 self.can_merge_with_content_between(current, next)
3177 }
3178 }
3179 }
3180
3181 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3183 current.is_ordered == next.is_ordered
3184 && current.blockquote_prefix == next.blockquote_prefix
3185 && current.nesting_level == next.nesting_level
3186 }
3187
3188 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3190 let gap = next.start_line - current.end_line;
3191
3192 match gap {
3193 1 => BlockSpacing::Consecutive,
3194 2 => BlockSpacing::SingleBlank,
3195 _ if gap > 2 => {
3196 if self.has_only_blank_lines_between(current, next) {
3197 BlockSpacing::MultipleBlanks
3198 } else {
3199 BlockSpacing::ContentBetween
3200 }
3201 }
3202 _ => BlockSpacing::Consecutive, }
3204 }
3205
3206 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3208 if has_meaningful_content_between(self.content, current, next, self.lines) {
3211 return false; }
3213
3214 !current.is_ordered && current.marker == next.marker
3216 }
3217
3218 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3220 if has_meaningful_content_between(self.content, current, next, self.lines) {
3222 return false; }
3224
3225 current.is_ordered && next.is_ordered
3227 }
3228
3229 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3231 for line_num in (current.end_line + 1)..next.start_line {
3232 if let Some(line_info) = self.lines.get(line_num - 1)
3233 && !line_info.content(self.content).trim().is_empty()
3234 {
3235 return false;
3236 }
3237 }
3238 true
3239 }
3240
3241 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3243 current.end_line = next.end_line;
3244 current.item_lines.extend_from_slice(&next.item_lines);
3245
3246 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3248
3249 if !current.is_ordered && self.markers_differ(¤t, next) {
3251 current.marker = None; }
3253
3254 current
3255 }
3256
3257 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3259 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3260 }
3261}
3262
3263#[derive(Debug, PartialEq)]
3265enum BlockSpacing {
3266 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3271
3272fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3274 for line_num in (current.end_line + 1)..next.start_line {
3276 if let Some(line_info) = lines.get(line_num - 1) {
3277 let trimmed = line_info.content(content).trim();
3279
3280 if trimmed.is_empty() {
3282 continue;
3283 }
3284
3285 if line_info.heading.is_some() {
3289 return true; }
3291
3292 if is_horizontal_rule(trimmed) {
3294 return true; }
3296
3297 if trimmed.contains('|') && trimmed.len() > 1 {
3300 if !trimmed.contains("](") && !trimmed.contains("http") {
3302 let pipe_count = trimmed.matches('|').count();
3304 if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
3305 return true; }
3307 }
3308 }
3309
3310 if trimmed.starts_with('>') {
3312 return true; }
3314
3315 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3317 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3318
3319 let min_continuation_indent = if current.is_ordered {
3321 current.nesting_level + current.max_marker_width + 1 } else {
3323 current.nesting_level + 2
3324 };
3325
3326 if line_indent < min_continuation_indent {
3327 return true; }
3330 }
3331
3332 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3334
3335 let min_indent = if current.is_ordered {
3337 current.nesting_level + current.max_marker_width
3338 } else {
3339 current.nesting_level + 2
3340 };
3341
3342 if line_indent < min_indent {
3344 return true; }
3346
3347 }
3350 }
3351
3352 false
3354}
3355
3356fn is_horizontal_rule(trimmed: &str) -> bool {
3358 if trimmed.len() < 3 {
3359 return false;
3360 }
3361
3362 let chars: Vec<char> = trimmed.chars().collect();
3364 if let Some(&first_char) = chars.first()
3365 && (first_char == '-' || first_char == '*' || first_char == '_')
3366 {
3367 let mut count = 0;
3368 for &ch in &chars {
3369 if ch == first_char {
3370 count += 1;
3371 } else if ch != ' ' && ch != '\t' {
3372 return false; }
3374 }
3375 return count >= 3;
3376 }
3377 false
3378}
3379
3380#[cfg(test)]
3382mod tests {
3383 use super::*;
3384
3385 #[test]
3386 fn test_empty_content() {
3387 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3388 assert_eq!(ctx.content, "");
3389 assert_eq!(ctx.line_offsets, vec![0]);
3390 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3391 assert_eq!(ctx.lines.len(), 0);
3392 }
3393
3394 #[test]
3395 fn test_single_line() {
3396 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3397 assert_eq!(ctx.content, "# Hello");
3398 assert_eq!(ctx.line_offsets, vec![0]);
3399 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3400 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3401 }
3402
3403 #[test]
3404 fn test_multi_line() {
3405 let content = "# Title\n\nSecond line\nThird line";
3406 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3407 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3408 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3415
3416 #[test]
3417 fn test_line_info() {
3418 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3419 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3420
3421 assert_eq!(ctx.lines.len(), 7);
3423
3424 let line1 = &ctx.lines[0];
3426 assert_eq!(line1.content(ctx.content), "# Title");
3427 assert_eq!(line1.byte_offset, 0);
3428 assert_eq!(line1.indent, 0);
3429 assert!(!line1.is_blank);
3430 assert!(!line1.in_code_block);
3431 assert!(line1.list_item.is_none());
3432
3433 let line2 = &ctx.lines[1];
3435 assert_eq!(line2.content(ctx.content), " indented");
3436 assert_eq!(line2.byte_offset, 8);
3437 assert_eq!(line2.indent, 4);
3438 assert!(!line2.is_blank);
3439
3440 let line3 = &ctx.lines[2];
3442 assert_eq!(line3.content(ctx.content), "");
3443 assert!(line3.is_blank);
3444
3445 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3447 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3448 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3449 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3450 }
3451
3452 #[test]
3453 fn test_list_item_detection() {
3454 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3455 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3456
3457 let line1 = &ctx.lines[0];
3459 assert!(line1.list_item.is_some());
3460 let list1 = line1.list_item.as_ref().unwrap();
3461 assert_eq!(list1.marker, "-");
3462 assert!(!list1.is_ordered);
3463 assert_eq!(list1.marker_column, 0);
3464 assert_eq!(list1.content_column, 2);
3465
3466 let line2 = &ctx.lines[1];
3468 assert!(line2.list_item.is_some());
3469 let list2 = line2.list_item.as_ref().unwrap();
3470 assert_eq!(list2.marker, "*");
3471 assert_eq!(list2.marker_column, 2);
3472
3473 let line3 = &ctx.lines[2];
3475 assert!(line3.list_item.is_some());
3476 let list3 = line3.list_item.as_ref().unwrap();
3477 assert_eq!(list3.marker, "1.");
3478 assert!(list3.is_ordered);
3479 assert_eq!(list3.number, Some(1));
3480
3481 let line6 = &ctx.lines[5];
3483 assert!(line6.list_item.is_none());
3484 }
3485
3486 #[test]
3487 fn test_offset_to_line_col_edge_cases() {
3488 let content = "a\nb\nc";
3489 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3490 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3498
3499 #[test]
3500 fn test_mdx_esm_blocks() {
3501 let content = r##"import {Chart} from './snowfall.js'
3502export const year = 2023
3503
3504# Last year's snowfall
3505
3506In {year}, the snowfall was above average.
3507It was followed by a warm spring which caused
3508flood conditions in many of the nearby rivers.
3509
3510<Chart color="#fcb32c" year={year} />
3511"##;
3512
3513 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3514
3515 assert_eq!(ctx.lines.len(), 10);
3517 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3518 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3519 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3520 assert!(
3521 !ctx.lines[3].in_esm_block,
3522 "Line 4 (heading) should NOT be in_esm_block"
3523 );
3524 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3525 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3526 }
3527
3528 #[test]
3529 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3530 let content = r#"import {Chart} from './snowfall.js'
3531export const year = 2023
3532
3533# Last year's snowfall
3534"#;
3535
3536 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3537
3538 assert!(
3540 !ctx.lines[0].in_esm_block,
3541 "Line 1 should NOT be in_esm_block in Standard flavor"
3542 );
3543 assert!(
3544 !ctx.lines[1].in_esm_block,
3545 "Line 2 should NOT be in_esm_block in Standard flavor"
3546 );
3547 }
3548}