1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15 ($name:expr, $profile:expr, $code:expr) => {{
16 let start = std::time::Instant::now();
17 let result = $code;
18 if $profile {
19 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20 }
21 result
22 }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(
34 r#"(?sx)
35 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36 (?:
37 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
38 |
39 \[([^\]]*)\] # Reference ID in group 6
40 )"#
41 ).unwrap()
42});
43
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?sx)
49 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50 (?:
51 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
52 |
53 \[([^\]]*)\] # Reference ID in group 6
54 )"#
55 ).unwrap()
56});
57
58static REF_DEF_PATTERN: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71#[derive(Debug, Clone)]
73pub struct LineInfo {
74 pub byte_offset: usize,
76 pub byte_len: usize,
78 pub indent: usize,
80 pub visual_indent: usize,
84 pub is_blank: bool,
86 pub in_code_block: bool,
88 pub in_front_matter: bool,
90 pub in_html_block: bool,
92 pub in_html_comment: bool,
94 pub list_item: Option<ListItemInfo>,
96 pub heading: Option<HeadingInfo>,
98 pub blockquote: Option<BlockquoteInfo>,
100 pub in_mkdocstrings: bool,
102 pub in_esm_block: bool,
104 pub in_code_span_continuation: bool,
106 pub is_horizontal_rule: bool,
109 pub in_math_block: bool,
111}
112
113impl LineInfo {
114 pub fn content<'a>(&self, source: &'a str) -> &'a str {
116 &source[self.byte_offset..self.byte_offset + self.byte_len]
117 }
118}
119
120#[derive(Debug, Clone)]
122pub struct ListItemInfo {
123 pub marker: String,
125 pub is_ordered: bool,
127 pub number: Option<usize>,
129 pub marker_column: usize,
131 pub content_column: usize,
133}
134
135#[derive(Debug, Clone, PartialEq)]
137pub enum HeadingStyle {
138 ATX,
140 Setext1,
142 Setext2,
144}
145
146#[derive(Debug, Clone)]
148pub struct ParsedLink<'a> {
149 pub line: usize,
151 pub start_col: usize,
153 pub end_col: usize,
155 pub byte_offset: usize,
157 pub byte_end: usize,
159 pub text: Cow<'a, str>,
161 pub url: Cow<'a, str>,
163 pub is_reference: bool,
165 pub reference_id: Option<Cow<'a, str>>,
167 pub link_type: LinkType,
169}
170
171#[derive(Debug, Clone)]
173pub struct BrokenLinkInfo {
174 pub reference: String,
176 pub span: std::ops::Range<usize>,
178}
179
180#[derive(Debug, Clone)]
182pub struct FootnoteRef {
183 pub id: String,
185 pub line: usize,
187 pub byte_offset: usize,
189 pub byte_end: usize,
191}
192
193#[derive(Debug, Clone)]
195pub struct ParsedImage<'a> {
196 pub line: usize,
198 pub start_col: usize,
200 pub end_col: usize,
202 pub byte_offset: usize,
204 pub byte_end: usize,
206 pub alt_text: Cow<'a, str>,
208 pub url: Cow<'a, str>,
210 pub is_reference: bool,
212 pub reference_id: Option<Cow<'a, str>>,
214 pub link_type: LinkType,
216}
217
218#[derive(Debug, Clone)]
220pub struct ReferenceDef {
221 pub line: usize,
223 pub id: String,
225 pub url: String,
227 pub title: Option<String>,
229 pub byte_offset: usize,
231 pub byte_end: usize,
233 pub title_byte_start: Option<usize>,
235 pub title_byte_end: Option<usize>,
237}
238
239#[derive(Debug, Clone)]
241pub struct CodeSpan {
242 pub line: usize,
244 pub end_line: usize,
246 pub start_col: usize,
248 pub end_col: usize,
250 pub byte_offset: usize,
252 pub byte_end: usize,
254 pub backtick_count: usize,
256 pub content: String,
258}
259
260#[derive(Debug, Clone)]
262pub struct HeadingInfo {
263 pub level: u8,
265 pub style: HeadingStyle,
267 pub marker: String,
269 pub marker_column: usize,
271 pub content_column: usize,
273 pub text: String,
275 pub custom_id: Option<String>,
277 pub raw_text: String,
279 pub has_closing_sequence: bool,
281 pub closing_sequence: String,
283 pub is_valid: bool,
286}
287
288#[derive(Debug, Clone)]
293pub struct ValidHeading<'a> {
294 pub line_num: usize,
296 pub heading: &'a HeadingInfo,
298 pub line_info: &'a LineInfo,
300}
301
302pub struct ValidHeadingsIter<'a> {
307 lines: &'a [LineInfo],
308 current_index: usize,
309}
310
311impl<'a> ValidHeadingsIter<'a> {
312 fn new(lines: &'a [LineInfo]) -> Self {
313 Self {
314 lines,
315 current_index: 0,
316 }
317 }
318}
319
320impl<'a> Iterator for ValidHeadingsIter<'a> {
321 type Item = ValidHeading<'a>;
322
323 fn next(&mut self) -> Option<Self::Item> {
324 while self.current_index < self.lines.len() {
325 let idx = self.current_index;
326 self.current_index += 1;
327
328 let line_info = &self.lines[idx];
329 if let Some(heading) = &line_info.heading
330 && heading.is_valid
331 {
332 return Some(ValidHeading {
333 line_num: idx + 1, heading,
335 line_info,
336 });
337 }
338 }
339 None
340 }
341}
342
343#[derive(Debug, Clone)]
345pub struct BlockquoteInfo {
346 pub nesting_level: usize,
348 pub indent: String,
350 pub marker_column: usize,
352 pub prefix: String,
354 pub content: String,
356 pub has_no_space_after_marker: bool,
358 pub has_multiple_spaces_after_marker: bool,
360 pub needs_md028_fix: bool,
362}
363
364#[derive(Debug, Clone)]
366pub struct ListBlock {
367 pub start_line: usize,
369 pub end_line: usize,
371 pub is_ordered: bool,
373 pub marker: Option<String>,
375 pub blockquote_prefix: String,
377 pub item_lines: Vec<usize>,
379 pub nesting_level: usize,
381 pub max_marker_width: usize,
383}
384
385use std::sync::{Arc, OnceLock};
386
387type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
389
390#[derive(Debug, Clone, Default)]
392pub struct CharFrequency {
393 pub hash_count: usize,
395 pub asterisk_count: usize,
397 pub underscore_count: usize,
399 pub hyphen_count: usize,
401 pub plus_count: usize,
403 pub gt_count: usize,
405 pub pipe_count: usize,
407 pub bracket_count: usize,
409 pub backtick_count: usize,
411 pub lt_count: usize,
413 pub exclamation_count: usize,
415 pub newline_count: usize,
417}
418
419#[derive(Debug, Clone)]
421pub struct HtmlTag {
422 pub line: usize,
424 pub start_col: usize,
426 pub end_col: usize,
428 pub byte_offset: usize,
430 pub byte_end: usize,
432 pub tag_name: String,
434 pub is_closing: bool,
436 pub is_self_closing: bool,
438 pub raw_content: String,
440}
441
442#[derive(Debug, Clone)]
444pub struct EmphasisSpan {
445 pub line: usize,
447 pub start_col: usize,
449 pub end_col: usize,
451 pub byte_offset: usize,
453 pub byte_end: usize,
455 pub marker: char,
457 pub marker_count: usize,
459 pub content: String,
461}
462
463#[derive(Debug, Clone)]
465pub struct TableRow {
466 pub line: usize,
468 pub is_separator: bool,
470 pub column_count: usize,
472 pub column_alignments: Vec<String>, }
475
476#[derive(Debug, Clone)]
478pub struct BareUrl {
479 pub line: usize,
481 pub start_col: usize,
483 pub end_col: usize,
485 pub byte_offset: usize,
487 pub byte_end: usize,
489 pub url: String,
491 pub url_type: String,
493}
494
495pub struct LintContext<'a> {
496 pub content: &'a str,
497 pub line_offsets: Vec<usize>,
498 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
520
521struct BlockquoteComponents<'a> {
523 indent: &'a str,
524 markers: &'a str,
525 spaces_after: &'a str,
526 content: &'a str,
527}
528
529#[inline]
531fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
532 let bytes = line.as_bytes();
533 let mut pos = 0;
534
535 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
537 pos += 1;
538 }
539 let indent_end = pos;
540
541 if pos >= bytes.len() || bytes[pos] != b'>' {
543 return None;
544 }
545
546 while pos < bytes.len() && bytes[pos] == b'>' {
548 pos += 1;
549 }
550 let markers_end = pos;
551
552 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
554 pos += 1;
555 }
556 let spaces_end = pos;
557
558 Some(BlockquoteComponents {
559 indent: &line[0..indent_end],
560 markers: &line[indent_end..markers_end],
561 spaces_after: &line[markers_end..spaces_end],
562 content: &line[spaces_end..],
563 })
564}
565
566impl<'a> LintContext<'a> {
567 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
568 #[cfg(not(target_arch = "wasm32"))]
569 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
570 #[cfg(target_arch = "wasm32")]
571 let profile = false;
572
573 let line_offsets = profile_section!("Line offsets", profile, {
574 let mut offsets = vec![0];
575 for (i, c) in content.char_indices() {
576 if c == '\n' {
577 offsets.push(i + 1);
578 }
579 }
580 offsets
581 });
582
583 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
585
586 let html_comment_ranges = profile_section!(
588 "HTML comment ranges",
589 profile,
590 crate::utils::skip_context::compute_html_comment_ranges(content)
591 );
592
593 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
595 if flavor == MarkdownFlavor::MkDocs {
596 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
597 } else {
598 Vec::new()
599 }
600 });
601
602 let (mut lines, emphasis_spans) = profile_section!(
605 "Basic line info",
606 profile,
607 Self::compute_basic_line_info(
608 content,
609 &line_offsets,
610 &code_blocks,
611 flavor,
612 &html_comment_ranges,
613 &autodoc_ranges,
614 )
615 );
616
617 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
619
620 profile_section!(
622 "ESM blocks",
623 profile,
624 Self::detect_esm_blocks(content, &mut lines, flavor)
625 );
626
627 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
629
630 profile_section!(
632 "Headings & blockquotes",
633 profile,
634 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
635 );
636
637 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
639
640 for span in &code_spans {
643 if span.end_line > span.line {
644 for line_num in (span.line + 1)..=span.end_line {
646 if let Some(line_info) = lines.get_mut(line_num - 1) {
647 line_info.in_code_span_continuation = true;
648 }
649 }
650 }
651 }
652
653 let (links, broken_links, footnote_refs) = profile_section!(
655 "Links",
656 profile,
657 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
658 );
659
660 let images = profile_section!(
661 "Images",
662 profile,
663 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
664 );
665
666 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
667
668 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
669
670 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
672
673 let table_blocks = profile_section!(
675 "Table blocks",
676 profile,
677 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
678 content,
679 &code_blocks,
680 &code_spans,
681 &html_comment_ranges,
682 )
683 );
684
685 let line_index = profile_section!(
687 "Line index",
688 profile,
689 crate::utils::range_utils::LineIndex::new(content)
690 );
691
692 let jinja_ranges = profile_section!(
694 "Jinja ranges",
695 profile,
696 crate::utils::jinja_utils::find_jinja_ranges(content)
697 );
698
699 Self {
700 content,
701 line_offsets,
702 code_blocks,
703 lines,
704 links,
705 images,
706 broken_links,
707 footnote_refs,
708 reference_defs,
709 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
710 list_blocks,
711 char_frequency,
712 html_tags_cache: OnceLock::new(),
713 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
714 table_rows_cache: OnceLock::new(),
715 bare_urls_cache: OnceLock::new(),
716 has_mixed_list_nesting_cache: OnceLock::new(),
717 html_comment_ranges,
718 table_blocks,
719 line_index,
720 jinja_ranges,
721 flavor,
722 source_file,
723 }
724 }
725
726 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
728 Arc::clone(
729 self.code_spans_cache
730 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
731 )
732 }
733
734 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
736 &self.html_comment_ranges
737 }
738
739 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
741 Arc::clone(self.html_tags_cache.get_or_init(|| {
742 Arc::new(Self::parse_html_tags(
743 self.content,
744 &self.lines,
745 &self.code_blocks,
746 self.flavor,
747 ))
748 }))
749 }
750
751 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
753 Arc::clone(
754 self.emphasis_spans_cache
755 .get()
756 .expect("emphasis_spans_cache initialized during construction"),
757 )
758 }
759
760 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
762 Arc::clone(
763 self.table_rows_cache
764 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
765 )
766 }
767
768 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
770 Arc::clone(
771 self.bare_urls_cache
772 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
773 )
774 }
775
776 pub fn has_mixed_list_nesting(&self) -> bool {
780 *self
781 .has_mixed_list_nesting_cache
782 .get_or_init(|| self.compute_mixed_list_nesting())
783 }
784
785 fn compute_mixed_list_nesting(&self) -> bool {
787 let mut stack: Vec<(usize, bool)> = Vec::new();
792 let mut last_was_blank = false;
793
794 for line_info in &self.lines {
795 if line_info.in_code_block
797 || line_info.in_front_matter
798 || line_info.in_mkdocstrings
799 || line_info.in_html_comment
800 || line_info.in_esm_block
801 {
802 continue;
803 }
804
805 if line_info.is_blank {
807 last_was_blank = true;
808 continue;
809 }
810
811 if let Some(list_item) = &line_info.list_item {
812 let current_pos = if list_item.marker_column == 1 {
814 0
815 } else {
816 list_item.marker_column
817 };
818
819 if last_was_blank && current_pos == 0 {
821 stack.clear();
822 }
823 last_was_blank = false;
824
825 while let Some(&(pos, _)) = stack.last() {
827 if pos >= current_pos {
828 stack.pop();
829 } else {
830 break;
831 }
832 }
833
834 if let Some(&(_, parent_is_ordered)) = stack.last()
836 && parent_is_ordered != list_item.is_ordered
837 {
838 return true; }
840
841 stack.push((current_pos, list_item.is_ordered));
842 } else {
843 last_was_blank = false;
845 }
846 }
847
848 false
849 }
850
851 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
853 match self.line_offsets.binary_search(&offset) {
854 Ok(line) => (line + 1, 1),
855 Err(line) => {
856 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
857 (line, offset - line_start + 1)
858 }
859 }
860 }
861
862 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
864 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
866 return true;
867 }
868
869 self.code_spans()
871 .iter()
872 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
873 }
874
875 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
877 if line_num > 0 {
878 self.lines.get(line_num - 1)
879 } else {
880 None
881 }
882 }
883
884 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
886 self.line_info(line_num).map(|info| info.byte_offset)
887 }
888
889 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
891 let normalized_id = ref_id.to_lowercase();
892 self.reference_defs
893 .iter()
894 .find(|def| def.id == normalized_id)
895 .map(|def| def.url.as_str())
896 }
897
898 pub fn is_in_list_block(&self, line_num: usize) -> bool {
900 self.list_blocks
901 .iter()
902 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
903 }
904
905 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
907 self.list_blocks
908 .iter()
909 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
910 }
911
912 pub fn is_in_code_block(&self, line_num: usize) -> bool {
916 if line_num == 0 || line_num > self.lines.len() {
917 return false;
918 }
919 self.lines[line_num - 1].in_code_block
920 }
921
922 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
924 if line_num == 0 || line_num > self.lines.len() {
925 return false;
926 }
927 self.lines[line_num - 1].in_front_matter
928 }
929
930 pub fn is_in_html_block(&self, line_num: usize) -> bool {
932 if line_num == 0 || line_num > self.lines.len() {
933 return false;
934 }
935 self.lines[line_num - 1].in_html_block
936 }
937
938 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
940 if line_num == 0 || line_num > self.lines.len() {
941 return false;
942 }
943
944 let col_0indexed = if col > 0 { col - 1 } else { 0 };
948 let code_spans = self.code_spans();
949 code_spans.iter().any(|span| {
950 if line_num < span.line || line_num > span.end_line {
952 return false;
953 }
954
955 if span.line == span.end_line {
956 col_0indexed >= span.start_col && col_0indexed < span.end_col
958 } else if line_num == span.line {
959 col_0indexed >= span.start_col
961 } else if line_num == span.end_line {
962 col_0indexed < span.end_col
964 } else {
965 true
967 }
968 })
969 }
970
971 #[inline]
973 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
974 let code_spans = self.code_spans();
975 code_spans
976 .iter()
977 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
978 }
979
980 #[inline]
983 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
984 self.reference_defs
985 .iter()
986 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
987 }
988
989 #[inline]
993 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
994 self.html_comment_ranges
995 .iter()
996 .any(|range| byte_pos >= range.start && byte_pos < range.end)
997 }
998
999 #[inline]
1002 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1003 self.html_tags()
1004 .iter()
1005 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1006 }
1007
1008 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1010 self.jinja_ranges
1011 .iter()
1012 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1013 }
1014
1015 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1017 self.reference_defs.iter().any(|def| {
1018 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1019 byte_pos >= start && byte_pos < end
1020 } else {
1021 false
1022 }
1023 })
1024 }
1025
1026 pub fn has_char(&self, ch: char) -> bool {
1028 match ch {
1029 '#' => self.char_frequency.hash_count > 0,
1030 '*' => self.char_frequency.asterisk_count > 0,
1031 '_' => self.char_frequency.underscore_count > 0,
1032 '-' => self.char_frequency.hyphen_count > 0,
1033 '+' => self.char_frequency.plus_count > 0,
1034 '>' => self.char_frequency.gt_count > 0,
1035 '|' => self.char_frequency.pipe_count > 0,
1036 '[' => self.char_frequency.bracket_count > 0,
1037 '`' => self.char_frequency.backtick_count > 0,
1038 '<' => self.char_frequency.lt_count > 0,
1039 '!' => self.char_frequency.exclamation_count > 0,
1040 '\n' => self.char_frequency.newline_count > 0,
1041 _ => self.content.contains(ch), }
1043 }
1044
1045 pub fn char_count(&self, ch: char) -> usize {
1047 match ch {
1048 '#' => self.char_frequency.hash_count,
1049 '*' => self.char_frequency.asterisk_count,
1050 '_' => self.char_frequency.underscore_count,
1051 '-' => self.char_frequency.hyphen_count,
1052 '+' => self.char_frequency.plus_count,
1053 '>' => self.char_frequency.gt_count,
1054 '|' => self.char_frequency.pipe_count,
1055 '[' => self.char_frequency.bracket_count,
1056 '`' => self.char_frequency.backtick_count,
1057 '<' => self.char_frequency.lt_count,
1058 '!' => self.char_frequency.exclamation_count,
1059 '\n' => self.char_frequency.newline_count,
1060 _ => self.content.matches(ch).count(), }
1062 }
1063
1064 pub fn likely_has_headings(&self) -> bool {
1066 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1068
1069 pub fn likely_has_lists(&self) -> bool {
1071 self.char_frequency.asterisk_count > 0
1072 || self.char_frequency.hyphen_count > 0
1073 || self.char_frequency.plus_count > 0
1074 }
1075
1076 pub fn likely_has_emphasis(&self) -> bool {
1078 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1079 }
1080
1081 pub fn likely_has_tables(&self) -> bool {
1083 self.char_frequency.pipe_count > 2
1084 }
1085
1086 pub fn likely_has_blockquotes(&self) -> bool {
1088 self.char_frequency.gt_count > 0
1089 }
1090
1091 pub fn likely_has_code(&self) -> bool {
1093 self.char_frequency.backtick_count > 0
1094 }
1095
1096 pub fn likely_has_links_or_images(&self) -> bool {
1098 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1099 }
1100
1101 pub fn likely_has_html(&self) -> bool {
1103 self.char_frequency.lt_count > 0
1104 }
1105
1106 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1108 self.html_tags()
1109 .iter()
1110 .filter(|tag| tag.line == line_num)
1111 .cloned()
1112 .collect()
1113 }
1114
1115 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1117 self.emphasis_spans()
1118 .iter()
1119 .filter(|span| span.line == line_num)
1120 .cloned()
1121 .collect()
1122 }
1123
1124 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1126 self.table_rows()
1127 .iter()
1128 .filter(|row| row.line == line_num)
1129 .cloned()
1130 .collect()
1131 }
1132
1133 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1135 self.bare_urls()
1136 .iter()
1137 .filter(|url| url.line == line_num)
1138 .cloned()
1139 .collect()
1140 }
1141
1142 #[inline]
1148 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1149 let idx = match lines.binary_search_by(|line| {
1151 if byte_offset < line.byte_offset {
1152 std::cmp::Ordering::Greater
1153 } else if byte_offset > line.byte_offset + line.byte_len {
1154 std::cmp::Ordering::Less
1155 } else {
1156 std::cmp::Ordering::Equal
1157 }
1158 }) {
1159 Ok(idx) => idx,
1160 Err(idx) => idx.saturating_sub(1),
1161 };
1162
1163 let line = &lines[idx];
1164 let line_num = idx + 1;
1165 let col = byte_offset.saturating_sub(line.byte_offset);
1166
1167 (idx, line_num, col)
1168 }
1169
1170 #[inline]
1172 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1173 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1175
1176 if idx > 0 {
1178 let span = &code_spans[idx - 1];
1179 if offset >= span.byte_offset && offset < span.byte_end {
1180 return true;
1181 }
1182 }
1183
1184 false
1185 }
1186
1187 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1191 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1192
1193 let mut link_ranges = Vec::new();
1194 let mut options = Options::empty();
1195 options.insert(Options::ENABLE_WIKILINKS);
1196 options.insert(Options::ENABLE_FOOTNOTES);
1197
1198 let parser = Parser::new_ext(content, options).into_offset_iter();
1199 let mut link_stack: Vec<usize> = Vec::new();
1200
1201 for (event, range) in parser {
1202 match event {
1203 Event::Start(Tag::Link { .. }) => {
1204 link_stack.push(range.start);
1205 }
1206 Event::End(TagEnd::Link) => {
1207 if let Some(start_pos) = link_stack.pop() {
1208 link_ranges.push((start_pos, range.end));
1209 }
1210 }
1211 _ => {}
1212 }
1213 }
1214
1215 link_ranges
1216 }
1217
1218 fn parse_links(
1220 content: &'a str,
1221 lines: &[LineInfo],
1222 code_blocks: &[(usize, usize)],
1223 code_spans: &[CodeSpan],
1224 flavor: MarkdownFlavor,
1225 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1226 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1227 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1228 use std::collections::HashSet;
1229
1230 let mut links = Vec::with_capacity(content.len() / 500);
1231 let mut broken_links = Vec::new();
1232 let mut footnote_refs = Vec::new();
1233
1234 let mut found_positions = HashSet::new();
1236
1237 let mut options = Options::empty();
1247 options.insert(Options::ENABLE_WIKILINKS);
1248 options.insert(Options::ENABLE_FOOTNOTES);
1249
1250 let parser = Parser::new_with_broken_link_callback(
1251 content,
1252 options,
1253 Some(|link: BrokenLink<'_>| {
1254 broken_links.push(BrokenLinkInfo {
1255 reference: link.reference.to_string(),
1256 span: link.span.clone(),
1257 });
1258 None
1259 }),
1260 )
1261 .into_offset_iter();
1262
1263 let mut link_stack: Vec<(
1264 usize,
1265 usize,
1266 pulldown_cmark::CowStr<'a>,
1267 LinkType,
1268 pulldown_cmark::CowStr<'a>,
1269 )> = Vec::new();
1270 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1273 match event {
1274 Event::Start(Tag::Link {
1275 link_type,
1276 dest_url,
1277 id,
1278 ..
1279 }) => {
1280 link_stack.push((range.start, range.end, dest_url, link_type, id));
1282 text_chunks.clear();
1283 }
1284 Event::Text(text) if !link_stack.is_empty() => {
1285 text_chunks.push((text.to_string(), range.start, range.end));
1287 }
1288 Event::Code(code) if !link_stack.is_empty() => {
1289 let code_text = format!("`{code}`");
1291 text_chunks.push((code_text, range.start, range.end));
1292 }
1293 Event::End(TagEnd::Link) => {
1294 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1295 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1297 text_chunks.clear();
1298 continue;
1299 }
1300
1301 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1303
1304 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1306 text_chunks.clear();
1307 continue;
1308 }
1309
1310 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1311
1312 let is_reference = matches!(
1313 link_type,
1314 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1315 );
1316
1317 let link_text = if start_pos < content.len() {
1320 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1321
1322 let mut close_pos = None;
1326 let mut depth = 0;
1327 let mut in_code_span = false;
1328
1329 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1330 let mut backslash_count = 0;
1332 let mut j = i;
1333 while j > 0 && link_bytes[j - 1] == b'\\' {
1334 backslash_count += 1;
1335 j -= 1;
1336 }
1337 let is_escaped = backslash_count % 2 != 0;
1338
1339 if byte == b'`' && !is_escaped {
1341 in_code_span = !in_code_span;
1342 }
1343
1344 if !is_escaped && !in_code_span {
1346 if byte == b'[' {
1347 depth += 1;
1348 } else if byte == b']' {
1349 if depth == 0 {
1350 close_pos = Some(i);
1352 break;
1353 } else {
1354 depth -= 1;
1355 }
1356 }
1357 }
1358 }
1359
1360 if let Some(pos) = close_pos {
1361 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1362 } else {
1363 Cow::Borrowed("")
1364 }
1365 } else {
1366 Cow::Borrowed("")
1367 };
1368
1369 let reference_id = if is_reference && !ref_id.is_empty() {
1371 Some(Cow::Owned(ref_id.to_lowercase()))
1372 } else if is_reference {
1373 Some(Cow::Owned(link_text.to_lowercase()))
1375 } else {
1376 None
1377 };
1378
1379 found_positions.insert(start_pos);
1381
1382 links.push(ParsedLink {
1383 line: line_num,
1384 start_col: col_start,
1385 end_col: col_end,
1386 byte_offset: start_pos,
1387 byte_end: range.end,
1388 text: link_text,
1389 url: Cow::Owned(url.to_string()),
1390 is_reference,
1391 reference_id,
1392 link_type,
1393 });
1394
1395 text_chunks.clear();
1396 }
1397 }
1398 Event::FootnoteReference(footnote_id) => {
1399 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1402 continue;
1403 }
1404
1405 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1406 footnote_refs.push(FootnoteRef {
1407 id: footnote_id.to_string(),
1408 line: line_num,
1409 byte_offset: range.start,
1410 byte_end: range.end,
1411 });
1412 }
1413 _ => {}
1414 }
1415 }
1416
1417 for cap in LINK_PATTERN.captures_iter(content) {
1421 let full_match = cap.get(0).unwrap();
1422 let match_start = full_match.start();
1423 let match_end = full_match.end();
1424
1425 if found_positions.contains(&match_start) {
1427 continue;
1428 }
1429
1430 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1432 continue;
1433 }
1434
1435 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1437 continue;
1438 }
1439
1440 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1442 continue;
1443 }
1444
1445 if Self::is_offset_in_code_span(code_spans, match_start) {
1447 continue;
1448 }
1449
1450 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1452 continue;
1453 }
1454
1455 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1457
1458 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1460 continue;
1461 }
1462
1463 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1464
1465 let text = cap.get(1).map_or("", |m| m.as_str());
1466
1467 if let Some(ref_id) = cap.get(6) {
1469 let ref_id_str = ref_id.as_str();
1470 let normalized_ref = if ref_id_str.is_empty() {
1471 Cow::Owned(text.to_lowercase()) } else {
1473 Cow::Owned(ref_id_str.to_lowercase())
1474 };
1475
1476 links.push(ParsedLink {
1478 line: line_num,
1479 start_col: col_start,
1480 end_col: col_end,
1481 byte_offset: match_start,
1482 byte_end: match_end,
1483 text: Cow::Borrowed(text),
1484 url: Cow::Borrowed(""), is_reference: true,
1486 reference_id: Some(normalized_ref),
1487 link_type: LinkType::Reference, });
1489 }
1490 }
1491
1492 (links, broken_links, footnote_refs)
1493 }
1494
1495 fn parse_images(
1497 content: &'a str,
1498 lines: &[LineInfo],
1499 code_blocks: &[(usize, usize)],
1500 code_spans: &[CodeSpan],
1501 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1502 ) -> Vec<ParsedImage<'a>> {
1503 use crate::utils::skip_context::is_in_html_comment_ranges;
1504 use std::collections::HashSet;
1505
1506 let mut images = Vec::with_capacity(content.len() / 1000);
1508 let mut found_positions = HashSet::new();
1509
1510 let parser = Parser::new(content).into_offset_iter();
1512 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1513 Vec::new();
1514 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1517 match event {
1518 Event::Start(Tag::Image {
1519 link_type,
1520 dest_url,
1521 id,
1522 ..
1523 }) => {
1524 image_stack.push((range.start, dest_url, link_type, id));
1525 text_chunks.clear();
1526 }
1527 Event::Text(text) if !image_stack.is_empty() => {
1528 text_chunks.push((text.to_string(), range.start, range.end));
1529 }
1530 Event::Code(code) if !image_stack.is_empty() => {
1531 let code_text = format!("`{code}`");
1532 text_chunks.push((code_text, range.start, range.end));
1533 }
1534 Event::End(TagEnd::Image) => {
1535 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1536 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1538 continue;
1539 }
1540
1541 if Self::is_offset_in_code_span(code_spans, start_pos) {
1543 continue;
1544 }
1545
1546 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1548 continue;
1549 }
1550
1551 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1553 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1554
1555 let is_reference = matches!(
1556 link_type,
1557 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1558 );
1559
1560 let alt_text = if start_pos < content.len() {
1563 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1564
1565 let mut close_pos = None;
1568 let mut depth = 0;
1569
1570 if image_bytes.len() > 2 {
1571 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1572 let mut backslash_count = 0;
1574 let mut j = i;
1575 while j > 0 && image_bytes[j - 1] == b'\\' {
1576 backslash_count += 1;
1577 j -= 1;
1578 }
1579 let is_escaped = backslash_count % 2 != 0;
1580
1581 if !is_escaped {
1582 if byte == b'[' {
1583 depth += 1;
1584 } else if byte == b']' {
1585 if depth == 0 {
1586 close_pos = Some(i);
1588 break;
1589 } else {
1590 depth -= 1;
1591 }
1592 }
1593 }
1594 }
1595 }
1596
1597 if let Some(pos) = close_pos {
1598 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1599 } else {
1600 Cow::Borrowed("")
1601 }
1602 } else {
1603 Cow::Borrowed("")
1604 };
1605
1606 let reference_id = if is_reference && !ref_id.is_empty() {
1607 Some(Cow::Owned(ref_id.to_lowercase()))
1608 } else if is_reference {
1609 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1611 None
1612 };
1613
1614 found_positions.insert(start_pos);
1615 images.push(ParsedImage {
1616 line: line_num,
1617 start_col: col_start,
1618 end_col: col_end,
1619 byte_offset: start_pos,
1620 byte_end: range.end,
1621 alt_text,
1622 url: Cow::Owned(url.to_string()),
1623 is_reference,
1624 reference_id,
1625 link_type,
1626 });
1627 }
1628 }
1629 _ => {}
1630 }
1631 }
1632
1633 for cap in IMAGE_PATTERN.captures_iter(content) {
1635 let full_match = cap.get(0).unwrap();
1636 let match_start = full_match.start();
1637 let match_end = full_match.end();
1638
1639 if found_positions.contains(&match_start) {
1641 continue;
1642 }
1643
1644 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1646 continue;
1647 }
1648
1649 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1651 || Self::is_offset_in_code_span(code_spans, match_start)
1652 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1653 {
1654 continue;
1655 }
1656
1657 if let Some(ref_id) = cap.get(6) {
1659 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1660 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1661 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1662 let ref_id_str = ref_id.as_str();
1663 let normalized_ref = if ref_id_str.is_empty() {
1664 Cow::Owned(alt_text.to_lowercase())
1665 } else {
1666 Cow::Owned(ref_id_str.to_lowercase())
1667 };
1668
1669 images.push(ParsedImage {
1670 line: line_num,
1671 start_col: col_start,
1672 end_col: col_end,
1673 byte_offset: match_start,
1674 byte_end: match_end,
1675 alt_text: Cow::Borrowed(alt_text),
1676 url: Cow::Borrowed(""),
1677 is_reference: true,
1678 reference_id: Some(normalized_ref),
1679 link_type: LinkType::Reference, });
1681 }
1682 }
1683
1684 images
1685 }
1686
1687 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1689 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1693 if line_info.in_code_block {
1695 continue;
1696 }
1697
1698 let line = line_info.content(content);
1699 let line_num = line_idx + 1;
1700
1701 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1702 let id_raw = cap.get(1).unwrap().as_str();
1703
1704 if id_raw.starts_with('^') {
1707 continue;
1708 }
1709
1710 let id = id_raw.to_lowercase();
1711 let url = cap.get(2).unwrap().as_str().to_string();
1712 let title_match = cap.get(3).or_else(|| cap.get(4));
1713 let title = title_match.map(|m| m.as_str().to_string());
1714
1715 let match_obj = cap.get(0).unwrap();
1718 let byte_offset = line_info.byte_offset + match_obj.start();
1719 let byte_end = line_info.byte_offset + match_obj.end();
1720
1721 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1723 let start = line_info.byte_offset + m.start().saturating_sub(1);
1725 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1727 } else {
1728 (None, None)
1729 };
1730
1731 refs.push(ReferenceDef {
1732 line: line_num,
1733 id,
1734 url,
1735 title,
1736 byte_offset,
1737 byte_end,
1738 title_byte_start,
1739 title_byte_end,
1740 });
1741 }
1742 }
1743
1744 refs
1745 }
1746
1747 #[inline]
1751 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1752 let trimmed_start = line.trim_start();
1753 if !trimmed_start.starts_with('>') {
1754 return None;
1755 }
1756
1757 let mut remaining = line;
1759 let mut total_prefix_len = 0;
1760
1761 loop {
1762 let trimmed = remaining.trim_start();
1763 if !trimmed.starts_with('>') {
1764 break;
1765 }
1766
1767 let leading_ws_len = remaining.len() - trimmed.len();
1769 total_prefix_len += leading_ws_len + 1;
1770
1771 let after_gt = &trimmed[1..];
1772
1773 if let Some(stripped) = after_gt.strip_prefix(' ') {
1775 total_prefix_len += 1;
1776 remaining = stripped;
1777 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1778 total_prefix_len += 1;
1779 remaining = stripped;
1780 } else {
1781 remaining = after_gt;
1782 }
1783 }
1784
1785 Some((&line[..total_prefix_len], remaining))
1786 }
1787
1788 fn detect_list_items_and_emphasis_with_pulldown(
1812 content: &str,
1813 line_offsets: &[usize],
1814 flavor: MarkdownFlavor,
1815 front_matter_end: usize,
1816 code_blocks: &[(usize, usize)],
1817 ) -> (ListItemMap, Vec<EmphasisSpan>) {
1818 use std::collections::HashMap;
1819
1820 let mut list_items = HashMap::new();
1821 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
1822
1823 let mut options = Options::empty();
1824 options.insert(Options::ENABLE_TABLES);
1825 options.insert(Options::ENABLE_FOOTNOTES);
1826 options.insert(Options::ENABLE_STRIKETHROUGH);
1827 options.insert(Options::ENABLE_TASKLISTS);
1828 options.insert(Options::ENABLE_GFM);
1830
1831 let _ = flavor;
1833
1834 let parser = Parser::new_ext(content, options).into_offset_iter();
1835 let mut list_depth: usize = 0;
1836 let mut list_stack: Vec<bool> = Vec::new();
1837
1838 for (event, range) in parser {
1839 match event {
1840 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
1842 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
1843 2
1844 } else {
1845 1
1846 };
1847 let match_start = range.start;
1848 let match_end = range.end;
1849
1850 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
1852 let marker = content[match_start..].chars().next().unwrap_or('*');
1854 if marker == '*' || marker == '_' {
1855 let content_start = match_start + marker_count;
1857 let content_end = if match_end >= marker_count {
1858 match_end - marker_count
1859 } else {
1860 match_end
1861 };
1862 let content_part = if content_start < content_end && content_end <= content.len() {
1863 &content[content_start..content_end]
1864 } else {
1865 ""
1866 };
1867
1868 let line_idx = match line_offsets.binary_search(&match_start) {
1870 Ok(idx) => idx,
1871 Err(idx) => idx.saturating_sub(1),
1872 };
1873 let line_num = line_idx + 1;
1874 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
1875 let col_start = match_start - line_start;
1876 let col_end = match_end - line_start;
1877
1878 emphasis_spans.push(EmphasisSpan {
1879 line: line_num,
1880 start_col: col_start,
1881 end_col: col_end,
1882 byte_offset: match_start,
1883 byte_end: match_end,
1884 marker,
1885 marker_count,
1886 content: content_part.to_string(),
1887 });
1888 }
1889 }
1890 }
1891 Event::Start(Tag::List(start_number)) => {
1892 list_depth += 1;
1893 list_stack.push(start_number.is_some());
1894 }
1895 Event::End(TagEnd::List(_)) => {
1896 list_depth = list_depth.saturating_sub(1);
1897 list_stack.pop();
1898 }
1899 Event::Start(Tag::Item) if list_depth > 0 => {
1900 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1902 let item_start = range.start;
1904
1905 let mut line_idx = match line_offsets.binary_search(&item_start) {
1907 Ok(idx) => idx,
1908 Err(idx) => idx.saturating_sub(1),
1909 };
1910
1911 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1915 line_idx += 1;
1916 }
1917
1918 if front_matter_end > 0 && line_idx < front_matter_end {
1920 continue;
1921 }
1922
1923 if line_idx < line_offsets.len() {
1924 let line_start_byte = line_offsets[line_idx];
1925 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
1926 let line = &content[line_start_byte..line_end.min(content.len())];
1927
1928 let line = line
1930 .strip_suffix('\n')
1931 .or_else(|| line.strip_suffix("\r\n"))
1932 .unwrap_or(line);
1933
1934 let blockquote_parse = Self::parse_blockquote_prefix(line);
1936 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
1937 (prefix.len(), content)
1938 } else {
1939 (0, line)
1940 };
1941
1942 if current_list_is_ordered {
1944 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1945 Self::parse_ordered_list(line_to_parse)
1946 {
1947 let marker = format!("{number_str}{delimiter}");
1948 let marker_column = blockquote_prefix_len + leading_spaces.len();
1949 let content_column = marker_column + marker.len() + spacing.len();
1950 let number = number_str.parse().ok();
1951
1952 list_items.entry(line_start_byte).or_insert((
1953 true,
1954 marker,
1955 marker_column,
1956 content_column,
1957 number,
1958 ));
1959 }
1960 } else if let Some((leading_spaces, marker, spacing, _content)) =
1961 Self::parse_unordered_list(line_to_parse)
1962 {
1963 let marker_column = blockquote_prefix_len + leading_spaces.len();
1964 let content_column = marker_column + 1 + spacing.len();
1965
1966 list_items.entry(line_start_byte).or_insert((
1967 false,
1968 marker.to_string(),
1969 marker_column,
1970 content_column,
1971 None,
1972 ));
1973 }
1974 }
1975 }
1976 _ => {}
1977 }
1978 }
1979
1980 (list_items, emphasis_spans)
1981 }
1982
1983 #[inline]
1987 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1988 let bytes = line.as_bytes();
1989 let mut i = 0;
1990
1991 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1993 i += 1;
1994 }
1995
1996 if i >= bytes.len() {
1998 return None;
1999 }
2000 let marker = bytes[i] as char;
2001 if marker != '-' && marker != '*' && marker != '+' {
2002 return None;
2003 }
2004 let marker_pos = i;
2005 i += 1;
2006
2007 let spacing_start = i;
2009 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2010 i += 1;
2011 }
2012
2013 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2014 }
2015
2016 #[inline]
2020 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2021 let bytes = line.as_bytes();
2022 let mut i = 0;
2023
2024 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2026 i += 1;
2027 }
2028
2029 let number_start = i;
2031 while i < bytes.len() && bytes[i].is_ascii_digit() {
2032 i += 1;
2033 }
2034 if i == number_start {
2035 return None; }
2037
2038 if i >= bytes.len() {
2040 return None;
2041 }
2042 let delimiter = bytes[i] as char;
2043 if delimiter != '.' && delimiter != ')' {
2044 return None;
2045 }
2046 let delimiter_pos = i;
2047 i += 1;
2048
2049 let spacing_start = i;
2051 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2052 i += 1;
2053 }
2054
2055 Some((
2056 &line[..number_start],
2057 &line[number_start..delimiter_pos],
2058 delimiter,
2059 &line[spacing_start..i],
2060 &line[i..],
2061 ))
2062 }
2063
2064 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2067 let num_lines = line_offsets.len();
2068 let mut in_code_block = vec![false; num_lines];
2069
2070 for &(start, end) in code_blocks {
2072 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2074 let mut boundary = start;
2075 while boundary > 0 && !content.is_char_boundary(boundary) {
2076 boundary -= 1;
2077 }
2078 boundary
2079 } else {
2080 start
2081 };
2082
2083 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2084 let mut boundary = end;
2085 while boundary < content.len() && !content.is_char_boundary(boundary) {
2086 boundary += 1;
2087 }
2088 boundary
2089 } else {
2090 end.min(content.len())
2091 };
2092
2093 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2112 let first_line = first_line_after.saturating_sub(1);
2113 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2114
2115 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2117 *flag = true;
2118 }
2119 }
2120
2121 in_code_block
2122 }
2123
2124 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2127 let content_lines: Vec<&str> = content.lines().collect();
2128 let num_lines = content_lines.len();
2129 let mut in_math_block = vec![false; num_lines];
2130
2131 let mut inside_math = false;
2132
2133 for (i, line) in content_lines.iter().enumerate() {
2134 if code_block_map.get(i).copied().unwrap_or(false) {
2136 continue;
2137 }
2138
2139 let trimmed = line.trim();
2140
2141 if trimmed == "$$" {
2144 if inside_math {
2145 in_math_block[i] = true;
2147 inside_math = false;
2148 } else {
2149 in_math_block[i] = true;
2151 inside_math = true;
2152 }
2153 } else if inside_math {
2154 in_math_block[i] = true;
2156 }
2157 }
2158
2159 in_math_block
2160 }
2161
2162 fn compute_basic_line_info(
2165 content: &str,
2166 line_offsets: &[usize],
2167 code_blocks: &[(usize, usize)],
2168 flavor: MarkdownFlavor,
2169 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2170 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2171 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2172 let content_lines: Vec<&str> = content.lines().collect();
2173 let mut lines = Vec::with_capacity(content_lines.len());
2174
2175 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2177
2178 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2180
2181 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2184
2185 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2188 content,
2189 line_offsets,
2190 flavor,
2191 front_matter_end,
2192 code_blocks,
2193 );
2194
2195 for (i, line) in content_lines.iter().enumerate() {
2196 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2197 let indent = line.len() - line.trim_start().len();
2198 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2200
2201 let blockquote_parse = Self::parse_blockquote_prefix(line);
2203
2204 let is_blank = if let Some((_, content)) = blockquote_parse {
2206 content.trim().is_empty()
2208 } else {
2209 line.trim().is_empty()
2210 };
2211
2212 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2214
2215 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2217 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2218 let line_end_offset = byte_offset + line.len();
2221 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2222 html_comment_ranges,
2223 byte_offset,
2224 line_end_offset,
2225 );
2226 let list_item =
2229 list_item_map
2230 .get(&byte_offset)
2231 .map(
2232 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2233 marker: marker.clone(),
2234 is_ordered: *is_ordered,
2235 number: *number,
2236 marker_column: *marker_column,
2237 content_column: *content_column,
2238 },
2239 );
2240
2241 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2244 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2245
2246 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2248
2249 lines.push(LineInfo {
2250 byte_offset,
2251 byte_len: line.len(),
2252 indent,
2253 visual_indent,
2254 is_blank,
2255 in_code_block,
2256 in_front_matter,
2257 in_html_block: false, in_html_comment,
2259 list_item,
2260 heading: None, blockquote: None, in_mkdocstrings,
2263 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2266 in_math_block,
2267 });
2268 }
2269
2270 (lines, emphasis_spans)
2271 }
2272
2273 fn detect_headings_and_blockquotes(
2275 content: &str,
2276 lines: &mut [LineInfo],
2277 flavor: MarkdownFlavor,
2278 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2279 link_byte_ranges: &[(usize, usize)],
2280 ) {
2281 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2283 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2284 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2285 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2286
2287 let content_lines: Vec<&str> = content.lines().collect();
2288
2289 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2291
2292 for i in 0..lines.len() {
2294 let line = content_lines[i];
2295
2296 if !(front_matter_end > 0 && i < front_matter_end)
2301 && let Some(bq) = parse_blockquote_detailed(line)
2302 {
2303 let nesting_level = bq.markers.len();
2304 let marker_column = bq.indent.len();
2305 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2306 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2307 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2308 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2309
2310 lines[i].blockquote = Some(BlockquoteInfo {
2311 nesting_level,
2312 indent: bq.indent.to_string(),
2313 marker_column,
2314 prefix,
2315 content: bq.content.to_string(),
2316 has_no_space_after_marker: has_no_space,
2317 has_multiple_spaces_after_marker: has_multiple_spaces,
2318 needs_md028_fix,
2319 });
2320 }
2321
2322 if lines[i].in_code_block {
2324 continue;
2325 }
2326
2327 if front_matter_end > 0 && i < front_matter_end {
2329 continue;
2330 }
2331
2332 if lines[i].in_html_block {
2334 continue;
2335 }
2336
2337 if lines[i].is_blank {
2339 continue;
2340 }
2341
2342 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2345 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2346 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2347 } else {
2348 false
2349 };
2350
2351 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2352 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2354 continue;
2355 }
2356 let line_offset = lines[i].byte_offset;
2359 if link_byte_ranges
2360 .iter()
2361 .any(|&(start, end)| line_offset > start && line_offset < end)
2362 {
2363 continue;
2364 }
2365 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2366 let hashes = caps.get(2).map_or("", |m| m.as_str());
2367 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2368 let rest = caps.get(4).map_or("", |m| m.as_str());
2369
2370 let level = hashes.len() as u8;
2371 let marker_column = leading_spaces.len();
2372
2373 let (text, has_closing, closing_seq) = {
2375 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2377 if rest[id_start..].trim_end().ends_with('}') {
2379 (&rest[..id_start], &rest[id_start..])
2381 } else {
2382 (rest, "")
2383 }
2384 } else {
2385 (rest, "")
2386 };
2387
2388 let trimmed_rest = rest_without_id.trim_end();
2390 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2391 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2394
2395 let last_hash_char_idx = char_positions
2397 .iter()
2398 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2399
2400 if let Some(mut char_idx) = last_hash_char_idx {
2401 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2403 char_idx -= 1;
2404 }
2405
2406 let start_of_hashes = char_positions[char_idx].0;
2408
2409 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2411
2412 let potential_closing = &trimmed_rest[start_of_hashes..];
2414 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2415
2416 if is_all_hashes && has_space_before {
2417 let closing_hashes = potential_closing.to_string();
2419 let text_part = if !custom_id_part.is_empty() {
2422 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2425 } else {
2426 trimmed_rest[..start_of_hashes].trim_end().to_string()
2427 };
2428 (text_part, true, closing_hashes)
2429 } else {
2430 (rest.to_string(), false, String::new())
2432 }
2433 } else {
2434 (rest.to_string(), false, String::new())
2436 }
2437 } else {
2438 (rest.to_string(), false, String::new())
2440 }
2441 };
2442
2443 let content_column = marker_column + hashes.len() + spaces_after.len();
2444
2445 let raw_text = text.trim().to_string();
2447 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2448
2449 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2451 let next_line = content_lines[i + 1];
2452 if !lines[i + 1].in_code_block
2453 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2454 && let Some(next_line_id) =
2455 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2456 {
2457 custom_id = Some(next_line_id);
2458 }
2459 }
2460
2461 let is_valid = !spaces_after.is_empty()
2471 || rest.is_empty()
2472 || level > 1
2473 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2474
2475 lines[i].heading = Some(HeadingInfo {
2476 level,
2477 style: HeadingStyle::ATX,
2478 marker: hashes.to_string(),
2479 marker_column,
2480 content_column,
2481 text: clean_text,
2482 custom_id,
2483 raw_text,
2484 has_closing_sequence: has_closing,
2485 closing_sequence: closing_seq,
2486 is_valid,
2487 });
2488 }
2489 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2491 let next_line = content_lines[i + 1];
2492 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2493 if front_matter_end > 0 && i < front_matter_end {
2495 continue;
2496 }
2497
2498 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2500 {
2501 continue;
2502 }
2503
2504 let content_line = line.trim();
2507
2508 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2510 continue;
2511 }
2512
2513 if content_line.starts_with('_') {
2515 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2516 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2517 continue;
2518 }
2519 }
2520
2521 if let Some(first_char) = content_line.chars().next()
2523 && first_char.is_ascii_digit()
2524 {
2525 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2526 if num_end < content_line.len() {
2527 let next = content_line.chars().nth(num_end);
2528 if next == Some('.') || next == Some(')') {
2529 continue;
2530 }
2531 }
2532 }
2533
2534 if ATX_HEADING_REGEX.is_match(line) {
2536 continue;
2537 }
2538
2539 if content_line.starts_with('>') {
2541 continue;
2542 }
2543
2544 let trimmed_start = line.trim_start();
2546 if trimmed_start.len() >= 3 {
2547 let first_three: String = trimmed_start.chars().take(3).collect();
2548 if first_three == "```" || first_three == "~~~" {
2549 continue;
2550 }
2551 }
2552
2553 if content_line.starts_with('<') {
2555 continue;
2556 }
2557
2558 let underline = next_line.trim();
2559
2560 let level = if underline.starts_with('=') { 1 } else { 2 };
2561 let style = if level == 1 {
2562 HeadingStyle::Setext1
2563 } else {
2564 HeadingStyle::Setext2
2565 };
2566
2567 let raw_text = line.trim().to_string();
2569 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2570
2571 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2573 let attr_line = content_lines[i + 2];
2574 if !lines[i + 2].in_code_block
2575 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2576 && let Some(attr_line_id) =
2577 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2578 {
2579 custom_id = Some(attr_line_id);
2580 }
2581 }
2582
2583 lines[i].heading = Some(HeadingInfo {
2584 level,
2585 style,
2586 marker: underline.to_string(),
2587 marker_column: next_line.len() - next_line.trim_start().len(),
2588 content_column: lines[i].indent,
2589 text: clean_text,
2590 custom_id,
2591 raw_text,
2592 has_closing_sequence: false,
2593 closing_sequence: String::new(),
2594 is_valid: true, });
2596 }
2597 }
2598 }
2599 }
2600
2601 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2603 const BLOCK_ELEMENTS: &[&str] = &[
2606 "address",
2607 "article",
2608 "aside",
2609 "audio",
2610 "blockquote",
2611 "canvas",
2612 "details",
2613 "dialog",
2614 "dd",
2615 "div",
2616 "dl",
2617 "dt",
2618 "embed",
2619 "fieldset",
2620 "figcaption",
2621 "figure",
2622 "footer",
2623 "form",
2624 "h1",
2625 "h2",
2626 "h3",
2627 "h4",
2628 "h5",
2629 "h6",
2630 "header",
2631 "hr",
2632 "iframe",
2633 "li",
2634 "main",
2635 "menu",
2636 "nav",
2637 "noscript",
2638 "object",
2639 "ol",
2640 "p",
2641 "picture",
2642 "pre",
2643 "script",
2644 "search",
2645 "section",
2646 "source",
2647 "style",
2648 "summary",
2649 "svg",
2650 "table",
2651 "tbody",
2652 "td",
2653 "template",
2654 "textarea",
2655 "tfoot",
2656 "th",
2657 "thead",
2658 "tr",
2659 "track",
2660 "ul",
2661 "video",
2662 ];
2663
2664 let mut i = 0;
2665 while i < lines.len() {
2666 if lines[i].in_code_block || lines[i].in_front_matter {
2668 i += 1;
2669 continue;
2670 }
2671
2672 let trimmed = lines[i].content(content).trim_start();
2673
2674 if trimmed.starts_with('<') && trimmed.len() > 1 {
2676 let after_bracket = &trimmed[1..];
2678 let is_closing = after_bracket.starts_with('/');
2679 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2680
2681 let tag_name = tag_start
2683 .chars()
2684 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2685 .collect::<String>()
2686 .to_lowercase();
2687
2688 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2690 lines[i].in_html_block = true;
2692
2693 if !is_closing {
2696 let closing_tag = format!("</{tag_name}>");
2697 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2699 let mut j = i + 1;
2700 let mut found_closing_tag = false;
2701 while j < lines.len() && j < i + 100 {
2702 if !allow_blank_lines && lines[j].is_blank {
2705 break;
2706 }
2707
2708 lines[j].in_html_block = true;
2709
2710 if lines[j].content(content).contains(&closing_tag) {
2712 found_closing_tag = true;
2713 }
2714
2715 if found_closing_tag {
2718 j += 1;
2719 while j < lines.len() && j < i + 100 {
2721 if lines[j].is_blank {
2722 break;
2723 }
2724 lines[j].in_html_block = true;
2725 j += 1;
2726 }
2727 break;
2728 }
2729 j += 1;
2730 }
2731 }
2732 }
2733 }
2734
2735 i += 1;
2736 }
2737 }
2738
2739 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2742 if !flavor.supports_esm_blocks() {
2744 return;
2745 }
2746
2747 let mut in_multiline_comment = false;
2748
2749 for line in lines.iter_mut() {
2750 if line.is_blank || line.in_html_comment {
2752 continue;
2753 }
2754
2755 let trimmed = line.content(content).trim_start();
2756
2757 if in_multiline_comment {
2759 if trimmed.contains("*/") {
2760 in_multiline_comment = false;
2761 }
2762 continue;
2763 }
2764
2765 if trimmed.starts_with("//") {
2767 continue;
2768 }
2769
2770 if trimmed.starts_with("/*") {
2772 if !trimmed.contains("*/") {
2773 in_multiline_comment = true;
2774 }
2775 continue;
2776 }
2777
2778 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2780 line.in_esm_block = true;
2781 } else {
2782 break;
2784 }
2785 }
2786 }
2787
2788 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2790 let mut code_spans = Vec::new();
2791
2792 if !content.contains('`') {
2794 return code_spans;
2795 }
2796
2797 let parser = Parser::new(content).into_offset_iter();
2799
2800 for (event, range) in parser {
2801 if let Event::Code(_) = event {
2802 let start_pos = range.start;
2803 let end_pos = range.end;
2804
2805 let full_span = &content[start_pos..end_pos];
2807 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2808
2809 let content_start = start_pos + backtick_count;
2811 let content_end = end_pos - backtick_count;
2812 let span_content = if content_start < content_end {
2813 content[content_start..content_end].to_string()
2814 } else {
2815 String::new()
2816 };
2817
2818 let line_idx = lines
2821 .partition_point(|line| line.byte_offset <= start_pos)
2822 .saturating_sub(1);
2823 let line_num = line_idx + 1;
2824 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2825
2826 let end_line_idx = lines
2828 .partition_point(|line| line.byte_offset <= end_pos)
2829 .saturating_sub(1);
2830 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2831
2832 let line_content = lines[line_idx].content(content);
2835 let col_start = if byte_col_start <= line_content.len() {
2836 line_content[..byte_col_start].chars().count()
2837 } else {
2838 line_content.chars().count()
2839 };
2840
2841 let end_line_content = lines[end_line_idx].content(content);
2842 let col_end = if byte_col_end <= end_line_content.len() {
2843 end_line_content[..byte_col_end].chars().count()
2844 } else {
2845 end_line_content.chars().count()
2846 };
2847
2848 code_spans.push(CodeSpan {
2849 line: line_num,
2850 end_line: end_line_idx + 1,
2851 start_col: col_start,
2852 end_col: col_end,
2853 byte_offset: start_pos,
2854 byte_end: end_pos,
2855 backtick_count,
2856 content: span_content,
2857 });
2858 }
2859 }
2860
2861 code_spans.sort_by_key(|span| span.byte_offset);
2863
2864 code_spans
2865 }
2866
2867 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2878 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2880
2881 #[inline]
2884 fn reset_tracking_state(
2885 list_item: &ListItemInfo,
2886 has_list_breaking_content: &mut bool,
2887 min_continuation: &mut usize,
2888 ) {
2889 *has_list_breaking_content = false;
2890 let marker_width = if list_item.is_ordered {
2891 list_item.marker.len() + 1 } else {
2893 list_item.marker.len()
2894 };
2895 *min_continuation = if list_item.is_ordered {
2896 marker_width
2897 } else {
2898 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2899 };
2900 }
2901
2902 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2905 let mut last_list_item_line = 0;
2906 let mut current_indent_level = 0;
2907 let mut last_marker_width = 0;
2908
2909 let mut has_list_breaking_content_since_last_item = false;
2911 let mut min_continuation_for_tracking = 0;
2912
2913 for (line_idx, line_info) in lines.iter().enumerate() {
2914 let line_num = line_idx + 1;
2915
2916 if line_info.in_code_block {
2918 if let Some(ref mut block) = current_block {
2919 let min_continuation_indent =
2921 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2922
2923 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2925
2926 match context {
2927 CodeBlockContext::Indented => {
2928 block.end_line = line_num;
2930 continue;
2931 }
2932 CodeBlockContext::Standalone => {
2933 let completed_block = current_block.take().unwrap();
2935 list_blocks.push(completed_block);
2936 continue;
2937 }
2938 CodeBlockContext::Adjacent => {
2939 block.end_line = line_num;
2941 continue;
2942 }
2943 }
2944 } else {
2945 continue;
2947 }
2948 }
2949
2950 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2952 caps.get(0).unwrap().as_str().to_string()
2953 } else {
2954 String::new()
2955 };
2956
2957 if let Some(ref block) = current_block
2960 && line_info.list_item.is_none()
2961 && !line_info.is_blank
2962 && !line_info.in_code_span_continuation
2963 {
2964 let line_content = line_info.content(content).trim();
2965
2966 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2971
2972 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
2975
2976 let breaks_list = line_info.heading.is_some()
2977 || line_content.starts_with("---")
2978 || line_content.starts_with("***")
2979 || line_content.starts_with("___")
2980 || crate::utils::skip_context::is_table_line(line_content)
2981 || blockquote_prefix_changes
2982 || (line_info.indent > 0
2983 && line_info.indent < min_continuation_for_tracking
2984 && !is_lazy_continuation);
2985
2986 if breaks_list {
2987 has_list_breaking_content_since_last_item = true;
2988 }
2989 }
2990
2991 if line_info.in_code_span_continuation
2994 && line_info.list_item.is_none()
2995 && let Some(ref mut block) = current_block
2996 {
2997 block.end_line = line_num;
2998 }
2999
3000 let effective_continuation_indent = if let Some(ref block) = current_block {
3006 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3007 let line_content = line_info.content(content);
3008 let line_bq_level = line_content
3009 .chars()
3010 .take_while(|c| *c == '>' || c.is_whitespace())
3011 .filter(|&c| c == '>')
3012 .count();
3013 if line_bq_level > 0 && line_bq_level == block_bq_level {
3014 let mut pos = 0;
3016 let mut found_markers = 0;
3017 for c in line_content.chars() {
3018 pos += c.len_utf8();
3019 if c == '>' {
3020 found_markers += 1;
3021 if found_markers == line_bq_level {
3022 if line_content.get(pos..pos + 1) == Some(" ") {
3023 pos += 1;
3024 }
3025 break;
3026 }
3027 }
3028 }
3029 let after_bq = &line_content[pos..];
3030 after_bq.len() - after_bq.trim_start().len()
3031 } else {
3032 line_info.indent
3033 }
3034 } else {
3035 line_info.indent
3036 };
3037 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3038 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3039 if block_bq_level > 0 {
3040 if block.is_ordered { last_marker_width } else { 2 }
3041 } else {
3042 min_continuation_for_tracking
3043 }
3044 } else {
3045 min_continuation_for_tracking
3046 };
3047 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3048 || (line_info.indent == 0 && !line_info.is_blank); if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3051 eprintln!(
3052 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3053 line_num,
3054 effective_continuation_indent,
3055 adjusted_min_continuation_for_tracking,
3056 is_valid_continuation,
3057 line_info.in_code_span_continuation,
3058 line_info.in_code_block,
3059 current_block.is_some()
3060 );
3061 }
3062
3063 if !line_info.in_code_span_continuation
3064 && line_info.list_item.is_none()
3065 && !line_info.is_blank
3066 && !line_info.in_code_block
3067 && is_valid_continuation
3068 && let Some(ref mut block) = current_block
3069 {
3070 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3071 eprintln!(
3072 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3073 line_num, block.end_line, line_num
3074 );
3075 }
3076 block.end_line = line_num;
3077 }
3078
3079 if let Some(list_item) = &line_info.list_item {
3081 let item_indent = list_item.marker_column;
3083 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3086 eprintln!(
3087 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3088 line_num, list_item.marker, item_indent
3089 );
3090 }
3091
3092 if let Some(ref mut block) = current_block {
3093 let is_nested = nesting > block.nesting_level;
3097 let same_type =
3098 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3099 let same_context = block.blockquote_prefix == blockquote_prefix;
3100 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3102
3103 let marker_compatible =
3105 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3106
3107 let has_non_list_content = has_list_breaking_content_since_last_item;
3110
3111 let mut continues_list = if is_nested {
3115 same_context && reasonable_distance && !has_non_list_content
3117 } else {
3118 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3120 };
3121
3122 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3123 eprintln!(
3124 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3125 line_num,
3126 continues_list,
3127 is_nested,
3128 same_type,
3129 same_context,
3130 reasonable_distance,
3131 marker_compatible,
3132 has_non_list_content,
3133 last_list_item_line,
3134 block.end_line
3135 );
3136 }
3137
3138 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
3141 if block.item_lines.contains(&(line_num - 1)) {
3144 continues_list = true;
3146 } else {
3147 continues_list = true;
3151 }
3152 }
3153
3154 if continues_list {
3155 block.end_line = line_num;
3157 block.item_lines.push(line_num);
3158
3159 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3161 list_item.marker.len() + 1
3162 } else {
3163 list_item.marker.len()
3164 });
3165
3166 if !block.is_ordered
3168 && block.marker.is_some()
3169 && block.marker.as_ref() != Some(&list_item.marker)
3170 {
3171 block.marker = None;
3173 }
3174
3175 reset_tracking_state(
3177 list_item,
3178 &mut has_list_breaking_content_since_last_item,
3179 &mut min_continuation_for_tracking,
3180 );
3181 } else {
3182 list_blocks.push(block.clone());
3185
3186 *block = ListBlock {
3187 start_line: line_num,
3188 end_line: line_num,
3189 is_ordered: list_item.is_ordered,
3190 marker: if list_item.is_ordered {
3191 None
3192 } else {
3193 Some(list_item.marker.clone())
3194 },
3195 blockquote_prefix: blockquote_prefix.clone(),
3196 item_lines: vec![line_num],
3197 nesting_level: nesting,
3198 max_marker_width: if list_item.is_ordered {
3199 list_item.marker.len() + 1
3200 } else {
3201 list_item.marker.len()
3202 },
3203 };
3204
3205 reset_tracking_state(
3207 list_item,
3208 &mut has_list_breaking_content_since_last_item,
3209 &mut min_continuation_for_tracking,
3210 );
3211 }
3212 } else {
3213 current_block = Some(ListBlock {
3215 start_line: line_num,
3216 end_line: line_num,
3217 is_ordered: list_item.is_ordered,
3218 marker: if list_item.is_ordered {
3219 None
3220 } else {
3221 Some(list_item.marker.clone())
3222 },
3223 blockquote_prefix,
3224 item_lines: vec![line_num],
3225 nesting_level: nesting,
3226 max_marker_width: list_item.marker.len(),
3227 });
3228
3229 reset_tracking_state(
3231 list_item,
3232 &mut has_list_breaking_content_since_last_item,
3233 &mut min_continuation_for_tracking,
3234 );
3235 }
3236
3237 last_list_item_line = line_num;
3238 current_indent_level = item_indent;
3239 last_marker_width = if list_item.is_ordered {
3240 list_item.marker.len() + 1 } else {
3242 list_item.marker.len()
3243 };
3244 } else if let Some(ref mut block) = current_block {
3245 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3247 eprintln!(
3248 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3249 line_num, line_info.is_blank
3250 );
3251 }
3252
3253 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3261 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3262 } else {
3263 false
3264 };
3265
3266 let min_continuation_indent = if block.is_ordered {
3270 current_indent_level + last_marker_width
3271 } else {
3272 current_indent_level + 2 };
3274
3275 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3276 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3278 eprintln!(
3279 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3280 line_num, line_info.indent, min_continuation_indent
3281 );
3282 }
3283 block.end_line = line_num;
3284 } else if line_info.is_blank {
3285 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3288 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3289 }
3290 let mut check_idx = line_idx + 1;
3291 let mut found_continuation = false;
3292
3293 while check_idx < lines.len() && lines[check_idx].is_blank {
3295 check_idx += 1;
3296 }
3297
3298 if check_idx < lines.len() {
3299 let next_line = &lines[check_idx];
3300 let next_content = next_line.content(content);
3302 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3305 let next_bq_level_for_indent = next_content
3306 .chars()
3307 .take_while(|c| *c == '>' || c.is_whitespace())
3308 .filter(|&c| c == '>')
3309 .count();
3310 let effective_indent =
3311 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3312 let mut pos = 0;
3315 let mut found_markers = 0;
3316 for c in next_content.chars() {
3317 pos += c.len_utf8();
3318 if c == '>' {
3319 found_markers += 1;
3320 if found_markers == next_bq_level_for_indent {
3321 if next_content.get(pos..pos + 1) == Some(" ") {
3323 pos += 1;
3324 }
3325 break;
3326 }
3327 }
3328 }
3329 let after_blockquote_marker = &next_content[pos..];
3330 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3331 } else {
3332 next_line.indent
3333 };
3334 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3337 if block.is_ordered { last_marker_width } else { 2 }
3340 } else {
3341 min_continuation_indent
3342 };
3343 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3345 eprintln!(
3346 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3347 line_num,
3348 check_idx + 1,
3349 effective_indent,
3350 adjusted_min_continuation,
3351 next_line.list_item.is_some(),
3352 next_line.in_code_block
3353 );
3354 }
3355 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3356 found_continuation = true;
3357 }
3358 else if !next_line.in_code_block
3360 && next_line.list_item.is_some()
3361 && let Some(item) = &next_line.list_item
3362 {
3363 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3364 .find(next_line.content(content))
3365 .map_or(String::new(), |m| m.as_str().to_string());
3366 if item.marker_column == current_indent_level
3367 && item.is_ordered == block.is_ordered
3368 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3369 {
3370 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3374 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3375 if let Some(between_line) = lines.get(idx) {
3376 let between_content = between_line.content(content);
3377 let trimmed = between_content.trim();
3378 if trimmed.is_empty() {
3380 return false;
3381 }
3382 let line_indent = between_content.len() - between_content.trim_start().len();
3384
3385 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3387 .find(between_content)
3388 .map_or(String::new(), |m| m.as_str().to_string());
3389 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3390 let blockquote_level_changed =
3391 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3392
3393 if trimmed.starts_with("```")
3395 || trimmed.starts_with("~~~")
3396 || trimmed.starts_with("---")
3397 || trimmed.starts_with("***")
3398 || trimmed.starts_with("___")
3399 || blockquote_level_changed
3400 || crate::utils::skip_context::is_table_line(trimmed)
3401 || between_line.heading.is_some()
3402 {
3403 return true; }
3405
3406 line_indent >= min_continuation_indent
3408 } else {
3409 false
3410 }
3411 });
3412
3413 if block.is_ordered {
3414 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3417 if let Some(between_line) = lines.get(idx) {
3418 let between_content = between_line.content(content);
3419 let trimmed = between_content.trim();
3420 if trimmed.is_empty() {
3421 return false;
3422 }
3423 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3425 .find(between_content)
3426 .map_or(String::new(), |m| m.as_str().to_string());
3427 let between_bq_level =
3428 between_bq_prefix.chars().filter(|&c| c == '>').count();
3429 let blockquote_level_changed =
3430 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3431 trimmed.starts_with("```")
3433 || trimmed.starts_with("~~~")
3434 || trimmed.starts_with("---")
3435 || trimmed.starts_with("***")
3436 || trimmed.starts_with("___")
3437 || blockquote_level_changed
3438 || crate::utils::skip_context::is_table_line(trimmed)
3439 || between_line.heading.is_some()
3440 } else {
3441 false
3442 }
3443 });
3444 found_continuation = !has_structural_separators;
3445 } else {
3446 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3448 if let Some(between_line) = lines.get(idx) {
3449 let between_content = between_line.content(content);
3450 let trimmed = between_content.trim();
3451 if trimmed.is_empty() {
3452 return false;
3453 }
3454 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3456 .find(between_content)
3457 .map_or(String::new(), |m| m.as_str().to_string());
3458 let between_bq_level =
3459 between_bq_prefix.chars().filter(|&c| c == '>').count();
3460 let blockquote_level_changed =
3461 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3462 trimmed.starts_with("```")
3464 || trimmed.starts_with("~~~")
3465 || trimmed.starts_with("---")
3466 || trimmed.starts_with("***")
3467 || trimmed.starts_with("___")
3468 || blockquote_level_changed
3469 || crate::utils::skip_context::is_table_line(trimmed)
3470 || between_line.heading.is_some()
3471 } else {
3472 false
3473 }
3474 });
3475 found_continuation = !has_structural_separators;
3476 }
3477 }
3478 }
3479 }
3480
3481 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3482 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
3483 }
3484 if found_continuation {
3485 block.end_line = line_num;
3487 } else {
3488 list_blocks.push(block.clone());
3490 current_block = None;
3491 }
3492 } else {
3493 let min_required_indent = if block.is_ordered {
3496 current_indent_level + last_marker_width
3497 } else {
3498 current_indent_level + 2
3499 };
3500
3501 let line_content = line_info.content(content).trim();
3506
3507 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3509
3510 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3513 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3514 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3515
3516 let is_structural_separator = line_info.heading.is_some()
3517 || line_content.starts_with("```")
3518 || line_content.starts_with("~~~")
3519 || line_content.starts_with("---")
3520 || line_content.starts_with("***")
3521 || line_content.starts_with("___")
3522 || blockquote_level_changed
3523 || looks_like_table;
3524
3525 let is_lazy_continuation = !is_structural_separator
3528 && !line_info.is_blank
3529 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3530
3531 if is_lazy_continuation {
3532 let line_content_raw = line_info.content(content);
3536 let block_bq_level_lazy = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3537 let line_bq_level_lazy = line_content_raw
3538 .chars()
3539 .take_while(|c| *c == '>' || c.is_whitespace())
3540 .filter(|&c| c == '>')
3541 .count();
3542 let has_proper_blockquote_indent =
3543 if line_bq_level_lazy > 0 && line_bq_level_lazy == block_bq_level_lazy {
3544 let mut pos = 0;
3546 let mut found_markers = 0;
3547 for c in line_content_raw.chars() {
3548 pos += c.len_utf8();
3549 if c == '>' {
3550 found_markers += 1;
3551 if found_markers == line_bq_level_lazy {
3552 if line_content_raw.get(pos..pos + 1) == Some(" ") {
3553 pos += 1;
3554 }
3555 break;
3556 }
3557 }
3558 }
3559 let after_bq = &line_content_raw[pos..];
3560 let effective_indent_lazy = after_bq.len() - after_bq.trim_start().len();
3561 let min_required_for_bq = if block.is_ordered { last_marker_width } else { 2 };
3562 effective_indent_lazy >= min_required_for_bq
3563 } else {
3564 false
3565 };
3566
3567 if has_proper_blockquote_indent {
3569 block.end_line = line_num;
3570 } else {
3571 let content_to_check = if !blockquote_prefix.is_empty() {
3572 line_info
3574 .content(content)
3575 .strip_prefix(&blockquote_prefix)
3576 .unwrap_or(line_info.content(content))
3577 .trim()
3578 } else {
3579 line_info.content(content).trim()
3580 };
3581
3582 let starts_with_uppercase =
3583 content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3584
3585 if starts_with_uppercase && last_list_item_line > 0 {
3588 list_blocks.push(block.clone());
3590 current_block = None;
3591 } else {
3592 block.end_line = line_num;
3594 }
3595 }
3596 } else {
3597 list_blocks.push(block.clone());
3599 current_block = None;
3600 }
3601 }
3602 }
3603 }
3604
3605 if let Some(block) = current_block {
3607 list_blocks.push(block);
3608 }
3609
3610 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3612
3613 list_blocks
3614 }
3615
3616 fn compute_char_frequency(content: &str) -> CharFrequency {
3618 let mut frequency = CharFrequency::default();
3619
3620 for ch in content.chars() {
3621 match ch {
3622 '#' => frequency.hash_count += 1,
3623 '*' => frequency.asterisk_count += 1,
3624 '_' => frequency.underscore_count += 1,
3625 '-' => frequency.hyphen_count += 1,
3626 '+' => frequency.plus_count += 1,
3627 '>' => frequency.gt_count += 1,
3628 '|' => frequency.pipe_count += 1,
3629 '[' => frequency.bracket_count += 1,
3630 '`' => frequency.backtick_count += 1,
3631 '<' => frequency.lt_count += 1,
3632 '!' => frequency.exclamation_count += 1,
3633 '\n' => frequency.newline_count += 1,
3634 _ => {}
3635 }
3636 }
3637
3638 frequency
3639 }
3640
3641 fn parse_html_tags(
3643 content: &str,
3644 lines: &[LineInfo],
3645 code_blocks: &[(usize, usize)],
3646 flavor: MarkdownFlavor,
3647 ) -> Vec<HtmlTag> {
3648 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3649 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3650
3651 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3652
3653 for cap in HTML_TAG_REGEX.captures_iter(content) {
3654 let full_match = cap.get(0).unwrap();
3655 let match_start = full_match.start();
3656 let match_end = full_match.end();
3657
3658 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3660 continue;
3661 }
3662
3663 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3664 let tag_name_original = cap.get(2).unwrap().as_str();
3665 let tag_name = tag_name_original.to_lowercase();
3666 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3667
3668 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3671 continue;
3672 }
3673
3674 let mut line_num = 1;
3676 let mut col_start = match_start;
3677 let mut col_end = match_end;
3678 for (idx, line_info) in lines.iter().enumerate() {
3679 if match_start >= line_info.byte_offset {
3680 line_num = idx + 1;
3681 col_start = match_start - line_info.byte_offset;
3682 col_end = match_end - line_info.byte_offset;
3683 } else {
3684 break;
3685 }
3686 }
3687
3688 html_tags.push(HtmlTag {
3689 line: line_num,
3690 start_col: col_start,
3691 end_col: col_end,
3692 byte_offset: match_start,
3693 byte_end: match_end,
3694 tag_name,
3695 is_closing,
3696 is_self_closing,
3697 raw_content: full_match.as_str().to_string(),
3698 });
3699 }
3700
3701 html_tags
3702 }
3703
3704 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3706 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3707
3708 for (line_idx, line_info) in lines.iter().enumerate() {
3709 if line_info.in_code_block || line_info.is_blank {
3711 continue;
3712 }
3713
3714 let line = line_info.content(content);
3715 let line_num = line_idx + 1;
3716
3717 if !line.contains('|') {
3719 continue;
3720 }
3721
3722 let parts: Vec<&str> = line.split('|').collect();
3724 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3725
3726 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3728 let mut column_alignments = Vec::new();
3729
3730 if is_separator {
3731 for part in &parts[1..parts.len() - 1] {
3732 let trimmed = part.trim();
3734 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3735 "center".to_string()
3736 } else if trimmed.ends_with(':') {
3737 "right".to_string()
3738 } else if trimmed.starts_with(':') {
3739 "left".to_string()
3740 } else {
3741 "none".to_string()
3742 };
3743 column_alignments.push(alignment);
3744 }
3745 }
3746
3747 table_rows.push(TableRow {
3748 line: line_num,
3749 is_separator,
3750 column_count,
3751 column_alignments,
3752 });
3753 }
3754
3755 table_rows
3756 }
3757
3758 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3760 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3761
3762 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3764 let full_match = cap.get(0).unwrap();
3765 let match_start = full_match.start();
3766 let match_end = full_match.end();
3767
3768 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3770 continue;
3771 }
3772
3773 let preceding_char = if match_start > 0 {
3775 content.chars().nth(match_start - 1)
3776 } else {
3777 None
3778 };
3779 let following_char = content.chars().nth(match_end);
3780
3781 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3782 continue;
3783 }
3784 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3785 continue;
3786 }
3787
3788 let url = full_match.as_str();
3789 let url_type = if url.starts_with("https://") {
3790 "https"
3791 } else if url.starts_with("http://") {
3792 "http"
3793 } else if url.starts_with("ftp://") {
3794 "ftp"
3795 } else {
3796 "other"
3797 };
3798
3799 let mut line_num = 1;
3801 let mut col_start = match_start;
3802 let mut col_end = match_end;
3803 for (idx, line_info) in lines.iter().enumerate() {
3804 if match_start >= line_info.byte_offset {
3805 line_num = idx + 1;
3806 col_start = match_start - line_info.byte_offset;
3807 col_end = match_end - line_info.byte_offset;
3808 } else {
3809 break;
3810 }
3811 }
3812
3813 bare_urls.push(BareUrl {
3814 line: line_num,
3815 start_col: col_start,
3816 end_col: col_end,
3817 byte_offset: match_start,
3818 byte_end: match_end,
3819 url: url.to_string(),
3820 url_type: url_type.to_string(),
3821 });
3822 }
3823
3824 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3826 let full_match = cap.get(0).unwrap();
3827 let match_start = full_match.start();
3828 let match_end = full_match.end();
3829
3830 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3832 continue;
3833 }
3834
3835 let preceding_char = if match_start > 0 {
3837 content.chars().nth(match_start - 1)
3838 } else {
3839 None
3840 };
3841 let following_char = content.chars().nth(match_end);
3842
3843 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3844 continue;
3845 }
3846 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3847 continue;
3848 }
3849
3850 let email = full_match.as_str();
3851
3852 let mut line_num = 1;
3854 let mut col_start = match_start;
3855 let mut col_end = match_end;
3856 for (idx, line_info) in lines.iter().enumerate() {
3857 if match_start >= line_info.byte_offset {
3858 line_num = idx + 1;
3859 col_start = match_start - line_info.byte_offset;
3860 col_end = match_end - line_info.byte_offset;
3861 } else {
3862 break;
3863 }
3864 }
3865
3866 bare_urls.push(BareUrl {
3867 line: line_num,
3868 start_col: col_start,
3869 end_col: col_end,
3870 byte_offset: match_start,
3871 byte_end: match_end,
3872 url: email.to_string(),
3873 url_type: "email".to_string(),
3874 });
3875 }
3876
3877 bare_urls
3878 }
3879
3880 #[must_use]
3900 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3901 ValidHeadingsIter::new(&self.lines)
3902 }
3903
3904 #[must_use]
3908 pub fn has_valid_headings(&self) -> bool {
3909 self.lines
3910 .iter()
3911 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3912 }
3913}
3914
3915fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3917 if list_blocks.len() < 2 {
3918 return;
3919 }
3920
3921 let mut merger = ListBlockMerger::new(content, lines);
3922 *list_blocks = merger.merge(list_blocks);
3923}
3924
3925struct ListBlockMerger<'a> {
3927 content: &'a str,
3928 lines: &'a [LineInfo],
3929}
3930
3931impl<'a> ListBlockMerger<'a> {
3932 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3933 Self { content, lines }
3934 }
3935
3936 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3937 let mut merged = Vec::with_capacity(list_blocks.len());
3938 let mut current = list_blocks[0].clone();
3939
3940 for next in list_blocks.iter().skip(1) {
3941 if self.should_merge_blocks(¤t, next) {
3942 current = self.merge_two_blocks(current, next);
3943 } else {
3944 merged.push(current);
3945 current = next.clone();
3946 }
3947 }
3948
3949 merged.push(current);
3950 merged
3951 }
3952
3953 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3955 if !self.blocks_are_compatible(current, next) {
3957 return false;
3958 }
3959
3960 let spacing = self.analyze_spacing_between(current, next);
3962 match spacing {
3963 BlockSpacing::Consecutive => true,
3964 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3965 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3966 self.can_merge_with_content_between(current, next)
3967 }
3968 }
3969 }
3970
3971 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3973 current.is_ordered == next.is_ordered
3974 && current.blockquote_prefix == next.blockquote_prefix
3975 && current.nesting_level == next.nesting_level
3976 }
3977
3978 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3980 let gap = next.start_line - current.end_line;
3981
3982 match gap {
3983 1 => BlockSpacing::Consecutive,
3984 2 => BlockSpacing::SingleBlank,
3985 _ if gap > 2 => {
3986 if self.has_only_blank_lines_between(current, next) {
3987 BlockSpacing::MultipleBlanks
3988 } else {
3989 BlockSpacing::ContentBetween
3990 }
3991 }
3992 _ => BlockSpacing::Consecutive, }
3994 }
3995
3996 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3998 if has_meaningful_content_between(self.content, current, next, self.lines) {
4001 return false; }
4003
4004 !current.is_ordered && current.marker == next.marker
4006 }
4007
4008 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4010 if has_meaningful_content_between(self.content, current, next, self.lines) {
4012 return false; }
4014
4015 current.is_ordered && next.is_ordered
4017 }
4018
4019 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4021 for line_num in (current.end_line + 1)..next.start_line {
4022 if let Some(line_info) = self.lines.get(line_num - 1)
4023 && !line_info.content(self.content).trim().is_empty()
4024 {
4025 return false;
4026 }
4027 }
4028 true
4029 }
4030
4031 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4033 current.end_line = next.end_line;
4034 current.item_lines.extend_from_slice(&next.item_lines);
4035
4036 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4038
4039 if !current.is_ordered && self.markers_differ(¤t, next) {
4041 current.marker = None; }
4043
4044 current
4045 }
4046
4047 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4049 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4050 }
4051}
4052
4053#[derive(Debug, PartialEq)]
4055enum BlockSpacing {
4056 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4061
4062fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4064 for line_num in (current.end_line + 1)..next.start_line {
4066 if let Some(line_info) = lines.get(line_num - 1) {
4067 let trimmed = line_info.content(content).trim();
4069
4070 if trimmed.is_empty() {
4072 continue;
4073 }
4074
4075 if line_info.heading.is_some() {
4079 return true; }
4081
4082 if is_horizontal_rule(trimmed) {
4084 return true; }
4086
4087 if crate::utils::skip_context::is_table_line(trimmed) {
4089 return true; }
4091
4092 if trimmed.starts_with('>') {
4094 return true; }
4096
4097 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4099 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4100
4101 let min_continuation_indent = if current.is_ordered {
4103 current.nesting_level + current.max_marker_width + 1 } else {
4105 current.nesting_level + 2
4106 };
4107
4108 if line_indent < min_continuation_indent {
4109 return true; }
4112 }
4113
4114 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4116
4117 let min_indent = if current.is_ordered {
4119 current.nesting_level + current.max_marker_width
4120 } else {
4121 current.nesting_level + 2
4122 };
4123
4124 if line_indent < min_indent {
4126 return true; }
4128
4129 }
4132 }
4133
4134 false
4136}
4137
4138pub fn is_horizontal_rule_line(line: &str) -> bool {
4145 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4147 if leading_spaces > 3 || line.starts_with('\t') {
4148 return false;
4149 }
4150
4151 is_horizontal_rule_content(line.trim())
4152}
4153
4154pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4157 if trimmed.len() < 3 {
4158 return false;
4159 }
4160
4161 let chars: Vec<char> = trimmed.chars().collect();
4163 if let Some(&first_char) = chars.first()
4164 && (first_char == '-' || first_char == '*' || first_char == '_')
4165 {
4166 let mut count = 0;
4167 for &ch in &chars {
4168 if ch == first_char {
4169 count += 1;
4170 } else if ch != ' ' && ch != '\t' {
4171 return false; }
4173 }
4174 return count >= 3;
4175 }
4176 false
4177}
4178
4179pub fn is_horizontal_rule(trimmed: &str) -> bool {
4181 is_horizontal_rule_content(trimmed)
4182}
4183
4184#[cfg(test)]
4186mod tests {
4187 use super::*;
4188
4189 #[test]
4190 fn test_empty_content() {
4191 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4192 assert_eq!(ctx.content, "");
4193 assert_eq!(ctx.line_offsets, vec![0]);
4194 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4195 assert_eq!(ctx.lines.len(), 0);
4196 }
4197
4198 #[test]
4199 fn test_single_line() {
4200 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4201 assert_eq!(ctx.content, "# Hello");
4202 assert_eq!(ctx.line_offsets, vec![0]);
4203 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4204 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4205 }
4206
4207 #[test]
4208 fn test_multi_line() {
4209 let content = "# Title\n\nSecond line\nThird line";
4210 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4211 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4212 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4219
4220 #[test]
4221 fn test_line_info() {
4222 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4223 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4224
4225 assert_eq!(ctx.lines.len(), 7);
4227
4228 let line1 = &ctx.lines[0];
4230 assert_eq!(line1.content(ctx.content), "# Title");
4231 assert_eq!(line1.byte_offset, 0);
4232 assert_eq!(line1.indent, 0);
4233 assert!(!line1.is_blank);
4234 assert!(!line1.in_code_block);
4235 assert!(line1.list_item.is_none());
4236
4237 let line2 = &ctx.lines[1];
4239 assert_eq!(line2.content(ctx.content), " indented");
4240 assert_eq!(line2.byte_offset, 8);
4241 assert_eq!(line2.indent, 4);
4242 assert!(!line2.is_blank);
4243
4244 let line3 = &ctx.lines[2];
4246 assert_eq!(line3.content(ctx.content), "");
4247 assert!(line3.is_blank);
4248
4249 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4251 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4252 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4253 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4254 }
4255
4256 #[test]
4257 fn test_list_item_detection() {
4258 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4259 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4260
4261 let line1 = &ctx.lines[0];
4263 assert!(line1.list_item.is_some());
4264 let list1 = line1.list_item.as_ref().unwrap();
4265 assert_eq!(list1.marker, "-");
4266 assert!(!list1.is_ordered);
4267 assert_eq!(list1.marker_column, 0);
4268 assert_eq!(list1.content_column, 2);
4269
4270 let line2 = &ctx.lines[1];
4272 assert!(line2.list_item.is_some());
4273 let list2 = line2.list_item.as_ref().unwrap();
4274 assert_eq!(list2.marker, "*");
4275 assert_eq!(list2.marker_column, 2);
4276
4277 let line3 = &ctx.lines[2];
4279 assert!(line3.list_item.is_some());
4280 let list3 = line3.list_item.as_ref().unwrap();
4281 assert_eq!(list3.marker, "1.");
4282 assert!(list3.is_ordered);
4283 assert_eq!(list3.number, Some(1));
4284
4285 let line6 = &ctx.lines[5];
4287 assert!(line6.list_item.is_none());
4288 }
4289
4290 #[test]
4291 fn test_offset_to_line_col_edge_cases() {
4292 let content = "a\nb\nc";
4293 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4294 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4302
4303 #[test]
4304 fn test_mdx_esm_blocks() {
4305 let content = r##"import {Chart} from './snowfall.js'
4306export const year = 2023
4307
4308# Last year's snowfall
4309
4310In {year}, the snowfall was above average.
4311It was followed by a warm spring which caused
4312flood conditions in many of the nearby rivers.
4313
4314<Chart color="#fcb32c" year={year} />
4315"##;
4316
4317 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4318
4319 assert_eq!(ctx.lines.len(), 10);
4321 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4322 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4323 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4324 assert!(
4325 !ctx.lines[3].in_esm_block,
4326 "Line 4 (heading) should NOT be in_esm_block"
4327 );
4328 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4329 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4330 }
4331
4332 #[test]
4333 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4334 let content = r#"import {Chart} from './snowfall.js'
4335export const year = 2023
4336
4337# Last year's snowfall
4338"#;
4339
4340 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4341
4342 assert!(
4344 !ctx.lines[0].in_esm_block,
4345 "Line 1 should NOT be in_esm_block in Standard flavor"
4346 );
4347 assert!(
4348 !ctx.lines[1].in_esm_block,
4349 "Line 2 should NOT be in_esm_block in Standard flavor"
4350 );
4351 }
4352
4353 #[test]
4354 fn test_blockquote_with_indented_content() {
4355 let content = r#"# Heading
4359
4360> -S socket-path
4361> More text
4362"#;
4363 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4364
4365 assert!(
4367 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4368 "Line 3 should be a blockquote"
4369 );
4370 assert!(
4372 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4373 "Line 4 should be a blockquote"
4374 );
4375
4376 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4379 assert_eq!(bq3.content, "-S socket-path");
4380 assert_eq!(bq3.nesting_level, 1);
4381 assert!(bq3.has_multiple_spaces_after_marker);
4383
4384 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4385 assert_eq!(bq4.content, "More text");
4386 assert_eq!(bq4.nesting_level, 1);
4387 }
4388
4389 #[test]
4390 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4391 let content = r#"# Title
4393
4394A footnote[^1].
4395
4396[^1]: This is the footnote content.
4397
4398[^note]: Another footnote with [link](https://example.com).
4399
4400[regular]: ./path.md "A real reference definition"
4401"#;
4402 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4403
4404 assert_eq!(
4406 ctx.reference_defs.len(),
4407 1,
4408 "Footnotes should not be parsed as reference definitions"
4409 );
4410
4411 assert_eq!(ctx.reference_defs[0].id, "regular");
4413 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4414 assert_eq!(
4415 ctx.reference_defs[0].title,
4416 Some("A real reference definition".to_string())
4417 );
4418 }
4419
4420 #[test]
4421 fn test_footnote_with_inline_link_not_misidentified() {
4422 let content = r#"# Title
4425
4426A footnote[^1].
4427
4428[^1]: [link](https://www.google.com).
4429"#;
4430 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4431
4432 assert!(
4434 ctx.reference_defs.is_empty(),
4435 "Footnote with inline link should not create a reference definition"
4436 );
4437 }
4438
4439 #[test]
4440 fn test_various_footnote_formats_excluded() {
4441 let content = r#"[^1]: Numeric footnote
4443[^note]: Named footnote
4444[^a]: Single char footnote
4445[^long-footnote-name]: Long named footnote
4446[^123abc]: Mixed alphanumeric
4447
4448[ref1]: ./file1.md
4449[ref2]: ./file2.md
4450"#;
4451 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4452
4453 assert_eq!(
4455 ctx.reference_defs.len(),
4456 2,
4457 "Only regular reference definitions should be parsed"
4458 );
4459
4460 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4461 assert!(ids.contains(&"ref1"));
4462 assert!(ids.contains(&"ref2"));
4463 assert!(!ids.iter().any(|id| id.starts_with('^')));
4464 }
4465}