1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::mkdocs_html_markdown::MarkdownHtmlTracker;
7use crate::utils::regex_cache::URL_SIMPLE_REGEX;
8use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
9use regex::Regex;
10use std::borrow::Cow;
11use std::collections::HashMap;
12use std::path::PathBuf;
13use std::sync::LazyLock;
14
15#[cfg(not(target_arch = "wasm32"))]
17macro_rules! profile_section {
18 ($name:expr, $profile:expr, $code:expr) => {{
19 let start = std::time::Instant::now();
20 let result = $code;
21 if $profile {
22 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
23 }
24 result
25 }};
26}
27
28#[cfg(target_arch = "wasm32")]
29macro_rules! profile_section {
30 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
31}
32
33static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36 Regex::new(
37 r#"(?sx)
38 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
39 (?:
40 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
41 |
42 \[([^\]]*)\] # Reference ID in group 6
43 )"#
44 ).unwrap()
45});
46
47static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(
51 r#"(?sx)
52 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
53 (?:
54 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
55 |
56 \[([^\]]*)\] # Reference ID in group 6
57 )"#
58 ).unwrap()
59});
60
61static REF_DEF_PATTERN: LazyLock<Regex> =
63 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
64
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub visual_indent: usize,
87 pub is_blank: bool,
89 pub in_code_block: bool,
91 pub in_front_matter: bool,
93 pub in_html_block: bool,
95 pub in_html_comment: bool,
97 pub list_item: Option<ListItemInfo>,
99 pub heading: Option<HeadingInfo>,
101 pub blockquote: Option<BlockquoteInfo>,
103 pub in_mkdocstrings: bool,
105 pub in_esm_block: bool,
107 pub in_code_span_continuation: bool,
109 pub is_horizontal_rule: bool,
112 pub in_math_block: bool,
114 pub in_quarto_div: bool,
116 pub in_jsx_expression: bool,
118 pub in_mdx_comment: bool,
120 pub in_jsx_component: bool,
122 pub in_jsx_fragment: bool,
124 pub in_admonition: bool,
126 pub in_content_tab: bool,
128 pub in_mkdocs_html_markdown: bool,
130 pub in_definition_list: bool,
132 pub in_obsidian_comment: bool,
134 pub in_pymdown_block: bool,
136}
137
138impl LineInfo {
139 pub fn content<'a>(&self, source: &'a str) -> &'a str {
141 &source[self.byte_offset..self.byte_offset + self.byte_len]
142 }
143
144 #[inline]
148 pub fn in_mkdocs_container(&self) -> bool {
149 self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
150 }
151}
152
153struct SkipByteRanges<'a> {
156 html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
157 autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
158 quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
159 pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
160}
161
162#[derive(Debug, Clone)]
164pub struct ListItemInfo {
165 pub marker: String,
167 pub is_ordered: bool,
169 pub number: Option<usize>,
171 pub marker_column: usize,
173 pub content_column: usize,
175}
176
177#[derive(Debug, Clone, PartialEq)]
179pub enum HeadingStyle {
180 ATX,
182 Setext1,
184 Setext2,
186}
187
188#[derive(Debug, Clone)]
190pub struct ParsedLink<'a> {
191 pub line: usize,
193 pub start_col: usize,
195 pub end_col: usize,
197 pub byte_offset: usize,
199 pub byte_end: usize,
201 pub text: Cow<'a, str>,
203 pub url: Cow<'a, str>,
205 pub is_reference: bool,
207 pub reference_id: Option<Cow<'a, str>>,
209 pub link_type: LinkType,
211}
212
213#[derive(Debug, Clone)]
215pub struct BrokenLinkInfo {
216 pub reference: String,
218 pub span: std::ops::Range<usize>,
220}
221
222#[derive(Debug, Clone)]
224pub struct FootnoteRef {
225 pub id: String,
227 pub line: usize,
229 pub byte_offset: usize,
231 pub byte_end: usize,
233}
234
235#[derive(Debug, Clone)]
237pub struct ParsedImage<'a> {
238 pub line: usize,
240 pub start_col: usize,
242 pub end_col: usize,
244 pub byte_offset: usize,
246 pub byte_end: usize,
248 pub alt_text: Cow<'a, str>,
250 pub url: Cow<'a, str>,
252 pub is_reference: bool,
254 pub reference_id: Option<Cow<'a, str>>,
256 pub link_type: LinkType,
258}
259
260#[derive(Debug, Clone)]
262pub struct ReferenceDef {
263 pub line: usize,
265 pub id: String,
267 pub url: String,
269 pub title: Option<String>,
271 pub byte_offset: usize,
273 pub byte_end: usize,
275 pub title_byte_start: Option<usize>,
277 pub title_byte_end: Option<usize>,
279}
280
281#[derive(Debug, Clone)]
283pub struct CodeSpan {
284 pub line: usize,
286 pub end_line: usize,
288 pub start_col: usize,
290 pub end_col: usize,
292 pub byte_offset: usize,
294 pub byte_end: usize,
296 pub backtick_count: usize,
298 pub content: String,
300}
301
302#[derive(Debug, Clone)]
304pub struct MathSpan {
305 pub line: usize,
307 pub end_line: usize,
309 pub start_col: usize,
311 pub end_col: usize,
313 pub byte_offset: usize,
315 pub byte_end: usize,
317 pub is_display: bool,
319 pub content: String,
321}
322
323#[derive(Debug, Clone)]
325pub struct HeadingInfo {
326 pub level: u8,
328 pub style: HeadingStyle,
330 pub marker: String,
332 pub marker_column: usize,
334 pub content_column: usize,
336 pub text: String,
338 pub custom_id: Option<String>,
340 pub raw_text: String,
342 pub has_closing_sequence: bool,
344 pub closing_sequence: String,
346 pub is_valid: bool,
349}
350
351#[derive(Debug, Clone)]
356pub struct ValidHeading<'a> {
357 pub line_num: usize,
359 pub heading: &'a HeadingInfo,
361 pub line_info: &'a LineInfo,
363}
364
365pub struct ValidHeadingsIter<'a> {
370 lines: &'a [LineInfo],
371 current_index: usize,
372}
373
374impl<'a> ValidHeadingsIter<'a> {
375 fn new(lines: &'a [LineInfo]) -> Self {
376 Self {
377 lines,
378 current_index: 0,
379 }
380 }
381}
382
383impl<'a> Iterator for ValidHeadingsIter<'a> {
384 type Item = ValidHeading<'a>;
385
386 fn next(&mut self) -> Option<Self::Item> {
387 while self.current_index < self.lines.len() {
388 let idx = self.current_index;
389 self.current_index += 1;
390
391 let line_info = &self.lines[idx];
392 if let Some(heading) = &line_info.heading
393 && heading.is_valid
394 {
395 return Some(ValidHeading {
396 line_num: idx + 1, heading,
398 line_info,
399 });
400 }
401 }
402 None
403 }
404}
405
406#[derive(Debug, Clone)]
408pub struct BlockquoteInfo {
409 pub nesting_level: usize,
411 pub indent: String,
413 pub marker_column: usize,
415 pub prefix: String,
417 pub content: String,
419 pub has_no_space_after_marker: bool,
421 pub has_multiple_spaces_after_marker: bool,
423 pub needs_md028_fix: bool,
425}
426
427#[derive(Debug, Clone)]
429pub struct ListBlock {
430 pub start_line: usize,
432 pub end_line: usize,
434 pub is_ordered: bool,
436 pub marker: Option<String>,
438 pub blockquote_prefix: String,
440 pub item_lines: Vec<usize>,
442 pub nesting_level: usize,
444 pub max_marker_width: usize,
446}
447
448use std::sync::{Arc, OnceLock};
449
450type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
452
453type ByteRanges = Vec<(usize, usize)>;
455
456#[derive(Debug, Clone, Default)]
458pub struct CharFrequency {
459 pub hash_count: usize,
461 pub asterisk_count: usize,
463 pub underscore_count: usize,
465 pub hyphen_count: usize,
467 pub plus_count: usize,
469 pub gt_count: usize,
471 pub pipe_count: usize,
473 pub bracket_count: usize,
475 pub backtick_count: usize,
477 pub lt_count: usize,
479 pub exclamation_count: usize,
481 pub newline_count: usize,
483}
484
485#[derive(Debug, Clone)]
487pub struct HtmlTag {
488 pub line: usize,
490 pub start_col: usize,
492 pub end_col: usize,
494 pub byte_offset: usize,
496 pub byte_end: usize,
498 pub tag_name: String,
500 pub is_closing: bool,
502 pub is_self_closing: bool,
504 pub raw_content: String,
506}
507
508#[derive(Debug, Clone)]
510pub struct EmphasisSpan {
511 pub line: usize,
513 pub start_col: usize,
515 pub end_col: usize,
517 pub byte_offset: usize,
519 pub byte_end: usize,
521 pub marker: char,
523 pub marker_count: usize,
525 pub content: String,
527}
528
529#[derive(Debug, Clone)]
531pub struct TableRow {
532 pub line: usize,
534 pub is_separator: bool,
536 pub column_count: usize,
538 pub column_alignments: Vec<String>, }
541
542#[derive(Debug, Clone)]
544pub struct BareUrl {
545 pub line: usize,
547 pub start_col: usize,
549 pub end_col: usize,
551 pub byte_offset: usize,
553 pub byte_end: usize,
555 pub url: String,
557 pub url_type: String,
559}
560
561pub struct LintContext<'a> {
562 pub content: &'a str,
563 pub line_offsets: Vec<usize>,
564 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, }
594
595struct BlockquoteComponents<'a> {
597 indent: &'a str,
598 markers: &'a str,
599 spaces_after: &'a str,
600 content: &'a str,
601}
602
603#[inline]
605fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
606 let bytes = line.as_bytes();
607 let mut pos = 0;
608
609 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
611 pos += 1;
612 }
613 let indent_end = pos;
614
615 if pos >= bytes.len() || bytes[pos] != b'>' {
617 return None;
618 }
619
620 while pos < bytes.len() && bytes[pos] == b'>' {
622 pos += 1;
623 }
624 let markers_end = pos;
625
626 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
628 pos += 1;
629 }
630 let spaces_end = pos;
631
632 Some(BlockquoteComponents {
633 indent: &line[0..indent_end],
634 markers: &line[indent_end..markers_end],
635 spaces_after: &line[markers_end..spaces_end],
636 content: &line[spaces_end..],
637 })
638}
639
640impl<'a> LintContext<'a> {
641 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
642 #[cfg(not(target_arch = "wasm32"))]
643 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
644 #[cfg(target_arch = "wasm32")]
645 let profile = false;
646
647 let line_offsets = profile_section!("Line offsets", profile, {
648 let mut offsets = vec![0];
649 for (i, c) in content.char_indices() {
650 if c == '\n' {
651 offsets.push(i + 1);
652 }
653 }
654 offsets
655 });
656
657 let (code_blocks, code_span_ranges) = profile_section!(
659 "Code blocks",
660 profile,
661 CodeBlockUtils::detect_code_blocks_and_spans(content)
662 );
663
664 let html_comment_ranges = profile_section!(
666 "HTML comment ranges",
667 profile,
668 crate::utils::skip_context::compute_html_comment_ranges(content)
669 );
670
671 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
673 if flavor == MarkdownFlavor::MkDocs {
674 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
675 } else {
676 Vec::new()
677 }
678 });
679
680 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
682 if flavor == MarkdownFlavor::Quarto {
683 crate::utils::quarto_divs::detect_div_block_ranges(content)
684 } else {
685 Vec::new()
686 }
687 });
688
689 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
691 if flavor == MarkdownFlavor::MkDocs {
692 crate::utils::pymdown_blocks::detect_block_ranges(content)
693 } else {
694 Vec::new()
695 }
696 });
697
698 let skip_ranges = SkipByteRanges {
701 html_comment_ranges: &html_comment_ranges,
702 autodoc_ranges: &autodoc_ranges,
703 quarto_div_ranges: &quarto_div_ranges,
704 pymdown_block_ranges: &pymdown_block_ranges,
705 };
706 let (mut lines, emphasis_spans) = profile_section!(
707 "Basic line info",
708 profile,
709 Self::compute_basic_line_info(content, &line_offsets, &code_blocks, flavor, &skip_ranges,)
710 );
711
712 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
714
715 profile_section!(
717 "ESM blocks",
718 profile,
719 Self::detect_esm_blocks(content, &mut lines, flavor)
720 );
721
722 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
724 "JSX/MDX detection",
725 profile,
726 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
727 );
728
729 profile_section!(
731 "MkDocs constructs",
732 profile,
733 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
734 );
735
736 let obsidian_comment_ranges = profile_section!(
738 "Obsidian comments",
739 profile,
740 Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
741 );
742
743 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
745
746 profile_section!(
748 "Headings & blockquotes",
749 profile,
750 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
751 );
752
753 let code_spans = profile_section!(
755 "Code spans",
756 profile,
757 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
758 );
759
760 for span in &code_spans {
763 if span.end_line > span.line {
764 for line_num in (span.line + 1)..=span.end_line {
766 if let Some(line_info) = lines.get_mut(line_num - 1) {
767 line_info.in_code_span_continuation = true;
768 }
769 }
770 }
771 }
772
773 let (links, broken_links, footnote_refs) = profile_section!(
775 "Links",
776 profile,
777 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
778 );
779
780 let images = profile_section!(
781 "Images",
782 profile,
783 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
784 );
785
786 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
787
788 let reference_defs_map: HashMap<String, usize> = reference_defs
790 .iter()
791 .enumerate()
792 .map(|(idx, def)| (def.id.to_lowercase(), idx))
793 .collect();
794
795 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
796
797 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
799
800 let table_blocks = profile_section!(
802 "Table blocks",
803 profile,
804 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
805 content,
806 &code_blocks,
807 &code_spans,
808 &html_comment_ranges,
809 )
810 );
811
812 let line_index = profile_section!(
814 "Line index",
815 profile,
816 crate::utils::range_utils::LineIndex::new(content)
817 );
818
819 let jinja_ranges = profile_section!(
821 "Jinja ranges",
822 profile,
823 crate::utils::jinja_utils::find_jinja_ranges(content)
824 );
825
826 let citation_ranges = profile_section!("Citation ranges", profile, {
828 if flavor == MarkdownFlavor::Quarto {
829 crate::utils::quarto_divs::find_citation_ranges(content)
830 } else {
831 Vec::new()
832 }
833 });
834
835 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
837 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
838 let mut ranges = Vec::new();
839 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
840 ranges.push((mat.start(), mat.end()));
841 }
842 ranges
843 });
844
845 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
846
847 Self {
848 content,
849 line_offsets,
850 code_blocks,
851 lines,
852 links,
853 images,
854 broken_links,
855 footnote_refs,
856 reference_defs,
857 reference_defs_map,
858 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
859 math_spans_cache: OnceLock::new(), list_blocks,
861 char_frequency,
862 html_tags_cache: OnceLock::new(),
863 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
864 table_rows_cache: OnceLock::new(),
865 bare_urls_cache: OnceLock::new(),
866 has_mixed_list_nesting_cache: OnceLock::new(),
867 html_comment_ranges,
868 table_blocks,
869 line_index,
870 jinja_ranges,
871 flavor,
872 source_file,
873 jsx_expression_ranges,
874 mdx_comment_ranges,
875 citation_ranges,
876 shortcode_ranges,
877 inline_config,
878 obsidian_comment_ranges,
879 }
880 }
881
882 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
887 self.inline_config.is_rule_disabled(rule_name, line_number)
888 }
889
890 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
892 Arc::clone(
893 self.code_spans_cache
894 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
895 )
896 }
897
898 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
900 Arc::clone(
901 self.math_spans_cache
902 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
903 )
904 }
905
906 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
908 let math_spans = self.math_spans();
909 math_spans
910 .iter()
911 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
912 }
913
914 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
916 &self.html_comment_ranges
917 }
918
919 pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
922 &self.obsidian_comment_ranges
923 }
924
925 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
929 self.obsidian_comment_ranges
930 .iter()
931 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
932 }
933
934 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
939 if self.obsidian_comment_ranges.is_empty() {
940 return false;
941 }
942
943 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
945 self.is_in_obsidian_comment(byte_pos)
946 }
947
948 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
950 Arc::clone(self.html_tags_cache.get_or_init(|| {
951 Arc::new(Self::parse_html_tags(
952 self.content,
953 &self.lines,
954 &self.code_blocks,
955 self.flavor,
956 ))
957 }))
958 }
959
960 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
962 Arc::clone(
963 self.emphasis_spans_cache
964 .get()
965 .expect("emphasis_spans_cache initialized during construction"),
966 )
967 }
968
969 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
971 Arc::clone(
972 self.table_rows_cache
973 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
974 )
975 }
976
977 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
979 Arc::clone(
980 self.bare_urls_cache
981 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
982 )
983 }
984
985 pub fn has_mixed_list_nesting(&self) -> bool {
989 *self
990 .has_mixed_list_nesting_cache
991 .get_or_init(|| self.compute_mixed_list_nesting())
992 }
993
994 fn compute_mixed_list_nesting(&self) -> bool {
996 let mut stack: Vec<(usize, bool)> = Vec::new();
1001 let mut last_was_blank = false;
1002
1003 for line_info in &self.lines {
1004 if line_info.in_code_block
1006 || line_info.in_front_matter
1007 || line_info.in_mkdocstrings
1008 || line_info.in_html_comment
1009 || line_info.in_esm_block
1010 {
1011 continue;
1012 }
1013
1014 if line_info.is_blank {
1016 last_was_blank = true;
1017 continue;
1018 }
1019
1020 if let Some(list_item) = &line_info.list_item {
1021 let current_pos = if list_item.marker_column == 1 {
1023 0
1024 } else {
1025 list_item.marker_column
1026 };
1027
1028 if last_was_blank && current_pos == 0 {
1030 stack.clear();
1031 }
1032 last_was_blank = false;
1033
1034 while let Some(&(pos, _)) = stack.last() {
1036 if pos >= current_pos {
1037 stack.pop();
1038 } else {
1039 break;
1040 }
1041 }
1042
1043 if let Some(&(_, parent_is_ordered)) = stack.last()
1045 && parent_is_ordered != list_item.is_ordered
1046 {
1047 return true; }
1049
1050 stack.push((current_pos, list_item.is_ordered));
1051 } else {
1052 last_was_blank = false;
1054 }
1055 }
1056
1057 false
1058 }
1059
1060 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1062 match self.line_offsets.binary_search(&offset) {
1063 Ok(line) => (line + 1, 1),
1064 Err(line) => {
1065 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1066 (line, offset - line_start + 1)
1067 }
1068 }
1069 }
1070
1071 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1073 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1075 return true;
1076 }
1077
1078 self.code_spans()
1080 .iter()
1081 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1082 }
1083
1084 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1086 if line_num > 0 {
1087 self.lines.get(line_num - 1)
1088 } else {
1089 None
1090 }
1091 }
1092
1093 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1095 self.line_info(line_num).map(|info| info.byte_offset)
1096 }
1097
1098 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1100 let normalized_id = ref_id.to_lowercase();
1101 self.reference_defs_map
1102 .get(&normalized_id)
1103 .map(|&idx| self.reference_defs[idx].url.as_str())
1104 }
1105
1106 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1108 let normalized_id = ref_id.to_lowercase();
1109 self.reference_defs_map
1110 .get(&normalized_id)
1111 .map(|&idx| &self.reference_defs[idx])
1112 }
1113
1114 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1116 let normalized_id = ref_id.to_lowercase();
1117 self.reference_defs_map.contains_key(&normalized_id)
1118 }
1119
1120 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1122 self.list_blocks
1123 .iter()
1124 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1125 }
1126
1127 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1129 self.list_blocks
1130 .iter()
1131 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1132 }
1133
1134 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1138 if line_num == 0 || line_num > self.lines.len() {
1139 return false;
1140 }
1141 self.lines[line_num - 1].in_code_block
1142 }
1143
1144 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1146 if line_num == 0 || line_num > self.lines.len() {
1147 return false;
1148 }
1149 self.lines[line_num - 1].in_front_matter
1150 }
1151
1152 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1154 if line_num == 0 || line_num > self.lines.len() {
1155 return false;
1156 }
1157 self.lines[line_num - 1].in_html_block
1158 }
1159
1160 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1162 if line_num == 0 || line_num > self.lines.len() {
1163 return false;
1164 }
1165
1166 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1170 let code_spans = self.code_spans();
1171 code_spans.iter().any(|span| {
1172 if line_num < span.line || line_num > span.end_line {
1174 return false;
1175 }
1176
1177 if span.line == span.end_line {
1178 col_0indexed >= span.start_col && col_0indexed < span.end_col
1180 } else if line_num == span.line {
1181 col_0indexed >= span.start_col
1183 } else if line_num == span.end_line {
1184 col_0indexed < span.end_col
1186 } else {
1187 true
1189 }
1190 })
1191 }
1192
1193 #[inline]
1195 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1196 let code_spans = self.code_spans();
1197 code_spans
1198 .iter()
1199 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1200 }
1201
1202 #[inline]
1205 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1206 self.reference_defs
1207 .iter()
1208 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1209 }
1210
1211 #[inline]
1215 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1216 self.html_comment_ranges
1217 .iter()
1218 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1219 }
1220
1221 #[inline]
1224 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1225 self.html_tags()
1226 .iter()
1227 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1228 }
1229
1230 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1232 self.jinja_ranges
1233 .iter()
1234 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1235 }
1236
1237 #[inline]
1239 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1240 self.jsx_expression_ranges
1241 .iter()
1242 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1243 }
1244
1245 #[inline]
1247 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1248 self.mdx_comment_ranges
1249 .iter()
1250 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1251 }
1252
1253 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1255 &self.jsx_expression_ranges
1256 }
1257
1258 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1260 &self.mdx_comment_ranges
1261 }
1262
1263 #[inline]
1266 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1267 self.citation_ranges
1268 .iter()
1269 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1270 }
1271
1272 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1274 &self.citation_ranges
1275 }
1276
1277 #[inline]
1279 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1280 self.shortcode_ranges
1281 .iter()
1282 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1283 }
1284
1285 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1287 &self.shortcode_ranges
1288 }
1289
1290 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1292 self.reference_defs.iter().any(|def| {
1293 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1294 byte_pos >= start && byte_pos < end
1295 } else {
1296 false
1297 }
1298 })
1299 }
1300
1301 pub fn has_char(&self, ch: char) -> bool {
1303 match ch {
1304 '#' => self.char_frequency.hash_count > 0,
1305 '*' => self.char_frequency.asterisk_count > 0,
1306 '_' => self.char_frequency.underscore_count > 0,
1307 '-' => self.char_frequency.hyphen_count > 0,
1308 '+' => self.char_frequency.plus_count > 0,
1309 '>' => self.char_frequency.gt_count > 0,
1310 '|' => self.char_frequency.pipe_count > 0,
1311 '[' => self.char_frequency.bracket_count > 0,
1312 '`' => self.char_frequency.backtick_count > 0,
1313 '<' => self.char_frequency.lt_count > 0,
1314 '!' => self.char_frequency.exclamation_count > 0,
1315 '\n' => self.char_frequency.newline_count > 0,
1316 _ => self.content.contains(ch), }
1318 }
1319
1320 pub fn char_count(&self, ch: char) -> usize {
1322 match ch {
1323 '#' => self.char_frequency.hash_count,
1324 '*' => self.char_frequency.asterisk_count,
1325 '_' => self.char_frequency.underscore_count,
1326 '-' => self.char_frequency.hyphen_count,
1327 '+' => self.char_frequency.plus_count,
1328 '>' => self.char_frequency.gt_count,
1329 '|' => self.char_frequency.pipe_count,
1330 '[' => self.char_frequency.bracket_count,
1331 '`' => self.char_frequency.backtick_count,
1332 '<' => self.char_frequency.lt_count,
1333 '!' => self.char_frequency.exclamation_count,
1334 '\n' => self.char_frequency.newline_count,
1335 _ => self.content.matches(ch).count(), }
1337 }
1338
1339 pub fn likely_has_headings(&self) -> bool {
1341 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1343
1344 pub fn likely_has_lists(&self) -> bool {
1346 self.char_frequency.asterisk_count > 0
1347 || self.char_frequency.hyphen_count > 0
1348 || self.char_frequency.plus_count > 0
1349 }
1350
1351 pub fn likely_has_emphasis(&self) -> bool {
1353 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1354 }
1355
1356 pub fn likely_has_tables(&self) -> bool {
1358 self.char_frequency.pipe_count > 2
1359 }
1360
1361 pub fn likely_has_blockquotes(&self) -> bool {
1363 self.char_frequency.gt_count > 0
1364 }
1365
1366 pub fn likely_has_code(&self) -> bool {
1368 self.char_frequency.backtick_count > 0
1369 }
1370
1371 pub fn likely_has_links_or_images(&self) -> bool {
1373 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1374 }
1375
1376 pub fn likely_has_html(&self) -> bool {
1378 self.char_frequency.lt_count > 0
1379 }
1380
1381 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1386 if let Some(line_info) = self.lines.get(line_idx)
1387 && let Some(ref bq) = line_info.blockquote
1388 {
1389 bq.prefix.trim_end().to_string()
1390 } else {
1391 String::new()
1392 }
1393 }
1394
1395 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1397 self.html_tags()
1398 .iter()
1399 .filter(|tag| tag.line == line_num)
1400 .cloned()
1401 .collect()
1402 }
1403
1404 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1406 self.emphasis_spans()
1407 .iter()
1408 .filter(|span| span.line == line_num)
1409 .cloned()
1410 .collect()
1411 }
1412
1413 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1415 self.table_rows()
1416 .iter()
1417 .filter(|row| row.line == line_num)
1418 .cloned()
1419 .collect()
1420 }
1421
1422 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1424 self.bare_urls()
1425 .iter()
1426 .filter(|url| url.line == line_num)
1427 .cloned()
1428 .collect()
1429 }
1430
1431 #[inline]
1437 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1438 let idx = match lines.binary_search_by(|line| {
1440 if byte_offset < line.byte_offset {
1441 std::cmp::Ordering::Greater
1442 } else if byte_offset > line.byte_offset + line.byte_len {
1443 std::cmp::Ordering::Less
1444 } else {
1445 std::cmp::Ordering::Equal
1446 }
1447 }) {
1448 Ok(idx) => idx,
1449 Err(idx) => idx.saturating_sub(1),
1450 };
1451
1452 let line = &lines[idx];
1453 let line_num = idx + 1;
1454 let col = byte_offset.saturating_sub(line.byte_offset);
1455
1456 (idx, line_num, col)
1457 }
1458
1459 #[inline]
1461 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1462 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1464
1465 if idx > 0 {
1467 let span = &code_spans[idx - 1];
1468 if offset >= span.byte_offset && offset < span.byte_end {
1469 return true;
1470 }
1471 }
1472
1473 false
1474 }
1475
1476 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1480 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1481
1482 let mut link_ranges = Vec::new();
1483 let mut options = Options::empty();
1484 options.insert(Options::ENABLE_WIKILINKS);
1485 options.insert(Options::ENABLE_FOOTNOTES);
1486
1487 let parser = Parser::new_ext(content, options).into_offset_iter();
1488 let mut link_stack: Vec<usize> = Vec::new();
1489
1490 for (event, range) in parser {
1491 match event {
1492 Event::Start(Tag::Link { .. }) => {
1493 link_stack.push(range.start);
1494 }
1495 Event::End(TagEnd::Link) => {
1496 if let Some(start_pos) = link_stack.pop() {
1497 link_ranges.push((start_pos, range.end));
1498 }
1499 }
1500 _ => {}
1501 }
1502 }
1503
1504 link_ranges
1505 }
1506
1507 fn parse_links(
1509 content: &'a str,
1510 lines: &[LineInfo],
1511 code_blocks: &[(usize, usize)],
1512 code_spans: &[CodeSpan],
1513 flavor: MarkdownFlavor,
1514 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1515 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1516 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1517 use std::collections::HashSet;
1518
1519 let mut links = Vec::with_capacity(content.len() / 500);
1520 let mut broken_links = Vec::new();
1521 let mut footnote_refs = Vec::new();
1522
1523 let mut found_positions = HashSet::new();
1525
1526 let mut options = Options::empty();
1536 options.insert(Options::ENABLE_WIKILINKS);
1537 options.insert(Options::ENABLE_FOOTNOTES);
1538
1539 let parser = Parser::new_with_broken_link_callback(
1540 content,
1541 options,
1542 Some(|link: BrokenLink<'_>| {
1543 broken_links.push(BrokenLinkInfo {
1544 reference: link.reference.to_string(),
1545 span: link.span.clone(),
1546 });
1547 None
1548 }),
1549 )
1550 .into_offset_iter();
1551
1552 let mut link_stack: Vec<(
1553 usize,
1554 usize,
1555 pulldown_cmark::CowStr<'a>,
1556 LinkType,
1557 pulldown_cmark::CowStr<'a>,
1558 )> = Vec::new();
1559 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1562 match event {
1563 Event::Start(Tag::Link {
1564 link_type,
1565 dest_url,
1566 id,
1567 ..
1568 }) => {
1569 link_stack.push((range.start, range.end, dest_url, link_type, id));
1571 text_chunks.clear();
1572 }
1573 Event::Text(text) if !link_stack.is_empty() => {
1574 text_chunks.push((text.to_string(), range.start, range.end));
1576 }
1577 Event::Code(code) if !link_stack.is_empty() => {
1578 let code_text = format!("`{code}`");
1580 text_chunks.push((code_text, range.start, range.end));
1581 }
1582 Event::End(TagEnd::Link) => {
1583 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1584 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1586 text_chunks.clear();
1587 continue;
1588 }
1589
1590 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1592
1593 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1595 text_chunks.clear();
1596 continue;
1597 }
1598
1599 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1600
1601 let is_reference = matches!(
1602 link_type,
1603 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1604 );
1605
1606 let link_text = if matches!(link_type, LinkType::WikiLink { .. }) {
1609 if !text_chunks.is_empty() {
1613 let text: String = text_chunks.iter().map(|(t, _, _)| t.as_str()).collect();
1614 Cow::Owned(text)
1615 } else {
1616 Cow::Owned(url.to_string())
1618 }
1619 } else if start_pos < content.len() {
1620 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1621
1622 let mut close_pos = None;
1626 let mut depth = 0;
1627 let mut in_code_span = false;
1628
1629 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1630 let mut backslash_count = 0;
1632 let mut j = i;
1633 while j > 0 && link_bytes[j - 1] == b'\\' {
1634 backslash_count += 1;
1635 j -= 1;
1636 }
1637 let is_escaped = backslash_count % 2 != 0;
1638
1639 if byte == b'`' && !is_escaped {
1641 in_code_span = !in_code_span;
1642 }
1643
1644 if !is_escaped && !in_code_span {
1646 if byte == b'[' {
1647 depth += 1;
1648 } else if byte == b']' {
1649 if depth == 0 {
1650 close_pos = Some(i);
1652 break;
1653 } else {
1654 depth -= 1;
1655 }
1656 }
1657 }
1658 }
1659
1660 if let Some(pos) = close_pos {
1661 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1662 } else {
1663 Cow::Borrowed("")
1664 }
1665 } else {
1666 Cow::Borrowed("")
1667 };
1668
1669 let reference_id = if is_reference && !ref_id.is_empty() {
1671 Some(Cow::Owned(ref_id.to_lowercase()))
1672 } else if is_reference {
1673 Some(Cow::Owned(link_text.to_lowercase()))
1675 } else {
1676 None
1677 };
1678
1679 found_positions.insert(start_pos);
1681
1682 links.push(ParsedLink {
1683 line: line_num,
1684 start_col: col_start,
1685 end_col: col_end,
1686 byte_offset: start_pos,
1687 byte_end: range.end,
1688 text: link_text,
1689 url: Cow::Owned(url.to_string()),
1690 is_reference,
1691 reference_id,
1692 link_type,
1693 });
1694
1695 text_chunks.clear();
1696 }
1697 }
1698 Event::FootnoteReference(footnote_id) => {
1699 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1702 continue;
1703 }
1704
1705 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1706 footnote_refs.push(FootnoteRef {
1707 id: footnote_id.to_string(),
1708 line: line_num,
1709 byte_offset: range.start,
1710 byte_end: range.end,
1711 });
1712 }
1713 _ => {}
1714 }
1715 }
1716
1717 for cap in LINK_PATTERN.captures_iter(content) {
1721 let full_match = cap.get(0).unwrap();
1722 let match_start = full_match.start();
1723 let match_end = full_match.end();
1724
1725 if found_positions.contains(&match_start) {
1727 continue;
1728 }
1729
1730 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1732 continue;
1733 }
1734
1735 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1737 continue;
1738 }
1739
1740 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1742 continue;
1743 }
1744
1745 if Self::is_offset_in_code_span(code_spans, match_start) {
1747 continue;
1748 }
1749
1750 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1752 continue;
1753 }
1754
1755 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1757
1758 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1760 continue;
1761 }
1762
1763 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1764
1765 let text = cap.get(1).map_or("", |m| m.as_str());
1766
1767 if let Some(ref_id) = cap.get(6) {
1769 let ref_id_str = ref_id.as_str();
1770 let normalized_ref = if ref_id_str.is_empty() {
1771 Cow::Owned(text.to_lowercase()) } else {
1773 Cow::Owned(ref_id_str.to_lowercase())
1774 };
1775
1776 links.push(ParsedLink {
1778 line: line_num,
1779 start_col: col_start,
1780 end_col: col_end,
1781 byte_offset: match_start,
1782 byte_end: match_end,
1783 text: Cow::Borrowed(text),
1784 url: Cow::Borrowed(""), is_reference: true,
1786 reference_id: Some(normalized_ref),
1787 link_type: LinkType::Reference, });
1789 }
1790 }
1791
1792 (links, broken_links, footnote_refs)
1793 }
1794
1795 fn parse_images(
1797 content: &'a str,
1798 lines: &[LineInfo],
1799 code_blocks: &[(usize, usize)],
1800 code_spans: &[CodeSpan],
1801 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1802 ) -> Vec<ParsedImage<'a>> {
1803 use crate::utils::skip_context::is_in_html_comment_ranges;
1804 use std::collections::HashSet;
1805
1806 let mut images = Vec::with_capacity(content.len() / 1000);
1808 let mut found_positions = HashSet::new();
1809
1810 let parser = Parser::new(content).into_offset_iter();
1812 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1813 Vec::new();
1814 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1817 match event {
1818 Event::Start(Tag::Image {
1819 link_type,
1820 dest_url,
1821 id,
1822 ..
1823 }) => {
1824 image_stack.push((range.start, dest_url, link_type, id));
1825 text_chunks.clear();
1826 }
1827 Event::Text(text) if !image_stack.is_empty() => {
1828 text_chunks.push((text.to_string(), range.start, range.end));
1829 }
1830 Event::Code(code) if !image_stack.is_empty() => {
1831 let code_text = format!("`{code}`");
1832 text_chunks.push((code_text, range.start, range.end));
1833 }
1834 Event::End(TagEnd::Image) => {
1835 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1836 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1838 continue;
1839 }
1840
1841 if Self::is_offset_in_code_span(code_spans, start_pos) {
1843 continue;
1844 }
1845
1846 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1848 continue;
1849 }
1850
1851 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1853 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1854
1855 let is_reference = matches!(
1856 link_type,
1857 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1858 );
1859
1860 let alt_text = if start_pos < content.len() {
1863 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1864
1865 let mut close_pos = None;
1868 let mut depth = 0;
1869
1870 if image_bytes.len() > 2 {
1871 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1872 let mut backslash_count = 0;
1874 let mut j = i;
1875 while j > 0 && image_bytes[j - 1] == b'\\' {
1876 backslash_count += 1;
1877 j -= 1;
1878 }
1879 let is_escaped = backslash_count % 2 != 0;
1880
1881 if !is_escaped {
1882 if byte == b'[' {
1883 depth += 1;
1884 } else if byte == b']' {
1885 if depth == 0 {
1886 close_pos = Some(i);
1888 break;
1889 } else {
1890 depth -= 1;
1891 }
1892 }
1893 }
1894 }
1895 }
1896
1897 if let Some(pos) = close_pos {
1898 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1899 } else {
1900 Cow::Borrowed("")
1901 }
1902 } else {
1903 Cow::Borrowed("")
1904 };
1905
1906 let reference_id = if is_reference && !ref_id.is_empty() {
1907 Some(Cow::Owned(ref_id.to_lowercase()))
1908 } else if is_reference {
1909 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1911 None
1912 };
1913
1914 found_positions.insert(start_pos);
1915 images.push(ParsedImage {
1916 line: line_num,
1917 start_col: col_start,
1918 end_col: col_end,
1919 byte_offset: start_pos,
1920 byte_end: range.end,
1921 alt_text,
1922 url: Cow::Owned(url.to_string()),
1923 is_reference,
1924 reference_id,
1925 link_type,
1926 });
1927 }
1928 }
1929 _ => {}
1930 }
1931 }
1932
1933 for cap in IMAGE_PATTERN.captures_iter(content) {
1935 let full_match = cap.get(0).unwrap();
1936 let match_start = full_match.start();
1937 let match_end = full_match.end();
1938
1939 if found_positions.contains(&match_start) {
1941 continue;
1942 }
1943
1944 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1946 continue;
1947 }
1948
1949 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1951 || Self::is_offset_in_code_span(code_spans, match_start)
1952 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1953 {
1954 continue;
1955 }
1956
1957 if let Some(ref_id) = cap.get(6) {
1959 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1960 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1961 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1962 let ref_id_str = ref_id.as_str();
1963 let normalized_ref = if ref_id_str.is_empty() {
1964 Cow::Owned(alt_text.to_lowercase())
1965 } else {
1966 Cow::Owned(ref_id_str.to_lowercase())
1967 };
1968
1969 images.push(ParsedImage {
1970 line: line_num,
1971 start_col: col_start,
1972 end_col: col_end,
1973 byte_offset: match_start,
1974 byte_end: match_end,
1975 alt_text: Cow::Borrowed(alt_text),
1976 url: Cow::Borrowed(""),
1977 is_reference: true,
1978 reference_id: Some(normalized_ref),
1979 link_type: LinkType::Reference, });
1981 }
1982 }
1983
1984 images
1985 }
1986
1987 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1989 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1993 if line_info.in_code_block {
1995 continue;
1996 }
1997
1998 let line = line_info.content(content);
1999 let line_num = line_idx + 1;
2000
2001 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
2002 let id_raw = cap.get(1).unwrap().as_str();
2003
2004 if id_raw.starts_with('^') {
2007 continue;
2008 }
2009
2010 let id = id_raw.to_lowercase();
2011 let url = cap.get(2).unwrap().as_str().to_string();
2012 let title_match = cap.get(3).or_else(|| cap.get(4));
2013 let title = title_match.map(|m| m.as_str().to_string());
2014
2015 let match_obj = cap.get(0).unwrap();
2018 let byte_offset = line_info.byte_offset + match_obj.start();
2019 let byte_end = line_info.byte_offset + match_obj.end();
2020
2021 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
2023 let start = line_info.byte_offset + m.start().saturating_sub(1);
2025 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
2027 } else {
2028 (None, None)
2029 };
2030
2031 refs.push(ReferenceDef {
2032 line: line_num,
2033 id,
2034 url,
2035 title,
2036 byte_offset,
2037 byte_end,
2038 title_byte_start,
2039 title_byte_end,
2040 });
2041 }
2042 }
2043
2044 refs
2045 }
2046
2047 #[inline]
2051 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2052 let trimmed_start = line.trim_start();
2053 if !trimmed_start.starts_with('>') {
2054 return None;
2055 }
2056
2057 let mut remaining = line;
2059 let mut total_prefix_len = 0;
2060
2061 loop {
2062 let trimmed = remaining.trim_start();
2063 if !trimmed.starts_with('>') {
2064 break;
2065 }
2066
2067 let leading_ws_len = remaining.len() - trimmed.len();
2069 total_prefix_len += leading_ws_len + 1;
2070
2071 let after_gt = &trimmed[1..];
2072
2073 if let Some(stripped) = after_gt.strip_prefix(' ') {
2075 total_prefix_len += 1;
2076 remaining = stripped;
2077 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2078 total_prefix_len += 1;
2079 remaining = stripped;
2080 } else {
2081 remaining = after_gt;
2082 }
2083 }
2084
2085 Some((&line[..total_prefix_len], remaining))
2086 }
2087
2088 fn detect_list_items_and_emphasis_with_pulldown(
2112 content: &str,
2113 line_offsets: &[usize],
2114 flavor: MarkdownFlavor,
2115 front_matter_end: usize,
2116 code_blocks: &[(usize, usize)],
2117 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2118 use std::collections::HashMap;
2119
2120 let mut list_items = HashMap::new();
2121 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2122
2123 let mut options = Options::empty();
2124 options.insert(Options::ENABLE_TABLES);
2125 options.insert(Options::ENABLE_FOOTNOTES);
2126 options.insert(Options::ENABLE_STRIKETHROUGH);
2127 options.insert(Options::ENABLE_TASKLISTS);
2128 options.insert(Options::ENABLE_GFM);
2130
2131 let _ = flavor;
2133
2134 let parser = Parser::new_ext(content, options).into_offset_iter();
2135 let mut list_depth: usize = 0;
2136 let mut list_stack: Vec<bool> = Vec::new();
2137
2138 for (event, range) in parser {
2139 match event {
2140 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2142 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2143 2
2144 } else {
2145 1
2146 };
2147 let match_start = range.start;
2148 let match_end = range.end;
2149
2150 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2152 let marker = content[match_start..].chars().next().unwrap_or('*');
2154 if marker == '*' || marker == '_' {
2155 let content_start = match_start + marker_count;
2157 let content_end = if match_end >= marker_count {
2158 match_end - marker_count
2159 } else {
2160 match_end
2161 };
2162 let content_part = if content_start < content_end && content_end <= content.len() {
2163 &content[content_start..content_end]
2164 } else {
2165 ""
2166 };
2167
2168 let line_idx = match line_offsets.binary_search(&match_start) {
2170 Ok(idx) => idx,
2171 Err(idx) => idx.saturating_sub(1),
2172 };
2173 let line_num = line_idx + 1;
2174 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2175 let col_start = match_start - line_start;
2176 let col_end = match_end - line_start;
2177
2178 emphasis_spans.push(EmphasisSpan {
2179 line: line_num,
2180 start_col: col_start,
2181 end_col: col_end,
2182 byte_offset: match_start,
2183 byte_end: match_end,
2184 marker,
2185 marker_count,
2186 content: content_part.to_string(),
2187 });
2188 }
2189 }
2190 }
2191 Event::Start(Tag::List(start_number)) => {
2192 list_depth += 1;
2193 list_stack.push(start_number.is_some());
2194 }
2195 Event::End(TagEnd::List(_)) => {
2196 list_depth = list_depth.saturating_sub(1);
2197 list_stack.pop();
2198 }
2199 Event::Start(Tag::Item) if list_depth > 0 => {
2200 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2202 let item_start = range.start;
2204
2205 let mut line_idx = match line_offsets.binary_search(&item_start) {
2207 Ok(idx) => idx,
2208 Err(idx) => idx.saturating_sub(1),
2209 };
2210
2211 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2215 line_idx += 1;
2216 }
2217
2218 if front_matter_end > 0 && line_idx < front_matter_end {
2220 continue;
2221 }
2222
2223 if line_idx < line_offsets.len() {
2224 let line_start_byte = line_offsets[line_idx];
2225 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2226 let line = &content[line_start_byte..line_end.min(content.len())];
2227
2228 let line = line
2230 .strip_suffix('\n')
2231 .or_else(|| line.strip_suffix("\r\n"))
2232 .unwrap_or(line);
2233
2234 let blockquote_parse = Self::parse_blockquote_prefix(line);
2236 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2237 (prefix.len(), content)
2238 } else {
2239 (0, line)
2240 };
2241
2242 if current_list_is_ordered {
2244 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2245 Self::parse_ordered_list(line_to_parse)
2246 {
2247 let marker = format!("{number_str}{delimiter}");
2248 let marker_column = blockquote_prefix_len + leading_spaces.len();
2249 let content_column = marker_column + marker.len() + spacing.len();
2250 let number = number_str.parse().ok();
2251
2252 list_items.entry(line_start_byte).or_insert((
2253 true,
2254 marker,
2255 marker_column,
2256 content_column,
2257 number,
2258 ));
2259 }
2260 } else if let Some((leading_spaces, marker, spacing, _content)) =
2261 Self::parse_unordered_list(line_to_parse)
2262 {
2263 let marker_column = blockquote_prefix_len + leading_spaces.len();
2264 let content_column = marker_column + 1 + spacing.len();
2265
2266 list_items.entry(line_start_byte).or_insert((
2267 false,
2268 marker.to_string(),
2269 marker_column,
2270 content_column,
2271 None,
2272 ));
2273 }
2274 }
2275 }
2276 _ => {}
2277 }
2278 }
2279
2280 (list_items, emphasis_spans)
2281 }
2282
2283 #[inline]
2287 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2288 let bytes = line.as_bytes();
2289 let mut i = 0;
2290
2291 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2293 i += 1;
2294 }
2295
2296 if i >= bytes.len() {
2298 return None;
2299 }
2300 let marker = bytes[i] as char;
2301 if marker != '-' && marker != '*' && marker != '+' {
2302 return None;
2303 }
2304 let marker_pos = i;
2305 i += 1;
2306
2307 let spacing_start = i;
2309 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2310 i += 1;
2311 }
2312
2313 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2314 }
2315
2316 #[inline]
2320 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2321 let bytes = line.as_bytes();
2322 let mut i = 0;
2323
2324 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2326 i += 1;
2327 }
2328
2329 let number_start = i;
2331 while i < bytes.len() && bytes[i].is_ascii_digit() {
2332 i += 1;
2333 }
2334 if i == number_start {
2335 return None; }
2337
2338 if i >= bytes.len() {
2340 return None;
2341 }
2342 let delimiter = bytes[i] as char;
2343 if delimiter != '.' && delimiter != ')' {
2344 return None;
2345 }
2346 let delimiter_pos = i;
2347 i += 1;
2348
2349 let spacing_start = i;
2351 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2352 i += 1;
2353 }
2354
2355 Some((
2356 &line[..number_start],
2357 &line[number_start..delimiter_pos],
2358 delimiter,
2359 &line[spacing_start..i],
2360 &line[i..],
2361 ))
2362 }
2363
2364 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2367 let num_lines = line_offsets.len();
2368 let mut in_code_block = vec![false; num_lines];
2369
2370 for &(start, end) in code_blocks {
2372 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2374 let mut boundary = start;
2375 while boundary > 0 && !content.is_char_boundary(boundary) {
2376 boundary -= 1;
2377 }
2378 boundary
2379 } else {
2380 start
2381 };
2382
2383 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2384 let mut boundary = end;
2385 while boundary < content.len() && !content.is_char_boundary(boundary) {
2386 boundary += 1;
2387 }
2388 boundary
2389 } else {
2390 end.min(content.len())
2391 };
2392
2393 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2412 let first_line = first_line_after.saturating_sub(1);
2413 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2414
2415 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2417 *flag = true;
2418 }
2419 }
2420
2421 in_code_block
2422 }
2423
2424 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2427 let content_lines: Vec<&str> = content.lines().collect();
2428 let num_lines = content_lines.len();
2429 let mut in_math_block = vec![false; num_lines];
2430
2431 let mut inside_math = false;
2432
2433 for (i, line) in content_lines.iter().enumerate() {
2434 if code_block_map.get(i).copied().unwrap_or(false) {
2436 continue;
2437 }
2438
2439 let trimmed = line.trim();
2440
2441 if trimmed == "$$" {
2444 if inside_math {
2445 in_math_block[i] = true;
2447 inside_math = false;
2448 } else {
2449 in_math_block[i] = true;
2451 inside_math = true;
2452 }
2453 } else if inside_math {
2454 in_math_block[i] = true;
2456 }
2457 }
2458
2459 in_math_block
2460 }
2461
2462 fn compute_basic_line_info(
2465 content: &str,
2466 line_offsets: &[usize],
2467 code_blocks: &[(usize, usize)],
2468 flavor: MarkdownFlavor,
2469 skip_ranges: &SkipByteRanges<'_>,
2470 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2471 let content_lines: Vec<&str> = content.lines().collect();
2472 let mut lines = Vec::with_capacity(content_lines.len());
2473
2474 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2476
2477 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2479
2480 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2483
2484 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2487 content,
2488 line_offsets,
2489 flavor,
2490 front_matter_end,
2491 code_blocks,
2492 );
2493
2494 for (i, line) in content_lines.iter().enumerate() {
2495 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2496 let indent = line.len() - line.trim_start().len();
2497 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2499
2500 let blockquote_parse = Self::parse_blockquote_prefix(line);
2502
2503 let is_blank = if let Some((_, content)) = blockquote_parse {
2505 content.trim().is_empty()
2507 } else {
2508 line.trim().is_empty()
2509 };
2510
2511 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2513
2514 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2516 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(
2517 skip_ranges.autodoc_ranges,
2518 byte_offset,
2519 );
2520 let line_end_offset = byte_offset + line.len();
2523 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2524 skip_ranges.html_comment_ranges,
2525 byte_offset,
2526 line_end_offset,
2527 );
2528 let list_item =
2531 list_item_map
2532 .get(&byte_offset)
2533 .map(
2534 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2535 marker: marker.clone(),
2536 is_ordered: *is_ordered,
2537 number: *number,
2538 marker_column: *marker_column,
2539 content_column: *content_column,
2540 },
2541 );
2542
2543 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2546 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2547
2548 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2550
2551 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2553 && crate::utils::quarto_divs::is_within_div_block_ranges(skip_ranges.quarto_div_ranges, byte_offset);
2554
2555 let in_pymdown_block = flavor == MarkdownFlavor::MkDocs
2557 && crate::utils::pymdown_blocks::is_within_block_ranges(skip_ranges.pymdown_block_ranges, byte_offset);
2558
2559 lines.push(LineInfo {
2560 byte_offset,
2561 byte_len: line.len(),
2562 indent,
2563 visual_indent,
2564 is_blank,
2565 in_code_block,
2566 in_front_matter,
2567 in_html_block: false, in_html_comment,
2569 list_item,
2570 heading: None, blockquote: None, in_mkdocstrings,
2573 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2576 in_math_block,
2577 in_quarto_div,
2578 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_mkdocs_html_markdown: false, in_definition_list: false, in_obsidian_comment: false, in_pymdown_block,
2588 });
2589 }
2590
2591 (lines, emphasis_spans)
2592 }
2593
2594 fn detect_headings_and_blockquotes(
2596 content: &str,
2597 lines: &mut [LineInfo],
2598 flavor: MarkdownFlavor,
2599 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2600 link_byte_ranges: &[(usize, usize)],
2601 ) {
2602 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2604 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2605 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2606 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2607
2608 let content_lines: Vec<&str> = content.lines().collect();
2609
2610 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2612
2613 for i in 0..lines.len() {
2615 let line = content_lines[i];
2616
2617 if !(front_matter_end > 0 && i < front_matter_end)
2622 && let Some(bq) = parse_blockquote_detailed(line)
2623 {
2624 let nesting_level = bq.markers.len();
2625 let marker_column = bq.indent.len();
2626 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2627 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2628 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2629 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2630
2631 lines[i].blockquote = Some(BlockquoteInfo {
2632 nesting_level,
2633 indent: bq.indent.to_string(),
2634 marker_column,
2635 prefix,
2636 content: bq.content.to_string(),
2637 has_no_space_after_marker: has_no_space,
2638 has_multiple_spaces_after_marker: has_multiple_spaces,
2639 needs_md028_fix,
2640 });
2641
2642 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2645 lines[i].is_horizontal_rule = true;
2646 }
2647 }
2648
2649 if lines[i].in_code_block {
2651 continue;
2652 }
2653
2654 if front_matter_end > 0 && i < front_matter_end {
2656 continue;
2657 }
2658
2659 if lines[i].in_html_block {
2661 continue;
2662 }
2663
2664 if lines[i].is_blank {
2666 continue;
2667 }
2668
2669 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2672 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2673 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2674 } else {
2675 false
2676 };
2677
2678 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2679 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2681 continue;
2682 }
2683 let line_offset = lines[i].byte_offset;
2686 if link_byte_ranges
2687 .iter()
2688 .any(|&(start, end)| line_offset > start && line_offset < end)
2689 {
2690 continue;
2691 }
2692 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2693 let hashes = caps.get(2).map_or("", |m| m.as_str());
2694 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2695 let rest = caps.get(4).map_or("", |m| m.as_str());
2696
2697 let level = hashes.len() as u8;
2698 let marker_column = leading_spaces.len();
2699
2700 let (text, has_closing, closing_seq) = {
2702 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2704 if rest[id_start..].trim_end().ends_with('}') {
2706 (&rest[..id_start], &rest[id_start..])
2708 } else {
2709 (rest, "")
2710 }
2711 } else {
2712 (rest, "")
2713 };
2714
2715 let trimmed_rest = rest_without_id.trim_end();
2717 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2718 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2721
2722 let last_hash_char_idx = char_positions
2724 .iter()
2725 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2726
2727 if let Some(mut char_idx) = last_hash_char_idx {
2728 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2730 char_idx -= 1;
2731 }
2732
2733 let start_of_hashes = char_positions[char_idx].0;
2735
2736 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2738
2739 let potential_closing = &trimmed_rest[start_of_hashes..];
2741 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2742
2743 if is_all_hashes && has_space_before {
2744 let closing_hashes = potential_closing.to_string();
2746 let text_part = if !custom_id_part.is_empty() {
2749 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2752 } else {
2753 trimmed_rest[..start_of_hashes].trim_end().to_string()
2754 };
2755 (text_part, true, closing_hashes)
2756 } else {
2757 (rest.to_string(), false, String::new())
2759 }
2760 } else {
2761 (rest.to_string(), false, String::new())
2763 }
2764 } else {
2765 (rest.to_string(), false, String::new())
2767 }
2768 };
2769
2770 let content_column = marker_column + hashes.len() + spaces_after.len();
2771
2772 let raw_text = text.trim().to_string();
2774 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2775
2776 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2778 let next_line = content_lines[i + 1];
2779 if !lines[i + 1].in_code_block
2780 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2781 && let Some(next_line_id) =
2782 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2783 {
2784 custom_id = Some(next_line_id);
2785 }
2786 }
2787
2788 let is_valid = !spaces_after.is_empty()
2798 || rest.is_empty()
2799 || level > 1
2800 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2801
2802 lines[i].heading = Some(HeadingInfo {
2803 level,
2804 style: HeadingStyle::ATX,
2805 marker: hashes.to_string(),
2806 marker_column,
2807 content_column,
2808 text: clean_text,
2809 custom_id,
2810 raw_text,
2811 has_closing_sequence: has_closing,
2812 closing_sequence: closing_seq,
2813 is_valid,
2814 });
2815 }
2816 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2818 let next_line = content_lines[i + 1];
2819 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2820 if front_matter_end > 0 && i < front_matter_end {
2822 continue;
2823 }
2824
2825 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2827 {
2828 continue;
2829 }
2830
2831 let content_line = line.trim();
2834
2835 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2837 continue;
2838 }
2839
2840 if content_line.starts_with('_') {
2842 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2843 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2844 continue;
2845 }
2846 }
2847
2848 if let Some(first_char) = content_line.chars().next()
2850 && first_char.is_ascii_digit()
2851 {
2852 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2853 if num_end < content_line.len() {
2854 let next = content_line.chars().nth(num_end);
2855 if next == Some('.') || next == Some(')') {
2856 continue;
2857 }
2858 }
2859 }
2860
2861 if ATX_HEADING_REGEX.is_match(line) {
2863 continue;
2864 }
2865
2866 if content_line.starts_with('>') {
2868 continue;
2869 }
2870
2871 let trimmed_start = line.trim_start();
2873 if trimmed_start.len() >= 3 {
2874 let first_three: String = trimmed_start.chars().take(3).collect();
2875 if first_three == "```" || first_three == "~~~" {
2876 continue;
2877 }
2878 }
2879
2880 if content_line.starts_with('<') {
2882 continue;
2883 }
2884
2885 let underline = next_line.trim();
2886
2887 let level = if underline.starts_with('=') { 1 } else { 2 };
2888 let style = if level == 1 {
2889 HeadingStyle::Setext1
2890 } else {
2891 HeadingStyle::Setext2
2892 };
2893
2894 let raw_text = line.trim().to_string();
2896 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2897
2898 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2900 let attr_line = content_lines[i + 2];
2901 if !lines[i + 2].in_code_block
2902 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2903 && let Some(attr_line_id) =
2904 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2905 {
2906 custom_id = Some(attr_line_id);
2907 }
2908 }
2909
2910 lines[i].heading = Some(HeadingInfo {
2911 level,
2912 style,
2913 marker: underline.to_string(),
2914 marker_column: next_line.len() - next_line.trim_start().len(),
2915 content_column: lines[i].indent,
2916 text: clean_text,
2917 custom_id,
2918 raw_text,
2919 has_closing_sequence: false,
2920 closing_sequence: String::new(),
2921 is_valid: true, });
2923 }
2924 }
2925 }
2926 }
2927
2928 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2930 const BLOCK_ELEMENTS: &[&str] = &[
2933 "address",
2934 "article",
2935 "aside",
2936 "audio",
2937 "blockquote",
2938 "canvas",
2939 "details",
2940 "dialog",
2941 "dd",
2942 "div",
2943 "dl",
2944 "dt",
2945 "embed",
2946 "fieldset",
2947 "figcaption",
2948 "figure",
2949 "footer",
2950 "form",
2951 "h1",
2952 "h2",
2953 "h3",
2954 "h4",
2955 "h5",
2956 "h6",
2957 "header",
2958 "hr",
2959 "iframe",
2960 "li",
2961 "main",
2962 "menu",
2963 "nav",
2964 "noscript",
2965 "object",
2966 "ol",
2967 "p",
2968 "picture",
2969 "pre",
2970 "script",
2971 "search",
2972 "section",
2973 "source",
2974 "style",
2975 "summary",
2976 "svg",
2977 "table",
2978 "tbody",
2979 "td",
2980 "template",
2981 "textarea",
2982 "tfoot",
2983 "th",
2984 "thead",
2985 "tr",
2986 "track",
2987 "ul",
2988 "video",
2989 ];
2990
2991 let mut i = 0;
2992 while i < lines.len() {
2993 if lines[i].in_code_block || lines[i].in_front_matter {
2995 i += 1;
2996 continue;
2997 }
2998
2999 let trimmed = lines[i].content(content).trim_start();
3000
3001 if trimmed.starts_with('<') && trimmed.len() > 1 {
3003 let after_bracket = &trimmed[1..];
3005 let is_closing = after_bracket.starts_with('/');
3006 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
3007
3008 let tag_name = tag_start
3010 .chars()
3011 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
3012 .collect::<String>()
3013 .to_lowercase();
3014
3015 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
3017 lines[i].in_html_block = true;
3019
3020 if !is_closing {
3025 let closing_tag = format!("</{tag_name}>");
3026
3027 let same_line_close = lines[i].content(content).contains(&closing_tag);
3030
3031 if !same_line_close {
3033 let allow_blank_lines = tag_name == "style" || tag_name == "script";
3035 let mut j = i + 1;
3036 let mut found_closing_tag = false;
3037 while j < lines.len() && j < i + 100 {
3038 if !allow_blank_lines && lines[j].is_blank {
3041 break;
3042 }
3043
3044 lines[j].in_html_block = true;
3045
3046 if lines[j].content(content).contains(&closing_tag) {
3048 found_closing_tag = true;
3049 }
3050
3051 if found_closing_tag {
3054 j += 1;
3055 while j < lines.len() && j < i + 100 {
3057 if lines[j].is_blank {
3058 break;
3059 }
3060 lines[j].in_html_block = true;
3061 j += 1;
3062 }
3063 break;
3064 }
3065 j += 1;
3066 }
3067 }
3068 }
3069 }
3070 }
3071
3072 i += 1;
3073 }
3074 }
3075
3076 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3079 if !flavor.supports_esm_blocks() {
3081 return;
3082 }
3083
3084 let mut in_multiline_import = false;
3085
3086 for line in lines.iter_mut() {
3087 if line.in_code_block || line.in_front_matter || line.in_html_comment {
3089 in_multiline_import = false;
3090 continue;
3091 }
3092
3093 let line_content = line.content(content);
3094 let trimmed = line_content.trim();
3095
3096 if in_multiline_import {
3098 line.in_esm_block = true;
3099 if trimmed.ends_with('\'')
3102 || trimmed.ends_with('"')
3103 || trimmed.ends_with("';")
3104 || trimmed.ends_with("\";")
3105 || line_content.contains(';')
3106 {
3107 in_multiline_import = false;
3108 }
3109 continue;
3110 }
3111
3112 if line.is_blank {
3114 continue;
3115 }
3116
3117 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3119 line.in_esm_block = true;
3120
3121 let is_import = trimmed.starts_with("import ");
3129
3130 let is_complete =
3132 trimmed.ends_with(';')
3134 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3136 || (!is_import && !trimmed.contains(" from ") && (
3138 trimmed.starts_with("export const ")
3139 || trimmed.starts_with("export let ")
3140 || trimmed.starts_with("export var ")
3141 || trimmed.starts_with("export function ")
3142 || trimmed.starts_with("export class ")
3143 || trimmed.starts_with("export default ")
3144 ));
3145
3146 if !is_complete && is_import {
3147 if trimmed.contains('{') && !trimmed.contains('}') {
3151 in_multiline_import = true;
3152 }
3153 }
3154 }
3155 }
3156 }
3157
3158 fn detect_jsx_and_mdx_comments(
3161 content: &str,
3162 lines: &mut [LineInfo],
3163 flavor: MarkdownFlavor,
3164 code_blocks: &[(usize, usize)],
3165 ) -> (ByteRanges, ByteRanges) {
3166 if !flavor.supports_jsx() {
3168 return (Vec::new(), Vec::new());
3169 }
3170
3171 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3172 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3173
3174 if !content.contains('{') {
3176 return (jsx_expression_ranges, mdx_comment_ranges);
3177 }
3178
3179 let bytes = content.as_bytes();
3180 let mut i = 0;
3181
3182 while i < bytes.len() {
3183 if bytes[i] == b'{' {
3184 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3186 i += 1;
3187 continue;
3188 }
3189
3190 let start = i;
3191
3192 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3194 let mut j = i + 3;
3196 while j + 2 < bytes.len() {
3197 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3198 let end = j + 3;
3199 mdx_comment_ranges.push((start, end));
3200
3201 Self::mark_lines_in_range(lines, content, start, end, |line| {
3203 line.in_mdx_comment = true;
3204 });
3205
3206 i = end;
3207 break;
3208 }
3209 j += 1;
3210 }
3211 if j + 2 >= bytes.len() {
3212 mdx_comment_ranges.push((start, bytes.len()));
3214 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3215 line.in_mdx_comment = true;
3216 });
3217 break;
3218 }
3219 } else {
3220 let mut brace_depth = 1;
3223 let mut j = i + 1;
3224 let mut in_string = false;
3225 let mut string_char = b'"';
3226
3227 while j < bytes.len() && brace_depth > 0 {
3228 let c = bytes[j];
3229
3230 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3232 in_string = true;
3233 string_char = c;
3234 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3235 in_string = false;
3236 } else if !in_string {
3237 if c == b'{' {
3238 brace_depth += 1;
3239 } else if c == b'}' {
3240 brace_depth -= 1;
3241 }
3242 }
3243 j += 1;
3244 }
3245
3246 if brace_depth == 0 {
3247 let end = j;
3248 jsx_expression_ranges.push((start, end));
3249
3250 Self::mark_lines_in_range(lines, content, start, end, |line| {
3252 line.in_jsx_expression = true;
3253 });
3254
3255 i = end;
3256 } else {
3257 i += 1;
3258 }
3259 }
3260 } else {
3261 i += 1;
3262 }
3263 }
3264
3265 (jsx_expression_ranges, mdx_comment_ranges)
3266 }
3267
3268 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3271 if flavor != MarkdownFlavor::MkDocs {
3272 return;
3273 }
3274
3275 use crate::utils::mkdocs_admonitions;
3276 use crate::utils::mkdocs_definition_lists;
3277 use crate::utils::mkdocs_tabs;
3278
3279 let content_lines: Vec<&str> = content.lines().collect();
3280
3281 let mut in_admonition = false;
3283 let mut admonition_indent = 0;
3284
3285 let mut in_tab = false;
3287 let mut tab_indent = 0;
3288
3289 let mut in_mkdocs_fenced_code = false;
3291 let mut mkdocs_fence_marker: Option<String> = None;
3292
3293 let mut in_definition = false;
3295
3296 let mut markdown_html_tracker = MarkdownHtmlTracker::new();
3298
3299 for (i, line) in content_lines.iter().enumerate() {
3300 if i >= lines.len() {
3301 break;
3302 }
3303
3304 if mkdocs_admonitions::is_admonition_start(line) {
3308 in_admonition = true;
3309 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3310 lines[i].in_admonition = true;
3311 } else if in_admonition {
3312 if line.trim().is_empty() {
3314 lines[i].in_admonition = true;
3316 lines[i].in_code_block = false;
3318 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3319 lines[i].in_admonition = true;
3320 lines[i].in_code_block = false;
3322 } else {
3323 in_admonition = false;
3325 if mkdocs_admonitions::is_admonition_start(line) {
3327 in_admonition = true;
3328 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3329 lines[i].in_admonition = true;
3330 }
3331 }
3332 }
3333
3334 if mkdocs_tabs::is_tab_marker(line) {
3337 in_tab = true;
3338 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3339 lines[i].in_content_tab = true;
3340 in_mkdocs_fenced_code = false;
3342 mkdocs_fence_marker = None;
3343 } else if in_tab {
3344 let trimmed = line.trim();
3345
3346 if !in_mkdocs_fenced_code {
3348 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3350 let fence_char = trimmed.chars().next().unwrap();
3351 let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
3352 if fence_len >= 3 {
3353 in_mkdocs_fenced_code = true;
3354 mkdocs_fence_marker = Some(fence_char.to_string().repeat(fence_len));
3355 }
3356 }
3357 } else if let Some(ref marker) = mkdocs_fence_marker {
3358 let fence_char = marker.chars().next().unwrap();
3360 if trimmed.starts_with(marker.as_str())
3361 && trimmed
3362 .chars()
3363 .skip(marker.len())
3364 .all(|c| c == fence_char || c.is_whitespace())
3365 {
3366 in_mkdocs_fenced_code = false;
3367 mkdocs_fence_marker = None;
3368 }
3369 }
3370
3371 if line.trim().is_empty() {
3373 lines[i].in_content_tab = true;
3375 if !in_mkdocs_fenced_code {
3377 lines[i].in_code_block = false;
3378 }
3379 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3380 lines[i].in_content_tab = true;
3381 if !in_mkdocs_fenced_code {
3384 lines[i].in_code_block = false;
3385 }
3386 } else {
3387 in_tab = false;
3389 in_mkdocs_fenced_code = false;
3390 mkdocs_fence_marker = None;
3391 if mkdocs_tabs::is_tab_marker(line) {
3393 in_tab = true;
3394 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3395 lines[i].in_content_tab = true;
3396 }
3397 }
3398 }
3399
3400 lines[i].in_mkdocs_html_markdown = markdown_html_tracker.process_line(line);
3404
3405 if lines[i].in_code_block {
3407 continue;
3408 }
3409
3410 if mkdocs_definition_lists::is_definition_line(line) {
3412 in_definition = true;
3413 lines[i].in_definition_list = true;
3414 } else if in_definition {
3415 if mkdocs_definition_lists::is_definition_continuation(line) {
3417 lines[i].in_definition_list = true;
3418 } else if line.trim().is_empty() {
3419 lines[i].in_definition_list = true;
3421 } else if mkdocs_definition_lists::could_be_term_line(line) {
3422 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3424 {
3425 lines[i].in_definition_list = true;
3426 } else {
3427 in_definition = false;
3428 }
3429 } else {
3430 in_definition = false;
3431 }
3432 } else if mkdocs_definition_lists::could_be_term_line(line) {
3433 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3435 lines[i].in_definition_list = true;
3436 in_definition = true;
3437 }
3438 }
3439 }
3440 }
3441
3442 fn detect_obsidian_comments(
3453 content: &str,
3454 lines: &mut [LineInfo],
3455 flavor: MarkdownFlavor,
3456 code_span_ranges: &[(usize, usize)],
3457 ) -> Vec<(usize, usize)> {
3458 if flavor != MarkdownFlavor::Obsidian {
3460 return Vec::new();
3461 }
3462
3463 let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3465
3466 for range in &comment_ranges {
3468 for line in lines.iter_mut() {
3469 if line.in_code_block || line.in_html_comment {
3471 continue;
3472 }
3473
3474 let line_start = line.byte_offset;
3475 let line_end = line.byte_offset + line.byte_len;
3476
3477 if line_start >= range.0 && line_end <= range.1 {
3481 line.in_obsidian_comment = true;
3482 } else if line_start < range.1 && line_end > range.0 {
3483 let line_content_start = line_start;
3491 let line_content_end = line_end;
3492
3493 if line_content_start >= range.0 && line_content_end <= range.1 {
3494 line.in_obsidian_comment = true;
3495 }
3496 }
3497 }
3498 }
3499
3500 comment_ranges
3501 }
3502
3503 fn compute_obsidian_comment_ranges(
3508 content: &str,
3509 lines: &[LineInfo],
3510 code_span_ranges: &[(usize, usize)],
3511 ) -> Vec<(usize, usize)> {
3512 let mut ranges = Vec::new();
3513
3514 if !content.contains("%%") {
3516 return ranges;
3517 }
3518
3519 let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3522 for line in lines {
3523 if line.in_code_block || line.in_html_comment {
3524 skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3525 }
3526 }
3527 skip_ranges.extend(code_span_ranges.iter().copied());
3528
3529 if !skip_ranges.is_empty() {
3530 skip_ranges.sort_by_key(|(start, _)| *start);
3532 let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3533 for (start, end) in skip_ranges {
3534 if let Some((_, last_end)) = merged.last_mut()
3535 && start <= *last_end
3536 {
3537 *last_end = (*last_end).max(end);
3538 continue;
3539 }
3540 merged.push((start, end));
3541 }
3542 skip_ranges = merged;
3543 }
3544
3545 let content_bytes = content.as_bytes();
3546 let len = content.len();
3547 let mut i = 0;
3548 let mut in_comment = false;
3549 let mut comment_start = 0;
3550 let mut skip_idx = 0;
3551
3552 while i < len.saturating_sub(1) {
3553 if skip_idx < skip_ranges.len() {
3555 let (skip_start, skip_end) = skip_ranges[skip_idx];
3556 if i >= skip_end {
3557 skip_idx += 1;
3558 continue;
3559 }
3560 if i >= skip_start {
3561 i = skip_end;
3562 continue;
3563 }
3564 }
3565
3566 if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3568 if !in_comment {
3569 in_comment = true;
3571 comment_start = i;
3572 i += 2;
3573 } else {
3574 let comment_end = i + 2;
3576 ranges.push((comment_start, comment_end));
3577 in_comment = false;
3578 i += 2;
3579 }
3580 } else {
3581 i += 1;
3582 }
3583 }
3584
3585 if in_comment {
3587 ranges.push((comment_start, len));
3588 }
3589
3590 ranges
3591 }
3592
3593 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3595 where
3596 F: FnMut(&mut LineInfo),
3597 {
3598 for line in lines.iter_mut() {
3600 let line_start = line.byte_offset;
3601 let line_end = line.byte_offset + line.byte_len;
3602
3603 if line_start < end && line_end > start {
3605 f(line);
3606 }
3607 }
3608
3609 let _ = content;
3611 }
3612
3613 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3615 if !content.contains('`') {
3617 return Vec::new();
3618 }
3619
3620 let parser = Parser::new(content).into_offset_iter();
3622 let mut ranges = Vec::new();
3623
3624 for (event, range) in parser {
3625 if let Event::Code(_) = event {
3626 ranges.push((range.start, range.end));
3627 }
3628 }
3629
3630 Self::build_code_spans_from_ranges(content, lines, &ranges)
3631 }
3632
3633 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3634 let mut code_spans = Vec::new();
3635 if ranges.is_empty() {
3636 return code_spans;
3637 }
3638
3639 for &(start_pos, end_pos) in ranges {
3640 let full_span = &content[start_pos..end_pos];
3642 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3643
3644 let content_start = start_pos + backtick_count;
3646 let content_end = end_pos - backtick_count;
3647 let span_content = if content_start < content_end {
3648 content[content_start..content_end].to_string()
3649 } else {
3650 String::new()
3651 };
3652
3653 let line_idx = lines
3656 .partition_point(|line| line.byte_offset <= start_pos)
3657 .saturating_sub(1);
3658 let line_num = line_idx + 1;
3659 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3660
3661 let end_line_idx = lines
3663 .partition_point(|line| line.byte_offset <= end_pos)
3664 .saturating_sub(1);
3665 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3666
3667 let line_content = lines[line_idx].content(content);
3670 let col_start = if byte_col_start <= line_content.len() {
3671 line_content[..byte_col_start].chars().count()
3672 } else {
3673 line_content.chars().count()
3674 };
3675
3676 let end_line_content = lines[end_line_idx].content(content);
3677 let col_end = if byte_col_end <= end_line_content.len() {
3678 end_line_content[..byte_col_end].chars().count()
3679 } else {
3680 end_line_content.chars().count()
3681 };
3682
3683 code_spans.push(CodeSpan {
3684 line: line_num,
3685 end_line: end_line_idx + 1,
3686 start_col: col_start,
3687 end_col: col_end,
3688 byte_offset: start_pos,
3689 byte_end: end_pos,
3690 backtick_count,
3691 content: span_content,
3692 });
3693 }
3694
3695 code_spans.sort_by_key(|span| span.byte_offset);
3697
3698 code_spans
3699 }
3700
3701 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3703 let mut math_spans = Vec::new();
3704
3705 if !content.contains('$') {
3707 return math_spans;
3708 }
3709
3710 let mut options = Options::empty();
3712 options.insert(Options::ENABLE_MATH);
3713 let parser = Parser::new_ext(content, options).into_offset_iter();
3714
3715 for (event, range) in parser {
3716 let (is_display, math_content) = match &event {
3717 Event::InlineMath(text) => (false, text.as_ref()),
3718 Event::DisplayMath(text) => (true, text.as_ref()),
3719 _ => continue,
3720 };
3721
3722 let start_pos = range.start;
3723 let end_pos = range.end;
3724
3725 let line_idx = lines
3727 .partition_point(|line| line.byte_offset <= start_pos)
3728 .saturating_sub(1);
3729 let line_num = line_idx + 1;
3730 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3731
3732 let end_line_idx = lines
3734 .partition_point(|line| line.byte_offset <= end_pos)
3735 .saturating_sub(1);
3736 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3737
3738 let line_content = lines[line_idx].content(content);
3740 let col_start = if byte_col_start <= line_content.len() {
3741 line_content[..byte_col_start].chars().count()
3742 } else {
3743 line_content.chars().count()
3744 };
3745
3746 let end_line_content = lines[end_line_idx].content(content);
3747 let col_end = if byte_col_end <= end_line_content.len() {
3748 end_line_content[..byte_col_end].chars().count()
3749 } else {
3750 end_line_content.chars().count()
3751 };
3752
3753 math_spans.push(MathSpan {
3754 line: line_num,
3755 end_line: end_line_idx + 1,
3756 start_col: col_start,
3757 end_col: col_end,
3758 byte_offset: start_pos,
3759 byte_end: end_pos,
3760 is_display,
3761 content: math_content.to_string(),
3762 });
3763 }
3764
3765 math_spans.sort_by_key(|span| span.byte_offset);
3767
3768 math_spans
3769 }
3770
3771 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3782 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3784
3785 #[inline]
3788 fn reset_tracking_state(
3789 list_item: &ListItemInfo,
3790 has_list_breaking_content: &mut bool,
3791 min_continuation: &mut usize,
3792 ) {
3793 *has_list_breaking_content = false;
3794 let marker_width = if list_item.is_ordered {
3795 list_item.marker.len() + 1 } else {
3797 list_item.marker.len()
3798 };
3799 *min_continuation = if list_item.is_ordered {
3800 marker_width
3801 } else {
3802 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3803 };
3804 }
3805
3806 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3809 let mut last_list_item_line = 0;
3810 let mut current_indent_level = 0;
3811 let mut last_marker_width = 0;
3812
3813 let mut has_list_breaking_content_since_last_item = false;
3815 let mut min_continuation_for_tracking = 0;
3816
3817 for (line_idx, line_info) in lines.iter().enumerate() {
3818 let line_num = line_idx + 1;
3819
3820 if line_info.in_code_block {
3822 if let Some(ref mut block) = current_block {
3823 let min_continuation_indent =
3825 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3826
3827 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3829
3830 match context {
3831 CodeBlockContext::Indented => {
3832 block.end_line = line_num;
3834 continue;
3835 }
3836 CodeBlockContext::Standalone => {
3837 let completed_block = current_block.take().unwrap();
3839 list_blocks.push(completed_block);
3840 continue;
3841 }
3842 CodeBlockContext::Adjacent => {
3843 block.end_line = line_num;
3845 continue;
3846 }
3847 }
3848 } else {
3849 continue;
3851 }
3852 }
3853
3854 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3856 caps.get(0).unwrap().as_str().to_string()
3857 } else {
3858 String::new()
3859 };
3860
3861 if let Some(ref block) = current_block
3864 && line_info.list_item.is_none()
3865 && !line_info.is_blank
3866 && !line_info.in_code_span_continuation
3867 {
3868 let line_content = line_info.content(content).trim();
3869
3870 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3875
3876 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3879
3880 let breaks_list = line_info.heading.is_some()
3881 || line_content.starts_with("---")
3882 || line_content.starts_with("***")
3883 || line_content.starts_with("___")
3884 || crate::utils::skip_context::is_table_line(line_content)
3885 || blockquote_prefix_changes
3886 || (line_info.indent > 0
3887 && line_info.indent < min_continuation_for_tracking
3888 && !is_lazy_continuation);
3889
3890 if breaks_list {
3891 has_list_breaking_content_since_last_item = true;
3892 }
3893 }
3894
3895 if line_info.in_code_span_continuation
3898 && line_info.list_item.is_none()
3899 && let Some(ref mut block) = current_block
3900 {
3901 block.end_line = line_num;
3902 }
3903
3904 let effective_continuation_indent = if let Some(ref block) = current_block {
3910 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3911 let line_content = line_info.content(content);
3912 let line_bq_level = line_content
3913 .chars()
3914 .take_while(|c| *c == '>' || c.is_whitespace())
3915 .filter(|&c| c == '>')
3916 .count();
3917 if line_bq_level > 0 && line_bq_level == block_bq_level {
3918 let mut pos = 0;
3920 let mut found_markers = 0;
3921 for c in line_content.chars() {
3922 pos += c.len_utf8();
3923 if c == '>' {
3924 found_markers += 1;
3925 if found_markers == line_bq_level {
3926 if line_content.get(pos..pos + 1) == Some(" ") {
3927 pos += 1;
3928 }
3929 break;
3930 }
3931 }
3932 }
3933 let after_bq = &line_content[pos..];
3934 after_bq.len() - after_bq.trim_start().len()
3935 } else {
3936 line_info.indent
3937 }
3938 } else {
3939 line_info.indent
3940 };
3941 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3942 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3943 if block_bq_level > 0 {
3944 if block.is_ordered { last_marker_width } else { 2 }
3945 } else {
3946 min_continuation_for_tracking
3947 }
3948 } else {
3949 min_continuation_for_tracking
3950 };
3951 let is_structural_element = line_info.heading.is_some()
3954 || line_info.content(content).trim().starts_with("```")
3955 || line_info.content(content).trim().starts_with("~~~");
3956 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3957 || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3958
3959 if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3960 eprintln!(
3961 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3962 line_num,
3963 effective_continuation_indent,
3964 adjusted_min_continuation_for_tracking,
3965 is_valid_continuation,
3966 line_info.in_code_span_continuation,
3967 line_info.in_code_block,
3968 current_block.is_some()
3969 );
3970 }
3971
3972 if !line_info.in_code_span_continuation
3973 && line_info.list_item.is_none()
3974 && !line_info.is_blank
3975 && !line_info.in_code_block
3976 && is_valid_continuation
3977 && let Some(ref mut block) = current_block
3978 {
3979 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3980 eprintln!(
3981 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3982 line_num, block.end_line, line_num
3983 );
3984 }
3985 block.end_line = line_num;
3986 }
3987
3988 if let Some(list_item) = &line_info.list_item {
3990 let item_indent = list_item.marker_column;
3992 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3995 eprintln!(
3996 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3997 line_num, list_item.marker, item_indent
3998 );
3999 }
4000
4001 if let Some(ref mut block) = current_block {
4002 let is_nested = nesting > block.nesting_level;
4006 let same_type =
4007 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
4008 let same_context = block.blockquote_prefix == blockquote_prefix;
4009 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
4011
4012 let marker_compatible =
4014 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
4015
4016 let has_non_list_content = has_list_breaking_content_since_last_item;
4019
4020 let mut continues_list = if is_nested {
4024 same_context && reasonable_distance && !has_non_list_content
4026 } else {
4027 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
4029 };
4030
4031 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4032 eprintln!(
4033 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
4034 line_num,
4035 continues_list,
4036 is_nested,
4037 same_type,
4038 same_context,
4039 reasonable_distance,
4040 marker_compatible,
4041 has_non_list_content,
4042 last_list_item_line,
4043 block.end_line
4044 );
4045 }
4046
4047 if !continues_list
4051 && (is_nested || same_type)
4052 && reasonable_distance
4053 && line_num > 0
4054 && block.end_line == line_num - 1
4055 {
4056 if block.item_lines.contains(&(line_num - 1)) {
4059 continues_list = true;
4061 } else {
4062 continues_list = true;
4066 }
4067 }
4068
4069 if continues_list {
4070 block.end_line = line_num;
4072 block.item_lines.push(line_num);
4073
4074 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
4076 list_item.marker.len() + 1
4077 } else {
4078 list_item.marker.len()
4079 });
4080
4081 if !block.is_ordered
4083 && block.marker.is_some()
4084 && block.marker.as_ref() != Some(&list_item.marker)
4085 {
4086 block.marker = None;
4088 }
4089
4090 reset_tracking_state(
4092 list_item,
4093 &mut has_list_breaking_content_since_last_item,
4094 &mut min_continuation_for_tracking,
4095 );
4096 } else {
4097 if !same_type
4102 && !is_nested
4103 && let Some(&last_item) = block.item_lines.last()
4104 {
4105 block.end_line = last_item;
4106 }
4107
4108 list_blocks.push(block.clone());
4109
4110 *block = ListBlock {
4111 start_line: line_num,
4112 end_line: line_num,
4113 is_ordered: list_item.is_ordered,
4114 marker: if list_item.is_ordered {
4115 None
4116 } else {
4117 Some(list_item.marker.clone())
4118 },
4119 blockquote_prefix: blockquote_prefix.clone(),
4120 item_lines: vec![line_num],
4121 nesting_level: nesting,
4122 max_marker_width: if list_item.is_ordered {
4123 list_item.marker.len() + 1
4124 } else {
4125 list_item.marker.len()
4126 },
4127 };
4128
4129 reset_tracking_state(
4131 list_item,
4132 &mut has_list_breaking_content_since_last_item,
4133 &mut min_continuation_for_tracking,
4134 );
4135 }
4136 } else {
4137 current_block = Some(ListBlock {
4139 start_line: line_num,
4140 end_line: line_num,
4141 is_ordered: list_item.is_ordered,
4142 marker: if list_item.is_ordered {
4143 None
4144 } else {
4145 Some(list_item.marker.clone())
4146 },
4147 blockquote_prefix,
4148 item_lines: vec![line_num],
4149 nesting_level: nesting,
4150 max_marker_width: list_item.marker.len(),
4151 });
4152
4153 reset_tracking_state(
4155 list_item,
4156 &mut has_list_breaking_content_since_last_item,
4157 &mut min_continuation_for_tracking,
4158 );
4159 }
4160
4161 last_list_item_line = line_num;
4162 current_indent_level = item_indent;
4163 last_marker_width = if list_item.is_ordered {
4164 list_item.marker.len() + 1 } else {
4166 list_item.marker.len()
4167 };
4168 } else if let Some(ref mut block) = current_block {
4169 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4171 eprintln!(
4172 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4173 line_num, line_info.is_blank
4174 );
4175 }
4176
4177 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4185 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4186 } else {
4187 false
4188 };
4189
4190 let min_continuation_indent = if block.is_ordered {
4194 current_indent_level + last_marker_width
4195 } else {
4196 current_indent_level + 2 };
4198
4199 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4200 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4202 eprintln!(
4203 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4204 line_num, line_info.indent, min_continuation_indent
4205 );
4206 }
4207 block.end_line = line_num;
4208 } else if line_info.is_blank {
4209 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4212 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4213 }
4214 let mut check_idx = line_idx + 1;
4215 let mut found_continuation = false;
4216
4217 while check_idx < lines.len() && lines[check_idx].is_blank {
4219 check_idx += 1;
4220 }
4221
4222 if check_idx < lines.len() {
4223 let next_line = &lines[check_idx];
4224 let next_content = next_line.content(content);
4226 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4229 let next_bq_level_for_indent = next_content
4230 .chars()
4231 .take_while(|c| *c == '>' || c.is_whitespace())
4232 .filter(|&c| c == '>')
4233 .count();
4234 let effective_indent =
4235 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4236 let mut pos = 0;
4239 let mut found_markers = 0;
4240 for c in next_content.chars() {
4241 pos += c.len_utf8();
4242 if c == '>' {
4243 found_markers += 1;
4244 if found_markers == next_bq_level_for_indent {
4245 if next_content.get(pos..pos + 1) == Some(" ") {
4247 pos += 1;
4248 }
4249 break;
4250 }
4251 }
4252 }
4253 let after_blockquote_marker = &next_content[pos..];
4254 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4255 } else {
4256 next_line.indent
4257 };
4258 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4261 if block.is_ordered { last_marker_width } else { 2 }
4264 } else {
4265 min_continuation_indent
4266 };
4267 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4269 eprintln!(
4270 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4271 line_num,
4272 check_idx + 1,
4273 effective_indent,
4274 adjusted_min_continuation,
4275 next_line.list_item.is_some(),
4276 next_line.in_code_block
4277 );
4278 }
4279 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4280 found_continuation = true;
4281 }
4282 else if !next_line.in_code_block
4284 && next_line.list_item.is_some()
4285 && let Some(item) = &next_line.list_item
4286 {
4287 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4288 .find(next_line.content(content))
4289 .map_or(String::new(), |m| m.as_str().to_string());
4290 if item.marker_column == current_indent_level
4291 && item.is_ordered == block.is_ordered
4292 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4293 {
4294 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4298 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4299 if let Some(between_line) = lines.get(idx) {
4300 let between_content = between_line.content(content);
4301 let trimmed = between_content.trim();
4302 if trimmed.is_empty() {
4304 return false;
4305 }
4306 let line_indent = between_content.len() - between_content.trim_start().len();
4308
4309 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4311 .find(between_content)
4312 .map_or(String::new(), |m| m.as_str().to_string());
4313 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4314 let blockquote_level_changed =
4315 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4316
4317 if trimmed.starts_with("```")
4319 || trimmed.starts_with("~~~")
4320 || trimmed.starts_with("---")
4321 || trimmed.starts_with("***")
4322 || trimmed.starts_with("___")
4323 || blockquote_level_changed
4324 || crate::utils::skip_context::is_table_line(trimmed)
4325 || between_line.heading.is_some()
4326 {
4327 return true; }
4329
4330 line_indent >= min_continuation_indent
4332 } else {
4333 false
4334 }
4335 });
4336
4337 if block.is_ordered {
4338 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4341 if let Some(between_line) = lines.get(idx) {
4342 let between_content = between_line.content(content);
4343 let trimmed = between_content.trim();
4344 if trimmed.is_empty() {
4345 return false;
4346 }
4347 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4349 .find(between_content)
4350 .map_or(String::new(), |m| m.as_str().to_string());
4351 let between_bq_level =
4352 between_bq_prefix.chars().filter(|&c| c == '>').count();
4353 let blockquote_level_changed =
4354 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4355 trimmed.starts_with("```")
4357 || trimmed.starts_with("~~~")
4358 || trimmed.starts_with("---")
4359 || trimmed.starts_with("***")
4360 || trimmed.starts_with("___")
4361 || blockquote_level_changed
4362 || crate::utils::skip_context::is_table_line(trimmed)
4363 || between_line.heading.is_some()
4364 } else {
4365 false
4366 }
4367 });
4368 found_continuation = !has_structural_separators;
4369 } else {
4370 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4372 if let Some(between_line) = lines.get(idx) {
4373 let between_content = between_line.content(content);
4374 let trimmed = between_content.trim();
4375 if trimmed.is_empty() {
4376 return false;
4377 }
4378 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4380 .find(between_content)
4381 .map_or(String::new(), |m| m.as_str().to_string());
4382 let between_bq_level =
4383 between_bq_prefix.chars().filter(|&c| c == '>').count();
4384 let blockquote_level_changed =
4385 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4386 trimmed.starts_with("```")
4388 || trimmed.starts_with("~~~")
4389 || trimmed.starts_with("---")
4390 || trimmed.starts_with("***")
4391 || trimmed.starts_with("___")
4392 || blockquote_level_changed
4393 || crate::utils::skip_context::is_table_line(trimmed)
4394 || between_line.heading.is_some()
4395 } else {
4396 false
4397 }
4398 });
4399 found_continuation = !has_structural_separators;
4400 }
4401 }
4402 }
4403 }
4404
4405 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4406 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4407 }
4408 if found_continuation {
4409 block.end_line = line_num;
4411 } else {
4412 list_blocks.push(block.clone());
4414 current_block = None;
4415 }
4416 } else {
4417 let min_required_indent = if block.is_ordered {
4420 current_indent_level + last_marker_width
4421 } else {
4422 current_indent_level + 2
4423 };
4424
4425 let line_content = line_info.content(content).trim();
4430
4431 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4433
4434 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4437 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4438 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4439
4440 let is_structural_separator = line_info.heading.is_some()
4441 || line_content.starts_with("```")
4442 || line_content.starts_with("~~~")
4443 || line_content.starts_with("---")
4444 || line_content.starts_with("***")
4445 || line_content.starts_with("___")
4446 || blockquote_level_changed
4447 || looks_like_table;
4448
4449 let is_lazy_continuation = !is_structural_separator
4453 && !line_info.is_blank
4454 && (line_info.indent == 0
4455 || line_info.indent >= min_required_indent
4456 || line_info.in_code_span_continuation);
4457
4458 if is_lazy_continuation {
4459 block.end_line = line_num;
4462 } else {
4463 list_blocks.push(block.clone());
4465 current_block = None;
4466 }
4467 }
4468 }
4469 }
4470
4471 if let Some(block) = current_block {
4473 list_blocks.push(block);
4474 }
4475
4476 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4478
4479 list_blocks
4480 }
4481
4482 fn compute_char_frequency(content: &str) -> CharFrequency {
4484 let mut frequency = CharFrequency::default();
4485
4486 for ch in content.chars() {
4487 match ch {
4488 '#' => frequency.hash_count += 1,
4489 '*' => frequency.asterisk_count += 1,
4490 '_' => frequency.underscore_count += 1,
4491 '-' => frequency.hyphen_count += 1,
4492 '+' => frequency.plus_count += 1,
4493 '>' => frequency.gt_count += 1,
4494 '|' => frequency.pipe_count += 1,
4495 '[' => frequency.bracket_count += 1,
4496 '`' => frequency.backtick_count += 1,
4497 '<' => frequency.lt_count += 1,
4498 '!' => frequency.exclamation_count += 1,
4499 '\n' => frequency.newline_count += 1,
4500 _ => {}
4501 }
4502 }
4503
4504 frequency
4505 }
4506
4507 fn parse_html_tags(
4509 content: &str,
4510 lines: &[LineInfo],
4511 code_blocks: &[(usize, usize)],
4512 flavor: MarkdownFlavor,
4513 ) -> Vec<HtmlTag> {
4514 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4515 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4516
4517 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4518
4519 for cap in HTML_TAG_REGEX.captures_iter(content) {
4520 let full_match = cap.get(0).unwrap();
4521 let match_start = full_match.start();
4522 let match_end = full_match.end();
4523
4524 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4526 continue;
4527 }
4528
4529 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4530 let tag_name_original = cap.get(2).unwrap().as_str();
4531 let tag_name = tag_name_original.to_lowercase();
4532 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4533
4534 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4537 continue;
4538 }
4539
4540 let mut line_num = 1;
4542 let mut col_start = match_start;
4543 let mut col_end = match_end;
4544 for (idx, line_info) in lines.iter().enumerate() {
4545 if match_start >= line_info.byte_offset {
4546 line_num = idx + 1;
4547 col_start = match_start - line_info.byte_offset;
4548 col_end = match_end - line_info.byte_offset;
4549 } else {
4550 break;
4551 }
4552 }
4553
4554 html_tags.push(HtmlTag {
4555 line: line_num,
4556 start_col: col_start,
4557 end_col: col_end,
4558 byte_offset: match_start,
4559 byte_end: match_end,
4560 tag_name,
4561 is_closing,
4562 is_self_closing,
4563 raw_content: full_match.as_str().to_string(),
4564 });
4565 }
4566
4567 html_tags
4568 }
4569
4570 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4572 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4573
4574 for (line_idx, line_info) in lines.iter().enumerate() {
4575 if line_info.in_code_block || line_info.is_blank {
4577 continue;
4578 }
4579
4580 let line = line_info.content(content);
4581 let line_num = line_idx + 1;
4582
4583 if !line.contains('|') {
4585 continue;
4586 }
4587
4588 let parts: Vec<&str> = line.split('|').collect();
4590 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4591
4592 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4594 let mut column_alignments = Vec::new();
4595
4596 if is_separator {
4597 for part in &parts[1..parts.len() - 1] {
4598 let trimmed = part.trim();
4600 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4601 "center".to_string()
4602 } else if trimmed.ends_with(':') {
4603 "right".to_string()
4604 } else if trimmed.starts_with(':') {
4605 "left".to_string()
4606 } else {
4607 "none".to_string()
4608 };
4609 column_alignments.push(alignment);
4610 }
4611 }
4612
4613 table_rows.push(TableRow {
4614 line: line_num,
4615 is_separator,
4616 column_count,
4617 column_alignments,
4618 });
4619 }
4620
4621 table_rows
4622 }
4623
4624 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4626 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4627
4628 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4630 let full_match = cap.get(0).unwrap();
4631 let match_start = full_match.start();
4632 let match_end = full_match.end();
4633
4634 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4636 continue;
4637 }
4638
4639 let preceding_char = if match_start > 0 {
4641 content.chars().nth(match_start - 1)
4642 } else {
4643 None
4644 };
4645 let following_char = content.chars().nth(match_end);
4646
4647 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4648 continue;
4649 }
4650 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4651 continue;
4652 }
4653
4654 let url = full_match.as_str();
4655 let url_type = if url.starts_with("https://") {
4656 "https"
4657 } else if url.starts_with("http://") {
4658 "http"
4659 } else if url.starts_with("ftp://") {
4660 "ftp"
4661 } else {
4662 "other"
4663 };
4664
4665 let mut line_num = 1;
4667 let mut col_start = match_start;
4668 let mut col_end = match_end;
4669 for (idx, line_info) in lines.iter().enumerate() {
4670 if match_start >= line_info.byte_offset {
4671 line_num = idx + 1;
4672 col_start = match_start - line_info.byte_offset;
4673 col_end = match_end - line_info.byte_offset;
4674 } else {
4675 break;
4676 }
4677 }
4678
4679 bare_urls.push(BareUrl {
4680 line: line_num,
4681 start_col: col_start,
4682 end_col: col_end,
4683 byte_offset: match_start,
4684 byte_end: match_end,
4685 url: url.to_string(),
4686 url_type: url_type.to_string(),
4687 });
4688 }
4689
4690 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4692 let full_match = cap.get(0).unwrap();
4693 let match_start = full_match.start();
4694 let match_end = full_match.end();
4695
4696 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4698 continue;
4699 }
4700
4701 let preceding_char = if match_start > 0 {
4703 content.chars().nth(match_start - 1)
4704 } else {
4705 None
4706 };
4707 let following_char = content.chars().nth(match_end);
4708
4709 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4710 continue;
4711 }
4712 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4713 continue;
4714 }
4715
4716 let email = full_match.as_str();
4717
4718 let mut line_num = 1;
4720 let mut col_start = match_start;
4721 let mut col_end = match_end;
4722 for (idx, line_info) in lines.iter().enumerate() {
4723 if match_start >= line_info.byte_offset {
4724 line_num = idx + 1;
4725 col_start = match_start - line_info.byte_offset;
4726 col_end = match_end - line_info.byte_offset;
4727 } else {
4728 break;
4729 }
4730 }
4731
4732 bare_urls.push(BareUrl {
4733 line: line_num,
4734 start_col: col_start,
4735 end_col: col_end,
4736 byte_offset: match_start,
4737 byte_end: match_end,
4738 url: email.to_string(),
4739 url_type: "email".to_string(),
4740 });
4741 }
4742
4743 bare_urls
4744 }
4745
4746 #[must_use]
4766 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4767 ValidHeadingsIter::new(&self.lines)
4768 }
4769
4770 #[must_use]
4774 pub fn has_valid_headings(&self) -> bool {
4775 self.lines
4776 .iter()
4777 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4778 }
4779}
4780
4781fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4783 if list_blocks.len() < 2 {
4784 return;
4785 }
4786
4787 let mut merger = ListBlockMerger::new(content, lines);
4788 *list_blocks = merger.merge(list_blocks);
4789}
4790
4791struct ListBlockMerger<'a> {
4793 content: &'a str,
4794 lines: &'a [LineInfo],
4795}
4796
4797impl<'a> ListBlockMerger<'a> {
4798 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4799 Self { content, lines }
4800 }
4801
4802 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4803 let mut merged = Vec::with_capacity(list_blocks.len());
4804 let mut current = list_blocks[0].clone();
4805
4806 for next in list_blocks.iter().skip(1) {
4807 if self.should_merge_blocks(¤t, next) {
4808 current = self.merge_two_blocks(current, next);
4809 } else {
4810 merged.push(current);
4811 current = next.clone();
4812 }
4813 }
4814
4815 merged.push(current);
4816 merged
4817 }
4818
4819 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4821 if !self.blocks_are_compatible(current, next) {
4823 return false;
4824 }
4825
4826 let spacing = self.analyze_spacing_between(current, next);
4828 match spacing {
4829 BlockSpacing::Consecutive => true,
4830 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4831 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4832 self.can_merge_with_content_between(current, next)
4833 }
4834 }
4835 }
4836
4837 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4839 current.is_ordered == next.is_ordered
4840 && current.blockquote_prefix == next.blockquote_prefix
4841 && current.nesting_level == next.nesting_level
4842 }
4843
4844 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4846 let gap = next.start_line - current.end_line;
4847
4848 match gap {
4849 1 => BlockSpacing::Consecutive,
4850 2 => BlockSpacing::SingleBlank,
4851 _ if gap > 2 => {
4852 if self.has_only_blank_lines_between(current, next) {
4853 BlockSpacing::MultipleBlanks
4854 } else {
4855 BlockSpacing::ContentBetween
4856 }
4857 }
4858 _ => BlockSpacing::Consecutive, }
4860 }
4861
4862 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4864 if has_meaningful_content_between(self.content, current, next, self.lines) {
4867 return false; }
4869
4870 !current.is_ordered && current.marker == next.marker
4872 }
4873
4874 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4876 if has_meaningful_content_between(self.content, current, next, self.lines) {
4878 return false; }
4880
4881 current.is_ordered && next.is_ordered
4883 }
4884
4885 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4887 for line_num in (current.end_line + 1)..next.start_line {
4888 if let Some(line_info) = self.lines.get(line_num - 1)
4889 && !line_info.content(self.content).trim().is_empty()
4890 {
4891 return false;
4892 }
4893 }
4894 true
4895 }
4896
4897 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4899 current.end_line = next.end_line;
4900 current.item_lines.extend_from_slice(&next.item_lines);
4901
4902 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4904
4905 if !current.is_ordered && self.markers_differ(¤t, next) {
4907 current.marker = None; }
4909
4910 current
4911 }
4912
4913 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4915 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4916 }
4917}
4918
4919#[derive(Debug, PartialEq)]
4921enum BlockSpacing {
4922 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4927
4928fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4930 for line_num in (current.end_line + 1)..next.start_line {
4932 if let Some(line_info) = lines.get(line_num - 1) {
4933 let trimmed = line_info.content(content).trim();
4935
4936 if trimmed.is_empty() {
4938 continue;
4939 }
4940
4941 if line_info.heading.is_some() {
4945 return true; }
4947
4948 if is_horizontal_rule(trimmed) {
4950 return true; }
4952
4953 if crate::utils::skip_context::is_table_line(trimmed) {
4955 return true; }
4957
4958 if trimmed.starts_with('>') {
4960 return true; }
4962
4963 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4965 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4966
4967 let min_continuation_indent = if current.is_ordered {
4969 current.nesting_level + current.max_marker_width + 1 } else {
4971 current.nesting_level + 2
4972 };
4973
4974 if line_indent < min_continuation_indent {
4975 return true; }
4978 }
4979
4980 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4982
4983 let min_indent = if current.is_ordered {
4985 current.nesting_level + current.max_marker_width
4986 } else {
4987 current.nesting_level + 2
4988 };
4989
4990 if line_indent < min_indent {
4992 return true; }
4994
4995 }
4998 }
4999
5000 false
5002}
5003
5004pub fn is_horizontal_rule_line(line: &str) -> bool {
5011 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
5013 if leading_spaces > 3 || line.starts_with('\t') {
5014 return false;
5015 }
5016
5017 is_horizontal_rule_content(line.trim())
5018}
5019
5020pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
5023 if trimmed.len() < 3 {
5024 return false;
5025 }
5026
5027 let chars: Vec<char> = trimmed.chars().collect();
5029 if let Some(&first_char) = chars.first()
5030 && (first_char == '-' || first_char == '*' || first_char == '_')
5031 {
5032 let mut count = 0;
5033 for &ch in &chars {
5034 if ch == first_char {
5035 count += 1;
5036 } else if ch != ' ' && ch != '\t' {
5037 return false; }
5039 }
5040 return count >= 3;
5041 }
5042 false
5043}
5044
5045pub fn is_horizontal_rule(trimmed: &str) -> bool {
5047 is_horizontal_rule_content(trimmed)
5048}
5049
5050#[cfg(test)]
5052mod tests {
5053 use super::*;
5054
5055 #[test]
5056 fn test_empty_content() {
5057 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5058 assert_eq!(ctx.content, "");
5059 assert_eq!(ctx.line_offsets, vec![0]);
5060 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5061 assert_eq!(ctx.lines.len(), 0);
5062 }
5063
5064 #[test]
5065 fn test_single_line() {
5066 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
5067 assert_eq!(ctx.content, "# Hello");
5068 assert_eq!(ctx.line_offsets, vec![0]);
5069 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5070 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
5071 }
5072
5073 #[test]
5074 fn test_multi_line() {
5075 let content = "# Title\n\nSecond line\nThird line";
5076 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5077 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
5078 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
5085
5086 #[test]
5087 fn test_line_info() {
5088 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
5089 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5090
5091 assert_eq!(ctx.lines.len(), 7);
5093
5094 let line1 = &ctx.lines[0];
5096 assert_eq!(line1.content(ctx.content), "# Title");
5097 assert_eq!(line1.byte_offset, 0);
5098 assert_eq!(line1.indent, 0);
5099 assert!(!line1.is_blank);
5100 assert!(!line1.in_code_block);
5101 assert!(line1.list_item.is_none());
5102
5103 let line2 = &ctx.lines[1];
5105 assert_eq!(line2.content(ctx.content), " indented");
5106 assert_eq!(line2.byte_offset, 8);
5107 assert_eq!(line2.indent, 4);
5108 assert!(!line2.is_blank);
5109
5110 let line3 = &ctx.lines[2];
5112 assert_eq!(line3.content(ctx.content), "");
5113 assert!(line3.is_blank);
5114
5115 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5117 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5118 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5119 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5120 }
5121
5122 #[test]
5123 fn test_list_item_detection() {
5124 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
5125 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5126
5127 let line1 = &ctx.lines[0];
5129 assert!(line1.list_item.is_some());
5130 let list1 = line1.list_item.as_ref().unwrap();
5131 assert_eq!(list1.marker, "-");
5132 assert!(!list1.is_ordered);
5133 assert_eq!(list1.marker_column, 0);
5134 assert_eq!(list1.content_column, 2);
5135
5136 let line2 = &ctx.lines[1];
5138 assert!(line2.list_item.is_some());
5139 let list2 = line2.list_item.as_ref().unwrap();
5140 assert_eq!(list2.marker, "*");
5141 assert_eq!(list2.marker_column, 2);
5142
5143 let line3 = &ctx.lines[2];
5145 assert!(line3.list_item.is_some());
5146 let list3 = line3.list_item.as_ref().unwrap();
5147 assert_eq!(list3.marker, "1.");
5148 assert!(list3.is_ordered);
5149 assert_eq!(list3.number, Some(1));
5150
5151 let line6 = &ctx.lines[5];
5153 assert!(line6.list_item.is_none());
5154 }
5155
5156 #[test]
5157 fn test_offset_to_line_col_edge_cases() {
5158 let content = "a\nb\nc";
5159 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5160 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
5168
5169 #[test]
5170 fn test_mdx_esm_blocks() {
5171 let content = r##"import {Chart} from './snowfall.js'
5172export const year = 2023
5173
5174# Last year's snowfall
5175
5176In {year}, the snowfall was above average.
5177It was followed by a warm spring which caused
5178flood conditions in many of the nearby rivers.
5179
5180<Chart color="#fcb32c" year={year} />
5181"##;
5182
5183 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5184
5185 assert_eq!(ctx.lines.len(), 10);
5187 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5188 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5189 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5190 assert!(
5191 !ctx.lines[3].in_esm_block,
5192 "Line 4 (heading) should NOT be in_esm_block"
5193 );
5194 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5195 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5196 }
5197
5198 #[test]
5199 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5200 let content = r#"import {Chart} from './snowfall.js'
5201export const year = 2023
5202
5203# Last year's snowfall
5204"#;
5205
5206 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5207
5208 assert!(
5210 !ctx.lines[0].in_esm_block,
5211 "Line 1 should NOT be in_esm_block in Standard flavor"
5212 );
5213 assert!(
5214 !ctx.lines[1].in_esm_block,
5215 "Line 2 should NOT be in_esm_block in Standard flavor"
5216 );
5217 }
5218
5219 #[test]
5220 fn test_blockquote_with_indented_content() {
5221 let content = r#"# Heading
5225
5226> -S socket-path
5227> More text
5228"#;
5229 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5230
5231 assert!(
5233 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5234 "Line 3 should be a blockquote"
5235 );
5236 assert!(
5238 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5239 "Line 4 should be a blockquote"
5240 );
5241
5242 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5245 assert_eq!(bq3.content, "-S socket-path");
5246 assert_eq!(bq3.nesting_level, 1);
5247 assert!(bq3.has_multiple_spaces_after_marker);
5249
5250 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5251 assert_eq!(bq4.content, "More text");
5252 assert_eq!(bq4.nesting_level, 1);
5253 }
5254
5255 #[test]
5256 fn test_footnote_definitions_not_parsed_as_reference_defs() {
5257 let content = r#"# Title
5259
5260A footnote[^1].
5261
5262[^1]: This is the footnote content.
5263
5264[^note]: Another footnote with [link](https://example.com).
5265
5266[regular]: ./path.md "A real reference definition"
5267"#;
5268 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5269
5270 assert_eq!(
5272 ctx.reference_defs.len(),
5273 1,
5274 "Footnotes should not be parsed as reference definitions"
5275 );
5276
5277 assert_eq!(ctx.reference_defs[0].id, "regular");
5279 assert_eq!(ctx.reference_defs[0].url, "./path.md");
5280 assert_eq!(
5281 ctx.reference_defs[0].title,
5282 Some("A real reference definition".to_string())
5283 );
5284 }
5285
5286 #[test]
5287 fn test_footnote_with_inline_link_not_misidentified() {
5288 let content = r#"# Title
5291
5292A footnote[^1].
5293
5294[^1]: [link](https://www.google.com).
5295"#;
5296 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5297
5298 assert!(
5300 ctx.reference_defs.is_empty(),
5301 "Footnote with inline link should not create a reference definition"
5302 );
5303 }
5304
5305 #[test]
5306 fn test_various_footnote_formats_excluded() {
5307 let content = r#"[^1]: Numeric footnote
5309[^note]: Named footnote
5310[^a]: Single char footnote
5311[^long-footnote-name]: Long named footnote
5312[^123abc]: Mixed alphanumeric
5313
5314[ref1]: ./file1.md
5315[ref2]: ./file2.md
5316"#;
5317 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5318
5319 assert_eq!(
5321 ctx.reference_defs.len(),
5322 2,
5323 "Only regular reference definitions should be parsed"
5324 );
5325
5326 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5327 assert!(ids.contains(&"ref1"));
5328 assert!(ids.contains(&"ref2"));
5329 assert!(!ids.iter().any(|id| id.starts_with('^')));
5330 }
5331
5332 #[test]
5337 fn test_has_char_tracked_characters() {
5338 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5340 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5341
5342 assert!(ctx.has_char('#'), "Should detect hash");
5344 assert!(ctx.has_char('*'), "Should detect asterisk");
5345 assert!(ctx.has_char('_'), "Should detect underscore");
5346 assert!(ctx.has_char('-'), "Should detect hyphen");
5347 assert!(ctx.has_char('+'), "Should detect plus");
5348 assert!(ctx.has_char('>'), "Should detect gt");
5349 assert!(ctx.has_char('|'), "Should detect pipe");
5350 assert!(ctx.has_char('['), "Should detect bracket");
5351 assert!(ctx.has_char('`'), "Should detect backtick");
5352 assert!(ctx.has_char('<'), "Should detect lt");
5353 assert!(ctx.has_char('!'), "Should detect exclamation");
5354 assert!(ctx.has_char('\n'), "Should detect newline");
5355 }
5356
5357 #[test]
5358 fn test_has_char_absent_characters() {
5359 let content = "Simple text without special chars";
5360 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5361
5362 assert!(!ctx.has_char('#'), "Should not detect hash");
5364 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5365 assert!(!ctx.has_char('_'), "Should not detect underscore");
5366 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5367 assert!(!ctx.has_char('+'), "Should not detect plus");
5368 assert!(!ctx.has_char('>'), "Should not detect gt");
5369 assert!(!ctx.has_char('|'), "Should not detect pipe");
5370 assert!(!ctx.has_char('['), "Should not detect bracket");
5371 assert!(!ctx.has_char('`'), "Should not detect backtick");
5372 assert!(!ctx.has_char('<'), "Should not detect lt");
5373 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5374 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5376 }
5377
5378 #[test]
5379 fn test_has_char_fallback_for_untracked() {
5380 let content = "Text with @mention and $dollar and %percent";
5381 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5382
5383 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5385 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5386 assert!(ctx.has_char('%'), "Should detect % via fallback");
5387 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5388 }
5389
5390 #[test]
5391 fn test_char_count_tracked_characters() {
5392 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5393 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5394
5395 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5397 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5398 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5399 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5400 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5401 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5402 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5403 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5404 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5405 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5406 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5407 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5408 }
5409
5410 #[test]
5411 fn test_char_count_zero_for_absent() {
5412 let content = "Plain text";
5413 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5414
5415 assert_eq!(ctx.char_count('#'), 0);
5416 assert_eq!(ctx.char_count('*'), 0);
5417 assert_eq!(ctx.char_count('_'), 0);
5418 assert_eq!(ctx.char_count('\n'), 0);
5419 }
5420
5421 #[test]
5422 fn test_char_count_fallback_for_untracked() {
5423 let content = "@@@ $$ %%%";
5424 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5425
5426 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5427 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5428 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5429 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5430 }
5431
5432 #[test]
5433 fn test_char_count_empty_content() {
5434 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5435
5436 assert_eq!(ctx.char_count('#'), 0);
5437 assert_eq!(ctx.char_count('*'), 0);
5438 assert_eq!(ctx.char_count('@'), 0);
5439 assert!(!ctx.has_char('#'));
5440 assert!(!ctx.has_char('@'));
5441 }
5442
5443 #[test]
5448 fn test_is_in_html_tag_simple() {
5449 let content = "<div>content</div>";
5450 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5451
5452 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5454 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5455 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5456
5457 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5459 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5460
5461 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5463 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5464 }
5465
5466 #[test]
5467 fn test_is_in_html_tag_self_closing() {
5468 let content = "Text <br/> more text";
5469 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5470
5471 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5473 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5474
5475 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5477 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5478 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5479
5480 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5482 }
5483
5484 #[test]
5485 fn test_is_in_html_tag_with_attributes() {
5486 let content = r#"<a href="url" class="link">text</a>"#;
5487 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5488
5489 assert!(ctx.is_in_html_tag(0), "Start of tag");
5491 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5492 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5493 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5494
5495 assert!(!ctx.is_in_html_tag(27), "Start of content");
5497 assert!(!ctx.is_in_html_tag(30), "End of content");
5498
5499 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5501 }
5502
5503 #[test]
5504 fn test_is_in_html_tag_multiline() {
5505 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5506 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5507
5508 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5510 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5511 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5512
5513 let closing_bracket_pos = content.find(">\n").unwrap();
5515 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5516 }
5517
5518 #[test]
5519 fn test_is_in_html_tag_no_tags() {
5520 let content = "Plain text without any HTML";
5521 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5522
5523 for i in 0..content.len() {
5525 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5526 }
5527 }
5528
5529 #[test]
5534 fn test_is_in_jinja_range_expression() {
5535 let content = "Hello {{ name }}!";
5536 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5537
5538 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5540 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5541
5542 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5544 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5545 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5546 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5547 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5548
5549 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5551 }
5552
5553 #[test]
5554 fn test_is_in_jinja_range_statement() {
5555 let content = "{% if condition %}content{% endif %}";
5556 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5557
5558 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5560 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5561 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5562
5563 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5565
5566 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5568 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5569 }
5570
5571 #[test]
5572 fn test_is_in_jinja_range_multiple() {
5573 let content = "{{ a }} and {{ b }}";
5574 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5575
5576 assert!(ctx.is_in_jinja_range(0));
5578 assert!(ctx.is_in_jinja_range(3));
5579 assert!(ctx.is_in_jinja_range(6));
5580
5581 assert!(!ctx.is_in_jinja_range(8));
5583 assert!(!ctx.is_in_jinja_range(11));
5584
5585 assert!(ctx.is_in_jinja_range(12));
5587 assert!(ctx.is_in_jinja_range(15));
5588 assert!(ctx.is_in_jinja_range(18));
5589 }
5590
5591 #[test]
5592 fn test_is_in_jinja_range_no_jinja() {
5593 let content = "Plain text with single braces but not Jinja";
5594 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5595
5596 for i in 0..content.len() {
5598 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5599 }
5600 }
5601
5602 #[test]
5607 fn test_is_in_link_title_with_title() {
5608 let content = r#"[ref]: https://example.com "Title text"
5609
5610Some content."#;
5611 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5612
5613 assert_eq!(ctx.reference_defs.len(), 1);
5615 let def = &ctx.reference_defs[0];
5616 assert!(def.title_byte_start.is_some());
5617 assert!(def.title_byte_end.is_some());
5618
5619 let title_start = def.title_byte_start.unwrap();
5620 let title_end = def.title_byte_end.unwrap();
5621
5622 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5624
5625 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5627 assert!(
5628 ctx.is_in_link_title(title_start + 5),
5629 "Middle of title should be in title"
5630 );
5631 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5632
5633 assert!(
5635 !ctx.is_in_link_title(title_end),
5636 "After title end should not be in title"
5637 );
5638 }
5639
5640 #[test]
5641 fn test_is_in_link_title_without_title() {
5642 let content = "[ref]: https://example.com\n\nSome content.";
5643 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5644
5645 assert_eq!(ctx.reference_defs.len(), 1);
5647 let def = &ctx.reference_defs[0];
5648 assert!(def.title_byte_start.is_none());
5649 assert!(def.title_byte_end.is_none());
5650
5651 for i in 0..content.len() {
5653 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5654 }
5655 }
5656
5657 #[test]
5658 fn test_is_in_link_title_multiple_refs() {
5659 let content = r#"[ref1]: /url1 "Title One"
5660[ref2]: /url2
5661[ref3]: /url3 "Title Three"
5662"#;
5663 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5664
5665 assert_eq!(ctx.reference_defs.len(), 3);
5667
5668 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5670 assert!(ref1.title_byte_start.is_some());
5671
5672 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5674 assert!(ref2.title_byte_start.is_none());
5675
5676 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5678 assert!(ref3.title_byte_start.is_some());
5679
5680 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5682 assert!(ctx.is_in_link_title(start + 1));
5683 assert!(!ctx.is_in_link_title(end + 5));
5684 }
5685
5686 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5688 assert!(ctx.is_in_link_title(start + 1));
5689 }
5690 }
5691
5692 #[test]
5693 fn test_is_in_link_title_single_quotes() {
5694 let content = "[ref]: /url 'Single quoted title'\n";
5695 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5696
5697 assert_eq!(ctx.reference_defs.len(), 1);
5698 let def = &ctx.reference_defs[0];
5699
5700 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5701 assert!(ctx.is_in_link_title(start));
5702 assert!(ctx.is_in_link_title(start + 5));
5703 assert!(!ctx.is_in_link_title(end));
5704 }
5705 }
5706
5707 #[test]
5708 fn test_is_in_link_title_parentheses() {
5709 let content = "[ref]: /url (Parenthesized title)\n";
5712 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5713
5714 if ctx.reference_defs.is_empty() {
5717 for i in 0..content.len() {
5719 assert!(!ctx.is_in_link_title(i));
5720 }
5721 } else {
5722 let def = &ctx.reference_defs[0];
5723 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5724 assert!(ctx.is_in_link_title(start));
5725 assert!(ctx.is_in_link_title(start + 5));
5726 assert!(!ctx.is_in_link_title(end));
5727 } else {
5728 for i in 0..content.len() {
5730 assert!(!ctx.is_in_link_title(i));
5731 }
5732 }
5733 }
5734 }
5735
5736 #[test]
5737 fn test_is_in_link_title_no_refs() {
5738 let content = "Just plain text without any reference definitions.";
5739 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5740
5741 assert!(ctx.reference_defs.is_empty());
5742
5743 for i in 0..content.len() {
5744 assert!(!ctx.is_in_link_title(i));
5745 }
5746 }
5747
5748 #[test]
5753 fn test_math_spans_inline() {
5754 let content = "Text with inline math $[f](x)$ in it.";
5755 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5756
5757 let math_spans = ctx.math_spans();
5758 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5759
5760 let span = &math_spans[0];
5761 assert!(!span.is_display, "Should be inline math, not display");
5762 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5763 }
5764
5765 #[test]
5766 fn test_math_spans_display_single_line() {
5767 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5768 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5769
5770 let math_spans = ctx.math_spans();
5771 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5772
5773 let span = &math_spans[0];
5774 assert!(span.is_display, "Should be display math");
5775 assert!(
5776 span.content.contains("[x](\\zeta)"),
5777 "Content should contain the link-like pattern"
5778 );
5779 }
5780
5781 #[test]
5782 fn test_math_spans_display_multiline() {
5783 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5784 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5785
5786 let math_spans = ctx.math_spans();
5787 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5788
5789 let span = &math_spans[0];
5790 assert!(span.is_display, "Should be display math");
5791 }
5792
5793 #[test]
5794 fn test_is_in_math_span() {
5795 let content = "Text $[f](x)$ more text";
5796 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5797
5798 let math_start = content.find('$').unwrap();
5800 let math_end = content.rfind('$').unwrap() + 1;
5801
5802 assert!(
5803 ctx.is_in_math_span(math_start + 1),
5804 "Position inside math span should return true"
5805 );
5806 assert!(
5807 ctx.is_in_math_span(math_start + 3),
5808 "Position inside math span should return true"
5809 );
5810
5811 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5813 assert!(
5814 !ctx.is_in_math_span(math_end + 1),
5815 "Position after math span should return false"
5816 );
5817 }
5818
5819 #[test]
5820 fn test_math_spans_mixed_with_code() {
5821 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5822 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5823
5824 let math_spans = ctx.math_spans();
5825 let code_spans = ctx.code_spans();
5826
5827 assert_eq!(math_spans.len(), 1, "Should have one math span");
5828 assert_eq!(code_spans.len(), 1, "Should have one code span");
5829
5830 assert_eq!(math_spans[0].content, "[f](x)");
5832 assert_eq!(code_spans[0].content, "[g](y)");
5834 }
5835
5836 #[test]
5837 fn test_math_spans_no_math() {
5838 let content = "Regular text without any math at all.";
5839 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5840
5841 let math_spans = ctx.math_spans();
5842 assert!(math_spans.is_empty(), "Should have no math spans");
5843 }
5844
5845 #[test]
5846 fn test_math_spans_multiple() {
5847 let content = "First $a$ and second $b$ and display $$c$$";
5848 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5849
5850 let math_spans = ctx.math_spans();
5851 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5852
5853 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5855 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5856
5857 assert_eq!(inline_count, 2, "Should have two inline math spans");
5858 assert_eq!(display_count, 1, "Should have one display math span");
5859 }
5860
5861 #[test]
5862 fn test_is_in_math_span_boundary_positions() {
5863 let content = "$[f](x)$";
5866 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5867
5868 let math_spans = ctx.math_spans();
5869 assert_eq!(math_spans.len(), 1, "Should have one math span");
5870
5871 let span = &math_spans[0];
5872
5873 assert!(
5875 ctx.is_in_math_span(span.byte_offset),
5876 "Start position should be in span"
5877 );
5878
5879 assert!(
5881 ctx.is_in_math_span(span.byte_offset + 1),
5882 "Position after start should be in span"
5883 );
5884
5885 assert!(
5887 ctx.is_in_math_span(span.byte_end - 1),
5888 "Position at end-1 should be in span"
5889 );
5890
5891 assert!(
5893 !ctx.is_in_math_span(span.byte_end),
5894 "Position at byte_end should NOT be in span (exclusive)"
5895 );
5896 }
5897
5898 #[test]
5899 fn test_math_spans_at_document_start() {
5900 let content = "$x$ text";
5901 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5902
5903 let math_spans = ctx.math_spans();
5904 assert_eq!(math_spans.len(), 1);
5905 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5906 }
5907
5908 #[test]
5909 fn test_math_spans_at_document_end() {
5910 let content = "text $x$";
5911 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5912
5913 let math_spans = ctx.math_spans();
5914 assert_eq!(math_spans.len(), 1);
5915 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5916 }
5917
5918 #[test]
5919 fn test_math_spans_consecutive() {
5920 let content = "$a$$b$";
5921 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5922
5923 let math_spans = ctx.math_spans();
5924 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5926
5927 for i in 0..content.len() {
5929 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5930 }
5931 }
5932
5933 #[test]
5934 fn test_math_spans_currency_not_math() {
5935 let content = "Price is $100";
5937 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5938
5939 let math_spans = ctx.math_spans();
5940 assert!(
5943 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5944 "Unbalanced $ should not create math span containing 100"
5945 );
5946 }
5947
5948 #[test]
5953 fn test_reference_lookup_o1_basic() {
5954 let content = r#"[ref1]: /url1
5955[REF2]: /url2 "Title"
5956[Ref3]: /url3
5957
5958Use [link][ref1] and [link][REF2]."#;
5959 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5960
5961 assert_eq!(ctx.reference_defs.len(), 3);
5963
5964 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5966 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5969 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5970 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5971 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5972 }
5973
5974 #[test]
5975 fn test_reference_lookup_o1_get_reference_def() {
5976 let content = r#"[myref]: https://example.com "My Title"
5977"#;
5978 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5979
5980 let def = ctx.get_reference_def("myref").expect("Should find myref");
5982 assert_eq!(def.url, "https://example.com");
5983 assert_eq!(def.title.as_deref(), Some("My Title"));
5984
5985 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5987 assert_eq!(def2.url, "https://example.com");
5988
5989 assert!(ctx.get_reference_def("nonexistent").is_none());
5991 }
5992
5993 #[test]
5994 fn test_reference_lookup_o1_has_reference_def() {
5995 let content = r#"[foo]: /foo
5996[BAR]: /bar
5997"#;
5998 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5999
6000 assert!(ctx.has_reference_def("foo"));
6002 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
6004 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
6007
6008 #[test]
6009 fn test_reference_lookup_o1_empty_content() {
6010 let content = "No references here.";
6011 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6012
6013 assert!(ctx.reference_defs.is_empty());
6014 assert_eq!(ctx.get_reference_url("anything"), None);
6015 assert!(ctx.get_reference_def("anything").is_none());
6016 assert!(!ctx.has_reference_def("anything"));
6017 }
6018
6019 #[test]
6020 fn test_reference_lookup_o1_special_characters_in_id() {
6021 let content = r#"[ref-with-dash]: /url1
6022[ref_with_underscore]: /url2
6023[ref.with.dots]: /url3
6024"#;
6025 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6026
6027 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
6028 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
6029 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
6030 }
6031
6032 #[test]
6033 fn test_reference_lookup_o1_unicode_id() {
6034 let content = r#"[日本語]: /japanese
6035[émoji]: /emoji
6036"#;
6037 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6038
6039 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
6040 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
6041 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
6043}