1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17 ($name:expr, $profile:expr, $code:expr) => {{
18 let start = std::time::Instant::now();
19 let result = $code;
20 if $profile {
21 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22 }
23 result
24 }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(
36 r#"(?sx)
37 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38 (?:
39 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
40 |
41 \[([^\]]*)\] # Reference ID in group 6
42 )"#
43 ).unwrap()
44});
45
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?sx)
51 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52 (?:
53 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
54 |
55 \[([^\]]*)\] # Reference ID in group 6
56 )"#
57 ).unwrap()
58});
59
60static REF_DEF_PATTERN: LazyLock<Regex> =
62 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub visual_indent: usize,
86 pub is_blank: bool,
88 pub in_code_block: bool,
90 pub in_front_matter: bool,
92 pub in_html_block: bool,
94 pub in_html_comment: bool,
96 pub list_item: Option<ListItemInfo>,
98 pub heading: Option<HeadingInfo>,
100 pub blockquote: Option<BlockquoteInfo>,
102 pub in_mkdocstrings: bool,
104 pub in_esm_block: bool,
106 pub in_code_span_continuation: bool,
108 pub is_horizontal_rule: bool,
111 pub in_math_block: bool,
113 pub in_quarto_div: bool,
115 pub in_jsx_expression: bool,
117 pub in_mdx_comment: bool,
119 pub in_jsx_component: bool,
121 pub in_jsx_fragment: bool,
123 pub in_admonition: bool,
125 pub in_content_tab: bool,
127 pub in_definition_list: bool,
129 pub in_obsidian_comment: bool,
131}
132
133impl LineInfo {
134 pub fn content<'a>(&self, source: &'a str) -> &'a str {
136 &source[self.byte_offset..self.byte_offset + self.byte_len]
137 }
138
139 #[inline]
143 pub fn in_mkdocs_container(&self) -> bool {
144 self.in_admonition || self.in_content_tab
145 }
146}
147
148#[derive(Debug, Clone)]
150pub struct ListItemInfo {
151 pub marker: String,
153 pub is_ordered: bool,
155 pub number: Option<usize>,
157 pub marker_column: usize,
159 pub content_column: usize,
161}
162
163#[derive(Debug, Clone, PartialEq)]
165pub enum HeadingStyle {
166 ATX,
168 Setext1,
170 Setext2,
172}
173
174#[derive(Debug, Clone)]
176pub struct ParsedLink<'a> {
177 pub line: usize,
179 pub start_col: usize,
181 pub end_col: usize,
183 pub byte_offset: usize,
185 pub byte_end: usize,
187 pub text: Cow<'a, str>,
189 pub url: Cow<'a, str>,
191 pub is_reference: bool,
193 pub reference_id: Option<Cow<'a, str>>,
195 pub link_type: LinkType,
197}
198
199#[derive(Debug, Clone)]
201pub struct BrokenLinkInfo {
202 pub reference: String,
204 pub span: std::ops::Range<usize>,
206}
207
208#[derive(Debug, Clone)]
210pub struct FootnoteRef {
211 pub id: String,
213 pub line: usize,
215 pub byte_offset: usize,
217 pub byte_end: usize,
219}
220
221#[derive(Debug, Clone)]
223pub struct ParsedImage<'a> {
224 pub line: usize,
226 pub start_col: usize,
228 pub end_col: usize,
230 pub byte_offset: usize,
232 pub byte_end: usize,
234 pub alt_text: Cow<'a, str>,
236 pub url: Cow<'a, str>,
238 pub is_reference: bool,
240 pub reference_id: Option<Cow<'a, str>>,
242 pub link_type: LinkType,
244}
245
246#[derive(Debug, Clone)]
248pub struct ReferenceDef {
249 pub line: usize,
251 pub id: String,
253 pub url: String,
255 pub title: Option<String>,
257 pub byte_offset: usize,
259 pub byte_end: usize,
261 pub title_byte_start: Option<usize>,
263 pub title_byte_end: Option<usize>,
265}
266
267#[derive(Debug, Clone)]
269pub struct CodeSpan {
270 pub line: usize,
272 pub end_line: usize,
274 pub start_col: usize,
276 pub end_col: usize,
278 pub byte_offset: usize,
280 pub byte_end: usize,
282 pub backtick_count: usize,
284 pub content: String,
286}
287
288#[derive(Debug, Clone)]
290pub struct MathSpan {
291 pub line: usize,
293 pub end_line: usize,
295 pub start_col: usize,
297 pub end_col: usize,
299 pub byte_offset: usize,
301 pub byte_end: usize,
303 pub is_display: bool,
305 pub content: String,
307}
308
309#[derive(Debug, Clone)]
311pub struct HeadingInfo {
312 pub level: u8,
314 pub style: HeadingStyle,
316 pub marker: String,
318 pub marker_column: usize,
320 pub content_column: usize,
322 pub text: String,
324 pub custom_id: Option<String>,
326 pub raw_text: String,
328 pub has_closing_sequence: bool,
330 pub closing_sequence: String,
332 pub is_valid: bool,
335}
336
337#[derive(Debug, Clone)]
342pub struct ValidHeading<'a> {
343 pub line_num: usize,
345 pub heading: &'a HeadingInfo,
347 pub line_info: &'a LineInfo,
349}
350
351pub struct ValidHeadingsIter<'a> {
356 lines: &'a [LineInfo],
357 current_index: usize,
358}
359
360impl<'a> ValidHeadingsIter<'a> {
361 fn new(lines: &'a [LineInfo]) -> Self {
362 Self {
363 lines,
364 current_index: 0,
365 }
366 }
367}
368
369impl<'a> Iterator for ValidHeadingsIter<'a> {
370 type Item = ValidHeading<'a>;
371
372 fn next(&mut self) -> Option<Self::Item> {
373 while self.current_index < self.lines.len() {
374 let idx = self.current_index;
375 self.current_index += 1;
376
377 let line_info = &self.lines[idx];
378 if let Some(heading) = &line_info.heading
379 && heading.is_valid
380 {
381 return Some(ValidHeading {
382 line_num: idx + 1, heading,
384 line_info,
385 });
386 }
387 }
388 None
389 }
390}
391
392#[derive(Debug, Clone)]
394pub struct BlockquoteInfo {
395 pub nesting_level: usize,
397 pub indent: String,
399 pub marker_column: usize,
401 pub prefix: String,
403 pub content: String,
405 pub has_no_space_after_marker: bool,
407 pub has_multiple_spaces_after_marker: bool,
409 pub needs_md028_fix: bool,
411}
412
413#[derive(Debug, Clone)]
415pub struct ListBlock {
416 pub start_line: usize,
418 pub end_line: usize,
420 pub is_ordered: bool,
422 pub marker: Option<String>,
424 pub blockquote_prefix: String,
426 pub item_lines: Vec<usize>,
428 pub nesting_level: usize,
430 pub max_marker_width: usize,
432}
433
434use std::sync::{Arc, OnceLock};
435
436type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
438
439type ByteRanges = Vec<(usize, usize)>;
441
442#[derive(Debug, Clone, Default)]
444pub struct CharFrequency {
445 pub hash_count: usize,
447 pub asterisk_count: usize,
449 pub underscore_count: usize,
451 pub hyphen_count: usize,
453 pub plus_count: usize,
455 pub gt_count: usize,
457 pub pipe_count: usize,
459 pub bracket_count: usize,
461 pub backtick_count: usize,
463 pub lt_count: usize,
465 pub exclamation_count: usize,
467 pub newline_count: usize,
469}
470
471#[derive(Debug, Clone)]
473pub struct HtmlTag {
474 pub line: usize,
476 pub start_col: usize,
478 pub end_col: usize,
480 pub byte_offset: usize,
482 pub byte_end: usize,
484 pub tag_name: String,
486 pub is_closing: bool,
488 pub is_self_closing: bool,
490 pub raw_content: String,
492}
493
494#[derive(Debug, Clone)]
496pub struct EmphasisSpan {
497 pub line: usize,
499 pub start_col: usize,
501 pub end_col: usize,
503 pub byte_offset: usize,
505 pub byte_end: usize,
507 pub marker: char,
509 pub marker_count: usize,
511 pub content: String,
513}
514
515#[derive(Debug, Clone)]
517pub struct TableRow {
518 pub line: usize,
520 pub is_separator: bool,
522 pub column_count: usize,
524 pub column_alignments: Vec<String>, }
527
528#[derive(Debug, Clone)]
530pub struct BareUrl {
531 pub line: usize,
533 pub start_col: usize,
535 pub end_col: usize,
537 pub byte_offset: usize,
539 pub byte_end: usize,
541 pub url: String,
543 pub url_type: String,
545}
546
547pub struct LintContext<'a> {
548 pub content: &'a str,
549 pub line_offsets: Vec<usize>,
550 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, }
580
581struct BlockquoteComponents<'a> {
583 indent: &'a str,
584 markers: &'a str,
585 spaces_after: &'a str,
586 content: &'a str,
587}
588
589#[inline]
591fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
592 let bytes = line.as_bytes();
593 let mut pos = 0;
594
595 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
597 pos += 1;
598 }
599 let indent_end = pos;
600
601 if pos >= bytes.len() || bytes[pos] != b'>' {
603 return None;
604 }
605
606 while pos < bytes.len() && bytes[pos] == b'>' {
608 pos += 1;
609 }
610 let markers_end = pos;
611
612 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
614 pos += 1;
615 }
616 let spaces_end = pos;
617
618 Some(BlockquoteComponents {
619 indent: &line[0..indent_end],
620 markers: &line[indent_end..markers_end],
621 spaces_after: &line[markers_end..spaces_end],
622 content: &line[spaces_end..],
623 })
624}
625
626impl<'a> LintContext<'a> {
627 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
628 #[cfg(not(target_arch = "wasm32"))]
629 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
630 #[cfg(target_arch = "wasm32")]
631 let profile = false;
632
633 let line_offsets = profile_section!("Line offsets", profile, {
634 let mut offsets = vec![0];
635 for (i, c) in content.char_indices() {
636 if c == '\n' {
637 offsets.push(i + 1);
638 }
639 }
640 offsets
641 });
642
643 let (code_blocks, code_span_ranges) = profile_section!(
645 "Code blocks",
646 profile,
647 CodeBlockUtils::detect_code_blocks_and_spans(content)
648 );
649
650 let html_comment_ranges = profile_section!(
652 "HTML comment ranges",
653 profile,
654 crate::utils::skip_context::compute_html_comment_ranges(content)
655 );
656
657 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
659 if flavor == MarkdownFlavor::MkDocs {
660 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
661 } else {
662 Vec::new()
663 }
664 });
665
666 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
668 if flavor == MarkdownFlavor::Quarto {
669 crate::utils::quarto_divs::detect_div_block_ranges(content)
670 } else {
671 Vec::new()
672 }
673 });
674
675 let (mut lines, emphasis_spans) = profile_section!(
678 "Basic line info",
679 profile,
680 Self::compute_basic_line_info(
681 content,
682 &line_offsets,
683 &code_blocks,
684 flavor,
685 &html_comment_ranges,
686 &autodoc_ranges,
687 &quarto_div_ranges,
688 )
689 );
690
691 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
693
694 profile_section!(
696 "ESM blocks",
697 profile,
698 Self::detect_esm_blocks(content, &mut lines, flavor)
699 );
700
701 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
703 "JSX/MDX detection",
704 profile,
705 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
706 );
707
708 profile_section!(
710 "MkDocs constructs",
711 profile,
712 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
713 );
714
715 let obsidian_comment_ranges = profile_section!(
717 "Obsidian comments",
718 profile,
719 Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
720 );
721
722 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
724
725 profile_section!(
727 "Headings & blockquotes",
728 profile,
729 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
730 );
731
732 let code_spans = profile_section!(
734 "Code spans",
735 profile,
736 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
737 );
738
739 for span in &code_spans {
742 if span.end_line > span.line {
743 for line_num in (span.line + 1)..=span.end_line {
745 if let Some(line_info) = lines.get_mut(line_num - 1) {
746 line_info.in_code_span_continuation = true;
747 }
748 }
749 }
750 }
751
752 let (links, broken_links, footnote_refs) = profile_section!(
754 "Links",
755 profile,
756 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
757 );
758
759 let images = profile_section!(
760 "Images",
761 profile,
762 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
763 );
764
765 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
766
767 let reference_defs_map: HashMap<String, usize> = reference_defs
769 .iter()
770 .enumerate()
771 .map(|(idx, def)| (def.id.to_lowercase(), idx))
772 .collect();
773
774 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
775
776 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
778
779 let table_blocks = profile_section!(
781 "Table blocks",
782 profile,
783 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
784 content,
785 &code_blocks,
786 &code_spans,
787 &html_comment_ranges,
788 )
789 );
790
791 let line_index = profile_section!(
793 "Line index",
794 profile,
795 crate::utils::range_utils::LineIndex::new(content)
796 );
797
798 let jinja_ranges = profile_section!(
800 "Jinja ranges",
801 profile,
802 crate::utils::jinja_utils::find_jinja_ranges(content)
803 );
804
805 let citation_ranges = profile_section!("Citation ranges", profile, {
807 if flavor == MarkdownFlavor::Quarto {
808 crate::utils::quarto_divs::find_citation_ranges(content)
809 } else {
810 Vec::new()
811 }
812 });
813
814 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
816 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
817 let mut ranges = Vec::new();
818 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
819 ranges.push((mat.start(), mat.end()));
820 }
821 ranges
822 });
823
824 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
825
826 Self {
827 content,
828 line_offsets,
829 code_blocks,
830 lines,
831 links,
832 images,
833 broken_links,
834 footnote_refs,
835 reference_defs,
836 reference_defs_map,
837 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
838 math_spans_cache: OnceLock::new(), list_blocks,
840 char_frequency,
841 html_tags_cache: OnceLock::new(),
842 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
843 table_rows_cache: OnceLock::new(),
844 bare_urls_cache: OnceLock::new(),
845 has_mixed_list_nesting_cache: OnceLock::new(),
846 html_comment_ranges,
847 table_blocks,
848 line_index,
849 jinja_ranges,
850 flavor,
851 source_file,
852 jsx_expression_ranges,
853 mdx_comment_ranges,
854 citation_ranges,
855 shortcode_ranges,
856 inline_config,
857 obsidian_comment_ranges,
858 }
859 }
860
861 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
866 self.inline_config.is_rule_disabled(rule_name, line_number)
867 }
868
869 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
871 Arc::clone(
872 self.code_spans_cache
873 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
874 )
875 }
876
877 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
879 Arc::clone(
880 self.math_spans_cache
881 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
882 )
883 }
884
885 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
887 let math_spans = self.math_spans();
888 math_spans
889 .iter()
890 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
891 }
892
893 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
895 &self.html_comment_ranges
896 }
897
898 pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
901 &self.obsidian_comment_ranges
902 }
903
904 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
908 self.obsidian_comment_ranges
909 .iter()
910 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
911 }
912
913 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
918 if self.obsidian_comment_ranges.is_empty() {
919 return false;
920 }
921
922 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
924 self.is_in_obsidian_comment(byte_pos)
925 }
926
927 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
929 Arc::clone(self.html_tags_cache.get_or_init(|| {
930 Arc::new(Self::parse_html_tags(
931 self.content,
932 &self.lines,
933 &self.code_blocks,
934 self.flavor,
935 ))
936 }))
937 }
938
939 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
941 Arc::clone(
942 self.emphasis_spans_cache
943 .get()
944 .expect("emphasis_spans_cache initialized during construction"),
945 )
946 }
947
948 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
950 Arc::clone(
951 self.table_rows_cache
952 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
953 )
954 }
955
956 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
958 Arc::clone(
959 self.bare_urls_cache
960 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
961 )
962 }
963
964 pub fn has_mixed_list_nesting(&self) -> bool {
968 *self
969 .has_mixed_list_nesting_cache
970 .get_or_init(|| self.compute_mixed_list_nesting())
971 }
972
973 fn compute_mixed_list_nesting(&self) -> bool {
975 let mut stack: Vec<(usize, bool)> = Vec::new();
980 let mut last_was_blank = false;
981
982 for line_info in &self.lines {
983 if line_info.in_code_block
985 || line_info.in_front_matter
986 || line_info.in_mkdocstrings
987 || line_info.in_html_comment
988 || line_info.in_esm_block
989 {
990 continue;
991 }
992
993 if line_info.is_blank {
995 last_was_blank = true;
996 continue;
997 }
998
999 if let Some(list_item) = &line_info.list_item {
1000 let current_pos = if list_item.marker_column == 1 {
1002 0
1003 } else {
1004 list_item.marker_column
1005 };
1006
1007 if last_was_blank && current_pos == 0 {
1009 stack.clear();
1010 }
1011 last_was_blank = false;
1012
1013 while let Some(&(pos, _)) = stack.last() {
1015 if pos >= current_pos {
1016 stack.pop();
1017 } else {
1018 break;
1019 }
1020 }
1021
1022 if let Some(&(_, parent_is_ordered)) = stack.last()
1024 && parent_is_ordered != list_item.is_ordered
1025 {
1026 return true; }
1028
1029 stack.push((current_pos, list_item.is_ordered));
1030 } else {
1031 last_was_blank = false;
1033 }
1034 }
1035
1036 false
1037 }
1038
1039 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1041 match self.line_offsets.binary_search(&offset) {
1042 Ok(line) => (line + 1, 1),
1043 Err(line) => {
1044 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1045 (line, offset - line_start + 1)
1046 }
1047 }
1048 }
1049
1050 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1052 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1054 return true;
1055 }
1056
1057 self.code_spans()
1059 .iter()
1060 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1061 }
1062
1063 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1065 if line_num > 0 {
1066 self.lines.get(line_num - 1)
1067 } else {
1068 None
1069 }
1070 }
1071
1072 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1074 self.line_info(line_num).map(|info| info.byte_offset)
1075 }
1076
1077 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1079 let normalized_id = ref_id.to_lowercase();
1080 self.reference_defs_map
1081 .get(&normalized_id)
1082 .map(|&idx| self.reference_defs[idx].url.as_str())
1083 }
1084
1085 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1087 let normalized_id = ref_id.to_lowercase();
1088 self.reference_defs_map
1089 .get(&normalized_id)
1090 .map(|&idx| &self.reference_defs[idx])
1091 }
1092
1093 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1095 let normalized_id = ref_id.to_lowercase();
1096 self.reference_defs_map.contains_key(&normalized_id)
1097 }
1098
1099 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1101 self.list_blocks
1102 .iter()
1103 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1104 }
1105
1106 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1108 self.list_blocks
1109 .iter()
1110 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1111 }
1112
1113 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1117 if line_num == 0 || line_num > self.lines.len() {
1118 return false;
1119 }
1120 self.lines[line_num - 1].in_code_block
1121 }
1122
1123 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1125 if line_num == 0 || line_num > self.lines.len() {
1126 return false;
1127 }
1128 self.lines[line_num - 1].in_front_matter
1129 }
1130
1131 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1133 if line_num == 0 || line_num > self.lines.len() {
1134 return false;
1135 }
1136 self.lines[line_num - 1].in_html_block
1137 }
1138
1139 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1141 if line_num == 0 || line_num > self.lines.len() {
1142 return false;
1143 }
1144
1145 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1149 let code_spans = self.code_spans();
1150 code_spans.iter().any(|span| {
1151 if line_num < span.line || line_num > span.end_line {
1153 return false;
1154 }
1155
1156 if span.line == span.end_line {
1157 col_0indexed >= span.start_col && col_0indexed < span.end_col
1159 } else if line_num == span.line {
1160 col_0indexed >= span.start_col
1162 } else if line_num == span.end_line {
1163 col_0indexed < span.end_col
1165 } else {
1166 true
1168 }
1169 })
1170 }
1171
1172 #[inline]
1174 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1175 let code_spans = self.code_spans();
1176 code_spans
1177 .iter()
1178 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1179 }
1180
1181 #[inline]
1184 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1185 self.reference_defs
1186 .iter()
1187 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1188 }
1189
1190 #[inline]
1194 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1195 self.html_comment_ranges
1196 .iter()
1197 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1198 }
1199
1200 #[inline]
1203 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1204 self.html_tags()
1205 .iter()
1206 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1207 }
1208
1209 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1211 self.jinja_ranges
1212 .iter()
1213 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214 }
1215
1216 #[inline]
1218 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1219 self.jsx_expression_ranges
1220 .iter()
1221 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1222 }
1223
1224 #[inline]
1226 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1227 self.mdx_comment_ranges
1228 .iter()
1229 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1230 }
1231
1232 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1234 &self.jsx_expression_ranges
1235 }
1236
1237 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1239 &self.mdx_comment_ranges
1240 }
1241
1242 #[inline]
1245 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1246 self.citation_ranges
1247 .iter()
1248 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1249 }
1250
1251 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1253 &self.citation_ranges
1254 }
1255
1256 #[inline]
1258 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1259 self.shortcode_ranges
1260 .iter()
1261 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1262 }
1263
1264 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1266 &self.shortcode_ranges
1267 }
1268
1269 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1271 self.reference_defs.iter().any(|def| {
1272 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1273 byte_pos >= start && byte_pos < end
1274 } else {
1275 false
1276 }
1277 })
1278 }
1279
1280 pub fn has_char(&self, ch: char) -> bool {
1282 match ch {
1283 '#' => self.char_frequency.hash_count > 0,
1284 '*' => self.char_frequency.asterisk_count > 0,
1285 '_' => self.char_frequency.underscore_count > 0,
1286 '-' => self.char_frequency.hyphen_count > 0,
1287 '+' => self.char_frequency.plus_count > 0,
1288 '>' => self.char_frequency.gt_count > 0,
1289 '|' => self.char_frequency.pipe_count > 0,
1290 '[' => self.char_frequency.bracket_count > 0,
1291 '`' => self.char_frequency.backtick_count > 0,
1292 '<' => self.char_frequency.lt_count > 0,
1293 '!' => self.char_frequency.exclamation_count > 0,
1294 '\n' => self.char_frequency.newline_count > 0,
1295 _ => self.content.contains(ch), }
1297 }
1298
1299 pub fn char_count(&self, ch: char) -> usize {
1301 match ch {
1302 '#' => self.char_frequency.hash_count,
1303 '*' => self.char_frequency.asterisk_count,
1304 '_' => self.char_frequency.underscore_count,
1305 '-' => self.char_frequency.hyphen_count,
1306 '+' => self.char_frequency.plus_count,
1307 '>' => self.char_frequency.gt_count,
1308 '|' => self.char_frequency.pipe_count,
1309 '[' => self.char_frequency.bracket_count,
1310 '`' => self.char_frequency.backtick_count,
1311 '<' => self.char_frequency.lt_count,
1312 '!' => self.char_frequency.exclamation_count,
1313 '\n' => self.char_frequency.newline_count,
1314 _ => self.content.matches(ch).count(), }
1316 }
1317
1318 pub fn likely_has_headings(&self) -> bool {
1320 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1322
1323 pub fn likely_has_lists(&self) -> bool {
1325 self.char_frequency.asterisk_count > 0
1326 || self.char_frequency.hyphen_count > 0
1327 || self.char_frequency.plus_count > 0
1328 }
1329
1330 pub fn likely_has_emphasis(&self) -> bool {
1332 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1333 }
1334
1335 pub fn likely_has_tables(&self) -> bool {
1337 self.char_frequency.pipe_count > 2
1338 }
1339
1340 pub fn likely_has_blockquotes(&self) -> bool {
1342 self.char_frequency.gt_count > 0
1343 }
1344
1345 pub fn likely_has_code(&self) -> bool {
1347 self.char_frequency.backtick_count > 0
1348 }
1349
1350 pub fn likely_has_links_or_images(&self) -> bool {
1352 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1353 }
1354
1355 pub fn likely_has_html(&self) -> bool {
1357 self.char_frequency.lt_count > 0
1358 }
1359
1360 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1365 if let Some(line_info) = self.lines.get(line_idx)
1366 && let Some(ref bq) = line_info.blockquote
1367 {
1368 bq.prefix.trim_end().to_string()
1369 } else {
1370 String::new()
1371 }
1372 }
1373
1374 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1376 self.html_tags()
1377 .iter()
1378 .filter(|tag| tag.line == line_num)
1379 .cloned()
1380 .collect()
1381 }
1382
1383 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1385 self.emphasis_spans()
1386 .iter()
1387 .filter(|span| span.line == line_num)
1388 .cloned()
1389 .collect()
1390 }
1391
1392 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1394 self.table_rows()
1395 .iter()
1396 .filter(|row| row.line == line_num)
1397 .cloned()
1398 .collect()
1399 }
1400
1401 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1403 self.bare_urls()
1404 .iter()
1405 .filter(|url| url.line == line_num)
1406 .cloned()
1407 .collect()
1408 }
1409
1410 #[inline]
1416 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1417 let idx = match lines.binary_search_by(|line| {
1419 if byte_offset < line.byte_offset {
1420 std::cmp::Ordering::Greater
1421 } else if byte_offset > line.byte_offset + line.byte_len {
1422 std::cmp::Ordering::Less
1423 } else {
1424 std::cmp::Ordering::Equal
1425 }
1426 }) {
1427 Ok(idx) => idx,
1428 Err(idx) => idx.saturating_sub(1),
1429 };
1430
1431 let line = &lines[idx];
1432 let line_num = idx + 1;
1433 let col = byte_offset.saturating_sub(line.byte_offset);
1434
1435 (idx, line_num, col)
1436 }
1437
1438 #[inline]
1440 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1441 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1443
1444 if idx > 0 {
1446 let span = &code_spans[idx - 1];
1447 if offset >= span.byte_offset && offset < span.byte_end {
1448 return true;
1449 }
1450 }
1451
1452 false
1453 }
1454
1455 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1459 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1460
1461 let mut link_ranges = Vec::new();
1462 let mut options = Options::empty();
1463 options.insert(Options::ENABLE_WIKILINKS);
1464 options.insert(Options::ENABLE_FOOTNOTES);
1465
1466 let parser = Parser::new_ext(content, options).into_offset_iter();
1467 let mut link_stack: Vec<usize> = Vec::new();
1468
1469 for (event, range) in parser {
1470 match event {
1471 Event::Start(Tag::Link { .. }) => {
1472 link_stack.push(range.start);
1473 }
1474 Event::End(TagEnd::Link) => {
1475 if let Some(start_pos) = link_stack.pop() {
1476 link_ranges.push((start_pos, range.end));
1477 }
1478 }
1479 _ => {}
1480 }
1481 }
1482
1483 link_ranges
1484 }
1485
1486 fn parse_links(
1488 content: &'a str,
1489 lines: &[LineInfo],
1490 code_blocks: &[(usize, usize)],
1491 code_spans: &[CodeSpan],
1492 flavor: MarkdownFlavor,
1493 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1494 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1495 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1496 use std::collections::HashSet;
1497
1498 let mut links = Vec::with_capacity(content.len() / 500);
1499 let mut broken_links = Vec::new();
1500 let mut footnote_refs = Vec::new();
1501
1502 let mut found_positions = HashSet::new();
1504
1505 let mut options = Options::empty();
1515 options.insert(Options::ENABLE_WIKILINKS);
1516 options.insert(Options::ENABLE_FOOTNOTES);
1517
1518 let parser = Parser::new_with_broken_link_callback(
1519 content,
1520 options,
1521 Some(|link: BrokenLink<'_>| {
1522 broken_links.push(BrokenLinkInfo {
1523 reference: link.reference.to_string(),
1524 span: link.span.clone(),
1525 });
1526 None
1527 }),
1528 )
1529 .into_offset_iter();
1530
1531 let mut link_stack: Vec<(
1532 usize,
1533 usize,
1534 pulldown_cmark::CowStr<'a>,
1535 LinkType,
1536 pulldown_cmark::CowStr<'a>,
1537 )> = Vec::new();
1538 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1541 match event {
1542 Event::Start(Tag::Link {
1543 link_type,
1544 dest_url,
1545 id,
1546 ..
1547 }) => {
1548 link_stack.push((range.start, range.end, dest_url, link_type, id));
1550 text_chunks.clear();
1551 }
1552 Event::Text(text) if !link_stack.is_empty() => {
1553 text_chunks.push((text.to_string(), range.start, range.end));
1555 }
1556 Event::Code(code) if !link_stack.is_empty() => {
1557 let code_text = format!("`{code}`");
1559 text_chunks.push((code_text, range.start, range.end));
1560 }
1561 Event::End(TagEnd::Link) => {
1562 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1563 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1565 text_chunks.clear();
1566 continue;
1567 }
1568
1569 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1571
1572 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1574 text_chunks.clear();
1575 continue;
1576 }
1577
1578 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1579
1580 let is_reference = matches!(
1581 link_type,
1582 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1583 );
1584
1585 let link_text = if matches!(link_type, LinkType::WikiLink { .. }) {
1588 if !text_chunks.is_empty() {
1592 let text: String = text_chunks.iter().map(|(t, _, _)| t.as_str()).collect();
1593 Cow::Owned(text)
1594 } else {
1595 Cow::Owned(url.to_string())
1597 }
1598 } else if start_pos < content.len() {
1599 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1600
1601 let mut close_pos = None;
1605 let mut depth = 0;
1606 let mut in_code_span = false;
1607
1608 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1609 let mut backslash_count = 0;
1611 let mut j = i;
1612 while j > 0 && link_bytes[j - 1] == b'\\' {
1613 backslash_count += 1;
1614 j -= 1;
1615 }
1616 let is_escaped = backslash_count % 2 != 0;
1617
1618 if byte == b'`' && !is_escaped {
1620 in_code_span = !in_code_span;
1621 }
1622
1623 if !is_escaped && !in_code_span {
1625 if byte == b'[' {
1626 depth += 1;
1627 } else if byte == b']' {
1628 if depth == 0 {
1629 close_pos = Some(i);
1631 break;
1632 } else {
1633 depth -= 1;
1634 }
1635 }
1636 }
1637 }
1638
1639 if let Some(pos) = close_pos {
1640 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1641 } else {
1642 Cow::Borrowed("")
1643 }
1644 } else {
1645 Cow::Borrowed("")
1646 };
1647
1648 let reference_id = if is_reference && !ref_id.is_empty() {
1650 Some(Cow::Owned(ref_id.to_lowercase()))
1651 } else if is_reference {
1652 Some(Cow::Owned(link_text.to_lowercase()))
1654 } else {
1655 None
1656 };
1657
1658 found_positions.insert(start_pos);
1660
1661 links.push(ParsedLink {
1662 line: line_num,
1663 start_col: col_start,
1664 end_col: col_end,
1665 byte_offset: start_pos,
1666 byte_end: range.end,
1667 text: link_text,
1668 url: Cow::Owned(url.to_string()),
1669 is_reference,
1670 reference_id,
1671 link_type,
1672 });
1673
1674 text_chunks.clear();
1675 }
1676 }
1677 Event::FootnoteReference(footnote_id) => {
1678 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1681 continue;
1682 }
1683
1684 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1685 footnote_refs.push(FootnoteRef {
1686 id: footnote_id.to_string(),
1687 line: line_num,
1688 byte_offset: range.start,
1689 byte_end: range.end,
1690 });
1691 }
1692 _ => {}
1693 }
1694 }
1695
1696 for cap in LINK_PATTERN.captures_iter(content) {
1700 let full_match = cap.get(0).unwrap();
1701 let match_start = full_match.start();
1702 let match_end = full_match.end();
1703
1704 if found_positions.contains(&match_start) {
1706 continue;
1707 }
1708
1709 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1711 continue;
1712 }
1713
1714 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1716 continue;
1717 }
1718
1719 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1721 continue;
1722 }
1723
1724 if Self::is_offset_in_code_span(code_spans, match_start) {
1726 continue;
1727 }
1728
1729 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1731 continue;
1732 }
1733
1734 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1736
1737 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1739 continue;
1740 }
1741
1742 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1743
1744 let text = cap.get(1).map_or("", |m| m.as_str());
1745
1746 if let Some(ref_id) = cap.get(6) {
1748 let ref_id_str = ref_id.as_str();
1749 let normalized_ref = if ref_id_str.is_empty() {
1750 Cow::Owned(text.to_lowercase()) } else {
1752 Cow::Owned(ref_id_str.to_lowercase())
1753 };
1754
1755 links.push(ParsedLink {
1757 line: line_num,
1758 start_col: col_start,
1759 end_col: col_end,
1760 byte_offset: match_start,
1761 byte_end: match_end,
1762 text: Cow::Borrowed(text),
1763 url: Cow::Borrowed(""), is_reference: true,
1765 reference_id: Some(normalized_ref),
1766 link_type: LinkType::Reference, });
1768 }
1769 }
1770
1771 (links, broken_links, footnote_refs)
1772 }
1773
1774 fn parse_images(
1776 content: &'a str,
1777 lines: &[LineInfo],
1778 code_blocks: &[(usize, usize)],
1779 code_spans: &[CodeSpan],
1780 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1781 ) -> Vec<ParsedImage<'a>> {
1782 use crate::utils::skip_context::is_in_html_comment_ranges;
1783 use std::collections::HashSet;
1784
1785 let mut images = Vec::with_capacity(content.len() / 1000);
1787 let mut found_positions = HashSet::new();
1788
1789 let parser = Parser::new(content).into_offset_iter();
1791 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1792 Vec::new();
1793 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1796 match event {
1797 Event::Start(Tag::Image {
1798 link_type,
1799 dest_url,
1800 id,
1801 ..
1802 }) => {
1803 image_stack.push((range.start, dest_url, link_type, id));
1804 text_chunks.clear();
1805 }
1806 Event::Text(text) if !image_stack.is_empty() => {
1807 text_chunks.push((text.to_string(), range.start, range.end));
1808 }
1809 Event::Code(code) if !image_stack.is_empty() => {
1810 let code_text = format!("`{code}`");
1811 text_chunks.push((code_text, range.start, range.end));
1812 }
1813 Event::End(TagEnd::Image) => {
1814 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1815 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1817 continue;
1818 }
1819
1820 if Self::is_offset_in_code_span(code_spans, start_pos) {
1822 continue;
1823 }
1824
1825 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1827 continue;
1828 }
1829
1830 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1832 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1833
1834 let is_reference = matches!(
1835 link_type,
1836 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1837 );
1838
1839 let alt_text = if start_pos < content.len() {
1842 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1843
1844 let mut close_pos = None;
1847 let mut depth = 0;
1848
1849 if image_bytes.len() > 2 {
1850 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1851 let mut backslash_count = 0;
1853 let mut j = i;
1854 while j > 0 && image_bytes[j - 1] == b'\\' {
1855 backslash_count += 1;
1856 j -= 1;
1857 }
1858 let is_escaped = backslash_count % 2 != 0;
1859
1860 if !is_escaped {
1861 if byte == b'[' {
1862 depth += 1;
1863 } else if byte == b']' {
1864 if depth == 0 {
1865 close_pos = Some(i);
1867 break;
1868 } else {
1869 depth -= 1;
1870 }
1871 }
1872 }
1873 }
1874 }
1875
1876 if let Some(pos) = close_pos {
1877 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1878 } else {
1879 Cow::Borrowed("")
1880 }
1881 } else {
1882 Cow::Borrowed("")
1883 };
1884
1885 let reference_id = if is_reference && !ref_id.is_empty() {
1886 Some(Cow::Owned(ref_id.to_lowercase()))
1887 } else if is_reference {
1888 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1890 None
1891 };
1892
1893 found_positions.insert(start_pos);
1894 images.push(ParsedImage {
1895 line: line_num,
1896 start_col: col_start,
1897 end_col: col_end,
1898 byte_offset: start_pos,
1899 byte_end: range.end,
1900 alt_text,
1901 url: Cow::Owned(url.to_string()),
1902 is_reference,
1903 reference_id,
1904 link_type,
1905 });
1906 }
1907 }
1908 _ => {}
1909 }
1910 }
1911
1912 for cap in IMAGE_PATTERN.captures_iter(content) {
1914 let full_match = cap.get(0).unwrap();
1915 let match_start = full_match.start();
1916 let match_end = full_match.end();
1917
1918 if found_positions.contains(&match_start) {
1920 continue;
1921 }
1922
1923 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1925 continue;
1926 }
1927
1928 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1930 || Self::is_offset_in_code_span(code_spans, match_start)
1931 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1932 {
1933 continue;
1934 }
1935
1936 if let Some(ref_id) = cap.get(6) {
1938 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1939 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1940 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1941 let ref_id_str = ref_id.as_str();
1942 let normalized_ref = if ref_id_str.is_empty() {
1943 Cow::Owned(alt_text.to_lowercase())
1944 } else {
1945 Cow::Owned(ref_id_str.to_lowercase())
1946 };
1947
1948 images.push(ParsedImage {
1949 line: line_num,
1950 start_col: col_start,
1951 end_col: col_end,
1952 byte_offset: match_start,
1953 byte_end: match_end,
1954 alt_text: Cow::Borrowed(alt_text),
1955 url: Cow::Borrowed(""),
1956 is_reference: true,
1957 reference_id: Some(normalized_ref),
1958 link_type: LinkType::Reference, });
1960 }
1961 }
1962
1963 images
1964 }
1965
1966 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1968 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1972 if line_info.in_code_block {
1974 continue;
1975 }
1976
1977 let line = line_info.content(content);
1978 let line_num = line_idx + 1;
1979
1980 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1981 let id_raw = cap.get(1).unwrap().as_str();
1982
1983 if id_raw.starts_with('^') {
1986 continue;
1987 }
1988
1989 let id = id_raw.to_lowercase();
1990 let url = cap.get(2).unwrap().as_str().to_string();
1991 let title_match = cap.get(3).or_else(|| cap.get(4));
1992 let title = title_match.map(|m| m.as_str().to_string());
1993
1994 let match_obj = cap.get(0).unwrap();
1997 let byte_offset = line_info.byte_offset + match_obj.start();
1998 let byte_end = line_info.byte_offset + match_obj.end();
1999
2000 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
2002 let start = line_info.byte_offset + m.start().saturating_sub(1);
2004 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
2006 } else {
2007 (None, None)
2008 };
2009
2010 refs.push(ReferenceDef {
2011 line: line_num,
2012 id,
2013 url,
2014 title,
2015 byte_offset,
2016 byte_end,
2017 title_byte_start,
2018 title_byte_end,
2019 });
2020 }
2021 }
2022
2023 refs
2024 }
2025
2026 #[inline]
2030 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2031 let trimmed_start = line.trim_start();
2032 if !trimmed_start.starts_with('>') {
2033 return None;
2034 }
2035
2036 let mut remaining = line;
2038 let mut total_prefix_len = 0;
2039
2040 loop {
2041 let trimmed = remaining.trim_start();
2042 if !trimmed.starts_with('>') {
2043 break;
2044 }
2045
2046 let leading_ws_len = remaining.len() - trimmed.len();
2048 total_prefix_len += leading_ws_len + 1;
2049
2050 let after_gt = &trimmed[1..];
2051
2052 if let Some(stripped) = after_gt.strip_prefix(' ') {
2054 total_prefix_len += 1;
2055 remaining = stripped;
2056 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2057 total_prefix_len += 1;
2058 remaining = stripped;
2059 } else {
2060 remaining = after_gt;
2061 }
2062 }
2063
2064 Some((&line[..total_prefix_len], remaining))
2065 }
2066
2067 fn detect_list_items_and_emphasis_with_pulldown(
2091 content: &str,
2092 line_offsets: &[usize],
2093 flavor: MarkdownFlavor,
2094 front_matter_end: usize,
2095 code_blocks: &[(usize, usize)],
2096 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2097 use std::collections::HashMap;
2098
2099 let mut list_items = HashMap::new();
2100 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2101
2102 let mut options = Options::empty();
2103 options.insert(Options::ENABLE_TABLES);
2104 options.insert(Options::ENABLE_FOOTNOTES);
2105 options.insert(Options::ENABLE_STRIKETHROUGH);
2106 options.insert(Options::ENABLE_TASKLISTS);
2107 options.insert(Options::ENABLE_GFM);
2109
2110 let _ = flavor;
2112
2113 let parser = Parser::new_ext(content, options).into_offset_iter();
2114 let mut list_depth: usize = 0;
2115 let mut list_stack: Vec<bool> = Vec::new();
2116
2117 for (event, range) in parser {
2118 match event {
2119 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2121 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2122 2
2123 } else {
2124 1
2125 };
2126 let match_start = range.start;
2127 let match_end = range.end;
2128
2129 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2131 let marker = content[match_start..].chars().next().unwrap_or('*');
2133 if marker == '*' || marker == '_' {
2134 let content_start = match_start + marker_count;
2136 let content_end = if match_end >= marker_count {
2137 match_end - marker_count
2138 } else {
2139 match_end
2140 };
2141 let content_part = if content_start < content_end && content_end <= content.len() {
2142 &content[content_start..content_end]
2143 } else {
2144 ""
2145 };
2146
2147 let line_idx = match line_offsets.binary_search(&match_start) {
2149 Ok(idx) => idx,
2150 Err(idx) => idx.saturating_sub(1),
2151 };
2152 let line_num = line_idx + 1;
2153 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2154 let col_start = match_start - line_start;
2155 let col_end = match_end - line_start;
2156
2157 emphasis_spans.push(EmphasisSpan {
2158 line: line_num,
2159 start_col: col_start,
2160 end_col: col_end,
2161 byte_offset: match_start,
2162 byte_end: match_end,
2163 marker,
2164 marker_count,
2165 content: content_part.to_string(),
2166 });
2167 }
2168 }
2169 }
2170 Event::Start(Tag::List(start_number)) => {
2171 list_depth += 1;
2172 list_stack.push(start_number.is_some());
2173 }
2174 Event::End(TagEnd::List(_)) => {
2175 list_depth = list_depth.saturating_sub(1);
2176 list_stack.pop();
2177 }
2178 Event::Start(Tag::Item) if list_depth > 0 => {
2179 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2181 let item_start = range.start;
2183
2184 let mut line_idx = match line_offsets.binary_search(&item_start) {
2186 Ok(idx) => idx,
2187 Err(idx) => idx.saturating_sub(1),
2188 };
2189
2190 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2194 line_idx += 1;
2195 }
2196
2197 if front_matter_end > 0 && line_idx < front_matter_end {
2199 continue;
2200 }
2201
2202 if line_idx < line_offsets.len() {
2203 let line_start_byte = line_offsets[line_idx];
2204 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2205 let line = &content[line_start_byte..line_end.min(content.len())];
2206
2207 let line = line
2209 .strip_suffix('\n')
2210 .or_else(|| line.strip_suffix("\r\n"))
2211 .unwrap_or(line);
2212
2213 let blockquote_parse = Self::parse_blockquote_prefix(line);
2215 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2216 (prefix.len(), content)
2217 } else {
2218 (0, line)
2219 };
2220
2221 if current_list_is_ordered {
2223 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2224 Self::parse_ordered_list(line_to_parse)
2225 {
2226 let marker = format!("{number_str}{delimiter}");
2227 let marker_column = blockquote_prefix_len + leading_spaces.len();
2228 let content_column = marker_column + marker.len() + spacing.len();
2229 let number = number_str.parse().ok();
2230
2231 list_items.entry(line_start_byte).or_insert((
2232 true,
2233 marker,
2234 marker_column,
2235 content_column,
2236 number,
2237 ));
2238 }
2239 } else if let Some((leading_spaces, marker, spacing, _content)) =
2240 Self::parse_unordered_list(line_to_parse)
2241 {
2242 let marker_column = blockquote_prefix_len + leading_spaces.len();
2243 let content_column = marker_column + 1 + spacing.len();
2244
2245 list_items.entry(line_start_byte).or_insert((
2246 false,
2247 marker.to_string(),
2248 marker_column,
2249 content_column,
2250 None,
2251 ));
2252 }
2253 }
2254 }
2255 _ => {}
2256 }
2257 }
2258
2259 (list_items, emphasis_spans)
2260 }
2261
2262 #[inline]
2266 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2267 let bytes = line.as_bytes();
2268 let mut i = 0;
2269
2270 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272 i += 1;
2273 }
2274
2275 if i >= bytes.len() {
2277 return None;
2278 }
2279 let marker = bytes[i] as char;
2280 if marker != '-' && marker != '*' && marker != '+' {
2281 return None;
2282 }
2283 let marker_pos = i;
2284 i += 1;
2285
2286 let spacing_start = i;
2288 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2289 i += 1;
2290 }
2291
2292 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2293 }
2294
2295 #[inline]
2299 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2300 let bytes = line.as_bytes();
2301 let mut i = 0;
2302
2303 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2305 i += 1;
2306 }
2307
2308 let number_start = i;
2310 while i < bytes.len() && bytes[i].is_ascii_digit() {
2311 i += 1;
2312 }
2313 if i == number_start {
2314 return None; }
2316
2317 if i >= bytes.len() {
2319 return None;
2320 }
2321 let delimiter = bytes[i] as char;
2322 if delimiter != '.' && delimiter != ')' {
2323 return None;
2324 }
2325 let delimiter_pos = i;
2326 i += 1;
2327
2328 let spacing_start = i;
2330 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2331 i += 1;
2332 }
2333
2334 Some((
2335 &line[..number_start],
2336 &line[number_start..delimiter_pos],
2337 delimiter,
2338 &line[spacing_start..i],
2339 &line[i..],
2340 ))
2341 }
2342
2343 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2346 let num_lines = line_offsets.len();
2347 let mut in_code_block = vec![false; num_lines];
2348
2349 for &(start, end) in code_blocks {
2351 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2353 let mut boundary = start;
2354 while boundary > 0 && !content.is_char_boundary(boundary) {
2355 boundary -= 1;
2356 }
2357 boundary
2358 } else {
2359 start
2360 };
2361
2362 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2363 let mut boundary = end;
2364 while boundary < content.len() && !content.is_char_boundary(boundary) {
2365 boundary += 1;
2366 }
2367 boundary
2368 } else {
2369 end.min(content.len())
2370 };
2371
2372 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2391 let first_line = first_line_after.saturating_sub(1);
2392 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2393
2394 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2396 *flag = true;
2397 }
2398 }
2399
2400 in_code_block
2401 }
2402
2403 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2406 let content_lines: Vec<&str> = content.lines().collect();
2407 let num_lines = content_lines.len();
2408 let mut in_math_block = vec![false; num_lines];
2409
2410 let mut inside_math = false;
2411
2412 for (i, line) in content_lines.iter().enumerate() {
2413 if code_block_map.get(i).copied().unwrap_or(false) {
2415 continue;
2416 }
2417
2418 let trimmed = line.trim();
2419
2420 if trimmed == "$$" {
2423 if inside_math {
2424 in_math_block[i] = true;
2426 inside_math = false;
2427 } else {
2428 in_math_block[i] = true;
2430 inside_math = true;
2431 }
2432 } else if inside_math {
2433 in_math_block[i] = true;
2435 }
2436 }
2437
2438 in_math_block
2439 }
2440
2441 fn compute_basic_line_info(
2444 content: &str,
2445 line_offsets: &[usize],
2446 code_blocks: &[(usize, usize)],
2447 flavor: MarkdownFlavor,
2448 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2449 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2450 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2451 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2452 let content_lines: Vec<&str> = content.lines().collect();
2453 let mut lines = Vec::with_capacity(content_lines.len());
2454
2455 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2457
2458 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2460
2461 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2464
2465 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2468 content,
2469 line_offsets,
2470 flavor,
2471 front_matter_end,
2472 code_blocks,
2473 );
2474
2475 for (i, line) in content_lines.iter().enumerate() {
2476 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2477 let indent = line.len() - line.trim_start().len();
2478 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2480
2481 let blockquote_parse = Self::parse_blockquote_prefix(line);
2483
2484 let is_blank = if let Some((_, content)) = blockquote_parse {
2486 content.trim().is_empty()
2488 } else {
2489 line.trim().is_empty()
2490 };
2491
2492 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2494
2495 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2497 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2498 let line_end_offset = byte_offset + line.len();
2501 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2502 html_comment_ranges,
2503 byte_offset,
2504 line_end_offset,
2505 );
2506 let list_item =
2509 list_item_map
2510 .get(&byte_offset)
2511 .map(
2512 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2513 marker: marker.clone(),
2514 is_ordered: *is_ordered,
2515 number: *number,
2516 marker_column: *marker_column,
2517 content_column: *content_column,
2518 },
2519 );
2520
2521 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2524 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2525
2526 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2528
2529 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2531 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2532
2533 lines.push(LineInfo {
2534 byte_offset,
2535 byte_len: line.len(),
2536 indent,
2537 visual_indent,
2538 is_blank,
2539 in_code_block,
2540 in_front_matter,
2541 in_html_block: false, in_html_comment,
2543 list_item,
2544 heading: None, blockquote: None, in_mkdocstrings,
2547 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2550 in_math_block,
2551 in_quarto_div,
2552 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, in_obsidian_comment: false, });
2561 }
2562
2563 (lines, emphasis_spans)
2564 }
2565
2566 fn detect_headings_and_blockquotes(
2568 content: &str,
2569 lines: &mut [LineInfo],
2570 flavor: MarkdownFlavor,
2571 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2572 link_byte_ranges: &[(usize, usize)],
2573 ) {
2574 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2576 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2577 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2578 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2579
2580 let content_lines: Vec<&str> = content.lines().collect();
2581
2582 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2584
2585 for i in 0..lines.len() {
2587 let line = content_lines[i];
2588
2589 if !(front_matter_end > 0 && i < front_matter_end)
2594 && let Some(bq) = parse_blockquote_detailed(line)
2595 {
2596 let nesting_level = bq.markers.len();
2597 let marker_column = bq.indent.len();
2598 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2599 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2600 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2601 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2602
2603 lines[i].blockquote = Some(BlockquoteInfo {
2604 nesting_level,
2605 indent: bq.indent.to_string(),
2606 marker_column,
2607 prefix,
2608 content: bq.content.to_string(),
2609 has_no_space_after_marker: has_no_space,
2610 has_multiple_spaces_after_marker: has_multiple_spaces,
2611 needs_md028_fix,
2612 });
2613
2614 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2617 lines[i].is_horizontal_rule = true;
2618 }
2619 }
2620
2621 if lines[i].in_code_block {
2623 continue;
2624 }
2625
2626 if front_matter_end > 0 && i < front_matter_end {
2628 continue;
2629 }
2630
2631 if lines[i].in_html_block {
2633 continue;
2634 }
2635
2636 if lines[i].is_blank {
2638 continue;
2639 }
2640
2641 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2644 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2645 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2646 } else {
2647 false
2648 };
2649
2650 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2651 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2653 continue;
2654 }
2655 let line_offset = lines[i].byte_offset;
2658 if link_byte_ranges
2659 .iter()
2660 .any(|&(start, end)| line_offset > start && line_offset < end)
2661 {
2662 continue;
2663 }
2664 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2665 let hashes = caps.get(2).map_or("", |m| m.as_str());
2666 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2667 let rest = caps.get(4).map_or("", |m| m.as_str());
2668
2669 let level = hashes.len() as u8;
2670 let marker_column = leading_spaces.len();
2671
2672 let (text, has_closing, closing_seq) = {
2674 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2676 if rest[id_start..].trim_end().ends_with('}') {
2678 (&rest[..id_start], &rest[id_start..])
2680 } else {
2681 (rest, "")
2682 }
2683 } else {
2684 (rest, "")
2685 };
2686
2687 let trimmed_rest = rest_without_id.trim_end();
2689 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2690 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2693
2694 let last_hash_char_idx = char_positions
2696 .iter()
2697 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2698
2699 if let Some(mut char_idx) = last_hash_char_idx {
2700 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2702 char_idx -= 1;
2703 }
2704
2705 let start_of_hashes = char_positions[char_idx].0;
2707
2708 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2710
2711 let potential_closing = &trimmed_rest[start_of_hashes..];
2713 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2714
2715 if is_all_hashes && has_space_before {
2716 let closing_hashes = potential_closing.to_string();
2718 let text_part = if !custom_id_part.is_empty() {
2721 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2724 } else {
2725 trimmed_rest[..start_of_hashes].trim_end().to_string()
2726 };
2727 (text_part, true, closing_hashes)
2728 } else {
2729 (rest.to_string(), false, String::new())
2731 }
2732 } else {
2733 (rest.to_string(), false, String::new())
2735 }
2736 } else {
2737 (rest.to_string(), false, String::new())
2739 }
2740 };
2741
2742 let content_column = marker_column + hashes.len() + spaces_after.len();
2743
2744 let raw_text = text.trim().to_string();
2746 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2747
2748 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2750 let next_line = content_lines[i + 1];
2751 if !lines[i + 1].in_code_block
2752 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2753 && let Some(next_line_id) =
2754 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2755 {
2756 custom_id = Some(next_line_id);
2757 }
2758 }
2759
2760 let is_valid = !spaces_after.is_empty()
2770 || rest.is_empty()
2771 || level > 1
2772 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2773
2774 lines[i].heading = Some(HeadingInfo {
2775 level,
2776 style: HeadingStyle::ATX,
2777 marker: hashes.to_string(),
2778 marker_column,
2779 content_column,
2780 text: clean_text,
2781 custom_id,
2782 raw_text,
2783 has_closing_sequence: has_closing,
2784 closing_sequence: closing_seq,
2785 is_valid,
2786 });
2787 }
2788 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2790 let next_line = content_lines[i + 1];
2791 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2792 if front_matter_end > 0 && i < front_matter_end {
2794 continue;
2795 }
2796
2797 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2799 {
2800 continue;
2801 }
2802
2803 let content_line = line.trim();
2806
2807 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2809 continue;
2810 }
2811
2812 if content_line.starts_with('_') {
2814 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2815 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2816 continue;
2817 }
2818 }
2819
2820 if let Some(first_char) = content_line.chars().next()
2822 && first_char.is_ascii_digit()
2823 {
2824 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2825 if num_end < content_line.len() {
2826 let next = content_line.chars().nth(num_end);
2827 if next == Some('.') || next == Some(')') {
2828 continue;
2829 }
2830 }
2831 }
2832
2833 if ATX_HEADING_REGEX.is_match(line) {
2835 continue;
2836 }
2837
2838 if content_line.starts_with('>') {
2840 continue;
2841 }
2842
2843 let trimmed_start = line.trim_start();
2845 if trimmed_start.len() >= 3 {
2846 let first_three: String = trimmed_start.chars().take(3).collect();
2847 if first_three == "```" || first_three == "~~~" {
2848 continue;
2849 }
2850 }
2851
2852 if content_line.starts_with('<') {
2854 continue;
2855 }
2856
2857 let underline = next_line.trim();
2858
2859 let level = if underline.starts_with('=') { 1 } else { 2 };
2860 let style = if level == 1 {
2861 HeadingStyle::Setext1
2862 } else {
2863 HeadingStyle::Setext2
2864 };
2865
2866 let raw_text = line.trim().to_string();
2868 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2869
2870 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2872 let attr_line = content_lines[i + 2];
2873 if !lines[i + 2].in_code_block
2874 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2875 && let Some(attr_line_id) =
2876 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2877 {
2878 custom_id = Some(attr_line_id);
2879 }
2880 }
2881
2882 lines[i].heading = Some(HeadingInfo {
2883 level,
2884 style,
2885 marker: underline.to_string(),
2886 marker_column: next_line.len() - next_line.trim_start().len(),
2887 content_column: lines[i].indent,
2888 text: clean_text,
2889 custom_id,
2890 raw_text,
2891 has_closing_sequence: false,
2892 closing_sequence: String::new(),
2893 is_valid: true, });
2895 }
2896 }
2897 }
2898 }
2899
2900 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2902 const BLOCK_ELEMENTS: &[&str] = &[
2905 "address",
2906 "article",
2907 "aside",
2908 "audio",
2909 "blockquote",
2910 "canvas",
2911 "details",
2912 "dialog",
2913 "dd",
2914 "div",
2915 "dl",
2916 "dt",
2917 "embed",
2918 "fieldset",
2919 "figcaption",
2920 "figure",
2921 "footer",
2922 "form",
2923 "h1",
2924 "h2",
2925 "h3",
2926 "h4",
2927 "h5",
2928 "h6",
2929 "header",
2930 "hr",
2931 "iframe",
2932 "li",
2933 "main",
2934 "menu",
2935 "nav",
2936 "noscript",
2937 "object",
2938 "ol",
2939 "p",
2940 "picture",
2941 "pre",
2942 "script",
2943 "search",
2944 "section",
2945 "source",
2946 "style",
2947 "summary",
2948 "svg",
2949 "table",
2950 "tbody",
2951 "td",
2952 "template",
2953 "textarea",
2954 "tfoot",
2955 "th",
2956 "thead",
2957 "tr",
2958 "track",
2959 "ul",
2960 "video",
2961 ];
2962
2963 let mut i = 0;
2964 while i < lines.len() {
2965 if lines[i].in_code_block || lines[i].in_front_matter {
2967 i += 1;
2968 continue;
2969 }
2970
2971 let trimmed = lines[i].content(content).trim_start();
2972
2973 if trimmed.starts_with('<') && trimmed.len() > 1 {
2975 let after_bracket = &trimmed[1..];
2977 let is_closing = after_bracket.starts_with('/');
2978 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2979
2980 let tag_name = tag_start
2982 .chars()
2983 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2984 .collect::<String>()
2985 .to_lowercase();
2986
2987 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2989 lines[i].in_html_block = true;
2991
2992 if !is_closing {
2997 let closing_tag = format!("</{tag_name}>");
2998
2999 let same_line_close = lines[i].content(content).contains(&closing_tag);
3002
3003 if !same_line_close {
3005 let allow_blank_lines = tag_name == "style" || tag_name == "script";
3007 let mut j = i + 1;
3008 let mut found_closing_tag = false;
3009 while j < lines.len() && j < i + 100 {
3010 if !allow_blank_lines && lines[j].is_blank {
3013 break;
3014 }
3015
3016 lines[j].in_html_block = true;
3017
3018 if lines[j].content(content).contains(&closing_tag) {
3020 found_closing_tag = true;
3021 }
3022
3023 if found_closing_tag {
3026 j += 1;
3027 while j < lines.len() && j < i + 100 {
3029 if lines[j].is_blank {
3030 break;
3031 }
3032 lines[j].in_html_block = true;
3033 j += 1;
3034 }
3035 break;
3036 }
3037 j += 1;
3038 }
3039 }
3040 }
3041 }
3042 }
3043
3044 i += 1;
3045 }
3046 }
3047
3048 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3051 if !flavor.supports_esm_blocks() {
3053 return;
3054 }
3055
3056 let mut in_multiline_import = false;
3057
3058 for line in lines.iter_mut() {
3059 if line.in_code_block || line.in_front_matter || line.in_html_comment {
3061 in_multiline_import = false;
3062 continue;
3063 }
3064
3065 let line_content = line.content(content);
3066 let trimmed = line_content.trim();
3067
3068 if in_multiline_import {
3070 line.in_esm_block = true;
3071 if trimmed.ends_with('\'')
3074 || trimmed.ends_with('"')
3075 || trimmed.ends_with("';")
3076 || trimmed.ends_with("\";")
3077 || line_content.contains(';')
3078 {
3079 in_multiline_import = false;
3080 }
3081 continue;
3082 }
3083
3084 if line.is_blank {
3086 continue;
3087 }
3088
3089 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3091 line.in_esm_block = true;
3092
3093 let is_import = trimmed.starts_with("import ");
3101
3102 let is_complete =
3104 trimmed.ends_with(';')
3106 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3108 || (!is_import && !trimmed.contains(" from ") && (
3110 trimmed.starts_with("export const ")
3111 || trimmed.starts_with("export let ")
3112 || trimmed.starts_with("export var ")
3113 || trimmed.starts_with("export function ")
3114 || trimmed.starts_with("export class ")
3115 || trimmed.starts_with("export default ")
3116 ));
3117
3118 if !is_complete && is_import {
3119 if trimmed.contains('{') && !trimmed.contains('}') {
3123 in_multiline_import = true;
3124 }
3125 }
3126 }
3127 }
3128 }
3129
3130 fn detect_jsx_and_mdx_comments(
3133 content: &str,
3134 lines: &mut [LineInfo],
3135 flavor: MarkdownFlavor,
3136 code_blocks: &[(usize, usize)],
3137 ) -> (ByteRanges, ByteRanges) {
3138 if !flavor.supports_jsx() {
3140 return (Vec::new(), Vec::new());
3141 }
3142
3143 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3144 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3145
3146 if !content.contains('{') {
3148 return (jsx_expression_ranges, mdx_comment_ranges);
3149 }
3150
3151 let bytes = content.as_bytes();
3152 let mut i = 0;
3153
3154 while i < bytes.len() {
3155 if bytes[i] == b'{' {
3156 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3158 i += 1;
3159 continue;
3160 }
3161
3162 let start = i;
3163
3164 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3166 let mut j = i + 3;
3168 while j + 2 < bytes.len() {
3169 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3170 let end = j + 3;
3171 mdx_comment_ranges.push((start, end));
3172
3173 Self::mark_lines_in_range(lines, content, start, end, |line| {
3175 line.in_mdx_comment = true;
3176 });
3177
3178 i = end;
3179 break;
3180 }
3181 j += 1;
3182 }
3183 if j + 2 >= bytes.len() {
3184 mdx_comment_ranges.push((start, bytes.len()));
3186 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3187 line.in_mdx_comment = true;
3188 });
3189 break;
3190 }
3191 } else {
3192 let mut brace_depth = 1;
3195 let mut j = i + 1;
3196 let mut in_string = false;
3197 let mut string_char = b'"';
3198
3199 while j < bytes.len() && brace_depth > 0 {
3200 let c = bytes[j];
3201
3202 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3204 in_string = true;
3205 string_char = c;
3206 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3207 in_string = false;
3208 } else if !in_string {
3209 if c == b'{' {
3210 brace_depth += 1;
3211 } else if c == b'}' {
3212 brace_depth -= 1;
3213 }
3214 }
3215 j += 1;
3216 }
3217
3218 if brace_depth == 0 {
3219 let end = j;
3220 jsx_expression_ranges.push((start, end));
3221
3222 Self::mark_lines_in_range(lines, content, start, end, |line| {
3224 line.in_jsx_expression = true;
3225 });
3226
3227 i = end;
3228 } else {
3229 i += 1;
3230 }
3231 }
3232 } else {
3233 i += 1;
3234 }
3235 }
3236
3237 (jsx_expression_ranges, mdx_comment_ranges)
3238 }
3239
3240 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3243 if flavor != MarkdownFlavor::MkDocs {
3244 return;
3245 }
3246
3247 use crate::utils::mkdocs_admonitions;
3248 use crate::utils::mkdocs_definition_lists;
3249 use crate::utils::mkdocs_tabs;
3250
3251 let content_lines: Vec<&str> = content.lines().collect();
3252
3253 let mut in_admonition = false;
3255 let mut admonition_indent = 0;
3256
3257 let mut in_tab = false;
3259 let mut tab_indent = 0;
3260
3261 let mut in_definition = false;
3263
3264 for (i, line) in content_lines.iter().enumerate() {
3265 if i >= lines.len() {
3266 break;
3267 }
3268
3269 if mkdocs_admonitions::is_admonition_start(line) {
3273 in_admonition = true;
3274 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3275 lines[i].in_admonition = true;
3276 } else if in_admonition {
3277 if line.trim().is_empty() {
3279 lines[i].in_admonition = true;
3281 lines[i].in_code_block = false;
3283 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3284 lines[i].in_admonition = true;
3285 lines[i].in_code_block = false;
3287 } else {
3288 in_admonition = false;
3290 if mkdocs_admonitions::is_admonition_start(line) {
3292 in_admonition = true;
3293 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3294 lines[i].in_admonition = true;
3295 }
3296 }
3297 }
3298
3299 if mkdocs_tabs::is_tab_marker(line) {
3302 in_tab = true;
3303 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3304 lines[i].in_content_tab = true;
3305 } else if in_tab {
3306 if line.trim().is_empty() {
3308 lines[i].in_content_tab = true;
3310 lines[i].in_code_block = false;
3311 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3312 lines[i].in_content_tab = true;
3313 lines[i].in_code_block = false;
3315 } else {
3316 in_tab = false;
3318 if mkdocs_tabs::is_tab_marker(line) {
3320 in_tab = true;
3321 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3322 lines[i].in_content_tab = true;
3323 }
3324 }
3325 }
3326
3327 if lines[i].in_code_block {
3329 continue;
3330 }
3331
3332 if mkdocs_definition_lists::is_definition_line(line) {
3334 in_definition = true;
3335 lines[i].in_definition_list = true;
3336 } else if in_definition {
3337 if mkdocs_definition_lists::is_definition_continuation(line) {
3339 lines[i].in_definition_list = true;
3340 } else if line.trim().is_empty() {
3341 lines[i].in_definition_list = true;
3343 } else if mkdocs_definition_lists::could_be_term_line(line) {
3344 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3346 {
3347 lines[i].in_definition_list = true;
3348 } else {
3349 in_definition = false;
3350 }
3351 } else {
3352 in_definition = false;
3353 }
3354 } else if mkdocs_definition_lists::could_be_term_line(line) {
3355 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3357 lines[i].in_definition_list = true;
3358 in_definition = true;
3359 }
3360 }
3361 }
3362 }
3363
3364 fn detect_obsidian_comments(
3375 content: &str,
3376 lines: &mut [LineInfo],
3377 flavor: MarkdownFlavor,
3378 code_span_ranges: &[(usize, usize)],
3379 ) -> Vec<(usize, usize)> {
3380 if flavor != MarkdownFlavor::Obsidian {
3382 return Vec::new();
3383 }
3384
3385 let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3387
3388 for range in &comment_ranges {
3390 for line in lines.iter_mut() {
3391 if line.in_code_block || line.in_html_comment {
3393 continue;
3394 }
3395
3396 let line_start = line.byte_offset;
3397 let line_end = line.byte_offset + line.byte_len;
3398
3399 if line_start >= range.0 && line_end <= range.1 {
3403 line.in_obsidian_comment = true;
3404 } else if line_start < range.1 && line_end > range.0 {
3405 let line_content_start = line_start;
3413 let line_content_end = line_end;
3414
3415 if line_content_start >= range.0 && line_content_end <= range.1 {
3416 line.in_obsidian_comment = true;
3417 }
3418 }
3419 }
3420 }
3421
3422 comment_ranges
3423 }
3424
3425 fn compute_obsidian_comment_ranges(
3430 content: &str,
3431 lines: &[LineInfo],
3432 code_span_ranges: &[(usize, usize)],
3433 ) -> Vec<(usize, usize)> {
3434 let mut ranges = Vec::new();
3435
3436 if !content.contains("%%") {
3438 return ranges;
3439 }
3440
3441 let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3444 for line in lines {
3445 if line.in_code_block || line.in_html_comment {
3446 skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3447 }
3448 }
3449 skip_ranges.extend(code_span_ranges.iter().copied());
3450
3451 if !skip_ranges.is_empty() {
3452 skip_ranges.sort_by_key(|(start, _)| *start);
3454 let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3455 for (start, end) in skip_ranges {
3456 if let Some((_, last_end)) = merged.last_mut()
3457 && start <= *last_end
3458 {
3459 *last_end = (*last_end).max(end);
3460 continue;
3461 }
3462 merged.push((start, end));
3463 }
3464 skip_ranges = merged;
3465 }
3466
3467 let content_bytes = content.as_bytes();
3468 let len = content.len();
3469 let mut i = 0;
3470 let mut in_comment = false;
3471 let mut comment_start = 0;
3472 let mut skip_idx = 0;
3473
3474 while i < len.saturating_sub(1) {
3475 if skip_idx < skip_ranges.len() {
3477 let (skip_start, skip_end) = skip_ranges[skip_idx];
3478 if i >= skip_end {
3479 skip_idx += 1;
3480 continue;
3481 }
3482 if i >= skip_start {
3483 i = skip_end;
3484 continue;
3485 }
3486 }
3487
3488 if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3490 if !in_comment {
3491 in_comment = true;
3493 comment_start = i;
3494 i += 2;
3495 } else {
3496 let comment_end = i + 2;
3498 ranges.push((comment_start, comment_end));
3499 in_comment = false;
3500 i += 2;
3501 }
3502 } else {
3503 i += 1;
3504 }
3505 }
3506
3507 if in_comment {
3509 ranges.push((comment_start, len));
3510 }
3511
3512 ranges
3513 }
3514
3515 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3517 where
3518 F: FnMut(&mut LineInfo),
3519 {
3520 for line in lines.iter_mut() {
3522 let line_start = line.byte_offset;
3523 let line_end = line.byte_offset + line.byte_len;
3524
3525 if line_start < end && line_end > start {
3527 f(line);
3528 }
3529 }
3530
3531 let _ = content;
3533 }
3534
3535 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3537 if !content.contains('`') {
3539 return Vec::new();
3540 }
3541
3542 let parser = Parser::new(content).into_offset_iter();
3544 let mut ranges = Vec::new();
3545
3546 for (event, range) in parser {
3547 if let Event::Code(_) = event {
3548 ranges.push((range.start, range.end));
3549 }
3550 }
3551
3552 Self::build_code_spans_from_ranges(content, lines, &ranges)
3553 }
3554
3555 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3556 let mut code_spans = Vec::new();
3557 if ranges.is_empty() {
3558 return code_spans;
3559 }
3560
3561 for &(start_pos, end_pos) in ranges {
3562 let full_span = &content[start_pos..end_pos];
3564 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3565
3566 let content_start = start_pos + backtick_count;
3568 let content_end = end_pos - backtick_count;
3569 let span_content = if content_start < content_end {
3570 content[content_start..content_end].to_string()
3571 } else {
3572 String::new()
3573 };
3574
3575 let line_idx = lines
3578 .partition_point(|line| line.byte_offset <= start_pos)
3579 .saturating_sub(1);
3580 let line_num = line_idx + 1;
3581 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3582
3583 let end_line_idx = lines
3585 .partition_point(|line| line.byte_offset <= end_pos)
3586 .saturating_sub(1);
3587 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3588
3589 let line_content = lines[line_idx].content(content);
3592 let col_start = if byte_col_start <= line_content.len() {
3593 line_content[..byte_col_start].chars().count()
3594 } else {
3595 line_content.chars().count()
3596 };
3597
3598 let end_line_content = lines[end_line_idx].content(content);
3599 let col_end = if byte_col_end <= end_line_content.len() {
3600 end_line_content[..byte_col_end].chars().count()
3601 } else {
3602 end_line_content.chars().count()
3603 };
3604
3605 code_spans.push(CodeSpan {
3606 line: line_num,
3607 end_line: end_line_idx + 1,
3608 start_col: col_start,
3609 end_col: col_end,
3610 byte_offset: start_pos,
3611 byte_end: end_pos,
3612 backtick_count,
3613 content: span_content,
3614 });
3615 }
3616
3617 code_spans.sort_by_key(|span| span.byte_offset);
3619
3620 code_spans
3621 }
3622
3623 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3625 let mut math_spans = Vec::new();
3626
3627 if !content.contains('$') {
3629 return math_spans;
3630 }
3631
3632 let mut options = Options::empty();
3634 options.insert(Options::ENABLE_MATH);
3635 let parser = Parser::new_ext(content, options).into_offset_iter();
3636
3637 for (event, range) in parser {
3638 let (is_display, math_content) = match &event {
3639 Event::InlineMath(text) => (false, text.as_ref()),
3640 Event::DisplayMath(text) => (true, text.as_ref()),
3641 _ => continue,
3642 };
3643
3644 let start_pos = range.start;
3645 let end_pos = range.end;
3646
3647 let line_idx = lines
3649 .partition_point(|line| line.byte_offset <= start_pos)
3650 .saturating_sub(1);
3651 let line_num = line_idx + 1;
3652 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3653
3654 let end_line_idx = lines
3656 .partition_point(|line| line.byte_offset <= end_pos)
3657 .saturating_sub(1);
3658 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3659
3660 let line_content = lines[line_idx].content(content);
3662 let col_start = if byte_col_start <= line_content.len() {
3663 line_content[..byte_col_start].chars().count()
3664 } else {
3665 line_content.chars().count()
3666 };
3667
3668 let end_line_content = lines[end_line_idx].content(content);
3669 let col_end = if byte_col_end <= end_line_content.len() {
3670 end_line_content[..byte_col_end].chars().count()
3671 } else {
3672 end_line_content.chars().count()
3673 };
3674
3675 math_spans.push(MathSpan {
3676 line: line_num,
3677 end_line: end_line_idx + 1,
3678 start_col: col_start,
3679 end_col: col_end,
3680 byte_offset: start_pos,
3681 byte_end: end_pos,
3682 is_display,
3683 content: math_content.to_string(),
3684 });
3685 }
3686
3687 math_spans.sort_by_key(|span| span.byte_offset);
3689
3690 math_spans
3691 }
3692
3693 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3704 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3706
3707 #[inline]
3710 fn reset_tracking_state(
3711 list_item: &ListItemInfo,
3712 has_list_breaking_content: &mut bool,
3713 min_continuation: &mut usize,
3714 ) {
3715 *has_list_breaking_content = false;
3716 let marker_width = if list_item.is_ordered {
3717 list_item.marker.len() + 1 } else {
3719 list_item.marker.len()
3720 };
3721 *min_continuation = if list_item.is_ordered {
3722 marker_width
3723 } else {
3724 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3725 };
3726 }
3727
3728 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3731 let mut last_list_item_line = 0;
3732 let mut current_indent_level = 0;
3733 let mut last_marker_width = 0;
3734
3735 let mut has_list_breaking_content_since_last_item = false;
3737 let mut min_continuation_for_tracking = 0;
3738
3739 for (line_idx, line_info) in lines.iter().enumerate() {
3740 let line_num = line_idx + 1;
3741
3742 if line_info.in_code_block {
3744 if let Some(ref mut block) = current_block {
3745 let min_continuation_indent =
3747 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3748
3749 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3751
3752 match context {
3753 CodeBlockContext::Indented => {
3754 block.end_line = line_num;
3756 continue;
3757 }
3758 CodeBlockContext::Standalone => {
3759 let completed_block = current_block.take().unwrap();
3761 list_blocks.push(completed_block);
3762 continue;
3763 }
3764 CodeBlockContext::Adjacent => {
3765 block.end_line = line_num;
3767 continue;
3768 }
3769 }
3770 } else {
3771 continue;
3773 }
3774 }
3775
3776 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3778 caps.get(0).unwrap().as_str().to_string()
3779 } else {
3780 String::new()
3781 };
3782
3783 if let Some(ref block) = current_block
3786 && line_info.list_item.is_none()
3787 && !line_info.is_blank
3788 && !line_info.in_code_span_continuation
3789 {
3790 let line_content = line_info.content(content).trim();
3791
3792 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3797
3798 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3801
3802 let breaks_list = line_info.heading.is_some()
3803 || line_content.starts_with("---")
3804 || line_content.starts_with("***")
3805 || line_content.starts_with("___")
3806 || crate::utils::skip_context::is_table_line(line_content)
3807 || blockquote_prefix_changes
3808 || (line_info.indent > 0
3809 && line_info.indent < min_continuation_for_tracking
3810 && !is_lazy_continuation);
3811
3812 if breaks_list {
3813 has_list_breaking_content_since_last_item = true;
3814 }
3815 }
3816
3817 if line_info.in_code_span_continuation
3820 && line_info.list_item.is_none()
3821 && let Some(ref mut block) = current_block
3822 {
3823 block.end_line = line_num;
3824 }
3825
3826 let effective_continuation_indent = if let Some(ref block) = current_block {
3832 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3833 let line_content = line_info.content(content);
3834 let line_bq_level = line_content
3835 .chars()
3836 .take_while(|c| *c == '>' || c.is_whitespace())
3837 .filter(|&c| c == '>')
3838 .count();
3839 if line_bq_level > 0 && line_bq_level == block_bq_level {
3840 let mut pos = 0;
3842 let mut found_markers = 0;
3843 for c in line_content.chars() {
3844 pos += c.len_utf8();
3845 if c == '>' {
3846 found_markers += 1;
3847 if found_markers == line_bq_level {
3848 if line_content.get(pos..pos + 1) == Some(" ") {
3849 pos += 1;
3850 }
3851 break;
3852 }
3853 }
3854 }
3855 let after_bq = &line_content[pos..];
3856 after_bq.len() - after_bq.trim_start().len()
3857 } else {
3858 line_info.indent
3859 }
3860 } else {
3861 line_info.indent
3862 };
3863 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3864 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3865 if block_bq_level > 0 {
3866 if block.is_ordered { last_marker_width } else { 2 }
3867 } else {
3868 min_continuation_for_tracking
3869 }
3870 } else {
3871 min_continuation_for_tracking
3872 };
3873 let is_structural_element = line_info.heading.is_some()
3876 || line_info.content(content).trim().starts_with("```")
3877 || line_info.content(content).trim().starts_with("~~~");
3878 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3879 || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3880
3881 if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3882 eprintln!(
3883 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3884 line_num,
3885 effective_continuation_indent,
3886 adjusted_min_continuation_for_tracking,
3887 is_valid_continuation,
3888 line_info.in_code_span_continuation,
3889 line_info.in_code_block,
3890 current_block.is_some()
3891 );
3892 }
3893
3894 if !line_info.in_code_span_continuation
3895 && line_info.list_item.is_none()
3896 && !line_info.is_blank
3897 && !line_info.in_code_block
3898 && is_valid_continuation
3899 && let Some(ref mut block) = current_block
3900 {
3901 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3902 eprintln!(
3903 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3904 line_num, block.end_line, line_num
3905 );
3906 }
3907 block.end_line = line_num;
3908 }
3909
3910 if let Some(list_item) = &line_info.list_item {
3912 let item_indent = list_item.marker_column;
3914 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3917 eprintln!(
3918 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3919 line_num, list_item.marker, item_indent
3920 );
3921 }
3922
3923 if let Some(ref mut block) = current_block {
3924 let is_nested = nesting > block.nesting_level;
3928 let same_type =
3929 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3930 let same_context = block.blockquote_prefix == blockquote_prefix;
3931 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3933
3934 let marker_compatible =
3936 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3937
3938 let has_non_list_content = has_list_breaking_content_since_last_item;
3941
3942 let mut continues_list = if is_nested {
3946 same_context && reasonable_distance && !has_non_list_content
3948 } else {
3949 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3951 };
3952
3953 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3954 eprintln!(
3955 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3956 line_num,
3957 continues_list,
3958 is_nested,
3959 same_type,
3960 same_context,
3961 reasonable_distance,
3962 marker_compatible,
3963 has_non_list_content,
3964 last_list_item_line,
3965 block.end_line
3966 );
3967 }
3968
3969 if !continues_list
3973 && (is_nested || same_type)
3974 && reasonable_distance
3975 && line_num > 0
3976 && block.end_line == line_num - 1
3977 {
3978 if block.item_lines.contains(&(line_num - 1)) {
3981 continues_list = true;
3983 } else {
3984 continues_list = true;
3988 }
3989 }
3990
3991 if continues_list {
3992 block.end_line = line_num;
3994 block.item_lines.push(line_num);
3995
3996 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3998 list_item.marker.len() + 1
3999 } else {
4000 list_item.marker.len()
4001 });
4002
4003 if !block.is_ordered
4005 && block.marker.is_some()
4006 && block.marker.as_ref() != Some(&list_item.marker)
4007 {
4008 block.marker = None;
4010 }
4011
4012 reset_tracking_state(
4014 list_item,
4015 &mut has_list_breaking_content_since_last_item,
4016 &mut min_continuation_for_tracking,
4017 );
4018 } else {
4019 if !same_type
4024 && !is_nested
4025 && let Some(&last_item) = block.item_lines.last()
4026 {
4027 block.end_line = last_item;
4028 }
4029
4030 list_blocks.push(block.clone());
4031
4032 *block = ListBlock {
4033 start_line: line_num,
4034 end_line: line_num,
4035 is_ordered: list_item.is_ordered,
4036 marker: if list_item.is_ordered {
4037 None
4038 } else {
4039 Some(list_item.marker.clone())
4040 },
4041 blockquote_prefix: blockquote_prefix.clone(),
4042 item_lines: vec![line_num],
4043 nesting_level: nesting,
4044 max_marker_width: if list_item.is_ordered {
4045 list_item.marker.len() + 1
4046 } else {
4047 list_item.marker.len()
4048 },
4049 };
4050
4051 reset_tracking_state(
4053 list_item,
4054 &mut has_list_breaking_content_since_last_item,
4055 &mut min_continuation_for_tracking,
4056 );
4057 }
4058 } else {
4059 current_block = Some(ListBlock {
4061 start_line: line_num,
4062 end_line: line_num,
4063 is_ordered: list_item.is_ordered,
4064 marker: if list_item.is_ordered {
4065 None
4066 } else {
4067 Some(list_item.marker.clone())
4068 },
4069 blockquote_prefix,
4070 item_lines: vec![line_num],
4071 nesting_level: nesting,
4072 max_marker_width: list_item.marker.len(),
4073 });
4074
4075 reset_tracking_state(
4077 list_item,
4078 &mut has_list_breaking_content_since_last_item,
4079 &mut min_continuation_for_tracking,
4080 );
4081 }
4082
4083 last_list_item_line = line_num;
4084 current_indent_level = item_indent;
4085 last_marker_width = if list_item.is_ordered {
4086 list_item.marker.len() + 1 } else {
4088 list_item.marker.len()
4089 };
4090 } else if let Some(ref mut block) = current_block {
4091 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4093 eprintln!(
4094 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4095 line_num, line_info.is_blank
4096 );
4097 }
4098
4099 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4107 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4108 } else {
4109 false
4110 };
4111
4112 let min_continuation_indent = if block.is_ordered {
4116 current_indent_level + last_marker_width
4117 } else {
4118 current_indent_level + 2 };
4120
4121 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4122 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4124 eprintln!(
4125 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4126 line_num, line_info.indent, min_continuation_indent
4127 );
4128 }
4129 block.end_line = line_num;
4130 } else if line_info.is_blank {
4131 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4134 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4135 }
4136 let mut check_idx = line_idx + 1;
4137 let mut found_continuation = false;
4138
4139 while check_idx < lines.len() && lines[check_idx].is_blank {
4141 check_idx += 1;
4142 }
4143
4144 if check_idx < lines.len() {
4145 let next_line = &lines[check_idx];
4146 let next_content = next_line.content(content);
4148 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4151 let next_bq_level_for_indent = next_content
4152 .chars()
4153 .take_while(|c| *c == '>' || c.is_whitespace())
4154 .filter(|&c| c == '>')
4155 .count();
4156 let effective_indent =
4157 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4158 let mut pos = 0;
4161 let mut found_markers = 0;
4162 for c in next_content.chars() {
4163 pos += c.len_utf8();
4164 if c == '>' {
4165 found_markers += 1;
4166 if found_markers == next_bq_level_for_indent {
4167 if next_content.get(pos..pos + 1) == Some(" ") {
4169 pos += 1;
4170 }
4171 break;
4172 }
4173 }
4174 }
4175 let after_blockquote_marker = &next_content[pos..];
4176 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4177 } else {
4178 next_line.indent
4179 };
4180 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4183 if block.is_ordered { last_marker_width } else { 2 }
4186 } else {
4187 min_continuation_indent
4188 };
4189 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4191 eprintln!(
4192 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4193 line_num,
4194 check_idx + 1,
4195 effective_indent,
4196 adjusted_min_continuation,
4197 next_line.list_item.is_some(),
4198 next_line.in_code_block
4199 );
4200 }
4201 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4202 found_continuation = true;
4203 }
4204 else if !next_line.in_code_block
4206 && next_line.list_item.is_some()
4207 && let Some(item) = &next_line.list_item
4208 {
4209 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4210 .find(next_line.content(content))
4211 .map_or(String::new(), |m| m.as_str().to_string());
4212 if item.marker_column == current_indent_level
4213 && item.is_ordered == block.is_ordered
4214 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4215 {
4216 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4220 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4221 if let Some(between_line) = lines.get(idx) {
4222 let between_content = between_line.content(content);
4223 let trimmed = between_content.trim();
4224 if trimmed.is_empty() {
4226 return false;
4227 }
4228 let line_indent = between_content.len() - between_content.trim_start().len();
4230
4231 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4233 .find(between_content)
4234 .map_or(String::new(), |m| m.as_str().to_string());
4235 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4236 let blockquote_level_changed =
4237 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4238
4239 if trimmed.starts_with("```")
4241 || trimmed.starts_with("~~~")
4242 || trimmed.starts_with("---")
4243 || trimmed.starts_with("***")
4244 || trimmed.starts_with("___")
4245 || blockquote_level_changed
4246 || crate::utils::skip_context::is_table_line(trimmed)
4247 || between_line.heading.is_some()
4248 {
4249 return true; }
4251
4252 line_indent >= min_continuation_indent
4254 } else {
4255 false
4256 }
4257 });
4258
4259 if block.is_ordered {
4260 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4263 if let Some(between_line) = lines.get(idx) {
4264 let between_content = between_line.content(content);
4265 let trimmed = between_content.trim();
4266 if trimmed.is_empty() {
4267 return false;
4268 }
4269 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4271 .find(between_content)
4272 .map_or(String::new(), |m| m.as_str().to_string());
4273 let between_bq_level =
4274 between_bq_prefix.chars().filter(|&c| c == '>').count();
4275 let blockquote_level_changed =
4276 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4277 trimmed.starts_with("```")
4279 || trimmed.starts_with("~~~")
4280 || trimmed.starts_with("---")
4281 || trimmed.starts_with("***")
4282 || trimmed.starts_with("___")
4283 || blockquote_level_changed
4284 || crate::utils::skip_context::is_table_line(trimmed)
4285 || between_line.heading.is_some()
4286 } else {
4287 false
4288 }
4289 });
4290 found_continuation = !has_structural_separators;
4291 } else {
4292 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4294 if let Some(between_line) = lines.get(idx) {
4295 let between_content = between_line.content(content);
4296 let trimmed = between_content.trim();
4297 if trimmed.is_empty() {
4298 return false;
4299 }
4300 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4302 .find(between_content)
4303 .map_or(String::new(), |m| m.as_str().to_string());
4304 let between_bq_level =
4305 between_bq_prefix.chars().filter(|&c| c == '>').count();
4306 let blockquote_level_changed =
4307 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4308 trimmed.starts_with("```")
4310 || trimmed.starts_with("~~~")
4311 || trimmed.starts_with("---")
4312 || trimmed.starts_with("***")
4313 || trimmed.starts_with("___")
4314 || blockquote_level_changed
4315 || crate::utils::skip_context::is_table_line(trimmed)
4316 || between_line.heading.is_some()
4317 } else {
4318 false
4319 }
4320 });
4321 found_continuation = !has_structural_separators;
4322 }
4323 }
4324 }
4325 }
4326
4327 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4328 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4329 }
4330 if found_continuation {
4331 block.end_line = line_num;
4333 } else {
4334 list_blocks.push(block.clone());
4336 current_block = None;
4337 }
4338 } else {
4339 let min_required_indent = if block.is_ordered {
4342 current_indent_level + last_marker_width
4343 } else {
4344 current_indent_level + 2
4345 };
4346
4347 let line_content = line_info.content(content).trim();
4352
4353 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4355
4356 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4359 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4360 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4361
4362 let is_structural_separator = line_info.heading.is_some()
4363 || line_content.starts_with("```")
4364 || line_content.starts_with("~~~")
4365 || line_content.starts_with("---")
4366 || line_content.starts_with("***")
4367 || line_content.starts_with("___")
4368 || blockquote_level_changed
4369 || looks_like_table;
4370
4371 let is_lazy_continuation = !is_structural_separator
4375 && !line_info.is_blank
4376 && (line_info.indent == 0
4377 || line_info.indent >= min_required_indent
4378 || line_info.in_code_span_continuation);
4379
4380 if is_lazy_continuation {
4381 block.end_line = line_num;
4384 } else {
4385 list_blocks.push(block.clone());
4387 current_block = None;
4388 }
4389 }
4390 }
4391 }
4392
4393 if let Some(block) = current_block {
4395 list_blocks.push(block);
4396 }
4397
4398 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4400
4401 list_blocks
4402 }
4403
4404 fn compute_char_frequency(content: &str) -> CharFrequency {
4406 let mut frequency = CharFrequency::default();
4407
4408 for ch in content.chars() {
4409 match ch {
4410 '#' => frequency.hash_count += 1,
4411 '*' => frequency.asterisk_count += 1,
4412 '_' => frequency.underscore_count += 1,
4413 '-' => frequency.hyphen_count += 1,
4414 '+' => frequency.plus_count += 1,
4415 '>' => frequency.gt_count += 1,
4416 '|' => frequency.pipe_count += 1,
4417 '[' => frequency.bracket_count += 1,
4418 '`' => frequency.backtick_count += 1,
4419 '<' => frequency.lt_count += 1,
4420 '!' => frequency.exclamation_count += 1,
4421 '\n' => frequency.newline_count += 1,
4422 _ => {}
4423 }
4424 }
4425
4426 frequency
4427 }
4428
4429 fn parse_html_tags(
4431 content: &str,
4432 lines: &[LineInfo],
4433 code_blocks: &[(usize, usize)],
4434 flavor: MarkdownFlavor,
4435 ) -> Vec<HtmlTag> {
4436 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4437 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4438
4439 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4440
4441 for cap in HTML_TAG_REGEX.captures_iter(content) {
4442 let full_match = cap.get(0).unwrap();
4443 let match_start = full_match.start();
4444 let match_end = full_match.end();
4445
4446 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4448 continue;
4449 }
4450
4451 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4452 let tag_name_original = cap.get(2).unwrap().as_str();
4453 let tag_name = tag_name_original.to_lowercase();
4454 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4455
4456 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4459 continue;
4460 }
4461
4462 let mut line_num = 1;
4464 let mut col_start = match_start;
4465 let mut col_end = match_end;
4466 for (idx, line_info) in lines.iter().enumerate() {
4467 if match_start >= line_info.byte_offset {
4468 line_num = idx + 1;
4469 col_start = match_start - line_info.byte_offset;
4470 col_end = match_end - line_info.byte_offset;
4471 } else {
4472 break;
4473 }
4474 }
4475
4476 html_tags.push(HtmlTag {
4477 line: line_num,
4478 start_col: col_start,
4479 end_col: col_end,
4480 byte_offset: match_start,
4481 byte_end: match_end,
4482 tag_name,
4483 is_closing,
4484 is_self_closing,
4485 raw_content: full_match.as_str().to_string(),
4486 });
4487 }
4488
4489 html_tags
4490 }
4491
4492 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4494 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4495
4496 for (line_idx, line_info) in lines.iter().enumerate() {
4497 if line_info.in_code_block || line_info.is_blank {
4499 continue;
4500 }
4501
4502 let line = line_info.content(content);
4503 let line_num = line_idx + 1;
4504
4505 if !line.contains('|') {
4507 continue;
4508 }
4509
4510 let parts: Vec<&str> = line.split('|').collect();
4512 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4513
4514 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4516 let mut column_alignments = Vec::new();
4517
4518 if is_separator {
4519 for part in &parts[1..parts.len() - 1] {
4520 let trimmed = part.trim();
4522 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4523 "center".to_string()
4524 } else if trimmed.ends_with(':') {
4525 "right".to_string()
4526 } else if trimmed.starts_with(':') {
4527 "left".to_string()
4528 } else {
4529 "none".to_string()
4530 };
4531 column_alignments.push(alignment);
4532 }
4533 }
4534
4535 table_rows.push(TableRow {
4536 line: line_num,
4537 is_separator,
4538 column_count,
4539 column_alignments,
4540 });
4541 }
4542
4543 table_rows
4544 }
4545
4546 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4548 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4549
4550 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4552 let full_match = cap.get(0).unwrap();
4553 let match_start = full_match.start();
4554 let match_end = full_match.end();
4555
4556 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4558 continue;
4559 }
4560
4561 let preceding_char = if match_start > 0 {
4563 content.chars().nth(match_start - 1)
4564 } else {
4565 None
4566 };
4567 let following_char = content.chars().nth(match_end);
4568
4569 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4570 continue;
4571 }
4572 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4573 continue;
4574 }
4575
4576 let url = full_match.as_str();
4577 let url_type = if url.starts_with("https://") {
4578 "https"
4579 } else if url.starts_with("http://") {
4580 "http"
4581 } else if url.starts_with("ftp://") {
4582 "ftp"
4583 } else {
4584 "other"
4585 };
4586
4587 let mut line_num = 1;
4589 let mut col_start = match_start;
4590 let mut col_end = match_end;
4591 for (idx, line_info) in lines.iter().enumerate() {
4592 if match_start >= line_info.byte_offset {
4593 line_num = idx + 1;
4594 col_start = match_start - line_info.byte_offset;
4595 col_end = match_end - line_info.byte_offset;
4596 } else {
4597 break;
4598 }
4599 }
4600
4601 bare_urls.push(BareUrl {
4602 line: line_num,
4603 start_col: col_start,
4604 end_col: col_end,
4605 byte_offset: match_start,
4606 byte_end: match_end,
4607 url: url.to_string(),
4608 url_type: url_type.to_string(),
4609 });
4610 }
4611
4612 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4614 let full_match = cap.get(0).unwrap();
4615 let match_start = full_match.start();
4616 let match_end = full_match.end();
4617
4618 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4620 continue;
4621 }
4622
4623 let preceding_char = if match_start > 0 {
4625 content.chars().nth(match_start - 1)
4626 } else {
4627 None
4628 };
4629 let following_char = content.chars().nth(match_end);
4630
4631 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4632 continue;
4633 }
4634 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4635 continue;
4636 }
4637
4638 let email = full_match.as_str();
4639
4640 let mut line_num = 1;
4642 let mut col_start = match_start;
4643 let mut col_end = match_end;
4644 for (idx, line_info) in lines.iter().enumerate() {
4645 if match_start >= line_info.byte_offset {
4646 line_num = idx + 1;
4647 col_start = match_start - line_info.byte_offset;
4648 col_end = match_end - line_info.byte_offset;
4649 } else {
4650 break;
4651 }
4652 }
4653
4654 bare_urls.push(BareUrl {
4655 line: line_num,
4656 start_col: col_start,
4657 end_col: col_end,
4658 byte_offset: match_start,
4659 byte_end: match_end,
4660 url: email.to_string(),
4661 url_type: "email".to_string(),
4662 });
4663 }
4664
4665 bare_urls
4666 }
4667
4668 #[must_use]
4688 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4689 ValidHeadingsIter::new(&self.lines)
4690 }
4691
4692 #[must_use]
4696 pub fn has_valid_headings(&self) -> bool {
4697 self.lines
4698 .iter()
4699 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4700 }
4701}
4702
4703fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4705 if list_blocks.len() < 2 {
4706 return;
4707 }
4708
4709 let mut merger = ListBlockMerger::new(content, lines);
4710 *list_blocks = merger.merge(list_blocks);
4711}
4712
4713struct ListBlockMerger<'a> {
4715 content: &'a str,
4716 lines: &'a [LineInfo],
4717}
4718
4719impl<'a> ListBlockMerger<'a> {
4720 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4721 Self { content, lines }
4722 }
4723
4724 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4725 let mut merged = Vec::with_capacity(list_blocks.len());
4726 let mut current = list_blocks[0].clone();
4727
4728 for next in list_blocks.iter().skip(1) {
4729 if self.should_merge_blocks(¤t, next) {
4730 current = self.merge_two_blocks(current, next);
4731 } else {
4732 merged.push(current);
4733 current = next.clone();
4734 }
4735 }
4736
4737 merged.push(current);
4738 merged
4739 }
4740
4741 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4743 if !self.blocks_are_compatible(current, next) {
4745 return false;
4746 }
4747
4748 let spacing = self.analyze_spacing_between(current, next);
4750 match spacing {
4751 BlockSpacing::Consecutive => true,
4752 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4753 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4754 self.can_merge_with_content_between(current, next)
4755 }
4756 }
4757 }
4758
4759 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4761 current.is_ordered == next.is_ordered
4762 && current.blockquote_prefix == next.blockquote_prefix
4763 && current.nesting_level == next.nesting_level
4764 }
4765
4766 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4768 let gap = next.start_line - current.end_line;
4769
4770 match gap {
4771 1 => BlockSpacing::Consecutive,
4772 2 => BlockSpacing::SingleBlank,
4773 _ if gap > 2 => {
4774 if self.has_only_blank_lines_between(current, next) {
4775 BlockSpacing::MultipleBlanks
4776 } else {
4777 BlockSpacing::ContentBetween
4778 }
4779 }
4780 _ => BlockSpacing::Consecutive, }
4782 }
4783
4784 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4786 if has_meaningful_content_between(self.content, current, next, self.lines) {
4789 return false; }
4791
4792 !current.is_ordered && current.marker == next.marker
4794 }
4795
4796 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4798 if has_meaningful_content_between(self.content, current, next, self.lines) {
4800 return false; }
4802
4803 current.is_ordered && next.is_ordered
4805 }
4806
4807 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4809 for line_num in (current.end_line + 1)..next.start_line {
4810 if let Some(line_info) = self.lines.get(line_num - 1)
4811 && !line_info.content(self.content).trim().is_empty()
4812 {
4813 return false;
4814 }
4815 }
4816 true
4817 }
4818
4819 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4821 current.end_line = next.end_line;
4822 current.item_lines.extend_from_slice(&next.item_lines);
4823
4824 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4826
4827 if !current.is_ordered && self.markers_differ(¤t, next) {
4829 current.marker = None; }
4831
4832 current
4833 }
4834
4835 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4837 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4838 }
4839}
4840
4841#[derive(Debug, PartialEq)]
4843enum BlockSpacing {
4844 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4849
4850fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4852 for line_num in (current.end_line + 1)..next.start_line {
4854 if let Some(line_info) = lines.get(line_num - 1) {
4855 let trimmed = line_info.content(content).trim();
4857
4858 if trimmed.is_empty() {
4860 continue;
4861 }
4862
4863 if line_info.heading.is_some() {
4867 return true; }
4869
4870 if is_horizontal_rule(trimmed) {
4872 return true; }
4874
4875 if crate::utils::skip_context::is_table_line(trimmed) {
4877 return true; }
4879
4880 if trimmed.starts_with('>') {
4882 return true; }
4884
4885 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4887 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4888
4889 let min_continuation_indent = if current.is_ordered {
4891 current.nesting_level + current.max_marker_width + 1 } else {
4893 current.nesting_level + 2
4894 };
4895
4896 if line_indent < min_continuation_indent {
4897 return true; }
4900 }
4901
4902 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4904
4905 let min_indent = if current.is_ordered {
4907 current.nesting_level + current.max_marker_width
4908 } else {
4909 current.nesting_level + 2
4910 };
4911
4912 if line_indent < min_indent {
4914 return true; }
4916
4917 }
4920 }
4921
4922 false
4924}
4925
4926pub fn is_horizontal_rule_line(line: &str) -> bool {
4933 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4935 if leading_spaces > 3 || line.starts_with('\t') {
4936 return false;
4937 }
4938
4939 is_horizontal_rule_content(line.trim())
4940}
4941
4942pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4945 if trimmed.len() < 3 {
4946 return false;
4947 }
4948
4949 let chars: Vec<char> = trimmed.chars().collect();
4951 if let Some(&first_char) = chars.first()
4952 && (first_char == '-' || first_char == '*' || first_char == '_')
4953 {
4954 let mut count = 0;
4955 for &ch in &chars {
4956 if ch == first_char {
4957 count += 1;
4958 } else if ch != ' ' && ch != '\t' {
4959 return false; }
4961 }
4962 return count >= 3;
4963 }
4964 false
4965}
4966
4967pub fn is_horizontal_rule(trimmed: &str) -> bool {
4969 is_horizontal_rule_content(trimmed)
4970}
4971
4972#[cfg(test)]
4974mod tests {
4975 use super::*;
4976
4977 #[test]
4978 fn test_empty_content() {
4979 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4980 assert_eq!(ctx.content, "");
4981 assert_eq!(ctx.line_offsets, vec![0]);
4982 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4983 assert_eq!(ctx.lines.len(), 0);
4984 }
4985
4986 #[test]
4987 fn test_single_line() {
4988 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4989 assert_eq!(ctx.content, "# Hello");
4990 assert_eq!(ctx.line_offsets, vec![0]);
4991 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4992 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4993 }
4994
4995 #[test]
4996 fn test_multi_line() {
4997 let content = "# Title\n\nSecond line\nThird line";
4998 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4999 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
5000 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
5007
5008 #[test]
5009 fn test_line_info() {
5010 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
5011 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5012
5013 assert_eq!(ctx.lines.len(), 7);
5015
5016 let line1 = &ctx.lines[0];
5018 assert_eq!(line1.content(ctx.content), "# Title");
5019 assert_eq!(line1.byte_offset, 0);
5020 assert_eq!(line1.indent, 0);
5021 assert!(!line1.is_blank);
5022 assert!(!line1.in_code_block);
5023 assert!(line1.list_item.is_none());
5024
5025 let line2 = &ctx.lines[1];
5027 assert_eq!(line2.content(ctx.content), " indented");
5028 assert_eq!(line2.byte_offset, 8);
5029 assert_eq!(line2.indent, 4);
5030 assert!(!line2.is_blank);
5031
5032 let line3 = &ctx.lines[2];
5034 assert_eq!(line3.content(ctx.content), "");
5035 assert!(line3.is_blank);
5036
5037 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5039 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5040 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5041 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5042 }
5043
5044 #[test]
5045 fn test_list_item_detection() {
5046 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
5047 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5048
5049 let line1 = &ctx.lines[0];
5051 assert!(line1.list_item.is_some());
5052 let list1 = line1.list_item.as_ref().unwrap();
5053 assert_eq!(list1.marker, "-");
5054 assert!(!list1.is_ordered);
5055 assert_eq!(list1.marker_column, 0);
5056 assert_eq!(list1.content_column, 2);
5057
5058 let line2 = &ctx.lines[1];
5060 assert!(line2.list_item.is_some());
5061 let list2 = line2.list_item.as_ref().unwrap();
5062 assert_eq!(list2.marker, "*");
5063 assert_eq!(list2.marker_column, 2);
5064
5065 let line3 = &ctx.lines[2];
5067 assert!(line3.list_item.is_some());
5068 let list3 = line3.list_item.as_ref().unwrap();
5069 assert_eq!(list3.marker, "1.");
5070 assert!(list3.is_ordered);
5071 assert_eq!(list3.number, Some(1));
5072
5073 let line6 = &ctx.lines[5];
5075 assert!(line6.list_item.is_none());
5076 }
5077
5078 #[test]
5079 fn test_offset_to_line_col_edge_cases() {
5080 let content = "a\nb\nc";
5081 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5082 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
5090
5091 #[test]
5092 fn test_mdx_esm_blocks() {
5093 let content = r##"import {Chart} from './snowfall.js'
5094export const year = 2023
5095
5096# Last year's snowfall
5097
5098In {year}, the snowfall was above average.
5099It was followed by a warm spring which caused
5100flood conditions in many of the nearby rivers.
5101
5102<Chart color="#fcb32c" year={year} />
5103"##;
5104
5105 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5106
5107 assert_eq!(ctx.lines.len(), 10);
5109 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5110 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5111 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5112 assert!(
5113 !ctx.lines[3].in_esm_block,
5114 "Line 4 (heading) should NOT be in_esm_block"
5115 );
5116 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5117 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5118 }
5119
5120 #[test]
5121 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5122 let content = r#"import {Chart} from './snowfall.js'
5123export const year = 2023
5124
5125# Last year's snowfall
5126"#;
5127
5128 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5129
5130 assert!(
5132 !ctx.lines[0].in_esm_block,
5133 "Line 1 should NOT be in_esm_block in Standard flavor"
5134 );
5135 assert!(
5136 !ctx.lines[1].in_esm_block,
5137 "Line 2 should NOT be in_esm_block in Standard flavor"
5138 );
5139 }
5140
5141 #[test]
5142 fn test_blockquote_with_indented_content() {
5143 let content = r#"# Heading
5147
5148> -S socket-path
5149> More text
5150"#;
5151 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5152
5153 assert!(
5155 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5156 "Line 3 should be a blockquote"
5157 );
5158 assert!(
5160 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5161 "Line 4 should be a blockquote"
5162 );
5163
5164 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5167 assert_eq!(bq3.content, "-S socket-path");
5168 assert_eq!(bq3.nesting_level, 1);
5169 assert!(bq3.has_multiple_spaces_after_marker);
5171
5172 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5173 assert_eq!(bq4.content, "More text");
5174 assert_eq!(bq4.nesting_level, 1);
5175 }
5176
5177 #[test]
5178 fn test_footnote_definitions_not_parsed_as_reference_defs() {
5179 let content = r#"# Title
5181
5182A footnote[^1].
5183
5184[^1]: This is the footnote content.
5185
5186[^note]: Another footnote with [link](https://example.com).
5187
5188[regular]: ./path.md "A real reference definition"
5189"#;
5190 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5191
5192 assert_eq!(
5194 ctx.reference_defs.len(),
5195 1,
5196 "Footnotes should not be parsed as reference definitions"
5197 );
5198
5199 assert_eq!(ctx.reference_defs[0].id, "regular");
5201 assert_eq!(ctx.reference_defs[0].url, "./path.md");
5202 assert_eq!(
5203 ctx.reference_defs[0].title,
5204 Some("A real reference definition".to_string())
5205 );
5206 }
5207
5208 #[test]
5209 fn test_footnote_with_inline_link_not_misidentified() {
5210 let content = r#"# Title
5213
5214A footnote[^1].
5215
5216[^1]: [link](https://www.google.com).
5217"#;
5218 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5219
5220 assert!(
5222 ctx.reference_defs.is_empty(),
5223 "Footnote with inline link should not create a reference definition"
5224 );
5225 }
5226
5227 #[test]
5228 fn test_various_footnote_formats_excluded() {
5229 let content = r#"[^1]: Numeric footnote
5231[^note]: Named footnote
5232[^a]: Single char footnote
5233[^long-footnote-name]: Long named footnote
5234[^123abc]: Mixed alphanumeric
5235
5236[ref1]: ./file1.md
5237[ref2]: ./file2.md
5238"#;
5239 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5240
5241 assert_eq!(
5243 ctx.reference_defs.len(),
5244 2,
5245 "Only regular reference definitions should be parsed"
5246 );
5247
5248 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5249 assert!(ids.contains(&"ref1"));
5250 assert!(ids.contains(&"ref2"));
5251 assert!(!ids.iter().any(|id| id.starts_with('^')));
5252 }
5253
5254 #[test]
5259 fn test_has_char_tracked_characters() {
5260 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5262 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5263
5264 assert!(ctx.has_char('#'), "Should detect hash");
5266 assert!(ctx.has_char('*'), "Should detect asterisk");
5267 assert!(ctx.has_char('_'), "Should detect underscore");
5268 assert!(ctx.has_char('-'), "Should detect hyphen");
5269 assert!(ctx.has_char('+'), "Should detect plus");
5270 assert!(ctx.has_char('>'), "Should detect gt");
5271 assert!(ctx.has_char('|'), "Should detect pipe");
5272 assert!(ctx.has_char('['), "Should detect bracket");
5273 assert!(ctx.has_char('`'), "Should detect backtick");
5274 assert!(ctx.has_char('<'), "Should detect lt");
5275 assert!(ctx.has_char('!'), "Should detect exclamation");
5276 assert!(ctx.has_char('\n'), "Should detect newline");
5277 }
5278
5279 #[test]
5280 fn test_has_char_absent_characters() {
5281 let content = "Simple text without special chars";
5282 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5283
5284 assert!(!ctx.has_char('#'), "Should not detect hash");
5286 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5287 assert!(!ctx.has_char('_'), "Should not detect underscore");
5288 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5289 assert!(!ctx.has_char('+'), "Should not detect plus");
5290 assert!(!ctx.has_char('>'), "Should not detect gt");
5291 assert!(!ctx.has_char('|'), "Should not detect pipe");
5292 assert!(!ctx.has_char('['), "Should not detect bracket");
5293 assert!(!ctx.has_char('`'), "Should not detect backtick");
5294 assert!(!ctx.has_char('<'), "Should not detect lt");
5295 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5296 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5298 }
5299
5300 #[test]
5301 fn test_has_char_fallback_for_untracked() {
5302 let content = "Text with @mention and $dollar and %percent";
5303 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5304
5305 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5307 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5308 assert!(ctx.has_char('%'), "Should detect % via fallback");
5309 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5310 }
5311
5312 #[test]
5313 fn test_char_count_tracked_characters() {
5314 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5315 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5316
5317 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5319 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5320 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5321 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5322 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5323 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5324 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5325 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5326 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5327 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5328 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5329 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5330 }
5331
5332 #[test]
5333 fn test_char_count_zero_for_absent() {
5334 let content = "Plain text";
5335 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5336
5337 assert_eq!(ctx.char_count('#'), 0);
5338 assert_eq!(ctx.char_count('*'), 0);
5339 assert_eq!(ctx.char_count('_'), 0);
5340 assert_eq!(ctx.char_count('\n'), 0);
5341 }
5342
5343 #[test]
5344 fn test_char_count_fallback_for_untracked() {
5345 let content = "@@@ $$ %%%";
5346 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5347
5348 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5349 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5350 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5351 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5352 }
5353
5354 #[test]
5355 fn test_char_count_empty_content() {
5356 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5357
5358 assert_eq!(ctx.char_count('#'), 0);
5359 assert_eq!(ctx.char_count('*'), 0);
5360 assert_eq!(ctx.char_count('@'), 0);
5361 assert!(!ctx.has_char('#'));
5362 assert!(!ctx.has_char('@'));
5363 }
5364
5365 #[test]
5370 fn test_is_in_html_tag_simple() {
5371 let content = "<div>content</div>";
5372 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5373
5374 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5376 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5377 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5378
5379 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5381 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5382
5383 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5385 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5386 }
5387
5388 #[test]
5389 fn test_is_in_html_tag_self_closing() {
5390 let content = "Text <br/> more text";
5391 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5392
5393 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5395 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5396
5397 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5399 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5400 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5401
5402 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5404 }
5405
5406 #[test]
5407 fn test_is_in_html_tag_with_attributes() {
5408 let content = r#"<a href="url" class="link">text</a>"#;
5409 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5410
5411 assert!(ctx.is_in_html_tag(0), "Start of tag");
5413 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5414 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5415 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5416
5417 assert!(!ctx.is_in_html_tag(27), "Start of content");
5419 assert!(!ctx.is_in_html_tag(30), "End of content");
5420
5421 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5423 }
5424
5425 #[test]
5426 fn test_is_in_html_tag_multiline() {
5427 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5428 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5429
5430 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5432 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5433 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5434
5435 let closing_bracket_pos = content.find(">\n").unwrap();
5437 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5438 }
5439
5440 #[test]
5441 fn test_is_in_html_tag_no_tags() {
5442 let content = "Plain text without any HTML";
5443 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5444
5445 for i in 0..content.len() {
5447 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5448 }
5449 }
5450
5451 #[test]
5456 fn test_is_in_jinja_range_expression() {
5457 let content = "Hello {{ name }}!";
5458 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5459
5460 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5462 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5463
5464 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5466 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5467 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5468 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5469 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5470
5471 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5473 }
5474
5475 #[test]
5476 fn test_is_in_jinja_range_statement() {
5477 let content = "{% if condition %}content{% endif %}";
5478 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5479
5480 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5482 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5483 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5484
5485 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5487
5488 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5490 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5491 }
5492
5493 #[test]
5494 fn test_is_in_jinja_range_multiple() {
5495 let content = "{{ a }} and {{ b }}";
5496 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5497
5498 assert!(ctx.is_in_jinja_range(0));
5500 assert!(ctx.is_in_jinja_range(3));
5501 assert!(ctx.is_in_jinja_range(6));
5502
5503 assert!(!ctx.is_in_jinja_range(8));
5505 assert!(!ctx.is_in_jinja_range(11));
5506
5507 assert!(ctx.is_in_jinja_range(12));
5509 assert!(ctx.is_in_jinja_range(15));
5510 assert!(ctx.is_in_jinja_range(18));
5511 }
5512
5513 #[test]
5514 fn test_is_in_jinja_range_no_jinja() {
5515 let content = "Plain text with single braces but not Jinja";
5516 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5517
5518 for i in 0..content.len() {
5520 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5521 }
5522 }
5523
5524 #[test]
5529 fn test_is_in_link_title_with_title() {
5530 let content = r#"[ref]: https://example.com "Title text"
5531
5532Some content."#;
5533 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5534
5535 assert_eq!(ctx.reference_defs.len(), 1);
5537 let def = &ctx.reference_defs[0];
5538 assert!(def.title_byte_start.is_some());
5539 assert!(def.title_byte_end.is_some());
5540
5541 let title_start = def.title_byte_start.unwrap();
5542 let title_end = def.title_byte_end.unwrap();
5543
5544 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5546
5547 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5549 assert!(
5550 ctx.is_in_link_title(title_start + 5),
5551 "Middle of title should be in title"
5552 );
5553 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5554
5555 assert!(
5557 !ctx.is_in_link_title(title_end),
5558 "After title end should not be in title"
5559 );
5560 }
5561
5562 #[test]
5563 fn test_is_in_link_title_without_title() {
5564 let content = "[ref]: https://example.com\n\nSome content.";
5565 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5566
5567 assert_eq!(ctx.reference_defs.len(), 1);
5569 let def = &ctx.reference_defs[0];
5570 assert!(def.title_byte_start.is_none());
5571 assert!(def.title_byte_end.is_none());
5572
5573 for i in 0..content.len() {
5575 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5576 }
5577 }
5578
5579 #[test]
5580 fn test_is_in_link_title_multiple_refs() {
5581 let content = r#"[ref1]: /url1 "Title One"
5582[ref2]: /url2
5583[ref3]: /url3 "Title Three"
5584"#;
5585 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5586
5587 assert_eq!(ctx.reference_defs.len(), 3);
5589
5590 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5592 assert!(ref1.title_byte_start.is_some());
5593
5594 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5596 assert!(ref2.title_byte_start.is_none());
5597
5598 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5600 assert!(ref3.title_byte_start.is_some());
5601
5602 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5604 assert!(ctx.is_in_link_title(start + 1));
5605 assert!(!ctx.is_in_link_title(end + 5));
5606 }
5607
5608 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5610 assert!(ctx.is_in_link_title(start + 1));
5611 }
5612 }
5613
5614 #[test]
5615 fn test_is_in_link_title_single_quotes() {
5616 let content = "[ref]: /url 'Single quoted title'\n";
5617 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5618
5619 assert_eq!(ctx.reference_defs.len(), 1);
5620 let def = &ctx.reference_defs[0];
5621
5622 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5623 assert!(ctx.is_in_link_title(start));
5624 assert!(ctx.is_in_link_title(start + 5));
5625 assert!(!ctx.is_in_link_title(end));
5626 }
5627 }
5628
5629 #[test]
5630 fn test_is_in_link_title_parentheses() {
5631 let content = "[ref]: /url (Parenthesized title)\n";
5634 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5635
5636 if ctx.reference_defs.is_empty() {
5639 for i in 0..content.len() {
5641 assert!(!ctx.is_in_link_title(i));
5642 }
5643 } else {
5644 let def = &ctx.reference_defs[0];
5645 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5646 assert!(ctx.is_in_link_title(start));
5647 assert!(ctx.is_in_link_title(start + 5));
5648 assert!(!ctx.is_in_link_title(end));
5649 } else {
5650 for i in 0..content.len() {
5652 assert!(!ctx.is_in_link_title(i));
5653 }
5654 }
5655 }
5656 }
5657
5658 #[test]
5659 fn test_is_in_link_title_no_refs() {
5660 let content = "Just plain text without any reference definitions.";
5661 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5662
5663 assert!(ctx.reference_defs.is_empty());
5664
5665 for i in 0..content.len() {
5666 assert!(!ctx.is_in_link_title(i));
5667 }
5668 }
5669
5670 #[test]
5675 fn test_math_spans_inline() {
5676 let content = "Text with inline math $[f](x)$ in it.";
5677 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5678
5679 let math_spans = ctx.math_spans();
5680 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5681
5682 let span = &math_spans[0];
5683 assert!(!span.is_display, "Should be inline math, not display");
5684 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5685 }
5686
5687 #[test]
5688 fn test_math_spans_display_single_line() {
5689 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5690 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5691
5692 let math_spans = ctx.math_spans();
5693 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5694
5695 let span = &math_spans[0];
5696 assert!(span.is_display, "Should be display math");
5697 assert!(
5698 span.content.contains("[x](\\zeta)"),
5699 "Content should contain the link-like pattern"
5700 );
5701 }
5702
5703 #[test]
5704 fn test_math_spans_display_multiline() {
5705 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5706 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5707
5708 let math_spans = ctx.math_spans();
5709 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5710
5711 let span = &math_spans[0];
5712 assert!(span.is_display, "Should be display math");
5713 }
5714
5715 #[test]
5716 fn test_is_in_math_span() {
5717 let content = "Text $[f](x)$ more text";
5718 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5719
5720 let math_start = content.find('$').unwrap();
5722 let math_end = content.rfind('$').unwrap() + 1;
5723
5724 assert!(
5725 ctx.is_in_math_span(math_start + 1),
5726 "Position inside math span should return true"
5727 );
5728 assert!(
5729 ctx.is_in_math_span(math_start + 3),
5730 "Position inside math span should return true"
5731 );
5732
5733 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5735 assert!(
5736 !ctx.is_in_math_span(math_end + 1),
5737 "Position after math span should return false"
5738 );
5739 }
5740
5741 #[test]
5742 fn test_math_spans_mixed_with_code() {
5743 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5744 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5745
5746 let math_spans = ctx.math_spans();
5747 let code_spans = ctx.code_spans();
5748
5749 assert_eq!(math_spans.len(), 1, "Should have one math span");
5750 assert_eq!(code_spans.len(), 1, "Should have one code span");
5751
5752 assert_eq!(math_spans[0].content, "[f](x)");
5754 assert_eq!(code_spans[0].content, "[g](y)");
5756 }
5757
5758 #[test]
5759 fn test_math_spans_no_math() {
5760 let content = "Regular text without any math at all.";
5761 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5762
5763 let math_spans = ctx.math_spans();
5764 assert!(math_spans.is_empty(), "Should have no math spans");
5765 }
5766
5767 #[test]
5768 fn test_math_spans_multiple() {
5769 let content = "First $a$ and second $b$ and display $$c$$";
5770 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5771
5772 let math_spans = ctx.math_spans();
5773 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5774
5775 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5777 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5778
5779 assert_eq!(inline_count, 2, "Should have two inline math spans");
5780 assert_eq!(display_count, 1, "Should have one display math span");
5781 }
5782
5783 #[test]
5784 fn test_is_in_math_span_boundary_positions() {
5785 let content = "$[f](x)$";
5788 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5789
5790 let math_spans = ctx.math_spans();
5791 assert_eq!(math_spans.len(), 1, "Should have one math span");
5792
5793 let span = &math_spans[0];
5794
5795 assert!(
5797 ctx.is_in_math_span(span.byte_offset),
5798 "Start position should be in span"
5799 );
5800
5801 assert!(
5803 ctx.is_in_math_span(span.byte_offset + 1),
5804 "Position after start should be in span"
5805 );
5806
5807 assert!(
5809 ctx.is_in_math_span(span.byte_end - 1),
5810 "Position at end-1 should be in span"
5811 );
5812
5813 assert!(
5815 !ctx.is_in_math_span(span.byte_end),
5816 "Position at byte_end should NOT be in span (exclusive)"
5817 );
5818 }
5819
5820 #[test]
5821 fn test_math_spans_at_document_start() {
5822 let content = "$x$ text";
5823 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5824
5825 let math_spans = ctx.math_spans();
5826 assert_eq!(math_spans.len(), 1);
5827 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5828 }
5829
5830 #[test]
5831 fn test_math_spans_at_document_end() {
5832 let content = "text $x$";
5833 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5834
5835 let math_spans = ctx.math_spans();
5836 assert_eq!(math_spans.len(), 1);
5837 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5838 }
5839
5840 #[test]
5841 fn test_math_spans_consecutive() {
5842 let content = "$a$$b$";
5843 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5844
5845 let math_spans = ctx.math_spans();
5846 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5848
5849 for i in 0..content.len() {
5851 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5852 }
5853 }
5854
5855 #[test]
5856 fn test_math_spans_currency_not_math() {
5857 let content = "Price is $100";
5859 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5860
5861 let math_spans = ctx.math_spans();
5862 assert!(
5865 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5866 "Unbalanced $ should not create math span containing 100"
5867 );
5868 }
5869
5870 #[test]
5875 fn test_reference_lookup_o1_basic() {
5876 let content = r#"[ref1]: /url1
5877[REF2]: /url2 "Title"
5878[Ref3]: /url3
5879
5880Use [link][ref1] and [link][REF2]."#;
5881 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5882
5883 assert_eq!(ctx.reference_defs.len(), 3);
5885
5886 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5888 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5891 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5892 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5893 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5894 }
5895
5896 #[test]
5897 fn test_reference_lookup_o1_get_reference_def() {
5898 let content = r#"[myref]: https://example.com "My Title"
5899"#;
5900 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5901
5902 let def = ctx.get_reference_def("myref").expect("Should find myref");
5904 assert_eq!(def.url, "https://example.com");
5905 assert_eq!(def.title.as_deref(), Some("My Title"));
5906
5907 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5909 assert_eq!(def2.url, "https://example.com");
5910
5911 assert!(ctx.get_reference_def("nonexistent").is_none());
5913 }
5914
5915 #[test]
5916 fn test_reference_lookup_o1_has_reference_def() {
5917 let content = r#"[foo]: /foo
5918[BAR]: /bar
5919"#;
5920 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5921
5922 assert!(ctx.has_reference_def("foo"));
5924 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5926 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5929
5930 #[test]
5931 fn test_reference_lookup_o1_empty_content() {
5932 let content = "No references here.";
5933 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5934
5935 assert!(ctx.reference_defs.is_empty());
5936 assert_eq!(ctx.get_reference_url("anything"), None);
5937 assert!(ctx.get_reference_def("anything").is_none());
5938 assert!(!ctx.has_reference_def("anything"));
5939 }
5940
5941 #[test]
5942 fn test_reference_lookup_o1_special_characters_in_id() {
5943 let content = r#"[ref-with-dash]: /url1
5944[ref_with_underscore]: /url2
5945[ref.with.dots]: /url3
5946"#;
5947 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5948
5949 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5950 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5951 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5952 }
5953
5954 #[test]
5955 fn test_reference_lookup_o1_unicode_id() {
5956 let content = r#"[日本語]: /japanese
5957[émoji]: /emoji
5958"#;
5959 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5960
5961 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5962 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5963 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5965}