1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17 ($name:expr, $profile:expr, $code:expr) => {{
18 let start = std::time::Instant::now();
19 let result = $code;
20 if $profile {
21 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22 }
23 result
24 }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(
36 r#"(?sx)
37 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38 (?:
39 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
40 |
41 \[([^\]]*)\] # Reference ID in group 6
42 )"#
43 ).unwrap()
44});
45
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?sx)
51 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52 (?:
53 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
54 |
55 \[([^\]]*)\] # Reference ID in group 6
56 )"#
57 ).unwrap()
58});
59
60static REF_DEF_PATTERN: LazyLock<Regex> =
62 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub visual_indent: usize,
86 pub is_blank: bool,
88 pub in_code_block: bool,
90 pub in_front_matter: bool,
92 pub in_html_block: bool,
94 pub in_html_comment: bool,
96 pub list_item: Option<ListItemInfo>,
98 pub heading: Option<HeadingInfo>,
100 pub blockquote: Option<BlockquoteInfo>,
102 pub in_mkdocstrings: bool,
104 pub in_esm_block: bool,
106 pub in_code_span_continuation: bool,
108 pub is_horizontal_rule: bool,
111 pub in_math_block: bool,
113 pub in_quarto_div: bool,
115 pub in_jsx_expression: bool,
117 pub in_mdx_comment: bool,
119 pub in_jsx_component: bool,
121 pub in_jsx_fragment: bool,
123 pub in_admonition: bool,
125 pub in_content_tab: bool,
127 pub in_definition_list: bool,
129 pub in_obsidian_comment: bool,
131}
132
133impl LineInfo {
134 pub fn content<'a>(&self, source: &'a str) -> &'a str {
136 &source[self.byte_offset..self.byte_offset + self.byte_len]
137 }
138
139 #[inline]
143 pub fn in_mkdocs_container(&self) -> bool {
144 self.in_admonition || self.in_content_tab
145 }
146}
147
148#[derive(Debug, Clone)]
150pub struct ListItemInfo {
151 pub marker: String,
153 pub is_ordered: bool,
155 pub number: Option<usize>,
157 pub marker_column: usize,
159 pub content_column: usize,
161}
162
163#[derive(Debug, Clone, PartialEq)]
165pub enum HeadingStyle {
166 ATX,
168 Setext1,
170 Setext2,
172}
173
174#[derive(Debug, Clone)]
176pub struct ParsedLink<'a> {
177 pub line: usize,
179 pub start_col: usize,
181 pub end_col: usize,
183 pub byte_offset: usize,
185 pub byte_end: usize,
187 pub text: Cow<'a, str>,
189 pub url: Cow<'a, str>,
191 pub is_reference: bool,
193 pub reference_id: Option<Cow<'a, str>>,
195 pub link_type: LinkType,
197}
198
199#[derive(Debug, Clone)]
201pub struct BrokenLinkInfo {
202 pub reference: String,
204 pub span: std::ops::Range<usize>,
206}
207
208#[derive(Debug, Clone)]
210pub struct FootnoteRef {
211 pub id: String,
213 pub line: usize,
215 pub byte_offset: usize,
217 pub byte_end: usize,
219}
220
221#[derive(Debug, Clone)]
223pub struct ParsedImage<'a> {
224 pub line: usize,
226 pub start_col: usize,
228 pub end_col: usize,
230 pub byte_offset: usize,
232 pub byte_end: usize,
234 pub alt_text: Cow<'a, str>,
236 pub url: Cow<'a, str>,
238 pub is_reference: bool,
240 pub reference_id: Option<Cow<'a, str>>,
242 pub link_type: LinkType,
244}
245
246#[derive(Debug, Clone)]
248pub struct ReferenceDef {
249 pub line: usize,
251 pub id: String,
253 pub url: String,
255 pub title: Option<String>,
257 pub byte_offset: usize,
259 pub byte_end: usize,
261 pub title_byte_start: Option<usize>,
263 pub title_byte_end: Option<usize>,
265}
266
267#[derive(Debug, Clone)]
269pub struct CodeSpan {
270 pub line: usize,
272 pub end_line: usize,
274 pub start_col: usize,
276 pub end_col: usize,
278 pub byte_offset: usize,
280 pub byte_end: usize,
282 pub backtick_count: usize,
284 pub content: String,
286}
287
288#[derive(Debug, Clone)]
290pub struct MathSpan {
291 pub line: usize,
293 pub end_line: usize,
295 pub start_col: usize,
297 pub end_col: usize,
299 pub byte_offset: usize,
301 pub byte_end: usize,
303 pub is_display: bool,
305 pub content: String,
307}
308
309#[derive(Debug, Clone)]
311pub struct HeadingInfo {
312 pub level: u8,
314 pub style: HeadingStyle,
316 pub marker: String,
318 pub marker_column: usize,
320 pub content_column: usize,
322 pub text: String,
324 pub custom_id: Option<String>,
326 pub raw_text: String,
328 pub has_closing_sequence: bool,
330 pub closing_sequence: String,
332 pub is_valid: bool,
335}
336
337#[derive(Debug, Clone)]
342pub struct ValidHeading<'a> {
343 pub line_num: usize,
345 pub heading: &'a HeadingInfo,
347 pub line_info: &'a LineInfo,
349}
350
351pub struct ValidHeadingsIter<'a> {
356 lines: &'a [LineInfo],
357 current_index: usize,
358}
359
360impl<'a> ValidHeadingsIter<'a> {
361 fn new(lines: &'a [LineInfo]) -> Self {
362 Self {
363 lines,
364 current_index: 0,
365 }
366 }
367}
368
369impl<'a> Iterator for ValidHeadingsIter<'a> {
370 type Item = ValidHeading<'a>;
371
372 fn next(&mut self) -> Option<Self::Item> {
373 while self.current_index < self.lines.len() {
374 let idx = self.current_index;
375 self.current_index += 1;
376
377 let line_info = &self.lines[idx];
378 if let Some(heading) = &line_info.heading
379 && heading.is_valid
380 {
381 return Some(ValidHeading {
382 line_num: idx + 1, heading,
384 line_info,
385 });
386 }
387 }
388 None
389 }
390}
391
392#[derive(Debug, Clone)]
394pub struct BlockquoteInfo {
395 pub nesting_level: usize,
397 pub indent: String,
399 pub marker_column: usize,
401 pub prefix: String,
403 pub content: String,
405 pub has_no_space_after_marker: bool,
407 pub has_multiple_spaces_after_marker: bool,
409 pub needs_md028_fix: bool,
411}
412
413#[derive(Debug, Clone)]
415pub struct ListBlock {
416 pub start_line: usize,
418 pub end_line: usize,
420 pub is_ordered: bool,
422 pub marker: Option<String>,
424 pub blockquote_prefix: String,
426 pub item_lines: Vec<usize>,
428 pub nesting_level: usize,
430 pub max_marker_width: usize,
432}
433
434use std::sync::{Arc, OnceLock};
435
436type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
438
439type ByteRanges = Vec<(usize, usize)>;
441
442#[derive(Debug, Clone, Default)]
444pub struct CharFrequency {
445 pub hash_count: usize,
447 pub asterisk_count: usize,
449 pub underscore_count: usize,
451 pub hyphen_count: usize,
453 pub plus_count: usize,
455 pub gt_count: usize,
457 pub pipe_count: usize,
459 pub bracket_count: usize,
461 pub backtick_count: usize,
463 pub lt_count: usize,
465 pub exclamation_count: usize,
467 pub newline_count: usize,
469}
470
471#[derive(Debug, Clone)]
473pub struct HtmlTag {
474 pub line: usize,
476 pub start_col: usize,
478 pub end_col: usize,
480 pub byte_offset: usize,
482 pub byte_end: usize,
484 pub tag_name: String,
486 pub is_closing: bool,
488 pub is_self_closing: bool,
490 pub raw_content: String,
492}
493
494#[derive(Debug, Clone)]
496pub struct EmphasisSpan {
497 pub line: usize,
499 pub start_col: usize,
501 pub end_col: usize,
503 pub byte_offset: usize,
505 pub byte_end: usize,
507 pub marker: char,
509 pub marker_count: usize,
511 pub content: String,
513}
514
515#[derive(Debug, Clone)]
517pub struct TableRow {
518 pub line: usize,
520 pub is_separator: bool,
522 pub column_count: usize,
524 pub column_alignments: Vec<String>, }
527
528#[derive(Debug, Clone)]
530pub struct BareUrl {
531 pub line: usize,
533 pub start_col: usize,
535 pub end_col: usize,
537 pub byte_offset: usize,
539 pub byte_end: usize,
541 pub url: String,
543 pub url_type: String,
545}
546
547pub struct LintContext<'a> {
548 pub content: &'a str,
549 pub line_offsets: Vec<usize>,
550 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, }
580
581struct BlockquoteComponents<'a> {
583 indent: &'a str,
584 markers: &'a str,
585 spaces_after: &'a str,
586 content: &'a str,
587}
588
589#[inline]
591fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
592 let bytes = line.as_bytes();
593 let mut pos = 0;
594
595 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
597 pos += 1;
598 }
599 let indent_end = pos;
600
601 if pos >= bytes.len() || bytes[pos] != b'>' {
603 return None;
604 }
605
606 while pos < bytes.len() && bytes[pos] == b'>' {
608 pos += 1;
609 }
610 let markers_end = pos;
611
612 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
614 pos += 1;
615 }
616 let spaces_end = pos;
617
618 Some(BlockquoteComponents {
619 indent: &line[0..indent_end],
620 markers: &line[indent_end..markers_end],
621 spaces_after: &line[markers_end..spaces_end],
622 content: &line[spaces_end..],
623 })
624}
625
626impl<'a> LintContext<'a> {
627 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
628 #[cfg(not(target_arch = "wasm32"))]
629 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
630 #[cfg(target_arch = "wasm32")]
631 let profile = false;
632
633 let line_offsets = profile_section!("Line offsets", profile, {
634 let mut offsets = vec![0];
635 for (i, c) in content.char_indices() {
636 if c == '\n' {
637 offsets.push(i + 1);
638 }
639 }
640 offsets
641 });
642
643 let (code_blocks, code_span_ranges) = profile_section!(
645 "Code blocks",
646 profile,
647 CodeBlockUtils::detect_code_blocks_and_spans(content)
648 );
649
650 let html_comment_ranges = profile_section!(
652 "HTML comment ranges",
653 profile,
654 crate::utils::skip_context::compute_html_comment_ranges(content)
655 );
656
657 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
659 if flavor == MarkdownFlavor::MkDocs {
660 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
661 } else {
662 Vec::new()
663 }
664 });
665
666 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
668 if flavor == MarkdownFlavor::Quarto {
669 crate::utils::quarto_divs::detect_div_block_ranges(content)
670 } else {
671 Vec::new()
672 }
673 });
674
675 let (mut lines, emphasis_spans) = profile_section!(
678 "Basic line info",
679 profile,
680 Self::compute_basic_line_info(
681 content,
682 &line_offsets,
683 &code_blocks,
684 flavor,
685 &html_comment_ranges,
686 &autodoc_ranges,
687 &quarto_div_ranges,
688 )
689 );
690
691 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
693
694 profile_section!(
696 "ESM blocks",
697 profile,
698 Self::detect_esm_blocks(content, &mut lines, flavor)
699 );
700
701 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
703 "JSX/MDX detection",
704 profile,
705 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
706 );
707
708 profile_section!(
710 "MkDocs constructs",
711 profile,
712 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
713 );
714
715 let obsidian_comment_ranges = profile_section!(
717 "Obsidian comments",
718 profile,
719 Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
720 );
721
722 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
724
725 profile_section!(
727 "Headings & blockquotes",
728 profile,
729 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
730 );
731
732 let code_spans = profile_section!(
734 "Code spans",
735 profile,
736 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
737 );
738
739 for span in &code_spans {
742 if span.end_line > span.line {
743 for line_num in (span.line + 1)..=span.end_line {
745 if let Some(line_info) = lines.get_mut(line_num - 1) {
746 line_info.in_code_span_continuation = true;
747 }
748 }
749 }
750 }
751
752 let (links, broken_links, footnote_refs) = profile_section!(
754 "Links",
755 profile,
756 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
757 );
758
759 let images = profile_section!(
760 "Images",
761 profile,
762 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
763 );
764
765 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
766
767 let reference_defs_map: HashMap<String, usize> = reference_defs
769 .iter()
770 .enumerate()
771 .map(|(idx, def)| (def.id.to_lowercase(), idx))
772 .collect();
773
774 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
775
776 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
778
779 let table_blocks = profile_section!(
781 "Table blocks",
782 profile,
783 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
784 content,
785 &code_blocks,
786 &code_spans,
787 &html_comment_ranges,
788 )
789 );
790
791 let line_index = profile_section!(
793 "Line index",
794 profile,
795 crate::utils::range_utils::LineIndex::new(content)
796 );
797
798 let jinja_ranges = profile_section!(
800 "Jinja ranges",
801 profile,
802 crate::utils::jinja_utils::find_jinja_ranges(content)
803 );
804
805 let citation_ranges = profile_section!("Citation ranges", profile, {
807 if flavor == MarkdownFlavor::Quarto {
808 crate::utils::quarto_divs::find_citation_ranges(content)
809 } else {
810 Vec::new()
811 }
812 });
813
814 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
816 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
817 let mut ranges = Vec::new();
818 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
819 ranges.push((mat.start(), mat.end()));
820 }
821 ranges
822 });
823
824 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
825
826 Self {
827 content,
828 line_offsets,
829 code_blocks,
830 lines,
831 links,
832 images,
833 broken_links,
834 footnote_refs,
835 reference_defs,
836 reference_defs_map,
837 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
838 math_spans_cache: OnceLock::new(), list_blocks,
840 char_frequency,
841 html_tags_cache: OnceLock::new(),
842 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
843 table_rows_cache: OnceLock::new(),
844 bare_urls_cache: OnceLock::new(),
845 has_mixed_list_nesting_cache: OnceLock::new(),
846 html_comment_ranges,
847 table_blocks,
848 line_index,
849 jinja_ranges,
850 flavor,
851 source_file,
852 jsx_expression_ranges,
853 mdx_comment_ranges,
854 citation_ranges,
855 shortcode_ranges,
856 inline_config,
857 obsidian_comment_ranges,
858 }
859 }
860
861 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
866 self.inline_config.is_rule_disabled(rule_name, line_number)
867 }
868
869 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
871 Arc::clone(
872 self.code_spans_cache
873 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
874 )
875 }
876
877 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
879 Arc::clone(
880 self.math_spans_cache
881 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
882 )
883 }
884
885 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
887 let math_spans = self.math_spans();
888 math_spans
889 .iter()
890 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
891 }
892
893 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
895 &self.html_comment_ranges
896 }
897
898 pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
901 &self.obsidian_comment_ranges
902 }
903
904 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
908 self.obsidian_comment_ranges
909 .iter()
910 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
911 }
912
913 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
918 if self.obsidian_comment_ranges.is_empty() {
919 return false;
920 }
921
922 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
924 self.is_in_obsidian_comment(byte_pos)
925 }
926
927 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
929 Arc::clone(self.html_tags_cache.get_or_init(|| {
930 Arc::new(Self::parse_html_tags(
931 self.content,
932 &self.lines,
933 &self.code_blocks,
934 self.flavor,
935 ))
936 }))
937 }
938
939 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
941 Arc::clone(
942 self.emphasis_spans_cache
943 .get()
944 .expect("emphasis_spans_cache initialized during construction"),
945 )
946 }
947
948 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
950 Arc::clone(
951 self.table_rows_cache
952 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
953 )
954 }
955
956 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
958 Arc::clone(
959 self.bare_urls_cache
960 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
961 )
962 }
963
964 pub fn has_mixed_list_nesting(&self) -> bool {
968 *self
969 .has_mixed_list_nesting_cache
970 .get_or_init(|| self.compute_mixed_list_nesting())
971 }
972
973 fn compute_mixed_list_nesting(&self) -> bool {
975 let mut stack: Vec<(usize, bool)> = Vec::new();
980 let mut last_was_blank = false;
981
982 for line_info in &self.lines {
983 if line_info.in_code_block
985 || line_info.in_front_matter
986 || line_info.in_mkdocstrings
987 || line_info.in_html_comment
988 || line_info.in_esm_block
989 {
990 continue;
991 }
992
993 if line_info.is_blank {
995 last_was_blank = true;
996 continue;
997 }
998
999 if let Some(list_item) = &line_info.list_item {
1000 let current_pos = if list_item.marker_column == 1 {
1002 0
1003 } else {
1004 list_item.marker_column
1005 };
1006
1007 if last_was_blank && current_pos == 0 {
1009 stack.clear();
1010 }
1011 last_was_blank = false;
1012
1013 while let Some(&(pos, _)) = stack.last() {
1015 if pos >= current_pos {
1016 stack.pop();
1017 } else {
1018 break;
1019 }
1020 }
1021
1022 if let Some(&(_, parent_is_ordered)) = stack.last()
1024 && parent_is_ordered != list_item.is_ordered
1025 {
1026 return true; }
1028
1029 stack.push((current_pos, list_item.is_ordered));
1030 } else {
1031 last_was_blank = false;
1033 }
1034 }
1035
1036 false
1037 }
1038
1039 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1041 match self.line_offsets.binary_search(&offset) {
1042 Ok(line) => (line + 1, 1),
1043 Err(line) => {
1044 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1045 (line, offset - line_start + 1)
1046 }
1047 }
1048 }
1049
1050 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1052 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1054 return true;
1055 }
1056
1057 self.code_spans()
1059 .iter()
1060 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1061 }
1062
1063 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1065 if line_num > 0 {
1066 self.lines.get(line_num - 1)
1067 } else {
1068 None
1069 }
1070 }
1071
1072 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1074 self.line_info(line_num).map(|info| info.byte_offset)
1075 }
1076
1077 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1079 let normalized_id = ref_id.to_lowercase();
1080 self.reference_defs_map
1081 .get(&normalized_id)
1082 .map(|&idx| self.reference_defs[idx].url.as_str())
1083 }
1084
1085 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1087 let normalized_id = ref_id.to_lowercase();
1088 self.reference_defs_map
1089 .get(&normalized_id)
1090 .map(|&idx| &self.reference_defs[idx])
1091 }
1092
1093 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1095 let normalized_id = ref_id.to_lowercase();
1096 self.reference_defs_map.contains_key(&normalized_id)
1097 }
1098
1099 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1101 self.list_blocks
1102 .iter()
1103 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1104 }
1105
1106 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1108 self.list_blocks
1109 .iter()
1110 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1111 }
1112
1113 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1117 if line_num == 0 || line_num > self.lines.len() {
1118 return false;
1119 }
1120 self.lines[line_num - 1].in_code_block
1121 }
1122
1123 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1125 if line_num == 0 || line_num > self.lines.len() {
1126 return false;
1127 }
1128 self.lines[line_num - 1].in_front_matter
1129 }
1130
1131 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1133 if line_num == 0 || line_num > self.lines.len() {
1134 return false;
1135 }
1136 self.lines[line_num - 1].in_html_block
1137 }
1138
1139 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1141 if line_num == 0 || line_num > self.lines.len() {
1142 return false;
1143 }
1144
1145 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1149 let code_spans = self.code_spans();
1150 code_spans.iter().any(|span| {
1151 if line_num < span.line || line_num > span.end_line {
1153 return false;
1154 }
1155
1156 if span.line == span.end_line {
1157 col_0indexed >= span.start_col && col_0indexed < span.end_col
1159 } else if line_num == span.line {
1160 col_0indexed >= span.start_col
1162 } else if line_num == span.end_line {
1163 col_0indexed < span.end_col
1165 } else {
1166 true
1168 }
1169 })
1170 }
1171
1172 #[inline]
1174 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1175 let code_spans = self.code_spans();
1176 code_spans
1177 .iter()
1178 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1179 }
1180
1181 #[inline]
1184 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1185 self.reference_defs
1186 .iter()
1187 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1188 }
1189
1190 #[inline]
1194 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1195 self.html_comment_ranges
1196 .iter()
1197 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1198 }
1199
1200 #[inline]
1203 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1204 self.html_tags()
1205 .iter()
1206 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1207 }
1208
1209 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1211 self.jinja_ranges
1212 .iter()
1213 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214 }
1215
1216 #[inline]
1218 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1219 self.jsx_expression_ranges
1220 .iter()
1221 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1222 }
1223
1224 #[inline]
1226 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1227 self.mdx_comment_ranges
1228 .iter()
1229 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1230 }
1231
1232 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1234 &self.jsx_expression_ranges
1235 }
1236
1237 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1239 &self.mdx_comment_ranges
1240 }
1241
1242 #[inline]
1245 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1246 self.citation_ranges
1247 .iter()
1248 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1249 }
1250
1251 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1253 &self.citation_ranges
1254 }
1255
1256 #[inline]
1258 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1259 self.shortcode_ranges
1260 .iter()
1261 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1262 }
1263
1264 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1266 &self.shortcode_ranges
1267 }
1268
1269 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1271 self.reference_defs.iter().any(|def| {
1272 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1273 byte_pos >= start && byte_pos < end
1274 } else {
1275 false
1276 }
1277 })
1278 }
1279
1280 pub fn has_char(&self, ch: char) -> bool {
1282 match ch {
1283 '#' => self.char_frequency.hash_count > 0,
1284 '*' => self.char_frequency.asterisk_count > 0,
1285 '_' => self.char_frequency.underscore_count > 0,
1286 '-' => self.char_frequency.hyphen_count > 0,
1287 '+' => self.char_frequency.plus_count > 0,
1288 '>' => self.char_frequency.gt_count > 0,
1289 '|' => self.char_frequency.pipe_count > 0,
1290 '[' => self.char_frequency.bracket_count > 0,
1291 '`' => self.char_frequency.backtick_count > 0,
1292 '<' => self.char_frequency.lt_count > 0,
1293 '!' => self.char_frequency.exclamation_count > 0,
1294 '\n' => self.char_frequency.newline_count > 0,
1295 _ => self.content.contains(ch), }
1297 }
1298
1299 pub fn char_count(&self, ch: char) -> usize {
1301 match ch {
1302 '#' => self.char_frequency.hash_count,
1303 '*' => self.char_frequency.asterisk_count,
1304 '_' => self.char_frequency.underscore_count,
1305 '-' => self.char_frequency.hyphen_count,
1306 '+' => self.char_frequency.plus_count,
1307 '>' => self.char_frequency.gt_count,
1308 '|' => self.char_frequency.pipe_count,
1309 '[' => self.char_frequency.bracket_count,
1310 '`' => self.char_frequency.backtick_count,
1311 '<' => self.char_frequency.lt_count,
1312 '!' => self.char_frequency.exclamation_count,
1313 '\n' => self.char_frequency.newline_count,
1314 _ => self.content.matches(ch).count(), }
1316 }
1317
1318 pub fn likely_has_headings(&self) -> bool {
1320 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1322
1323 pub fn likely_has_lists(&self) -> bool {
1325 self.char_frequency.asterisk_count > 0
1326 || self.char_frequency.hyphen_count > 0
1327 || self.char_frequency.plus_count > 0
1328 }
1329
1330 pub fn likely_has_emphasis(&self) -> bool {
1332 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1333 }
1334
1335 pub fn likely_has_tables(&self) -> bool {
1337 self.char_frequency.pipe_count > 2
1338 }
1339
1340 pub fn likely_has_blockquotes(&self) -> bool {
1342 self.char_frequency.gt_count > 0
1343 }
1344
1345 pub fn likely_has_code(&self) -> bool {
1347 self.char_frequency.backtick_count > 0
1348 }
1349
1350 pub fn likely_has_links_or_images(&self) -> bool {
1352 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1353 }
1354
1355 pub fn likely_has_html(&self) -> bool {
1357 self.char_frequency.lt_count > 0
1358 }
1359
1360 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1365 if let Some(line_info) = self.lines.get(line_idx)
1366 && let Some(ref bq) = line_info.blockquote
1367 {
1368 bq.prefix.trim_end().to_string()
1369 } else {
1370 String::new()
1371 }
1372 }
1373
1374 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1376 self.html_tags()
1377 .iter()
1378 .filter(|tag| tag.line == line_num)
1379 .cloned()
1380 .collect()
1381 }
1382
1383 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1385 self.emphasis_spans()
1386 .iter()
1387 .filter(|span| span.line == line_num)
1388 .cloned()
1389 .collect()
1390 }
1391
1392 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1394 self.table_rows()
1395 .iter()
1396 .filter(|row| row.line == line_num)
1397 .cloned()
1398 .collect()
1399 }
1400
1401 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1403 self.bare_urls()
1404 .iter()
1405 .filter(|url| url.line == line_num)
1406 .cloned()
1407 .collect()
1408 }
1409
1410 #[inline]
1416 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1417 let idx = match lines.binary_search_by(|line| {
1419 if byte_offset < line.byte_offset {
1420 std::cmp::Ordering::Greater
1421 } else if byte_offset > line.byte_offset + line.byte_len {
1422 std::cmp::Ordering::Less
1423 } else {
1424 std::cmp::Ordering::Equal
1425 }
1426 }) {
1427 Ok(idx) => idx,
1428 Err(idx) => idx.saturating_sub(1),
1429 };
1430
1431 let line = &lines[idx];
1432 let line_num = idx + 1;
1433 let col = byte_offset.saturating_sub(line.byte_offset);
1434
1435 (idx, line_num, col)
1436 }
1437
1438 #[inline]
1440 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1441 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1443
1444 if idx > 0 {
1446 let span = &code_spans[idx - 1];
1447 if offset >= span.byte_offset && offset < span.byte_end {
1448 return true;
1449 }
1450 }
1451
1452 false
1453 }
1454
1455 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1459 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1460
1461 let mut link_ranges = Vec::new();
1462 let mut options = Options::empty();
1463 options.insert(Options::ENABLE_WIKILINKS);
1464 options.insert(Options::ENABLE_FOOTNOTES);
1465
1466 let parser = Parser::new_ext(content, options).into_offset_iter();
1467 let mut link_stack: Vec<usize> = Vec::new();
1468
1469 for (event, range) in parser {
1470 match event {
1471 Event::Start(Tag::Link { .. }) => {
1472 link_stack.push(range.start);
1473 }
1474 Event::End(TagEnd::Link) => {
1475 if let Some(start_pos) = link_stack.pop() {
1476 link_ranges.push((start_pos, range.end));
1477 }
1478 }
1479 _ => {}
1480 }
1481 }
1482
1483 link_ranges
1484 }
1485
1486 fn parse_links(
1488 content: &'a str,
1489 lines: &[LineInfo],
1490 code_blocks: &[(usize, usize)],
1491 code_spans: &[CodeSpan],
1492 flavor: MarkdownFlavor,
1493 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1494 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1495 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1496 use std::collections::HashSet;
1497
1498 let mut links = Vec::with_capacity(content.len() / 500);
1499 let mut broken_links = Vec::new();
1500 let mut footnote_refs = Vec::new();
1501
1502 let mut found_positions = HashSet::new();
1504
1505 let mut options = Options::empty();
1515 options.insert(Options::ENABLE_WIKILINKS);
1516 options.insert(Options::ENABLE_FOOTNOTES);
1517
1518 let parser = Parser::new_with_broken_link_callback(
1519 content,
1520 options,
1521 Some(|link: BrokenLink<'_>| {
1522 broken_links.push(BrokenLinkInfo {
1523 reference: link.reference.to_string(),
1524 span: link.span.clone(),
1525 });
1526 None
1527 }),
1528 )
1529 .into_offset_iter();
1530
1531 let mut link_stack: Vec<(
1532 usize,
1533 usize,
1534 pulldown_cmark::CowStr<'a>,
1535 LinkType,
1536 pulldown_cmark::CowStr<'a>,
1537 )> = Vec::new();
1538 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1541 match event {
1542 Event::Start(Tag::Link {
1543 link_type,
1544 dest_url,
1545 id,
1546 ..
1547 }) => {
1548 link_stack.push((range.start, range.end, dest_url, link_type, id));
1550 text_chunks.clear();
1551 }
1552 Event::Text(text) if !link_stack.is_empty() => {
1553 text_chunks.push((text.to_string(), range.start, range.end));
1555 }
1556 Event::Code(code) if !link_stack.is_empty() => {
1557 let code_text = format!("`{code}`");
1559 text_chunks.push((code_text, range.start, range.end));
1560 }
1561 Event::End(TagEnd::Link) => {
1562 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1563 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1565 text_chunks.clear();
1566 continue;
1567 }
1568
1569 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1571
1572 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1574 text_chunks.clear();
1575 continue;
1576 }
1577
1578 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1579
1580 let is_reference = matches!(
1581 link_type,
1582 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1583 );
1584
1585 let link_text = if start_pos < content.len() {
1588 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1589
1590 let mut close_pos = None;
1594 let mut depth = 0;
1595 let mut in_code_span = false;
1596
1597 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1598 let mut backslash_count = 0;
1600 let mut j = i;
1601 while j > 0 && link_bytes[j - 1] == b'\\' {
1602 backslash_count += 1;
1603 j -= 1;
1604 }
1605 let is_escaped = backslash_count % 2 != 0;
1606
1607 if byte == b'`' && !is_escaped {
1609 in_code_span = !in_code_span;
1610 }
1611
1612 if !is_escaped && !in_code_span {
1614 if byte == b'[' {
1615 depth += 1;
1616 } else if byte == b']' {
1617 if depth == 0 {
1618 close_pos = Some(i);
1620 break;
1621 } else {
1622 depth -= 1;
1623 }
1624 }
1625 }
1626 }
1627
1628 if let Some(pos) = close_pos {
1629 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1630 } else {
1631 Cow::Borrowed("")
1632 }
1633 } else {
1634 Cow::Borrowed("")
1635 };
1636
1637 let reference_id = if is_reference && !ref_id.is_empty() {
1639 Some(Cow::Owned(ref_id.to_lowercase()))
1640 } else if is_reference {
1641 Some(Cow::Owned(link_text.to_lowercase()))
1643 } else {
1644 None
1645 };
1646
1647 found_positions.insert(start_pos);
1649
1650 links.push(ParsedLink {
1651 line: line_num,
1652 start_col: col_start,
1653 end_col: col_end,
1654 byte_offset: start_pos,
1655 byte_end: range.end,
1656 text: link_text,
1657 url: Cow::Owned(url.to_string()),
1658 is_reference,
1659 reference_id,
1660 link_type,
1661 });
1662
1663 text_chunks.clear();
1664 }
1665 }
1666 Event::FootnoteReference(footnote_id) => {
1667 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1670 continue;
1671 }
1672
1673 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1674 footnote_refs.push(FootnoteRef {
1675 id: footnote_id.to_string(),
1676 line: line_num,
1677 byte_offset: range.start,
1678 byte_end: range.end,
1679 });
1680 }
1681 _ => {}
1682 }
1683 }
1684
1685 for cap in LINK_PATTERN.captures_iter(content) {
1689 let full_match = cap.get(0).unwrap();
1690 let match_start = full_match.start();
1691 let match_end = full_match.end();
1692
1693 if found_positions.contains(&match_start) {
1695 continue;
1696 }
1697
1698 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1700 continue;
1701 }
1702
1703 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1705 continue;
1706 }
1707
1708 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1710 continue;
1711 }
1712
1713 if Self::is_offset_in_code_span(code_spans, match_start) {
1715 continue;
1716 }
1717
1718 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1720 continue;
1721 }
1722
1723 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1725
1726 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1728 continue;
1729 }
1730
1731 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1732
1733 let text = cap.get(1).map_or("", |m| m.as_str());
1734
1735 if let Some(ref_id) = cap.get(6) {
1737 let ref_id_str = ref_id.as_str();
1738 let normalized_ref = if ref_id_str.is_empty() {
1739 Cow::Owned(text.to_lowercase()) } else {
1741 Cow::Owned(ref_id_str.to_lowercase())
1742 };
1743
1744 links.push(ParsedLink {
1746 line: line_num,
1747 start_col: col_start,
1748 end_col: col_end,
1749 byte_offset: match_start,
1750 byte_end: match_end,
1751 text: Cow::Borrowed(text),
1752 url: Cow::Borrowed(""), is_reference: true,
1754 reference_id: Some(normalized_ref),
1755 link_type: LinkType::Reference, });
1757 }
1758 }
1759
1760 (links, broken_links, footnote_refs)
1761 }
1762
1763 fn parse_images(
1765 content: &'a str,
1766 lines: &[LineInfo],
1767 code_blocks: &[(usize, usize)],
1768 code_spans: &[CodeSpan],
1769 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1770 ) -> Vec<ParsedImage<'a>> {
1771 use crate::utils::skip_context::is_in_html_comment_ranges;
1772 use std::collections::HashSet;
1773
1774 let mut images = Vec::with_capacity(content.len() / 1000);
1776 let mut found_positions = HashSet::new();
1777
1778 let parser = Parser::new(content).into_offset_iter();
1780 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1781 Vec::new();
1782 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1785 match event {
1786 Event::Start(Tag::Image {
1787 link_type,
1788 dest_url,
1789 id,
1790 ..
1791 }) => {
1792 image_stack.push((range.start, dest_url, link_type, id));
1793 text_chunks.clear();
1794 }
1795 Event::Text(text) if !image_stack.is_empty() => {
1796 text_chunks.push((text.to_string(), range.start, range.end));
1797 }
1798 Event::Code(code) if !image_stack.is_empty() => {
1799 let code_text = format!("`{code}`");
1800 text_chunks.push((code_text, range.start, range.end));
1801 }
1802 Event::End(TagEnd::Image) => {
1803 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1804 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1806 continue;
1807 }
1808
1809 if Self::is_offset_in_code_span(code_spans, start_pos) {
1811 continue;
1812 }
1813
1814 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1816 continue;
1817 }
1818
1819 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1821 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1822
1823 let is_reference = matches!(
1824 link_type,
1825 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1826 );
1827
1828 let alt_text = if start_pos < content.len() {
1831 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1832
1833 let mut close_pos = None;
1836 let mut depth = 0;
1837
1838 if image_bytes.len() > 2 {
1839 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1840 let mut backslash_count = 0;
1842 let mut j = i;
1843 while j > 0 && image_bytes[j - 1] == b'\\' {
1844 backslash_count += 1;
1845 j -= 1;
1846 }
1847 let is_escaped = backslash_count % 2 != 0;
1848
1849 if !is_escaped {
1850 if byte == b'[' {
1851 depth += 1;
1852 } else if byte == b']' {
1853 if depth == 0 {
1854 close_pos = Some(i);
1856 break;
1857 } else {
1858 depth -= 1;
1859 }
1860 }
1861 }
1862 }
1863 }
1864
1865 if let Some(pos) = close_pos {
1866 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1867 } else {
1868 Cow::Borrowed("")
1869 }
1870 } else {
1871 Cow::Borrowed("")
1872 };
1873
1874 let reference_id = if is_reference && !ref_id.is_empty() {
1875 Some(Cow::Owned(ref_id.to_lowercase()))
1876 } else if is_reference {
1877 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1879 None
1880 };
1881
1882 found_positions.insert(start_pos);
1883 images.push(ParsedImage {
1884 line: line_num,
1885 start_col: col_start,
1886 end_col: col_end,
1887 byte_offset: start_pos,
1888 byte_end: range.end,
1889 alt_text,
1890 url: Cow::Owned(url.to_string()),
1891 is_reference,
1892 reference_id,
1893 link_type,
1894 });
1895 }
1896 }
1897 _ => {}
1898 }
1899 }
1900
1901 for cap in IMAGE_PATTERN.captures_iter(content) {
1903 let full_match = cap.get(0).unwrap();
1904 let match_start = full_match.start();
1905 let match_end = full_match.end();
1906
1907 if found_positions.contains(&match_start) {
1909 continue;
1910 }
1911
1912 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1914 continue;
1915 }
1916
1917 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1919 || Self::is_offset_in_code_span(code_spans, match_start)
1920 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1921 {
1922 continue;
1923 }
1924
1925 if let Some(ref_id) = cap.get(6) {
1927 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1928 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1929 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1930 let ref_id_str = ref_id.as_str();
1931 let normalized_ref = if ref_id_str.is_empty() {
1932 Cow::Owned(alt_text.to_lowercase())
1933 } else {
1934 Cow::Owned(ref_id_str.to_lowercase())
1935 };
1936
1937 images.push(ParsedImage {
1938 line: line_num,
1939 start_col: col_start,
1940 end_col: col_end,
1941 byte_offset: match_start,
1942 byte_end: match_end,
1943 alt_text: Cow::Borrowed(alt_text),
1944 url: Cow::Borrowed(""),
1945 is_reference: true,
1946 reference_id: Some(normalized_ref),
1947 link_type: LinkType::Reference, });
1949 }
1950 }
1951
1952 images
1953 }
1954
1955 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1957 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1961 if line_info.in_code_block {
1963 continue;
1964 }
1965
1966 let line = line_info.content(content);
1967 let line_num = line_idx + 1;
1968
1969 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1970 let id_raw = cap.get(1).unwrap().as_str();
1971
1972 if id_raw.starts_with('^') {
1975 continue;
1976 }
1977
1978 let id = id_raw.to_lowercase();
1979 let url = cap.get(2).unwrap().as_str().to_string();
1980 let title_match = cap.get(3).or_else(|| cap.get(4));
1981 let title = title_match.map(|m| m.as_str().to_string());
1982
1983 let match_obj = cap.get(0).unwrap();
1986 let byte_offset = line_info.byte_offset + match_obj.start();
1987 let byte_end = line_info.byte_offset + match_obj.end();
1988
1989 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1991 let start = line_info.byte_offset + m.start().saturating_sub(1);
1993 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1995 } else {
1996 (None, None)
1997 };
1998
1999 refs.push(ReferenceDef {
2000 line: line_num,
2001 id,
2002 url,
2003 title,
2004 byte_offset,
2005 byte_end,
2006 title_byte_start,
2007 title_byte_end,
2008 });
2009 }
2010 }
2011
2012 refs
2013 }
2014
2015 #[inline]
2019 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2020 let trimmed_start = line.trim_start();
2021 if !trimmed_start.starts_with('>') {
2022 return None;
2023 }
2024
2025 let mut remaining = line;
2027 let mut total_prefix_len = 0;
2028
2029 loop {
2030 let trimmed = remaining.trim_start();
2031 if !trimmed.starts_with('>') {
2032 break;
2033 }
2034
2035 let leading_ws_len = remaining.len() - trimmed.len();
2037 total_prefix_len += leading_ws_len + 1;
2038
2039 let after_gt = &trimmed[1..];
2040
2041 if let Some(stripped) = after_gt.strip_prefix(' ') {
2043 total_prefix_len += 1;
2044 remaining = stripped;
2045 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2046 total_prefix_len += 1;
2047 remaining = stripped;
2048 } else {
2049 remaining = after_gt;
2050 }
2051 }
2052
2053 Some((&line[..total_prefix_len], remaining))
2054 }
2055
2056 fn detect_list_items_and_emphasis_with_pulldown(
2080 content: &str,
2081 line_offsets: &[usize],
2082 flavor: MarkdownFlavor,
2083 front_matter_end: usize,
2084 code_blocks: &[(usize, usize)],
2085 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2086 use std::collections::HashMap;
2087
2088 let mut list_items = HashMap::new();
2089 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2090
2091 let mut options = Options::empty();
2092 options.insert(Options::ENABLE_TABLES);
2093 options.insert(Options::ENABLE_FOOTNOTES);
2094 options.insert(Options::ENABLE_STRIKETHROUGH);
2095 options.insert(Options::ENABLE_TASKLISTS);
2096 options.insert(Options::ENABLE_GFM);
2098
2099 let _ = flavor;
2101
2102 let parser = Parser::new_ext(content, options).into_offset_iter();
2103 let mut list_depth: usize = 0;
2104 let mut list_stack: Vec<bool> = Vec::new();
2105
2106 for (event, range) in parser {
2107 match event {
2108 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2110 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2111 2
2112 } else {
2113 1
2114 };
2115 let match_start = range.start;
2116 let match_end = range.end;
2117
2118 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2120 let marker = content[match_start..].chars().next().unwrap_or('*');
2122 if marker == '*' || marker == '_' {
2123 let content_start = match_start + marker_count;
2125 let content_end = if match_end >= marker_count {
2126 match_end - marker_count
2127 } else {
2128 match_end
2129 };
2130 let content_part = if content_start < content_end && content_end <= content.len() {
2131 &content[content_start..content_end]
2132 } else {
2133 ""
2134 };
2135
2136 let line_idx = match line_offsets.binary_search(&match_start) {
2138 Ok(idx) => idx,
2139 Err(idx) => idx.saturating_sub(1),
2140 };
2141 let line_num = line_idx + 1;
2142 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2143 let col_start = match_start - line_start;
2144 let col_end = match_end - line_start;
2145
2146 emphasis_spans.push(EmphasisSpan {
2147 line: line_num,
2148 start_col: col_start,
2149 end_col: col_end,
2150 byte_offset: match_start,
2151 byte_end: match_end,
2152 marker,
2153 marker_count,
2154 content: content_part.to_string(),
2155 });
2156 }
2157 }
2158 }
2159 Event::Start(Tag::List(start_number)) => {
2160 list_depth += 1;
2161 list_stack.push(start_number.is_some());
2162 }
2163 Event::End(TagEnd::List(_)) => {
2164 list_depth = list_depth.saturating_sub(1);
2165 list_stack.pop();
2166 }
2167 Event::Start(Tag::Item) if list_depth > 0 => {
2168 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2170 let item_start = range.start;
2172
2173 let mut line_idx = match line_offsets.binary_search(&item_start) {
2175 Ok(idx) => idx,
2176 Err(idx) => idx.saturating_sub(1),
2177 };
2178
2179 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2183 line_idx += 1;
2184 }
2185
2186 if front_matter_end > 0 && line_idx < front_matter_end {
2188 continue;
2189 }
2190
2191 if line_idx < line_offsets.len() {
2192 let line_start_byte = line_offsets[line_idx];
2193 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2194 let line = &content[line_start_byte..line_end.min(content.len())];
2195
2196 let line = line
2198 .strip_suffix('\n')
2199 .or_else(|| line.strip_suffix("\r\n"))
2200 .unwrap_or(line);
2201
2202 let blockquote_parse = Self::parse_blockquote_prefix(line);
2204 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2205 (prefix.len(), content)
2206 } else {
2207 (0, line)
2208 };
2209
2210 if current_list_is_ordered {
2212 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2213 Self::parse_ordered_list(line_to_parse)
2214 {
2215 let marker = format!("{number_str}{delimiter}");
2216 let marker_column = blockquote_prefix_len + leading_spaces.len();
2217 let content_column = marker_column + marker.len() + spacing.len();
2218 let number = number_str.parse().ok();
2219
2220 list_items.entry(line_start_byte).or_insert((
2221 true,
2222 marker,
2223 marker_column,
2224 content_column,
2225 number,
2226 ));
2227 }
2228 } else if let Some((leading_spaces, marker, spacing, _content)) =
2229 Self::parse_unordered_list(line_to_parse)
2230 {
2231 let marker_column = blockquote_prefix_len + leading_spaces.len();
2232 let content_column = marker_column + 1 + spacing.len();
2233
2234 list_items.entry(line_start_byte).or_insert((
2235 false,
2236 marker.to_string(),
2237 marker_column,
2238 content_column,
2239 None,
2240 ));
2241 }
2242 }
2243 }
2244 _ => {}
2245 }
2246 }
2247
2248 (list_items, emphasis_spans)
2249 }
2250
2251 #[inline]
2255 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2256 let bytes = line.as_bytes();
2257 let mut i = 0;
2258
2259 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2261 i += 1;
2262 }
2263
2264 if i >= bytes.len() {
2266 return None;
2267 }
2268 let marker = bytes[i] as char;
2269 if marker != '-' && marker != '*' && marker != '+' {
2270 return None;
2271 }
2272 let marker_pos = i;
2273 i += 1;
2274
2275 let spacing_start = i;
2277 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2278 i += 1;
2279 }
2280
2281 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2282 }
2283
2284 #[inline]
2288 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2289 let bytes = line.as_bytes();
2290 let mut i = 0;
2291
2292 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2294 i += 1;
2295 }
2296
2297 let number_start = i;
2299 while i < bytes.len() && bytes[i].is_ascii_digit() {
2300 i += 1;
2301 }
2302 if i == number_start {
2303 return None; }
2305
2306 if i >= bytes.len() {
2308 return None;
2309 }
2310 let delimiter = bytes[i] as char;
2311 if delimiter != '.' && delimiter != ')' {
2312 return None;
2313 }
2314 let delimiter_pos = i;
2315 i += 1;
2316
2317 let spacing_start = i;
2319 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2320 i += 1;
2321 }
2322
2323 Some((
2324 &line[..number_start],
2325 &line[number_start..delimiter_pos],
2326 delimiter,
2327 &line[spacing_start..i],
2328 &line[i..],
2329 ))
2330 }
2331
2332 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2335 let num_lines = line_offsets.len();
2336 let mut in_code_block = vec![false; num_lines];
2337
2338 for &(start, end) in code_blocks {
2340 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2342 let mut boundary = start;
2343 while boundary > 0 && !content.is_char_boundary(boundary) {
2344 boundary -= 1;
2345 }
2346 boundary
2347 } else {
2348 start
2349 };
2350
2351 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2352 let mut boundary = end;
2353 while boundary < content.len() && !content.is_char_boundary(boundary) {
2354 boundary += 1;
2355 }
2356 boundary
2357 } else {
2358 end.min(content.len())
2359 };
2360
2361 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2380 let first_line = first_line_after.saturating_sub(1);
2381 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2382
2383 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2385 *flag = true;
2386 }
2387 }
2388
2389 in_code_block
2390 }
2391
2392 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2395 let content_lines: Vec<&str> = content.lines().collect();
2396 let num_lines = content_lines.len();
2397 let mut in_math_block = vec![false; num_lines];
2398
2399 let mut inside_math = false;
2400
2401 for (i, line) in content_lines.iter().enumerate() {
2402 if code_block_map.get(i).copied().unwrap_or(false) {
2404 continue;
2405 }
2406
2407 let trimmed = line.trim();
2408
2409 if trimmed == "$$" {
2412 if inside_math {
2413 in_math_block[i] = true;
2415 inside_math = false;
2416 } else {
2417 in_math_block[i] = true;
2419 inside_math = true;
2420 }
2421 } else if inside_math {
2422 in_math_block[i] = true;
2424 }
2425 }
2426
2427 in_math_block
2428 }
2429
2430 fn compute_basic_line_info(
2433 content: &str,
2434 line_offsets: &[usize],
2435 code_blocks: &[(usize, usize)],
2436 flavor: MarkdownFlavor,
2437 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2438 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2439 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2440 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2441 let content_lines: Vec<&str> = content.lines().collect();
2442 let mut lines = Vec::with_capacity(content_lines.len());
2443
2444 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2446
2447 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2449
2450 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2453
2454 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2457 content,
2458 line_offsets,
2459 flavor,
2460 front_matter_end,
2461 code_blocks,
2462 );
2463
2464 for (i, line) in content_lines.iter().enumerate() {
2465 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2466 let indent = line.len() - line.trim_start().len();
2467 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2469
2470 let blockquote_parse = Self::parse_blockquote_prefix(line);
2472
2473 let is_blank = if let Some((_, content)) = blockquote_parse {
2475 content.trim().is_empty()
2477 } else {
2478 line.trim().is_empty()
2479 };
2480
2481 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2483
2484 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2486 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2487 let line_end_offset = byte_offset + line.len();
2490 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2491 html_comment_ranges,
2492 byte_offset,
2493 line_end_offset,
2494 );
2495 let list_item =
2498 list_item_map
2499 .get(&byte_offset)
2500 .map(
2501 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2502 marker: marker.clone(),
2503 is_ordered: *is_ordered,
2504 number: *number,
2505 marker_column: *marker_column,
2506 content_column: *content_column,
2507 },
2508 );
2509
2510 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2513 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2514
2515 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2517
2518 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2520 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2521
2522 lines.push(LineInfo {
2523 byte_offset,
2524 byte_len: line.len(),
2525 indent,
2526 visual_indent,
2527 is_blank,
2528 in_code_block,
2529 in_front_matter,
2530 in_html_block: false, in_html_comment,
2532 list_item,
2533 heading: None, blockquote: None, in_mkdocstrings,
2536 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2539 in_math_block,
2540 in_quarto_div,
2541 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, in_obsidian_comment: false, });
2550 }
2551
2552 (lines, emphasis_spans)
2553 }
2554
2555 fn detect_headings_and_blockquotes(
2557 content: &str,
2558 lines: &mut [LineInfo],
2559 flavor: MarkdownFlavor,
2560 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2561 link_byte_ranges: &[(usize, usize)],
2562 ) {
2563 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2565 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2566 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2567 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2568
2569 let content_lines: Vec<&str> = content.lines().collect();
2570
2571 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2573
2574 for i in 0..lines.len() {
2576 let line = content_lines[i];
2577
2578 if !(front_matter_end > 0 && i < front_matter_end)
2583 && let Some(bq) = parse_blockquote_detailed(line)
2584 {
2585 let nesting_level = bq.markers.len();
2586 let marker_column = bq.indent.len();
2587 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2588 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2589 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2590 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2591
2592 lines[i].blockquote = Some(BlockquoteInfo {
2593 nesting_level,
2594 indent: bq.indent.to_string(),
2595 marker_column,
2596 prefix,
2597 content: bq.content.to_string(),
2598 has_no_space_after_marker: has_no_space,
2599 has_multiple_spaces_after_marker: has_multiple_spaces,
2600 needs_md028_fix,
2601 });
2602
2603 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2606 lines[i].is_horizontal_rule = true;
2607 }
2608 }
2609
2610 if lines[i].in_code_block {
2612 continue;
2613 }
2614
2615 if front_matter_end > 0 && i < front_matter_end {
2617 continue;
2618 }
2619
2620 if lines[i].in_html_block {
2622 continue;
2623 }
2624
2625 if lines[i].is_blank {
2627 continue;
2628 }
2629
2630 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2633 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2634 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2635 } else {
2636 false
2637 };
2638
2639 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2640 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2642 continue;
2643 }
2644 let line_offset = lines[i].byte_offset;
2647 if link_byte_ranges
2648 .iter()
2649 .any(|&(start, end)| line_offset > start && line_offset < end)
2650 {
2651 continue;
2652 }
2653 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2654 let hashes = caps.get(2).map_or("", |m| m.as_str());
2655 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2656 let rest = caps.get(4).map_or("", |m| m.as_str());
2657
2658 let level = hashes.len() as u8;
2659 let marker_column = leading_spaces.len();
2660
2661 let (text, has_closing, closing_seq) = {
2663 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2665 if rest[id_start..].trim_end().ends_with('}') {
2667 (&rest[..id_start], &rest[id_start..])
2669 } else {
2670 (rest, "")
2671 }
2672 } else {
2673 (rest, "")
2674 };
2675
2676 let trimmed_rest = rest_without_id.trim_end();
2678 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2679 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2682
2683 let last_hash_char_idx = char_positions
2685 .iter()
2686 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2687
2688 if let Some(mut char_idx) = last_hash_char_idx {
2689 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2691 char_idx -= 1;
2692 }
2693
2694 let start_of_hashes = char_positions[char_idx].0;
2696
2697 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2699
2700 let potential_closing = &trimmed_rest[start_of_hashes..];
2702 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2703
2704 if is_all_hashes && has_space_before {
2705 let closing_hashes = potential_closing.to_string();
2707 let text_part = if !custom_id_part.is_empty() {
2710 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2713 } else {
2714 trimmed_rest[..start_of_hashes].trim_end().to_string()
2715 };
2716 (text_part, true, closing_hashes)
2717 } else {
2718 (rest.to_string(), false, String::new())
2720 }
2721 } else {
2722 (rest.to_string(), false, String::new())
2724 }
2725 } else {
2726 (rest.to_string(), false, String::new())
2728 }
2729 };
2730
2731 let content_column = marker_column + hashes.len() + spaces_after.len();
2732
2733 let raw_text = text.trim().to_string();
2735 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2736
2737 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2739 let next_line = content_lines[i + 1];
2740 if !lines[i + 1].in_code_block
2741 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2742 && let Some(next_line_id) =
2743 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2744 {
2745 custom_id = Some(next_line_id);
2746 }
2747 }
2748
2749 let is_valid = !spaces_after.is_empty()
2759 || rest.is_empty()
2760 || level > 1
2761 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2762
2763 lines[i].heading = Some(HeadingInfo {
2764 level,
2765 style: HeadingStyle::ATX,
2766 marker: hashes.to_string(),
2767 marker_column,
2768 content_column,
2769 text: clean_text,
2770 custom_id,
2771 raw_text,
2772 has_closing_sequence: has_closing,
2773 closing_sequence: closing_seq,
2774 is_valid,
2775 });
2776 }
2777 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2779 let next_line = content_lines[i + 1];
2780 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2781 if front_matter_end > 0 && i < front_matter_end {
2783 continue;
2784 }
2785
2786 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2788 {
2789 continue;
2790 }
2791
2792 let content_line = line.trim();
2795
2796 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2798 continue;
2799 }
2800
2801 if content_line.starts_with('_') {
2803 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2804 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2805 continue;
2806 }
2807 }
2808
2809 if let Some(first_char) = content_line.chars().next()
2811 && first_char.is_ascii_digit()
2812 {
2813 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2814 if num_end < content_line.len() {
2815 let next = content_line.chars().nth(num_end);
2816 if next == Some('.') || next == Some(')') {
2817 continue;
2818 }
2819 }
2820 }
2821
2822 if ATX_HEADING_REGEX.is_match(line) {
2824 continue;
2825 }
2826
2827 if content_line.starts_with('>') {
2829 continue;
2830 }
2831
2832 let trimmed_start = line.trim_start();
2834 if trimmed_start.len() >= 3 {
2835 let first_three: String = trimmed_start.chars().take(3).collect();
2836 if first_three == "```" || first_three == "~~~" {
2837 continue;
2838 }
2839 }
2840
2841 if content_line.starts_with('<') {
2843 continue;
2844 }
2845
2846 let underline = next_line.trim();
2847
2848 let level = if underline.starts_with('=') { 1 } else { 2 };
2849 let style = if level == 1 {
2850 HeadingStyle::Setext1
2851 } else {
2852 HeadingStyle::Setext2
2853 };
2854
2855 let raw_text = line.trim().to_string();
2857 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2858
2859 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2861 let attr_line = content_lines[i + 2];
2862 if !lines[i + 2].in_code_block
2863 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2864 && let Some(attr_line_id) =
2865 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2866 {
2867 custom_id = Some(attr_line_id);
2868 }
2869 }
2870
2871 lines[i].heading = Some(HeadingInfo {
2872 level,
2873 style,
2874 marker: underline.to_string(),
2875 marker_column: next_line.len() - next_line.trim_start().len(),
2876 content_column: lines[i].indent,
2877 text: clean_text,
2878 custom_id,
2879 raw_text,
2880 has_closing_sequence: false,
2881 closing_sequence: String::new(),
2882 is_valid: true, });
2884 }
2885 }
2886 }
2887 }
2888
2889 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2891 const BLOCK_ELEMENTS: &[&str] = &[
2894 "address",
2895 "article",
2896 "aside",
2897 "audio",
2898 "blockquote",
2899 "canvas",
2900 "details",
2901 "dialog",
2902 "dd",
2903 "div",
2904 "dl",
2905 "dt",
2906 "embed",
2907 "fieldset",
2908 "figcaption",
2909 "figure",
2910 "footer",
2911 "form",
2912 "h1",
2913 "h2",
2914 "h3",
2915 "h4",
2916 "h5",
2917 "h6",
2918 "header",
2919 "hr",
2920 "iframe",
2921 "li",
2922 "main",
2923 "menu",
2924 "nav",
2925 "noscript",
2926 "object",
2927 "ol",
2928 "p",
2929 "picture",
2930 "pre",
2931 "script",
2932 "search",
2933 "section",
2934 "source",
2935 "style",
2936 "summary",
2937 "svg",
2938 "table",
2939 "tbody",
2940 "td",
2941 "template",
2942 "textarea",
2943 "tfoot",
2944 "th",
2945 "thead",
2946 "tr",
2947 "track",
2948 "ul",
2949 "video",
2950 ];
2951
2952 let mut i = 0;
2953 while i < lines.len() {
2954 if lines[i].in_code_block || lines[i].in_front_matter {
2956 i += 1;
2957 continue;
2958 }
2959
2960 let trimmed = lines[i].content(content).trim_start();
2961
2962 if trimmed.starts_with('<') && trimmed.len() > 1 {
2964 let after_bracket = &trimmed[1..];
2966 let is_closing = after_bracket.starts_with('/');
2967 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2968
2969 let tag_name = tag_start
2971 .chars()
2972 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2973 .collect::<String>()
2974 .to_lowercase();
2975
2976 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2978 lines[i].in_html_block = true;
2980
2981 if !is_closing {
2986 let closing_tag = format!("</{tag_name}>");
2987
2988 let same_line_close = lines[i].content(content).contains(&closing_tag);
2991
2992 if !same_line_close {
2994 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2996 let mut j = i + 1;
2997 let mut found_closing_tag = false;
2998 while j < lines.len() && j < i + 100 {
2999 if !allow_blank_lines && lines[j].is_blank {
3002 break;
3003 }
3004
3005 lines[j].in_html_block = true;
3006
3007 if lines[j].content(content).contains(&closing_tag) {
3009 found_closing_tag = true;
3010 }
3011
3012 if found_closing_tag {
3015 j += 1;
3016 while j < lines.len() && j < i + 100 {
3018 if lines[j].is_blank {
3019 break;
3020 }
3021 lines[j].in_html_block = true;
3022 j += 1;
3023 }
3024 break;
3025 }
3026 j += 1;
3027 }
3028 }
3029 }
3030 }
3031 }
3032
3033 i += 1;
3034 }
3035 }
3036
3037 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3040 if !flavor.supports_esm_blocks() {
3042 return;
3043 }
3044
3045 let mut in_multiline_import = false;
3046
3047 for line in lines.iter_mut() {
3048 if line.in_code_block || line.in_front_matter || line.in_html_comment {
3050 in_multiline_import = false;
3051 continue;
3052 }
3053
3054 let line_content = line.content(content);
3055 let trimmed = line_content.trim();
3056
3057 if in_multiline_import {
3059 line.in_esm_block = true;
3060 if trimmed.ends_with('\'')
3063 || trimmed.ends_with('"')
3064 || trimmed.ends_with("';")
3065 || trimmed.ends_with("\";")
3066 || line_content.contains(';')
3067 {
3068 in_multiline_import = false;
3069 }
3070 continue;
3071 }
3072
3073 if line.is_blank {
3075 continue;
3076 }
3077
3078 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3080 line.in_esm_block = true;
3081
3082 let is_import = trimmed.starts_with("import ");
3090
3091 let is_complete =
3093 trimmed.ends_with(';')
3095 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3097 || (!is_import && !trimmed.contains(" from ") && (
3099 trimmed.starts_with("export const ")
3100 || trimmed.starts_with("export let ")
3101 || trimmed.starts_with("export var ")
3102 || trimmed.starts_with("export function ")
3103 || trimmed.starts_with("export class ")
3104 || trimmed.starts_with("export default ")
3105 ));
3106
3107 if !is_complete && is_import {
3108 if trimmed.contains('{') && !trimmed.contains('}') {
3112 in_multiline_import = true;
3113 }
3114 }
3115 }
3116 }
3117 }
3118
3119 fn detect_jsx_and_mdx_comments(
3122 content: &str,
3123 lines: &mut [LineInfo],
3124 flavor: MarkdownFlavor,
3125 code_blocks: &[(usize, usize)],
3126 ) -> (ByteRanges, ByteRanges) {
3127 if !flavor.supports_jsx() {
3129 return (Vec::new(), Vec::new());
3130 }
3131
3132 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3133 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3134
3135 if !content.contains('{') {
3137 return (jsx_expression_ranges, mdx_comment_ranges);
3138 }
3139
3140 let bytes = content.as_bytes();
3141 let mut i = 0;
3142
3143 while i < bytes.len() {
3144 if bytes[i] == b'{' {
3145 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3147 i += 1;
3148 continue;
3149 }
3150
3151 let start = i;
3152
3153 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3155 let mut j = i + 3;
3157 while j + 2 < bytes.len() {
3158 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3159 let end = j + 3;
3160 mdx_comment_ranges.push((start, end));
3161
3162 Self::mark_lines_in_range(lines, content, start, end, |line| {
3164 line.in_mdx_comment = true;
3165 });
3166
3167 i = end;
3168 break;
3169 }
3170 j += 1;
3171 }
3172 if j + 2 >= bytes.len() {
3173 mdx_comment_ranges.push((start, bytes.len()));
3175 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3176 line.in_mdx_comment = true;
3177 });
3178 break;
3179 }
3180 } else {
3181 let mut brace_depth = 1;
3184 let mut j = i + 1;
3185 let mut in_string = false;
3186 let mut string_char = b'"';
3187
3188 while j < bytes.len() && brace_depth > 0 {
3189 let c = bytes[j];
3190
3191 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3193 in_string = true;
3194 string_char = c;
3195 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3196 in_string = false;
3197 } else if !in_string {
3198 if c == b'{' {
3199 brace_depth += 1;
3200 } else if c == b'}' {
3201 brace_depth -= 1;
3202 }
3203 }
3204 j += 1;
3205 }
3206
3207 if brace_depth == 0 {
3208 let end = j;
3209 jsx_expression_ranges.push((start, end));
3210
3211 Self::mark_lines_in_range(lines, content, start, end, |line| {
3213 line.in_jsx_expression = true;
3214 });
3215
3216 i = end;
3217 } else {
3218 i += 1;
3219 }
3220 }
3221 } else {
3222 i += 1;
3223 }
3224 }
3225
3226 (jsx_expression_ranges, mdx_comment_ranges)
3227 }
3228
3229 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3232 if flavor != MarkdownFlavor::MkDocs {
3233 return;
3234 }
3235
3236 use crate::utils::mkdocs_admonitions;
3237 use crate::utils::mkdocs_definition_lists;
3238 use crate::utils::mkdocs_tabs;
3239
3240 let content_lines: Vec<&str> = content.lines().collect();
3241
3242 let mut in_admonition = false;
3244 let mut admonition_indent = 0;
3245
3246 let mut in_tab = false;
3248 let mut tab_indent = 0;
3249
3250 let mut in_definition = false;
3252
3253 for (i, line) in content_lines.iter().enumerate() {
3254 if i >= lines.len() {
3255 break;
3256 }
3257
3258 if mkdocs_admonitions::is_admonition_start(line) {
3262 in_admonition = true;
3263 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3264 lines[i].in_admonition = true;
3265 } else if in_admonition {
3266 if line.trim().is_empty() {
3268 lines[i].in_admonition = true;
3270 lines[i].in_code_block = false;
3272 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3273 lines[i].in_admonition = true;
3274 lines[i].in_code_block = false;
3276 } else {
3277 in_admonition = false;
3279 if mkdocs_admonitions::is_admonition_start(line) {
3281 in_admonition = true;
3282 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3283 lines[i].in_admonition = true;
3284 }
3285 }
3286 }
3287
3288 if mkdocs_tabs::is_tab_marker(line) {
3291 in_tab = true;
3292 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3293 lines[i].in_content_tab = true;
3294 } else if in_tab {
3295 if line.trim().is_empty() {
3297 lines[i].in_content_tab = true;
3299 lines[i].in_code_block = false;
3300 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3301 lines[i].in_content_tab = true;
3302 lines[i].in_code_block = false;
3304 } else {
3305 in_tab = false;
3307 if mkdocs_tabs::is_tab_marker(line) {
3309 in_tab = true;
3310 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3311 lines[i].in_content_tab = true;
3312 }
3313 }
3314 }
3315
3316 if lines[i].in_code_block {
3318 continue;
3319 }
3320
3321 if mkdocs_definition_lists::is_definition_line(line) {
3323 in_definition = true;
3324 lines[i].in_definition_list = true;
3325 } else if in_definition {
3326 if mkdocs_definition_lists::is_definition_continuation(line) {
3328 lines[i].in_definition_list = true;
3329 } else if line.trim().is_empty() {
3330 lines[i].in_definition_list = true;
3332 } else if mkdocs_definition_lists::could_be_term_line(line) {
3333 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3335 {
3336 lines[i].in_definition_list = true;
3337 } else {
3338 in_definition = false;
3339 }
3340 } else {
3341 in_definition = false;
3342 }
3343 } else if mkdocs_definition_lists::could_be_term_line(line) {
3344 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3346 lines[i].in_definition_list = true;
3347 in_definition = true;
3348 }
3349 }
3350 }
3351 }
3352
3353 fn detect_obsidian_comments(
3364 content: &str,
3365 lines: &mut [LineInfo],
3366 flavor: MarkdownFlavor,
3367 code_span_ranges: &[(usize, usize)],
3368 ) -> Vec<(usize, usize)> {
3369 if flavor != MarkdownFlavor::Obsidian {
3371 return Vec::new();
3372 }
3373
3374 let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3376
3377 for range in &comment_ranges {
3379 for line in lines.iter_mut() {
3380 if line.in_code_block || line.in_html_comment {
3382 continue;
3383 }
3384
3385 let line_start = line.byte_offset;
3386 let line_end = line.byte_offset + line.byte_len;
3387
3388 if line_start >= range.0 && line_end <= range.1 {
3392 line.in_obsidian_comment = true;
3393 } else if line_start < range.1 && line_end > range.0 {
3394 let line_content_start = line_start;
3402 let line_content_end = line_end;
3403
3404 if line_content_start >= range.0 && line_content_end <= range.1 {
3405 line.in_obsidian_comment = true;
3406 }
3407 }
3408 }
3409 }
3410
3411 comment_ranges
3412 }
3413
3414 fn compute_obsidian_comment_ranges(
3419 content: &str,
3420 lines: &[LineInfo],
3421 code_span_ranges: &[(usize, usize)],
3422 ) -> Vec<(usize, usize)> {
3423 let mut ranges = Vec::new();
3424
3425 if !content.contains("%%") {
3427 return ranges;
3428 }
3429
3430 let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3433 for line in lines {
3434 if line.in_code_block || line.in_html_comment {
3435 skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3436 }
3437 }
3438 skip_ranges.extend(code_span_ranges.iter().copied());
3439
3440 if !skip_ranges.is_empty() {
3441 skip_ranges.sort_by_key(|(start, _)| *start);
3443 let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3444 for (start, end) in skip_ranges {
3445 if let Some((_, last_end)) = merged.last_mut()
3446 && start <= *last_end
3447 {
3448 *last_end = (*last_end).max(end);
3449 continue;
3450 }
3451 merged.push((start, end));
3452 }
3453 skip_ranges = merged;
3454 }
3455
3456 let content_bytes = content.as_bytes();
3457 let len = content.len();
3458 let mut i = 0;
3459 let mut in_comment = false;
3460 let mut comment_start = 0;
3461 let mut skip_idx = 0;
3462
3463 while i < len.saturating_sub(1) {
3464 if skip_idx < skip_ranges.len() {
3466 let (skip_start, skip_end) = skip_ranges[skip_idx];
3467 if i >= skip_end {
3468 skip_idx += 1;
3469 continue;
3470 }
3471 if i >= skip_start {
3472 i = skip_end;
3473 continue;
3474 }
3475 }
3476
3477 if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3479 if !in_comment {
3480 in_comment = true;
3482 comment_start = i;
3483 i += 2;
3484 } else {
3485 let comment_end = i + 2;
3487 ranges.push((comment_start, comment_end));
3488 in_comment = false;
3489 i += 2;
3490 }
3491 } else {
3492 i += 1;
3493 }
3494 }
3495
3496 if in_comment {
3498 ranges.push((comment_start, len));
3499 }
3500
3501 ranges
3502 }
3503
3504 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3506 where
3507 F: FnMut(&mut LineInfo),
3508 {
3509 for line in lines.iter_mut() {
3511 let line_start = line.byte_offset;
3512 let line_end = line.byte_offset + line.byte_len;
3513
3514 if line_start < end && line_end > start {
3516 f(line);
3517 }
3518 }
3519
3520 let _ = content;
3522 }
3523
3524 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3526 if !content.contains('`') {
3528 return Vec::new();
3529 }
3530
3531 let parser = Parser::new(content).into_offset_iter();
3533 let mut ranges = Vec::new();
3534
3535 for (event, range) in parser {
3536 if let Event::Code(_) = event {
3537 ranges.push((range.start, range.end));
3538 }
3539 }
3540
3541 Self::build_code_spans_from_ranges(content, lines, &ranges)
3542 }
3543
3544 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3545 let mut code_spans = Vec::new();
3546 if ranges.is_empty() {
3547 return code_spans;
3548 }
3549
3550 for &(start_pos, end_pos) in ranges {
3551 let full_span = &content[start_pos..end_pos];
3553 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3554
3555 let content_start = start_pos + backtick_count;
3557 let content_end = end_pos - backtick_count;
3558 let span_content = if content_start < content_end {
3559 content[content_start..content_end].to_string()
3560 } else {
3561 String::new()
3562 };
3563
3564 let line_idx = lines
3567 .partition_point(|line| line.byte_offset <= start_pos)
3568 .saturating_sub(1);
3569 let line_num = line_idx + 1;
3570 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3571
3572 let end_line_idx = lines
3574 .partition_point(|line| line.byte_offset <= end_pos)
3575 .saturating_sub(1);
3576 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3577
3578 let line_content = lines[line_idx].content(content);
3581 let col_start = if byte_col_start <= line_content.len() {
3582 line_content[..byte_col_start].chars().count()
3583 } else {
3584 line_content.chars().count()
3585 };
3586
3587 let end_line_content = lines[end_line_idx].content(content);
3588 let col_end = if byte_col_end <= end_line_content.len() {
3589 end_line_content[..byte_col_end].chars().count()
3590 } else {
3591 end_line_content.chars().count()
3592 };
3593
3594 code_spans.push(CodeSpan {
3595 line: line_num,
3596 end_line: end_line_idx + 1,
3597 start_col: col_start,
3598 end_col: col_end,
3599 byte_offset: start_pos,
3600 byte_end: end_pos,
3601 backtick_count,
3602 content: span_content,
3603 });
3604 }
3605
3606 code_spans.sort_by_key(|span| span.byte_offset);
3608
3609 code_spans
3610 }
3611
3612 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3614 let mut math_spans = Vec::new();
3615
3616 if !content.contains('$') {
3618 return math_spans;
3619 }
3620
3621 let mut options = Options::empty();
3623 options.insert(Options::ENABLE_MATH);
3624 let parser = Parser::new_ext(content, options).into_offset_iter();
3625
3626 for (event, range) in parser {
3627 let (is_display, math_content) = match &event {
3628 Event::InlineMath(text) => (false, text.as_ref()),
3629 Event::DisplayMath(text) => (true, text.as_ref()),
3630 _ => continue,
3631 };
3632
3633 let start_pos = range.start;
3634 let end_pos = range.end;
3635
3636 let line_idx = lines
3638 .partition_point(|line| line.byte_offset <= start_pos)
3639 .saturating_sub(1);
3640 let line_num = line_idx + 1;
3641 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3642
3643 let end_line_idx = lines
3645 .partition_point(|line| line.byte_offset <= end_pos)
3646 .saturating_sub(1);
3647 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3648
3649 let line_content = lines[line_idx].content(content);
3651 let col_start = if byte_col_start <= line_content.len() {
3652 line_content[..byte_col_start].chars().count()
3653 } else {
3654 line_content.chars().count()
3655 };
3656
3657 let end_line_content = lines[end_line_idx].content(content);
3658 let col_end = if byte_col_end <= end_line_content.len() {
3659 end_line_content[..byte_col_end].chars().count()
3660 } else {
3661 end_line_content.chars().count()
3662 };
3663
3664 math_spans.push(MathSpan {
3665 line: line_num,
3666 end_line: end_line_idx + 1,
3667 start_col: col_start,
3668 end_col: col_end,
3669 byte_offset: start_pos,
3670 byte_end: end_pos,
3671 is_display,
3672 content: math_content.to_string(),
3673 });
3674 }
3675
3676 math_spans.sort_by_key(|span| span.byte_offset);
3678
3679 math_spans
3680 }
3681
3682 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3693 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3695
3696 #[inline]
3699 fn reset_tracking_state(
3700 list_item: &ListItemInfo,
3701 has_list_breaking_content: &mut bool,
3702 min_continuation: &mut usize,
3703 ) {
3704 *has_list_breaking_content = false;
3705 let marker_width = if list_item.is_ordered {
3706 list_item.marker.len() + 1 } else {
3708 list_item.marker.len()
3709 };
3710 *min_continuation = if list_item.is_ordered {
3711 marker_width
3712 } else {
3713 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3714 };
3715 }
3716
3717 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3720 let mut last_list_item_line = 0;
3721 let mut current_indent_level = 0;
3722 let mut last_marker_width = 0;
3723
3724 let mut has_list_breaking_content_since_last_item = false;
3726 let mut min_continuation_for_tracking = 0;
3727
3728 for (line_idx, line_info) in lines.iter().enumerate() {
3729 let line_num = line_idx + 1;
3730
3731 if line_info.in_code_block {
3733 if let Some(ref mut block) = current_block {
3734 let min_continuation_indent =
3736 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3737
3738 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3740
3741 match context {
3742 CodeBlockContext::Indented => {
3743 block.end_line = line_num;
3745 continue;
3746 }
3747 CodeBlockContext::Standalone => {
3748 let completed_block = current_block.take().unwrap();
3750 list_blocks.push(completed_block);
3751 continue;
3752 }
3753 CodeBlockContext::Adjacent => {
3754 block.end_line = line_num;
3756 continue;
3757 }
3758 }
3759 } else {
3760 continue;
3762 }
3763 }
3764
3765 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3767 caps.get(0).unwrap().as_str().to_string()
3768 } else {
3769 String::new()
3770 };
3771
3772 if let Some(ref block) = current_block
3775 && line_info.list_item.is_none()
3776 && !line_info.is_blank
3777 && !line_info.in_code_span_continuation
3778 {
3779 let line_content = line_info.content(content).trim();
3780
3781 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3786
3787 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3790
3791 let breaks_list = line_info.heading.is_some()
3792 || line_content.starts_with("---")
3793 || line_content.starts_with("***")
3794 || line_content.starts_with("___")
3795 || crate::utils::skip_context::is_table_line(line_content)
3796 || blockquote_prefix_changes
3797 || (line_info.indent > 0
3798 && line_info.indent < min_continuation_for_tracking
3799 && !is_lazy_continuation);
3800
3801 if breaks_list {
3802 has_list_breaking_content_since_last_item = true;
3803 }
3804 }
3805
3806 if line_info.in_code_span_continuation
3809 && line_info.list_item.is_none()
3810 && let Some(ref mut block) = current_block
3811 {
3812 block.end_line = line_num;
3813 }
3814
3815 let effective_continuation_indent = if let Some(ref block) = current_block {
3821 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3822 let line_content = line_info.content(content);
3823 let line_bq_level = line_content
3824 .chars()
3825 .take_while(|c| *c == '>' || c.is_whitespace())
3826 .filter(|&c| c == '>')
3827 .count();
3828 if line_bq_level > 0 && line_bq_level == block_bq_level {
3829 let mut pos = 0;
3831 let mut found_markers = 0;
3832 for c in line_content.chars() {
3833 pos += c.len_utf8();
3834 if c == '>' {
3835 found_markers += 1;
3836 if found_markers == line_bq_level {
3837 if line_content.get(pos..pos + 1) == Some(" ") {
3838 pos += 1;
3839 }
3840 break;
3841 }
3842 }
3843 }
3844 let after_bq = &line_content[pos..];
3845 after_bq.len() - after_bq.trim_start().len()
3846 } else {
3847 line_info.indent
3848 }
3849 } else {
3850 line_info.indent
3851 };
3852 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3853 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3854 if block_bq_level > 0 {
3855 if block.is_ordered { last_marker_width } else { 2 }
3856 } else {
3857 min_continuation_for_tracking
3858 }
3859 } else {
3860 min_continuation_for_tracking
3861 };
3862 let is_structural_element = line_info.heading.is_some()
3865 || line_info.content(content).trim().starts_with("```")
3866 || line_info.content(content).trim().starts_with("~~~");
3867 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3868 || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3869
3870 if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3871 eprintln!(
3872 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3873 line_num,
3874 effective_continuation_indent,
3875 adjusted_min_continuation_for_tracking,
3876 is_valid_continuation,
3877 line_info.in_code_span_continuation,
3878 line_info.in_code_block,
3879 current_block.is_some()
3880 );
3881 }
3882
3883 if !line_info.in_code_span_continuation
3884 && line_info.list_item.is_none()
3885 && !line_info.is_blank
3886 && !line_info.in_code_block
3887 && is_valid_continuation
3888 && let Some(ref mut block) = current_block
3889 {
3890 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3891 eprintln!(
3892 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3893 line_num, block.end_line, line_num
3894 );
3895 }
3896 block.end_line = line_num;
3897 }
3898
3899 if let Some(list_item) = &line_info.list_item {
3901 let item_indent = list_item.marker_column;
3903 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3906 eprintln!(
3907 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3908 line_num, list_item.marker, item_indent
3909 );
3910 }
3911
3912 if let Some(ref mut block) = current_block {
3913 let is_nested = nesting > block.nesting_level;
3917 let same_type =
3918 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3919 let same_context = block.blockquote_prefix == blockquote_prefix;
3920 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3922
3923 let marker_compatible =
3925 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3926
3927 let has_non_list_content = has_list_breaking_content_since_last_item;
3930
3931 let mut continues_list = if is_nested {
3935 same_context && reasonable_distance && !has_non_list_content
3937 } else {
3938 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3940 };
3941
3942 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3943 eprintln!(
3944 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3945 line_num,
3946 continues_list,
3947 is_nested,
3948 same_type,
3949 same_context,
3950 reasonable_distance,
3951 marker_compatible,
3952 has_non_list_content,
3953 last_list_item_line,
3954 block.end_line
3955 );
3956 }
3957
3958 if !continues_list
3962 && (is_nested || same_type)
3963 && reasonable_distance
3964 && line_num > 0
3965 && block.end_line == line_num - 1
3966 {
3967 if block.item_lines.contains(&(line_num - 1)) {
3970 continues_list = true;
3972 } else {
3973 continues_list = true;
3977 }
3978 }
3979
3980 if continues_list {
3981 block.end_line = line_num;
3983 block.item_lines.push(line_num);
3984
3985 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3987 list_item.marker.len() + 1
3988 } else {
3989 list_item.marker.len()
3990 });
3991
3992 if !block.is_ordered
3994 && block.marker.is_some()
3995 && block.marker.as_ref() != Some(&list_item.marker)
3996 {
3997 block.marker = None;
3999 }
4000
4001 reset_tracking_state(
4003 list_item,
4004 &mut has_list_breaking_content_since_last_item,
4005 &mut min_continuation_for_tracking,
4006 );
4007 } else {
4008 if !same_type
4013 && !is_nested
4014 && let Some(&last_item) = block.item_lines.last()
4015 {
4016 block.end_line = last_item;
4017 }
4018
4019 list_blocks.push(block.clone());
4020
4021 *block = ListBlock {
4022 start_line: line_num,
4023 end_line: line_num,
4024 is_ordered: list_item.is_ordered,
4025 marker: if list_item.is_ordered {
4026 None
4027 } else {
4028 Some(list_item.marker.clone())
4029 },
4030 blockquote_prefix: blockquote_prefix.clone(),
4031 item_lines: vec![line_num],
4032 nesting_level: nesting,
4033 max_marker_width: if list_item.is_ordered {
4034 list_item.marker.len() + 1
4035 } else {
4036 list_item.marker.len()
4037 },
4038 };
4039
4040 reset_tracking_state(
4042 list_item,
4043 &mut has_list_breaking_content_since_last_item,
4044 &mut min_continuation_for_tracking,
4045 );
4046 }
4047 } else {
4048 current_block = Some(ListBlock {
4050 start_line: line_num,
4051 end_line: line_num,
4052 is_ordered: list_item.is_ordered,
4053 marker: if list_item.is_ordered {
4054 None
4055 } else {
4056 Some(list_item.marker.clone())
4057 },
4058 blockquote_prefix,
4059 item_lines: vec![line_num],
4060 nesting_level: nesting,
4061 max_marker_width: list_item.marker.len(),
4062 });
4063
4064 reset_tracking_state(
4066 list_item,
4067 &mut has_list_breaking_content_since_last_item,
4068 &mut min_continuation_for_tracking,
4069 );
4070 }
4071
4072 last_list_item_line = line_num;
4073 current_indent_level = item_indent;
4074 last_marker_width = if list_item.is_ordered {
4075 list_item.marker.len() + 1 } else {
4077 list_item.marker.len()
4078 };
4079 } else if let Some(ref mut block) = current_block {
4080 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4082 eprintln!(
4083 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4084 line_num, line_info.is_blank
4085 );
4086 }
4087
4088 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4096 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4097 } else {
4098 false
4099 };
4100
4101 let min_continuation_indent = if block.is_ordered {
4105 current_indent_level + last_marker_width
4106 } else {
4107 current_indent_level + 2 };
4109
4110 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4111 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4113 eprintln!(
4114 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4115 line_num, line_info.indent, min_continuation_indent
4116 );
4117 }
4118 block.end_line = line_num;
4119 } else if line_info.is_blank {
4120 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4123 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4124 }
4125 let mut check_idx = line_idx + 1;
4126 let mut found_continuation = false;
4127
4128 while check_idx < lines.len() && lines[check_idx].is_blank {
4130 check_idx += 1;
4131 }
4132
4133 if check_idx < lines.len() {
4134 let next_line = &lines[check_idx];
4135 let next_content = next_line.content(content);
4137 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4140 let next_bq_level_for_indent = next_content
4141 .chars()
4142 .take_while(|c| *c == '>' || c.is_whitespace())
4143 .filter(|&c| c == '>')
4144 .count();
4145 let effective_indent =
4146 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4147 let mut pos = 0;
4150 let mut found_markers = 0;
4151 for c in next_content.chars() {
4152 pos += c.len_utf8();
4153 if c == '>' {
4154 found_markers += 1;
4155 if found_markers == next_bq_level_for_indent {
4156 if next_content.get(pos..pos + 1) == Some(" ") {
4158 pos += 1;
4159 }
4160 break;
4161 }
4162 }
4163 }
4164 let after_blockquote_marker = &next_content[pos..];
4165 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4166 } else {
4167 next_line.indent
4168 };
4169 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4172 if block.is_ordered { last_marker_width } else { 2 }
4175 } else {
4176 min_continuation_indent
4177 };
4178 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4180 eprintln!(
4181 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4182 line_num,
4183 check_idx + 1,
4184 effective_indent,
4185 adjusted_min_continuation,
4186 next_line.list_item.is_some(),
4187 next_line.in_code_block
4188 );
4189 }
4190 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4191 found_continuation = true;
4192 }
4193 else if !next_line.in_code_block
4195 && next_line.list_item.is_some()
4196 && let Some(item) = &next_line.list_item
4197 {
4198 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4199 .find(next_line.content(content))
4200 .map_or(String::new(), |m| m.as_str().to_string());
4201 if item.marker_column == current_indent_level
4202 && item.is_ordered == block.is_ordered
4203 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4204 {
4205 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4209 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4210 if let Some(between_line) = lines.get(idx) {
4211 let between_content = between_line.content(content);
4212 let trimmed = between_content.trim();
4213 if trimmed.is_empty() {
4215 return false;
4216 }
4217 let line_indent = between_content.len() - between_content.trim_start().len();
4219
4220 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4222 .find(between_content)
4223 .map_or(String::new(), |m| m.as_str().to_string());
4224 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4225 let blockquote_level_changed =
4226 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4227
4228 if trimmed.starts_with("```")
4230 || trimmed.starts_with("~~~")
4231 || trimmed.starts_with("---")
4232 || trimmed.starts_with("***")
4233 || trimmed.starts_with("___")
4234 || blockquote_level_changed
4235 || crate::utils::skip_context::is_table_line(trimmed)
4236 || between_line.heading.is_some()
4237 {
4238 return true; }
4240
4241 line_indent >= min_continuation_indent
4243 } else {
4244 false
4245 }
4246 });
4247
4248 if block.is_ordered {
4249 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4252 if let Some(between_line) = lines.get(idx) {
4253 let between_content = between_line.content(content);
4254 let trimmed = between_content.trim();
4255 if trimmed.is_empty() {
4256 return false;
4257 }
4258 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4260 .find(between_content)
4261 .map_or(String::new(), |m| m.as_str().to_string());
4262 let between_bq_level =
4263 between_bq_prefix.chars().filter(|&c| c == '>').count();
4264 let blockquote_level_changed =
4265 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4266 trimmed.starts_with("```")
4268 || trimmed.starts_with("~~~")
4269 || trimmed.starts_with("---")
4270 || trimmed.starts_with("***")
4271 || trimmed.starts_with("___")
4272 || blockquote_level_changed
4273 || crate::utils::skip_context::is_table_line(trimmed)
4274 || between_line.heading.is_some()
4275 } else {
4276 false
4277 }
4278 });
4279 found_continuation = !has_structural_separators;
4280 } else {
4281 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4283 if let Some(between_line) = lines.get(idx) {
4284 let between_content = between_line.content(content);
4285 let trimmed = between_content.trim();
4286 if trimmed.is_empty() {
4287 return false;
4288 }
4289 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4291 .find(between_content)
4292 .map_or(String::new(), |m| m.as_str().to_string());
4293 let between_bq_level =
4294 between_bq_prefix.chars().filter(|&c| c == '>').count();
4295 let blockquote_level_changed =
4296 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4297 trimmed.starts_with("```")
4299 || trimmed.starts_with("~~~")
4300 || trimmed.starts_with("---")
4301 || trimmed.starts_with("***")
4302 || trimmed.starts_with("___")
4303 || blockquote_level_changed
4304 || crate::utils::skip_context::is_table_line(trimmed)
4305 || between_line.heading.is_some()
4306 } else {
4307 false
4308 }
4309 });
4310 found_continuation = !has_structural_separators;
4311 }
4312 }
4313 }
4314 }
4315
4316 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4317 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4318 }
4319 if found_continuation {
4320 block.end_line = line_num;
4322 } else {
4323 list_blocks.push(block.clone());
4325 current_block = None;
4326 }
4327 } else {
4328 let min_required_indent = if block.is_ordered {
4331 current_indent_level + last_marker_width
4332 } else {
4333 current_indent_level + 2
4334 };
4335
4336 let line_content = line_info.content(content).trim();
4341
4342 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4344
4345 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4348 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4349 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4350
4351 let is_structural_separator = line_info.heading.is_some()
4352 || line_content.starts_with("```")
4353 || line_content.starts_with("~~~")
4354 || line_content.starts_with("---")
4355 || line_content.starts_with("***")
4356 || line_content.starts_with("___")
4357 || blockquote_level_changed
4358 || looks_like_table;
4359
4360 let is_lazy_continuation = !is_structural_separator
4364 && !line_info.is_blank
4365 && (line_info.indent == 0
4366 || line_info.indent >= min_required_indent
4367 || line_info.in_code_span_continuation);
4368
4369 if is_lazy_continuation {
4370 block.end_line = line_num;
4373 } else {
4374 list_blocks.push(block.clone());
4376 current_block = None;
4377 }
4378 }
4379 }
4380 }
4381
4382 if let Some(block) = current_block {
4384 list_blocks.push(block);
4385 }
4386
4387 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4389
4390 list_blocks
4391 }
4392
4393 fn compute_char_frequency(content: &str) -> CharFrequency {
4395 let mut frequency = CharFrequency::default();
4396
4397 for ch in content.chars() {
4398 match ch {
4399 '#' => frequency.hash_count += 1,
4400 '*' => frequency.asterisk_count += 1,
4401 '_' => frequency.underscore_count += 1,
4402 '-' => frequency.hyphen_count += 1,
4403 '+' => frequency.plus_count += 1,
4404 '>' => frequency.gt_count += 1,
4405 '|' => frequency.pipe_count += 1,
4406 '[' => frequency.bracket_count += 1,
4407 '`' => frequency.backtick_count += 1,
4408 '<' => frequency.lt_count += 1,
4409 '!' => frequency.exclamation_count += 1,
4410 '\n' => frequency.newline_count += 1,
4411 _ => {}
4412 }
4413 }
4414
4415 frequency
4416 }
4417
4418 fn parse_html_tags(
4420 content: &str,
4421 lines: &[LineInfo],
4422 code_blocks: &[(usize, usize)],
4423 flavor: MarkdownFlavor,
4424 ) -> Vec<HtmlTag> {
4425 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4426 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4427
4428 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4429
4430 for cap in HTML_TAG_REGEX.captures_iter(content) {
4431 let full_match = cap.get(0).unwrap();
4432 let match_start = full_match.start();
4433 let match_end = full_match.end();
4434
4435 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4437 continue;
4438 }
4439
4440 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4441 let tag_name_original = cap.get(2).unwrap().as_str();
4442 let tag_name = tag_name_original.to_lowercase();
4443 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4444
4445 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4448 continue;
4449 }
4450
4451 let mut line_num = 1;
4453 let mut col_start = match_start;
4454 let mut col_end = match_end;
4455 for (idx, line_info) in lines.iter().enumerate() {
4456 if match_start >= line_info.byte_offset {
4457 line_num = idx + 1;
4458 col_start = match_start - line_info.byte_offset;
4459 col_end = match_end - line_info.byte_offset;
4460 } else {
4461 break;
4462 }
4463 }
4464
4465 html_tags.push(HtmlTag {
4466 line: line_num,
4467 start_col: col_start,
4468 end_col: col_end,
4469 byte_offset: match_start,
4470 byte_end: match_end,
4471 tag_name,
4472 is_closing,
4473 is_self_closing,
4474 raw_content: full_match.as_str().to_string(),
4475 });
4476 }
4477
4478 html_tags
4479 }
4480
4481 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4483 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4484
4485 for (line_idx, line_info) in lines.iter().enumerate() {
4486 if line_info.in_code_block || line_info.is_blank {
4488 continue;
4489 }
4490
4491 let line = line_info.content(content);
4492 let line_num = line_idx + 1;
4493
4494 if !line.contains('|') {
4496 continue;
4497 }
4498
4499 let parts: Vec<&str> = line.split('|').collect();
4501 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4502
4503 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4505 let mut column_alignments = Vec::new();
4506
4507 if is_separator {
4508 for part in &parts[1..parts.len() - 1] {
4509 let trimmed = part.trim();
4511 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4512 "center".to_string()
4513 } else if trimmed.ends_with(':') {
4514 "right".to_string()
4515 } else if trimmed.starts_with(':') {
4516 "left".to_string()
4517 } else {
4518 "none".to_string()
4519 };
4520 column_alignments.push(alignment);
4521 }
4522 }
4523
4524 table_rows.push(TableRow {
4525 line: line_num,
4526 is_separator,
4527 column_count,
4528 column_alignments,
4529 });
4530 }
4531
4532 table_rows
4533 }
4534
4535 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4537 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4538
4539 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4541 let full_match = cap.get(0).unwrap();
4542 let match_start = full_match.start();
4543 let match_end = full_match.end();
4544
4545 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4547 continue;
4548 }
4549
4550 let preceding_char = if match_start > 0 {
4552 content.chars().nth(match_start - 1)
4553 } else {
4554 None
4555 };
4556 let following_char = content.chars().nth(match_end);
4557
4558 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4559 continue;
4560 }
4561 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4562 continue;
4563 }
4564
4565 let url = full_match.as_str();
4566 let url_type = if url.starts_with("https://") {
4567 "https"
4568 } else if url.starts_with("http://") {
4569 "http"
4570 } else if url.starts_with("ftp://") {
4571 "ftp"
4572 } else {
4573 "other"
4574 };
4575
4576 let mut line_num = 1;
4578 let mut col_start = match_start;
4579 let mut col_end = match_end;
4580 for (idx, line_info) in lines.iter().enumerate() {
4581 if match_start >= line_info.byte_offset {
4582 line_num = idx + 1;
4583 col_start = match_start - line_info.byte_offset;
4584 col_end = match_end - line_info.byte_offset;
4585 } else {
4586 break;
4587 }
4588 }
4589
4590 bare_urls.push(BareUrl {
4591 line: line_num,
4592 start_col: col_start,
4593 end_col: col_end,
4594 byte_offset: match_start,
4595 byte_end: match_end,
4596 url: url.to_string(),
4597 url_type: url_type.to_string(),
4598 });
4599 }
4600
4601 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4603 let full_match = cap.get(0).unwrap();
4604 let match_start = full_match.start();
4605 let match_end = full_match.end();
4606
4607 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4609 continue;
4610 }
4611
4612 let preceding_char = if match_start > 0 {
4614 content.chars().nth(match_start - 1)
4615 } else {
4616 None
4617 };
4618 let following_char = content.chars().nth(match_end);
4619
4620 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4621 continue;
4622 }
4623 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4624 continue;
4625 }
4626
4627 let email = full_match.as_str();
4628
4629 let mut line_num = 1;
4631 let mut col_start = match_start;
4632 let mut col_end = match_end;
4633 for (idx, line_info) in lines.iter().enumerate() {
4634 if match_start >= line_info.byte_offset {
4635 line_num = idx + 1;
4636 col_start = match_start - line_info.byte_offset;
4637 col_end = match_end - line_info.byte_offset;
4638 } else {
4639 break;
4640 }
4641 }
4642
4643 bare_urls.push(BareUrl {
4644 line: line_num,
4645 start_col: col_start,
4646 end_col: col_end,
4647 byte_offset: match_start,
4648 byte_end: match_end,
4649 url: email.to_string(),
4650 url_type: "email".to_string(),
4651 });
4652 }
4653
4654 bare_urls
4655 }
4656
4657 #[must_use]
4677 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4678 ValidHeadingsIter::new(&self.lines)
4679 }
4680
4681 #[must_use]
4685 pub fn has_valid_headings(&self) -> bool {
4686 self.lines
4687 .iter()
4688 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4689 }
4690}
4691
4692fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4694 if list_blocks.len() < 2 {
4695 return;
4696 }
4697
4698 let mut merger = ListBlockMerger::new(content, lines);
4699 *list_blocks = merger.merge(list_blocks);
4700}
4701
4702struct ListBlockMerger<'a> {
4704 content: &'a str,
4705 lines: &'a [LineInfo],
4706}
4707
4708impl<'a> ListBlockMerger<'a> {
4709 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4710 Self { content, lines }
4711 }
4712
4713 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4714 let mut merged = Vec::with_capacity(list_blocks.len());
4715 let mut current = list_blocks[0].clone();
4716
4717 for next in list_blocks.iter().skip(1) {
4718 if self.should_merge_blocks(¤t, next) {
4719 current = self.merge_two_blocks(current, next);
4720 } else {
4721 merged.push(current);
4722 current = next.clone();
4723 }
4724 }
4725
4726 merged.push(current);
4727 merged
4728 }
4729
4730 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4732 if !self.blocks_are_compatible(current, next) {
4734 return false;
4735 }
4736
4737 let spacing = self.analyze_spacing_between(current, next);
4739 match spacing {
4740 BlockSpacing::Consecutive => true,
4741 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4742 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4743 self.can_merge_with_content_between(current, next)
4744 }
4745 }
4746 }
4747
4748 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4750 current.is_ordered == next.is_ordered
4751 && current.blockquote_prefix == next.blockquote_prefix
4752 && current.nesting_level == next.nesting_level
4753 }
4754
4755 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4757 let gap = next.start_line - current.end_line;
4758
4759 match gap {
4760 1 => BlockSpacing::Consecutive,
4761 2 => BlockSpacing::SingleBlank,
4762 _ if gap > 2 => {
4763 if self.has_only_blank_lines_between(current, next) {
4764 BlockSpacing::MultipleBlanks
4765 } else {
4766 BlockSpacing::ContentBetween
4767 }
4768 }
4769 _ => BlockSpacing::Consecutive, }
4771 }
4772
4773 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4775 if has_meaningful_content_between(self.content, current, next, self.lines) {
4778 return false; }
4780
4781 !current.is_ordered && current.marker == next.marker
4783 }
4784
4785 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4787 if has_meaningful_content_between(self.content, current, next, self.lines) {
4789 return false; }
4791
4792 current.is_ordered && next.is_ordered
4794 }
4795
4796 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4798 for line_num in (current.end_line + 1)..next.start_line {
4799 if let Some(line_info) = self.lines.get(line_num - 1)
4800 && !line_info.content(self.content).trim().is_empty()
4801 {
4802 return false;
4803 }
4804 }
4805 true
4806 }
4807
4808 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4810 current.end_line = next.end_line;
4811 current.item_lines.extend_from_slice(&next.item_lines);
4812
4813 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4815
4816 if !current.is_ordered && self.markers_differ(¤t, next) {
4818 current.marker = None; }
4820
4821 current
4822 }
4823
4824 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4826 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4827 }
4828}
4829
4830#[derive(Debug, PartialEq)]
4832enum BlockSpacing {
4833 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4838
4839fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4841 for line_num in (current.end_line + 1)..next.start_line {
4843 if let Some(line_info) = lines.get(line_num - 1) {
4844 let trimmed = line_info.content(content).trim();
4846
4847 if trimmed.is_empty() {
4849 continue;
4850 }
4851
4852 if line_info.heading.is_some() {
4856 return true; }
4858
4859 if is_horizontal_rule(trimmed) {
4861 return true; }
4863
4864 if crate::utils::skip_context::is_table_line(trimmed) {
4866 return true; }
4868
4869 if trimmed.starts_with('>') {
4871 return true; }
4873
4874 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4876 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4877
4878 let min_continuation_indent = if current.is_ordered {
4880 current.nesting_level + current.max_marker_width + 1 } else {
4882 current.nesting_level + 2
4883 };
4884
4885 if line_indent < min_continuation_indent {
4886 return true; }
4889 }
4890
4891 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4893
4894 let min_indent = if current.is_ordered {
4896 current.nesting_level + current.max_marker_width
4897 } else {
4898 current.nesting_level + 2
4899 };
4900
4901 if line_indent < min_indent {
4903 return true; }
4905
4906 }
4909 }
4910
4911 false
4913}
4914
4915pub fn is_horizontal_rule_line(line: &str) -> bool {
4922 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4924 if leading_spaces > 3 || line.starts_with('\t') {
4925 return false;
4926 }
4927
4928 is_horizontal_rule_content(line.trim())
4929}
4930
4931pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4934 if trimmed.len() < 3 {
4935 return false;
4936 }
4937
4938 let chars: Vec<char> = trimmed.chars().collect();
4940 if let Some(&first_char) = chars.first()
4941 && (first_char == '-' || first_char == '*' || first_char == '_')
4942 {
4943 let mut count = 0;
4944 for &ch in &chars {
4945 if ch == first_char {
4946 count += 1;
4947 } else if ch != ' ' && ch != '\t' {
4948 return false; }
4950 }
4951 return count >= 3;
4952 }
4953 false
4954}
4955
4956pub fn is_horizontal_rule(trimmed: &str) -> bool {
4958 is_horizontal_rule_content(trimmed)
4959}
4960
4961#[cfg(test)]
4963mod tests {
4964 use super::*;
4965
4966 #[test]
4967 fn test_empty_content() {
4968 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4969 assert_eq!(ctx.content, "");
4970 assert_eq!(ctx.line_offsets, vec![0]);
4971 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4972 assert_eq!(ctx.lines.len(), 0);
4973 }
4974
4975 #[test]
4976 fn test_single_line() {
4977 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4978 assert_eq!(ctx.content, "# Hello");
4979 assert_eq!(ctx.line_offsets, vec![0]);
4980 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4981 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4982 }
4983
4984 #[test]
4985 fn test_multi_line() {
4986 let content = "# Title\n\nSecond line\nThird line";
4987 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4988 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4989 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4996
4997 #[test]
4998 fn test_line_info() {
4999 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
5000 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5001
5002 assert_eq!(ctx.lines.len(), 7);
5004
5005 let line1 = &ctx.lines[0];
5007 assert_eq!(line1.content(ctx.content), "# Title");
5008 assert_eq!(line1.byte_offset, 0);
5009 assert_eq!(line1.indent, 0);
5010 assert!(!line1.is_blank);
5011 assert!(!line1.in_code_block);
5012 assert!(line1.list_item.is_none());
5013
5014 let line2 = &ctx.lines[1];
5016 assert_eq!(line2.content(ctx.content), " indented");
5017 assert_eq!(line2.byte_offset, 8);
5018 assert_eq!(line2.indent, 4);
5019 assert!(!line2.is_blank);
5020
5021 let line3 = &ctx.lines[2];
5023 assert_eq!(line3.content(ctx.content), "");
5024 assert!(line3.is_blank);
5025
5026 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5028 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5029 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5030 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5031 }
5032
5033 #[test]
5034 fn test_list_item_detection() {
5035 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
5036 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5037
5038 let line1 = &ctx.lines[0];
5040 assert!(line1.list_item.is_some());
5041 let list1 = line1.list_item.as_ref().unwrap();
5042 assert_eq!(list1.marker, "-");
5043 assert!(!list1.is_ordered);
5044 assert_eq!(list1.marker_column, 0);
5045 assert_eq!(list1.content_column, 2);
5046
5047 let line2 = &ctx.lines[1];
5049 assert!(line2.list_item.is_some());
5050 let list2 = line2.list_item.as_ref().unwrap();
5051 assert_eq!(list2.marker, "*");
5052 assert_eq!(list2.marker_column, 2);
5053
5054 let line3 = &ctx.lines[2];
5056 assert!(line3.list_item.is_some());
5057 let list3 = line3.list_item.as_ref().unwrap();
5058 assert_eq!(list3.marker, "1.");
5059 assert!(list3.is_ordered);
5060 assert_eq!(list3.number, Some(1));
5061
5062 let line6 = &ctx.lines[5];
5064 assert!(line6.list_item.is_none());
5065 }
5066
5067 #[test]
5068 fn test_offset_to_line_col_edge_cases() {
5069 let content = "a\nb\nc";
5070 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5071 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
5079
5080 #[test]
5081 fn test_mdx_esm_blocks() {
5082 let content = r##"import {Chart} from './snowfall.js'
5083export const year = 2023
5084
5085# Last year's snowfall
5086
5087In {year}, the snowfall was above average.
5088It was followed by a warm spring which caused
5089flood conditions in many of the nearby rivers.
5090
5091<Chart color="#fcb32c" year={year} />
5092"##;
5093
5094 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5095
5096 assert_eq!(ctx.lines.len(), 10);
5098 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5099 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5100 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5101 assert!(
5102 !ctx.lines[3].in_esm_block,
5103 "Line 4 (heading) should NOT be in_esm_block"
5104 );
5105 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5106 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5107 }
5108
5109 #[test]
5110 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5111 let content = r#"import {Chart} from './snowfall.js'
5112export const year = 2023
5113
5114# Last year's snowfall
5115"#;
5116
5117 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5118
5119 assert!(
5121 !ctx.lines[0].in_esm_block,
5122 "Line 1 should NOT be in_esm_block in Standard flavor"
5123 );
5124 assert!(
5125 !ctx.lines[1].in_esm_block,
5126 "Line 2 should NOT be in_esm_block in Standard flavor"
5127 );
5128 }
5129
5130 #[test]
5131 fn test_blockquote_with_indented_content() {
5132 let content = r#"# Heading
5136
5137> -S socket-path
5138> More text
5139"#;
5140 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5141
5142 assert!(
5144 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5145 "Line 3 should be a blockquote"
5146 );
5147 assert!(
5149 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5150 "Line 4 should be a blockquote"
5151 );
5152
5153 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5156 assert_eq!(bq3.content, "-S socket-path");
5157 assert_eq!(bq3.nesting_level, 1);
5158 assert!(bq3.has_multiple_spaces_after_marker);
5160
5161 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5162 assert_eq!(bq4.content, "More text");
5163 assert_eq!(bq4.nesting_level, 1);
5164 }
5165
5166 #[test]
5167 fn test_footnote_definitions_not_parsed_as_reference_defs() {
5168 let content = r#"# Title
5170
5171A footnote[^1].
5172
5173[^1]: This is the footnote content.
5174
5175[^note]: Another footnote with [link](https://example.com).
5176
5177[regular]: ./path.md "A real reference definition"
5178"#;
5179 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5180
5181 assert_eq!(
5183 ctx.reference_defs.len(),
5184 1,
5185 "Footnotes should not be parsed as reference definitions"
5186 );
5187
5188 assert_eq!(ctx.reference_defs[0].id, "regular");
5190 assert_eq!(ctx.reference_defs[0].url, "./path.md");
5191 assert_eq!(
5192 ctx.reference_defs[0].title,
5193 Some("A real reference definition".to_string())
5194 );
5195 }
5196
5197 #[test]
5198 fn test_footnote_with_inline_link_not_misidentified() {
5199 let content = r#"# Title
5202
5203A footnote[^1].
5204
5205[^1]: [link](https://www.google.com).
5206"#;
5207 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5208
5209 assert!(
5211 ctx.reference_defs.is_empty(),
5212 "Footnote with inline link should not create a reference definition"
5213 );
5214 }
5215
5216 #[test]
5217 fn test_various_footnote_formats_excluded() {
5218 let content = r#"[^1]: Numeric footnote
5220[^note]: Named footnote
5221[^a]: Single char footnote
5222[^long-footnote-name]: Long named footnote
5223[^123abc]: Mixed alphanumeric
5224
5225[ref1]: ./file1.md
5226[ref2]: ./file2.md
5227"#;
5228 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5229
5230 assert_eq!(
5232 ctx.reference_defs.len(),
5233 2,
5234 "Only regular reference definitions should be parsed"
5235 );
5236
5237 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5238 assert!(ids.contains(&"ref1"));
5239 assert!(ids.contains(&"ref2"));
5240 assert!(!ids.iter().any(|id| id.starts_with('^')));
5241 }
5242
5243 #[test]
5248 fn test_has_char_tracked_characters() {
5249 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5251 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5252
5253 assert!(ctx.has_char('#'), "Should detect hash");
5255 assert!(ctx.has_char('*'), "Should detect asterisk");
5256 assert!(ctx.has_char('_'), "Should detect underscore");
5257 assert!(ctx.has_char('-'), "Should detect hyphen");
5258 assert!(ctx.has_char('+'), "Should detect plus");
5259 assert!(ctx.has_char('>'), "Should detect gt");
5260 assert!(ctx.has_char('|'), "Should detect pipe");
5261 assert!(ctx.has_char('['), "Should detect bracket");
5262 assert!(ctx.has_char('`'), "Should detect backtick");
5263 assert!(ctx.has_char('<'), "Should detect lt");
5264 assert!(ctx.has_char('!'), "Should detect exclamation");
5265 assert!(ctx.has_char('\n'), "Should detect newline");
5266 }
5267
5268 #[test]
5269 fn test_has_char_absent_characters() {
5270 let content = "Simple text without special chars";
5271 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5272
5273 assert!(!ctx.has_char('#'), "Should not detect hash");
5275 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5276 assert!(!ctx.has_char('_'), "Should not detect underscore");
5277 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5278 assert!(!ctx.has_char('+'), "Should not detect plus");
5279 assert!(!ctx.has_char('>'), "Should not detect gt");
5280 assert!(!ctx.has_char('|'), "Should not detect pipe");
5281 assert!(!ctx.has_char('['), "Should not detect bracket");
5282 assert!(!ctx.has_char('`'), "Should not detect backtick");
5283 assert!(!ctx.has_char('<'), "Should not detect lt");
5284 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5285 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5287 }
5288
5289 #[test]
5290 fn test_has_char_fallback_for_untracked() {
5291 let content = "Text with @mention and $dollar and %percent";
5292 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5293
5294 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5296 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5297 assert!(ctx.has_char('%'), "Should detect % via fallback");
5298 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5299 }
5300
5301 #[test]
5302 fn test_char_count_tracked_characters() {
5303 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5304 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5305
5306 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5308 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5309 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5310 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5311 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5312 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5313 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5314 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5315 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5316 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5317 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5318 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5319 }
5320
5321 #[test]
5322 fn test_char_count_zero_for_absent() {
5323 let content = "Plain text";
5324 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5325
5326 assert_eq!(ctx.char_count('#'), 0);
5327 assert_eq!(ctx.char_count('*'), 0);
5328 assert_eq!(ctx.char_count('_'), 0);
5329 assert_eq!(ctx.char_count('\n'), 0);
5330 }
5331
5332 #[test]
5333 fn test_char_count_fallback_for_untracked() {
5334 let content = "@@@ $$ %%%";
5335 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5336
5337 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5338 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5339 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5340 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5341 }
5342
5343 #[test]
5344 fn test_char_count_empty_content() {
5345 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5346
5347 assert_eq!(ctx.char_count('#'), 0);
5348 assert_eq!(ctx.char_count('*'), 0);
5349 assert_eq!(ctx.char_count('@'), 0);
5350 assert!(!ctx.has_char('#'));
5351 assert!(!ctx.has_char('@'));
5352 }
5353
5354 #[test]
5359 fn test_is_in_html_tag_simple() {
5360 let content = "<div>content</div>";
5361 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5362
5363 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5365 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5366 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5367
5368 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5370 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5371
5372 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5374 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5375 }
5376
5377 #[test]
5378 fn test_is_in_html_tag_self_closing() {
5379 let content = "Text <br/> more text";
5380 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5381
5382 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5384 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5385
5386 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5388 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5389 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5390
5391 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5393 }
5394
5395 #[test]
5396 fn test_is_in_html_tag_with_attributes() {
5397 let content = r#"<a href="url" class="link">text</a>"#;
5398 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5399
5400 assert!(ctx.is_in_html_tag(0), "Start of tag");
5402 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5403 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5404 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5405
5406 assert!(!ctx.is_in_html_tag(27), "Start of content");
5408 assert!(!ctx.is_in_html_tag(30), "End of content");
5409
5410 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5412 }
5413
5414 #[test]
5415 fn test_is_in_html_tag_multiline() {
5416 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5417 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5418
5419 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5421 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5422 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5423
5424 let closing_bracket_pos = content.find(">\n").unwrap();
5426 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5427 }
5428
5429 #[test]
5430 fn test_is_in_html_tag_no_tags() {
5431 let content = "Plain text without any HTML";
5432 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5433
5434 for i in 0..content.len() {
5436 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5437 }
5438 }
5439
5440 #[test]
5445 fn test_is_in_jinja_range_expression() {
5446 let content = "Hello {{ name }}!";
5447 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5448
5449 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5451 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5452
5453 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5455 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5456 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5457 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5458 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5459
5460 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5462 }
5463
5464 #[test]
5465 fn test_is_in_jinja_range_statement() {
5466 let content = "{% if condition %}content{% endif %}";
5467 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5468
5469 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5471 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5472 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5473
5474 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5476
5477 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5479 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5480 }
5481
5482 #[test]
5483 fn test_is_in_jinja_range_multiple() {
5484 let content = "{{ a }} and {{ b }}";
5485 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5486
5487 assert!(ctx.is_in_jinja_range(0));
5489 assert!(ctx.is_in_jinja_range(3));
5490 assert!(ctx.is_in_jinja_range(6));
5491
5492 assert!(!ctx.is_in_jinja_range(8));
5494 assert!(!ctx.is_in_jinja_range(11));
5495
5496 assert!(ctx.is_in_jinja_range(12));
5498 assert!(ctx.is_in_jinja_range(15));
5499 assert!(ctx.is_in_jinja_range(18));
5500 }
5501
5502 #[test]
5503 fn test_is_in_jinja_range_no_jinja() {
5504 let content = "Plain text with single braces but not Jinja";
5505 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5506
5507 for i in 0..content.len() {
5509 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5510 }
5511 }
5512
5513 #[test]
5518 fn test_is_in_link_title_with_title() {
5519 let content = r#"[ref]: https://example.com "Title text"
5520
5521Some content."#;
5522 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5523
5524 assert_eq!(ctx.reference_defs.len(), 1);
5526 let def = &ctx.reference_defs[0];
5527 assert!(def.title_byte_start.is_some());
5528 assert!(def.title_byte_end.is_some());
5529
5530 let title_start = def.title_byte_start.unwrap();
5531 let title_end = def.title_byte_end.unwrap();
5532
5533 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5535
5536 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5538 assert!(
5539 ctx.is_in_link_title(title_start + 5),
5540 "Middle of title should be in title"
5541 );
5542 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5543
5544 assert!(
5546 !ctx.is_in_link_title(title_end),
5547 "After title end should not be in title"
5548 );
5549 }
5550
5551 #[test]
5552 fn test_is_in_link_title_without_title() {
5553 let content = "[ref]: https://example.com\n\nSome content.";
5554 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5555
5556 assert_eq!(ctx.reference_defs.len(), 1);
5558 let def = &ctx.reference_defs[0];
5559 assert!(def.title_byte_start.is_none());
5560 assert!(def.title_byte_end.is_none());
5561
5562 for i in 0..content.len() {
5564 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5565 }
5566 }
5567
5568 #[test]
5569 fn test_is_in_link_title_multiple_refs() {
5570 let content = r#"[ref1]: /url1 "Title One"
5571[ref2]: /url2
5572[ref3]: /url3 "Title Three"
5573"#;
5574 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5575
5576 assert_eq!(ctx.reference_defs.len(), 3);
5578
5579 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5581 assert!(ref1.title_byte_start.is_some());
5582
5583 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5585 assert!(ref2.title_byte_start.is_none());
5586
5587 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5589 assert!(ref3.title_byte_start.is_some());
5590
5591 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5593 assert!(ctx.is_in_link_title(start + 1));
5594 assert!(!ctx.is_in_link_title(end + 5));
5595 }
5596
5597 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5599 assert!(ctx.is_in_link_title(start + 1));
5600 }
5601 }
5602
5603 #[test]
5604 fn test_is_in_link_title_single_quotes() {
5605 let content = "[ref]: /url 'Single quoted title'\n";
5606 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5607
5608 assert_eq!(ctx.reference_defs.len(), 1);
5609 let def = &ctx.reference_defs[0];
5610
5611 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5612 assert!(ctx.is_in_link_title(start));
5613 assert!(ctx.is_in_link_title(start + 5));
5614 assert!(!ctx.is_in_link_title(end));
5615 }
5616 }
5617
5618 #[test]
5619 fn test_is_in_link_title_parentheses() {
5620 let content = "[ref]: /url (Parenthesized title)\n";
5623 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5624
5625 if ctx.reference_defs.is_empty() {
5628 for i in 0..content.len() {
5630 assert!(!ctx.is_in_link_title(i));
5631 }
5632 } else {
5633 let def = &ctx.reference_defs[0];
5634 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5635 assert!(ctx.is_in_link_title(start));
5636 assert!(ctx.is_in_link_title(start + 5));
5637 assert!(!ctx.is_in_link_title(end));
5638 } else {
5639 for i in 0..content.len() {
5641 assert!(!ctx.is_in_link_title(i));
5642 }
5643 }
5644 }
5645 }
5646
5647 #[test]
5648 fn test_is_in_link_title_no_refs() {
5649 let content = "Just plain text without any reference definitions.";
5650 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5651
5652 assert!(ctx.reference_defs.is_empty());
5653
5654 for i in 0..content.len() {
5655 assert!(!ctx.is_in_link_title(i));
5656 }
5657 }
5658
5659 #[test]
5664 fn test_math_spans_inline() {
5665 let content = "Text with inline math $[f](x)$ in it.";
5666 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5667
5668 let math_spans = ctx.math_spans();
5669 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5670
5671 let span = &math_spans[0];
5672 assert!(!span.is_display, "Should be inline math, not display");
5673 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5674 }
5675
5676 #[test]
5677 fn test_math_spans_display_single_line() {
5678 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5679 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5680
5681 let math_spans = ctx.math_spans();
5682 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5683
5684 let span = &math_spans[0];
5685 assert!(span.is_display, "Should be display math");
5686 assert!(
5687 span.content.contains("[x](\\zeta)"),
5688 "Content should contain the link-like pattern"
5689 );
5690 }
5691
5692 #[test]
5693 fn test_math_spans_display_multiline() {
5694 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5695 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5696
5697 let math_spans = ctx.math_spans();
5698 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5699
5700 let span = &math_spans[0];
5701 assert!(span.is_display, "Should be display math");
5702 }
5703
5704 #[test]
5705 fn test_is_in_math_span() {
5706 let content = "Text $[f](x)$ more text";
5707 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5708
5709 let math_start = content.find('$').unwrap();
5711 let math_end = content.rfind('$').unwrap() + 1;
5712
5713 assert!(
5714 ctx.is_in_math_span(math_start + 1),
5715 "Position inside math span should return true"
5716 );
5717 assert!(
5718 ctx.is_in_math_span(math_start + 3),
5719 "Position inside math span should return true"
5720 );
5721
5722 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5724 assert!(
5725 !ctx.is_in_math_span(math_end + 1),
5726 "Position after math span should return false"
5727 );
5728 }
5729
5730 #[test]
5731 fn test_math_spans_mixed_with_code() {
5732 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5733 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5734
5735 let math_spans = ctx.math_spans();
5736 let code_spans = ctx.code_spans();
5737
5738 assert_eq!(math_spans.len(), 1, "Should have one math span");
5739 assert_eq!(code_spans.len(), 1, "Should have one code span");
5740
5741 assert_eq!(math_spans[0].content, "[f](x)");
5743 assert_eq!(code_spans[0].content, "[g](y)");
5745 }
5746
5747 #[test]
5748 fn test_math_spans_no_math() {
5749 let content = "Regular text without any math at all.";
5750 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5751
5752 let math_spans = ctx.math_spans();
5753 assert!(math_spans.is_empty(), "Should have no math spans");
5754 }
5755
5756 #[test]
5757 fn test_math_spans_multiple() {
5758 let content = "First $a$ and second $b$ and display $$c$$";
5759 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5760
5761 let math_spans = ctx.math_spans();
5762 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5763
5764 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5766 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5767
5768 assert_eq!(inline_count, 2, "Should have two inline math spans");
5769 assert_eq!(display_count, 1, "Should have one display math span");
5770 }
5771
5772 #[test]
5773 fn test_is_in_math_span_boundary_positions() {
5774 let content = "$[f](x)$";
5777 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5778
5779 let math_spans = ctx.math_spans();
5780 assert_eq!(math_spans.len(), 1, "Should have one math span");
5781
5782 let span = &math_spans[0];
5783
5784 assert!(
5786 ctx.is_in_math_span(span.byte_offset),
5787 "Start position should be in span"
5788 );
5789
5790 assert!(
5792 ctx.is_in_math_span(span.byte_offset + 1),
5793 "Position after start should be in span"
5794 );
5795
5796 assert!(
5798 ctx.is_in_math_span(span.byte_end - 1),
5799 "Position at end-1 should be in span"
5800 );
5801
5802 assert!(
5804 !ctx.is_in_math_span(span.byte_end),
5805 "Position at byte_end should NOT be in span (exclusive)"
5806 );
5807 }
5808
5809 #[test]
5810 fn test_math_spans_at_document_start() {
5811 let content = "$x$ text";
5812 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5813
5814 let math_spans = ctx.math_spans();
5815 assert_eq!(math_spans.len(), 1);
5816 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5817 }
5818
5819 #[test]
5820 fn test_math_spans_at_document_end() {
5821 let content = "text $x$";
5822 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5823
5824 let math_spans = ctx.math_spans();
5825 assert_eq!(math_spans.len(), 1);
5826 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5827 }
5828
5829 #[test]
5830 fn test_math_spans_consecutive() {
5831 let content = "$a$$b$";
5832 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5833
5834 let math_spans = ctx.math_spans();
5835 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5837
5838 for i in 0..content.len() {
5840 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5841 }
5842 }
5843
5844 #[test]
5845 fn test_math_spans_currency_not_math() {
5846 let content = "Price is $100";
5848 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5849
5850 let math_spans = ctx.math_spans();
5851 assert!(
5854 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5855 "Unbalanced $ should not create math span containing 100"
5856 );
5857 }
5858
5859 #[test]
5864 fn test_reference_lookup_o1_basic() {
5865 let content = r#"[ref1]: /url1
5866[REF2]: /url2 "Title"
5867[Ref3]: /url3
5868
5869Use [link][ref1] and [link][REF2]."#;
5870 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5871
5872 assert_eq!(ctx.reference_defs.len(), 3);
5874
5875 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5877 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5880 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5881 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5882 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5883 }
5884
5885 #[test]
5886 fn test_reference_lookup_o1_get_reference_def() {
5887 let content = r#"[myref]: https://example.com "My Title"
5888"#;
5889 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5890
5891 let def = ctx.get_reference_def("myref").expect("Should find myref");
5893 assert_eq!(def.url, "https://example.com");
5894 assert_eq!(def.title.as_deref(), Some("My Title"));
5895
5896 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5898 assert_eq!(def2.url, "https://example.com");
5899
5900 assert!(ctx.get_reference_def("nonexistent").is_none());
5902 }
5903
5904 #[test]
5905 fn test_reference_lookup_o1_has_reference_def() {
5906 let content = r#"[foo]: /foo
5907[BAR]: /bar
5908"#;
5909 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5910
5911 assert!(ctx.has_reference_def("foo"));
5913 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5915 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5918
5919 #[test]
5920 fn test_reference_lookup_o1_empty_content() {
5921 let content = "No references here.";
5922 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5923
5924 assert!(ctx.reference_defs.is_empty());
5925 assert_eq!(ctx.get_reference_url("anything"), None);
5926 assert!(ctx.get_reference_def("anything").is_none());
5927 assert!(!ctx.has_reference_def("anything"));
5928 }
5929
5930 #[test]
5931 fn test_reference_lookup_o1_special_characters_in_id() {
5932 let content = r#"[ref-with-dash]: /url1
5933[ref_with_underscore]: /url2
5934[ref.with.dots]: /url3
5935"#;
5936 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5937
5938 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5939 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5940 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5941 }
5942
5943 #[test]
5944 fn test_reference_lookup_o1_unicode_id() {
5945 let content = r#"[日本語]: /japanese
5946[émoji]: /emoji
5947"#;
5948 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5949
5950 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5951 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5952 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5954}