1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::mkdocs_html_markdown::MarkdownHtmlTracker;
7use crate::utils::regex_cache::URL_SIMPLE_REGEX;
8use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
9use regex::Regex;
10use std::borrow::Cow;
11use std::collections::HashMap;
12use std::path::PathBuf;
13use std::sync::LazyLock;
14
15#[cfg(not(target_arch = "wasm32"))]
17macro_rules! profile_section {
18 ($name:expr, $profile:expr, $code:expr) => {{
19 let start = std::time::Instant::now();
20 let result = $code;
21 if $profile {
22 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
23 }
24 result
25 }};
26}
27
28#[cfg(target_arch = "wasm32")]
29macro_rules! profile_section {
30 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
31}
32
33static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36 Regex::new(
37 r#"(?sx)
38 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
39 (?:
40 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
41 |
42 \[([^\]]*)\] # Reference ID in group 6
43 )"#
44 ).unwrap()
45});
46
47static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(
51 r#"(?sx)
52 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
53 (?:
54 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
55 |
56 \[([^\]]*)\] # Reference ID in group 6
57 )"#
58 ).unwrap()
59});
60
61static REF_DEF_PATTERN: LazyLock<Regex> =
63 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
64
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub visual_indent: usize,
87 pub is_blank: bool,
89 pub in_code_block: bool,
91 pub in_front_matter: bool,
93 pub in_html_block: bool,
95 pub in_html_comment: bool,
97 pub list_item: Option<ListItemInfo>,
99 pub heading: Option<HeadingInfo>,
101 pub blockquote: Option<BlockquoteInfo>,
103 pub in_mkdocstrings: bool,
105 pub in_esm_block: bool,
107 pub in_code_span_continuation: bool,
109 pub is_horizontal_rule: bool,
112 pub in_math_block: bool,
114 pub in_quarto_div: bool,
116 pub in_jsx_expression: bool,
118 pub in_mdx_comment: bool,
120 pub in_jsx_component: bool,
122 pub in_jsx_fragment: bool,
124 pub in_admonition: bool,
126 pub in_content_tab: bool,
128 pub in_mkdocs_html_markdown: bool,
130 pub in_definition_list: bool,
132 pub in_obsidian_comment: bool,
134}
135
136impl LineInfo {
137 pub fn content<'a>(&self, source: &'a str) -> &'a str {
139 &source[self.byte_offset..self.byte_offset + self.byte_len]
140 }
141
142 #[inline]
146 pub fn in_mkdocs_container(&self) -> bool {
147 self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
148 }
149}
150
151#[derive(Debug, Clone)]
153pub struct ListItemInfo {
154 pub marker: String,
156 pub is_ordered: bool,
158 pub number: Option<usize>,
160 pub marker_column: usize,
162 pub content_column: usize,
164}
165
166#[derive(Debug, Clone, PartialEq)]
168pub enum HeadingStyle {
169 ATX,
171 Setext1,
173 Setext2,
175}
176
177#[derive(Debug, Clone)]
179pub struct ParsedLink<'a> {
180 pub line: usize,
182 pub start_col: usize,
184 pub end_col: usize,
186 pub byte_offset: usize,
188 pub byte_end: usize,
190 pub text: Cow<'a, str>,
192 pub url: Cow<'a, str>,
194 pub is_reference: bool,
196 pub reference_id: Option<Cow<'a, str>>,
198 pub link_type: LinkType,
200}
201
202#[derive(Debug, Clone)]
204pub struct BrokenLinkInfo {
205 pub reference: String,
207 pub span: std::ops::Range<usize>,
209}
210
211#[derive(Debug, Clone)]
213pub struct FootnoteRef {
214 pub id: String,
216 pub line: usize,
218 pub byte_offset: usize,
220 pub byte_end: usize,
222}
223
224#[derive(Debug, Clone)]
226pub struct ParsedImage<'a> {
227 pub line: usize,
229 pub start_col: usize,
231 pub end_col: usize,
233 pub byte_offset: usize,
235 pub byte_end: usize,
237 pub alt_text: Cow<'a, str>,
239 pub url: Cow<'a, str>,
241 pub is_reference: bool,
243 pub reference_id: Option<Cow<'a, str>>,
245 pub link_type: LinkType,
247}
248
249#[derive(Debug, Clone)]
251pub struct ReferenceDef {
252 pub line: usize,
254 pub id: String,
256 pub url: String,
258 pub title: Option<String>,
260 pub byte_offset: usize,
262 pub byte_end: usize,
264 pub title_byte_start: Option<usize>,
266 pub title_byte_end: Option<usize>,
268}
269
270#[derive(Debug, Clone)]
272pub struct CodeSpan {
273 pub line: usize,
275 pub end_line: usize,
277 pub start_col: usize,
279 pub end_col: usize,
281 pub byte_offset: usize,
283 pub byte_end: usize,
285 pub backtick_count: usize,
287 pub content: String,
289}
290
291#[derive(Debug, Clone)]
293pub struct MathSpan {
294 pub line: usize,
296 pub end_line: usize,
298 pub start_col: usize,
300 pub end_col: usize,
302 pub byte_offset: usize,
304 pub byte_end: usize,
306 pub is_display: bool,
308 pub content: String,
310}
311
312#[derive(Debug, Clone)]
314pub struct HeadingInfo {
315 pub level: u8,
317 pub style: HeadingStyle,
319 pub marker: String,
321 pub marker_column: usize,
323 pub content_column: usize,
325 pub text: String,
327 pub custom_id: Option<String>,
329 pub raw_text: String,
331 pub has_closing_sequence: bool,
333 pub closing_sequence: String,
335 pub is_valid: bool,
338}
339
340#[derive(Debug, Clone)]
345pub struct ValidHeading<'a> {
346 pub line_num: usize,
348 pub heading: &'a HeadingInfo,
350 pub line_info: &'a LineInfo,
352}
353
354pub struct ValidHeadingsIter<'a> {
359 lines: &'a [LineInfo],
360 current_index: usize,
361}
362
363impl<'a> ValidHeadingsIter<'a> {
364 fn new(lines: &'a [LineInfo]) -> Self {
365 Self {
366 lines,
367 current_index: 0,
368 }
369 }
370}
371
372impl<'a> Iterator for ValidHeadingsIter<'a> {
373 type Item = ValidHeading<'a>;
374
375 fn next(&mut self) -> Option<Self::Item> {
376 while self.current_index < self.lines.len() {
377 let idx = self.current_index;
378 self.current_index += 1;
379
380 let line_info = &self.lines[idx];
381 if let Some(heading) = &line_info.heading
382 && heading.is_valid
383 {
384 return Some(ValidHeading {
385 line_num: idx + 1, heading,
387 line_info,
388 });
389 }
390 }
391 None
392 }
393}
394
395#[derive(Debug, Clone)]
397pub struct BlockquoteInfo {
398 pub nesting_level: usize,
400 pub indent: String,
402 pub marker_column: usize,
404 pub prefix: String,
406 pub content: String,
408 pub has_no_space_after_marker: bool,
410 pub has_multiple_spaces_after_marker: bool,
412 pub needs_md028_fix: bool,
414}
415
416#[derive(Debug, Clone)]
418pub struct ListBlock {
419 pub start_line: usize,
421 pub end_line: usize,
423 pub is_ordered: bool,
425 pub marker: Option<String>,
427 pub blockquote_prefix: String,
429 pub item_lines: Vec<usize>,
431 pub nesting_level: usize,
433 pub max_marker_width: usize,
435}
436
437use std::sync::{Arc, OnceLock};
438
439type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
441
442type ByteRanges = Vec<(usize, usize)>;
444
445#[derive(Debug, Clone, Default)]
447pub struct CharFrequency {
448 pub hash_count: usize,
450 pub asterisk_count: usize,
452 pub underscore_count: usize,
454 pub hyphen_count: usize,
456 pub plus_count: usize,
458 pub gt_count: usize,
460 pub pipe_count: usize,
462 pub bracket_count: usize,
464 pub backtick_count: usize,
466 pub lt_count: usize,
468 pub exclamation_count: usize,
470 pub newline_count: usize,
472}
473
474#[derive(Debug, Clone)]
476pub struct HtmlTag {
477 pub line: usize,
479 pub start_col: usize,
481 pub end_col: usize,
483 pub byte_offset: usize,
485 pub byte_end: usize,
487 pub tag_name: String,
489 pub is_closing: bool,
491 pub is_self_closing: bool,
493 pub raw_content: String,
495}
496
497#[derive(Debug, Clone)]
499pub struct EmphasisSpan {
500 pub line: usize,
502 pub start_col: usize,
504 pub end_col: usize,
506 pub byte_offset: usize,
508 pub byte_end: usize,
510 pub marker: char,
512 pub marker_count: usize,
514 pub content: String,
516}
517
518#[derive(Debug, Clone)]
520pub struct TableRow {
521 pub line: usize,
523 pub is_separator: bool,
525 pub column_count: usize,
527 pub column_alignments: Vec<String>, }
530
531#[derive(Debug, Clone)]
533pub struct BareUrl {
534 pub line: usize,
536 pub start_col: usize,
538 pub end_col: usize,
540 pub byte_offset: usize,
542 pub byte_end: usize,
544 pub url: String,
546 pub url_type: String,
548}
549
550pub struct LintContext<'a> {
551 pub content: &'a str,
552 pub line_offsets: Vec<usize>,
553 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, }
583
584struct BlockquoteComponents<'a> {
586 indent: &'a str,
587 markers: &'a str,
588 spaces_after: &'a str,
589 content: &'a str,
590}
591
592#[inline]
594fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
595 let bytes = line.as_bytes();
596 let mut pos = 0;
597
598 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
600 pos += 1;
601 }
602 let indent_end = pos;
603
604 if pos >= bytes.len() || bytes[pos] != b'>' {
606 return None;
607 }
608
609 while pos < bytes.len() && bytes[pos] == b'>' {
611 pos += 1;
612 }
613 let markers_end = pos;
614
615 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
617 pos += 1;
618 }
619 let spaces_end = pos;
620
621 Some(BlockquoteComponents {
622 indent: &line[0..indent_end],
623 markers: &line[indent_end..markers_end],
624 spaces_after: &line[markers_end..spaces_end],
625 content: &line[spaces_end..],
626 })
627}
628
629impl<'a> LintContext<'a> {
630 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
631 #[cfg(not(target_arch = "wasm32"))]
632 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
633 #[cfg(target_arch = "wasm32")]
634 let profile = false;
635
636 let line_offsets = profile_section!("Line offsets", profile, {
637 let mut offsets = vec![0];
638 for (i, c) in content.char_indices() {
639 if c == '\n' {
640 offsets.push(i + 1);
641 }
642 }
643 offsets
644 });
645
646 let (code_blocks, code_span_ranges) = profile_section!(
648 "Code blocks",
649 profile,
650 CodeBlockUtils::detect_code_blocks_and_spans(content)
651 );
652
653 let html_comment_ranges = profile_section!(
655 "HTML comment ranges",
656 profile,
657 crate::utils::skip_context::compute_html_comment_ranges(content)
658 );
659
660 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
662 if flavor == MarkdownFlavor::MkDocs {
663 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
664 } else {
665 Vec::new()
666 }
667 });
668
669 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
671 if flavor == MarkdownFlavor::Quarto {
672 crate::utils::quarto_divs::detect_div_block_ranges(content)
673 } else {
674 Vec::new()
675 }
676 });
677
678 let (mut lines, emphasis_spans) = profile_section!(
681 "Basic line info",
682 profile,
683 Self::compute_basic_line_info(
684 content,
685 &line_offsets,
686 &code_blocks,
687 flavor,
688 &html_comment_ranges,
689 &autodoc_ranges,
690 &quarto_div_ranges,
691 )
692 );
693
694 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
696
697 profile_section!(
699 "ESM blocks",
700 profile,
701 Self::detect_esm_blocks(content, &mut lines, flavor)
702 );
703
704 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
706 "JSX/MDX detection",
707 profile,
708 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
709 );
710
711 profile_section!(
713 "MkDocs constructs",
714 profile,
715 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
716 );
717
718 let obsidian_comment_ranges = profile_section!(
720 "Obsidian comments",
721 profile,
722 Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
723 );
724
725 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
727
728 profile_section!(
730 "Headings & blockquotes",
731 profile,
732 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
733 );
734
735 let code_spans = profile_section!(
737 "Code spans",
738 profile,
739 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
740 );
741
742 for span in &code_spans {
745 if span.end_line > span.line {
746 for line_num in (span.line + 1)..=span.end_line {
748 if let Some(line_info) = lines.get_mut(line_num - 1) {
749 line_info.in_code_span_continuation = true;
750 }
751 }
752 }
753 }
754
755 let (links, broken_links, footnote_refs) = profile_section!(
757 "Links",
758 profile,
759 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
760 );
761
762 let images = profile_section!(
763 "Images",
764 profile,
765 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
766 );
767
768 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
769
770 let reference_defs_map: HashMap<String, usize> = reference_defs
772 .iter()
773 .enumerate()
774 .map(|(idx, def)| (def.id.to_lowercase(), idx))
775 .collect();
776
777 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
778
779 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
781
782 let table_blocks = profile_section!(
784 "Table blocks",
785 profile,
786 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
787 content,
788 &code_blocks,
789 &code_spans,
790 &html_comment_ranges,
791 )
792 );
793
794 let line_index = profile_section!(
796 "Line index",
797 profile,
798 crate::utils::range_utils::LineIndex::new(content)
799 );
800
801 let jinja_ranges = profile_section!(
803 "Jinja ranges",
804 profile,
805 crate::utils::jinja_utils::find_jinja_ranges(content)
806 );
807
808 let citation_ranges = profile_section!("Citation ranges", profile, {
810 if flavor == MarkdownFlavor::Quarto {
811 crate::utils::quarto_divs::find_citation_ranges(content)
812 } else {
813 Vec::new()
814 }
815 });
816
817 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
819 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
820 let mut ranges = Vec::new();
821 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
822 ranges.push((mat.start(), mat.end()));
823 }
824 ranges
825 });
826
827 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
828
829 Self {
830 content,
831 line_offsets,
832 code_blocks,
833 lines,
834 links,
835 images,
836 broken_links,
837 footnote_refs,
838 reference_defs,
839 reference_defs_map,
840 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
841 math_spans_cache: OnceLock::new(), list_blocks,
843 char_frequency,
844 html_tags_cache: OnceLock::new(),
845 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
846 table_rows_cache: OnceLock::new(),
847 bare_urls_cache: OnceLock::new(),
848 has_mixed_list_nesting_cache: OnceLock::new(),
849 html_comment_ranges,
850 table_blocks,
851 line_index,
852 jinja_ranges,
853 flavor,
854 source_file,
855 jsx_expression_ranges,
856 mdx_comment_ranges,
857 citation_ranges,
858 shortcode_ranges,
859 inline_config,
860 obsidian_comment_ranges,
861 }
862 }
863
864 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
869 self.inline_config.is_rule_disabled(rule_name, line_number)
870 }
871
872 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
874 Arc::clone(
875 self.code_spans_cache
876 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
877 )
878 }
879
880 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
882 Arc::clone(
883 self.math_spans_cache
884 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
885 )
886 }
887
888 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
890 let math_spans = self.math_spans();
891 math_spans
892 .iter()
893 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
894 }
895
896 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
898 &self.html_comment_ranges
899 }
900
901 pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
904 &self.obsidian_comment_ranges
905 }
906
907 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
911 self.obsidian_comment_ranges
912 .iter()
913 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
914 }
915
916 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
921 if self.obsidian_comment_ranges.is_empty() {
922 return false;
923 }
924
925 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
927 self.is_in_obsidian_comment(byte_pos)
928 }
929
930 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
932 Arc::clone(self.html_tags_cache.get_or_init(|| {
933 Arc::new(Self::parse_html_tags(
934 self.content,
935 &self.lines,
936 &self.code_blocks,
937 self.flavor,
938 ))
939 }))
940 }
941
942 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
944 Arc::clone(
945 self.emphasis_spans_cache
946 .get()
947 .expect("emphasis_spans_cache initialized during construction"),
948 )
949 }
950
951 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
953 Arc::clone(
954 self.table_rows_cache
955 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
956 )
957 }
958
959 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
961 Arc::clone(
962 self.bare_urls_cache
963 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
964 )
965 }
966
967 pub fn has_mixed_list_nesting(&self) -> bool {
971 *self
972 .has_mixed_list_nesting_cache
973 .get_or_init(|| self.compute_mixed_list_nesting())
974 }
975
976 fn compute_mixed_list_nesting(&self) -> bool {
978 let mut stack: Vec<(usize, bool)> = Vec::new();
983 let mut last_was_blank = false;
984
985 for line_info in &self.lines {
986 if line_info.in_code_block
988 || line_info.in_front_matter
989 || line_info.in_mkdocstrings
990 || line_info.in_html_comment
991 || line_info.in_esm_block
992 {
993 continue;
994 }
995
996 if line_info.is_blank {
998 last_was_blank = true;
999 continue;
1000 }
1001
1002 if let Some(list_item) = &line_info.list_item {
1003 let current_pos = if list_item.marker_column == 1 {
1005 0
1006 } else {
1007 list_item.marker_column
1008 };
1009
1010 if last_was_blank && current_pos == 0 {
1012 stack.clear();
1013 }
1014 last_was_blank = false;
1015
1016 while let Some(&(pos, _)) = stack.last() {
1018 if pos >= current_pos {
1019 stack.pop();
1020 } else {
1021 break;
1022 }
1023 }
1024
1025 if let Some(&(_, parent_is_ordered)) = stack.last()
1027 && parent_is_ordered != list_item.is_ordered
1028 {
1029 return true; }
1031
1032 stack.push((current_pos, list_item.is_ordered));
1033 } else {
1034 last_was_blank = false;
1036 }
1037 }
1038
1039 false
1040 }
1041
1042 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1044 match self.line_offsets.binary_search(&offset) {
1045 Ok(line) => (line + 1, 1),
1046 Err(line) => {
1047 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1048 (line, offset - line_start + 1)
1049 }
1050 }
1051 }
1052
1053 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1055 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1057 return true;
1058 }
1059
1060 self.code_spans()
1062 .iter()
1063 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1064 }
1065
1066 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1068 if line_num > 0 {
1069 self.lines.get(line_num - 1)
1070 } else {
1071 None
1072 }
1073 }
1074
1075 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1077 self.line_info(line_num).map(|info| info.byte_offset)
1078 }
1079
1080 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1082 let normalized_id = ref_id.to_lowercase();
1083 self.reference_defs_map
1084 .get(&normalized_id)
1085 .map(|&idx| self.reference_defs[idx].url.as_str())
1086 }
1087
1088 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1090 let normalized_id = ref_id.to_lowercase();
1091 self.reference_defs_map
1092 .get(&normalized_id)
1093 .map(|&idx| &self.reference_defs[idx])
1094 }
1095
1096 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1098 let normalized_id = ref_id.to_lowercase();
1099 self.reference_defs_map.contains_key(&normalized_id)
1100 }
1101
1102 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1104 self.list_blocks
1105 .iter()
1106 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1107 }
1108
1109 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1111 self.list_blocks
1112 .iter()
1113 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1114 }
1115
1116 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1120 if line_num == 0 || line_num > self.lines.len() {
1121 return false;
1122 }
1123 self.lines[line_num - 1].in_code_block
1124 }
1125
1126 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1128 if line_num == 0 || line_num > self.lines.len() {
1129 return false;
1130 }
1131 self.lines[line_num - 1].in_front_matter
1132 }
1133
1134 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1136 if line_num == 0 || line_num > self.lines.len() {
1137 return false;
1138 }
1139 self.lines[line_num - 1].in_html_block
1140 }
1141
1142 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1144 if line_num == 0 || line_num > self.lines.len() {
1145 return false;
1146 }
1147
1148 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1152 let code_spans = self.code_spans();
1153 code_spans.iter().any(|span| {
1154 if line_num < span.line || line_num > span.end_line {
1156 return false;
1157 }
1158
1159 if span.line == span.end_line {
1160 col_0indexed >= span.start_col && col_0indexed < span.end_col
1162 } else if line_num == span.line {
1163 col_0indexed >= span.start_col
1165 } else if line_num == span.end_line {
1166 col_0indexed < span.end_col
1168 } else {
1169 true
1171 }
1172 })
1173 }
1174
1175 #[inline]
1177 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1178 let code_spans = self.code_spans();
1179 code_spans
1180 .iter()
1181 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1182 }
1183
1184 #[inline]
1187 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1188 self.reference_defs
1189 .iter()
1190 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1191 }
1192
1193 #[inline]
1197 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1198 self.html_comment_ranges
1199 .iter()
1200 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201 }
1202
1203 #[inline]
1206 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1207 self.html_tags()
1208 .iter()
1209 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1210 }
1211
1212 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1214 self.jinja_ranges
1215 .iter()
1216 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1217 }
1218
1219 #[inline]
1221 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1222 self.jsx_expression_ranges
1223 .iter()
1224 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1225 }
1226
1227 #[inline]
1229 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1230 self.mdx_comment_ranges
1231 .iter()
1232 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1233 }
1234
1235 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1237 &self.jsx_expression_ranges
1238 }
1239
1240 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1242 &self.mdx_comment_ranges
1243 }
1244
1245 #[inline]
1248 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1249 self.citation_ranges
1250 .iter()
1251 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1252 }
1253
1254 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1256 &self.citation_ranges
1257 }
1258
1259 #[inline]
1261 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1262 self.shortcode_ranges
1263 .iter()
1264 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1265 }
1266
1267 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1269 &self.shortcode_ranges
1270 }
1271
1272 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1274 self.reference_defs.iter().any(|def| {
1275 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1276 byte_pos >= start && byte_pos < end
1277 } else {
1278 false
1279 }
1280 })
1281 }
1282
1283 pub fn has_char(&self, ch: char) -> bool {
1285 match ch {
1286 '#' => self.char_frequency.hash_count > 0,
1287 '*' => self.char_frequency.asterisk_count > 0,
1288 '_' => self.char_frequency.underscore_count > 0,
1289 '-' => self.char_frequency.hyphen_count > 0,
1290 '+' => self.char_frequency.plus_count > 0,
1291 '>' => self.char_frequency.gt_count > 0,
1292 '|' => self.char_frequency.pipe_count > 0,
1293 '[' => self.char_frequency.bracket_count > 0,
1294 '`' => self.char_frequency.backtick_count > 0,
1295 '<' => self.char_frequency.lt_count > 0,
1296 '!' => self.char_frequency.exclamation_count > 0,
1297 '\n' => self.char_frequency.newline_count > 0,
1298 _ => self.content.contains(ch), }
1300 }
1301
1302 pub fn char_count(&self, ch: char) -> usize {
1304 match ch {
1305 '#' => self.char_frequency.hash_count,
1306 '*' => self.char_frequency.asterisk_count,
1307 '_' => self.char_frequency.underscore_count,
1308 '-' => self.char_frequency.hyphen_count,
1309 '+' => self.char_frequency.plus_count,
1310 '>' => self.char_frequency.gt_count,
1311 '|' => self.char_frequency.pipe_count,
1312 '[' => self.char_frequency.bracket_count,
1313 '`' => self.char_frequency.backtick_count,
1314 '<' => self.char_frequency.lt_count,
1315 '!' => self.char_frequency.exclamation_count,
1316 '\n' => self.char_frequency.newline_count,
1317 _ => self.content.matches(ch).count(), }
1319 }
1320
1321 pub fn likely_has_headings(&self) -> bool {
1323 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1325
1326 pub fn likely_has_lists(&self) -> bool {
1328 self.char_frequency.asterisk_count > 0
1329 || self.char_frequency.hyphen_count > 0
1330 || self.char_frequency.plus_count > 0
1331 }
1332
1333 pub fn likely_has_emphasis(&self) -> bool {
1335 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1336 }
1337
1338 pub fn likely_has_tables(&self) -> bool {
1340 self.char_frequency.pipe_count > 2
1341 }
1342
1343 pub fn likely_has_blockquotes(&self) -> bool {
1345 self.char_frequency.gt_count > 0
1346 }
1347
1348 pub fn likely_has_code(&self) -> bool {
1350 self.char_frequency.backtick_count > 0
1351 }
1352
1353 pub fn likely_has_links_or_images(&self) -> bool {
1355 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1356 }
1357
1358 pub fn likely_has_html(&self) -> bool {
1360 self.char_frequency.lt_count > 0
1361 }
1362
1363 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1368 if let Some(line_info) = self.lines.get(line_idx)
1369 && let Some(ref bq) = line_info.blockquote
1370 {
1371 bq.prefix.trim_end().to_string()
1372 } else {
1373 String::new()
1374 }
1375 }
1376
1377 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1379 self.html_tags()
1380 .iter()
1381 .filter(|tag| tag.line == line_num)
1382 .cloned()
1383 .collect()
1384 }
1385
1386 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1388 self.emphasis_spans()
1389 .iter()
1390 .filter(|span| span.line == line_num)
1391 .cloned()
1392 .collect()
1393 }
1394
1395 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1397 self.table_rows()
1398 .iter()
1399 .filter(|row| row.line == line_num)
1400 .cloned()
1401 .collect()
1402 }
1403
1404 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1406 self.bare_urls()
1407 .iter()
1408 .filter(|url| url.line == line_num)
1409 .cloned()
1410 .collect()
1411 }
1412
1413 #[inline]
1419 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1420 let idx = match lines.binary_search_by(|line| {
1422 if byte_offset < line.byte_offset {
1423 std::cmp::Ordering::Greater
1424 } else if byte_offset > line.byte_offset + line.byte_len {
1425 std::cmp::Ordering::Less
1426 } else {
1427 std::cmp::Ordering::Equal
1428 }
1429 }) {
1430 Ok(idx) => idx,
1431 Err(idx) => idx.saturating_sub(1),
1432 };
1433
1434 let line = &lines[idx];
1435 let line_num = idx + 1;
1436 let col = byte_offset.saturating_sub(line.byte_offset);
1437
1438 (idx, line_num, col)
1439 }
1440
1441 #[inline]
1443 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1444 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1446
1447 if idx > 0 {
1449 let span = &code_spans[idx - 1];
1450 if offset >= span.byte_offset && offset < span.byte_end {
1451 return true;
1452 }
1453 }
1454
1455 false
1456 }
1457
1458 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1462 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1463
1464 let mut link_ranges = Vec::new();
1465 let mut options = Options::empty();
1466 options.insert(Options::ENABLE_WIKILINKS);
1467 options.insert(Options::ENABLE_FOOTNOTES);
1468
1469 let parser = Parser::new_ext(content, options).into_offset_iter();
1470 let mut link_stack: Vec<usize> = Vec::new();
1471
1472 for (event, range) in parser {
1473 match event {
1474 Event::Start(Tag::Link { .. }) => {
1475 link_stack.push(range.start);
1476 }
1477 Event::End(TagEnd::Link) => {
1478 if let Some(start_pos) = link_stack.pop() {
1479 link_ranges.push((start_pos, range.end));
1480 }
1481 }
1482 _ => {}
1483 }
1484 }
1485
1486 link_ranges
1487 }
1488
1489 fn parse_links(
1491 content: &'a str,
1492 lines: &[LineInfo],
1493 code_blocks: &[(usize, usize)],
1494 code_spans: &[CodeSpan],
1495 flavor: MarkdownFlavor,
1496 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1497 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1498 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1499 use std::collections::HashSet;
1500
1501 let mut links = Vec::with_capacity(content.len() / 500);
1502 let mut broken_links = Vec::new();
1503 let mut footnote_refs = Vec::new();
1504
1505 let mut found_positions = HashSet::new();
1507
1508 let mut options = Options::empty();
1518 options.insert(Options::ENABLE_WIKILINKS);
1519 options.insert(Options::ENABLE_FOOTNOTES);
1520
1521 let parser = Parser::new_with_broken_link_callback(
1522 content,
1523 options,
1524 Some(|link: BrokenLink<'_>| {
1525 broken_links.push(BrokenLinkInfo {
1526 reference: link.reference.to_string(),
1527 span: link.span.clone(),
1528 });
1529 None
1530 }),
1531 )
1532 .into_offset_iter();
1533
1534 let mut link_stack: Vec<(
1535 usize,
1536 usize,
1537 pulldown_cmark::CowStr<'a>,
1538 LinkType,
1539 pulldown_cmark::CowStr<'a>,
1540 )> = Vec::new();
1541 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1544 match event {
1545 Event::Start(Tag::Link {
1546 link_type,
1547 dest_url,
1548 id,
1549 ..
1550 }) => {
1551 link_stack.push((range.start, range.end, dest_url, link_type, id));
1553 text_chunks.clear();
1554 }
1555 Event::Text(text) if !link_stack.is_empty() => {
1556 text_chunks.push((text.to_string(), range.start, range.end));
1558 }
1559 Event::Code(code) if !link_stack.is_empty() => {
1560 let code_text = format!("`{code}`");
1562 text_chunks.push((code_text, range.start, range.end));
1563 }
1564 Event::End(TagEnd::Link) => {
1565 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1566 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1568 text_chunks.clear();
1569 continue;
1570 }
1571
1572 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1574
1575 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1577 text_chunks.clear();
1578 continue;
1579 }
1580
1581 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1582
1583 let is_reference = matches!(
1584 link_type,
1585 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1586 );
1587
1588 let link_text = if matches!(link_type, LinkType::WikiLink { .. }) {
1591 if !text_chunks.is_empty() {
1595 let text: String = text_chunks.iter().map(|(t, _, _)| t.as_str()).collect();
1596 Cow::Owned(text)
1597 } else {
1598 Cow::Owned(url.to_string())
1600 }
1601 } else if start_pos < content.len() {
1602 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1603
1604 let mut close_pos = None;
1608 let mut depth = 0;
1609 let mut in_code_span = false;
1610
1611 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1612 let mut backslash_count = 0;
1614 let mut j = i;
1615 while j > 0 && link_bytes[j - 1] == b'\\' {
1616 backslash_count += 1;
1617 j -= 1;
1618 }
1619 let is_escaped = backslash_count % 2 != 0;
1620
1621 if byte == b'`' && !is_escaped {
1623 in_code_span = !in_code_span;
1624 }
1625
1626 if !is_escaped && !in_code_span {
1628 if byte == b'[' {
1629 depth += 1;
1630 } else if byte == b']' {
1631 if depth == 0 {
1632 close_pos = Some(i);
1634 break;
1635 } else {
1636 depth -= 1;
1637 }
1638 }
1639 }
1640 }
1641
1642 if let Some(pos) = close_pos {
1643 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1644 } else {
1645 Cow::Borrowed("")
1646 }
1647 } else {
1648 Cow::Borrowed("")
1649 };
1650
1651 let reference_id = if is_reference && !ref_id.is_empty() {
1653 Some(Cow::Owned(ref_id.to_lowercase()))
1654 } else if is_reference {
1655 Some(Cow::Owned(link_text.to_lowercase()))
1657 } else {
1658 None
1659 };
1660
1661 found_positions.insert(start_pos);
1663
1664 links.push(ParsedLink {
1665 line: line_num,
1666 start_col: col_start,
1667 end_col: col_end,
1668 byte_offset: start_pos,
1669 byte_end: range.end,
1670 text: link_text,
1671 url: Cow::Owned(url.to_string()),
1672 is_reference,
1673 reference_id,
1674 link_type,
1675 });
1676
1677 text_chunks.clear();
1678 }
1679 }
1680 Event::FootnoteReference(footnote_id) => {
1681 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1684 continue;
1685 }
1686
1687 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1688 footnote_refs.push(FootnoteRef {
1689 id: footnote_id.to_string(),
1690 line: line_num,
1691 byte_offset: range.start,
1692 byte_end: range.end,
1693 });
1694 }
1695 _ => {}
1696 }
1697 }
1698
1699 for cap in LINK_PATTERN.captures_iter(content) {
1703 let full_match = cap.get(0).unwrap();
1704 let match_start = full_match.start();
1705 let match_end = full_match.end();
1706
1707 if found_positions.contains(&match_start) {
1709 continue;
1710 }
1711
1712 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1714 continue;
1715 }
1716
1717 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1719 continue;
1720 }
1721
1722 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1724 continue;
1725 }
1726
1727 if Self::is_offset_in_code_span(code_spans, match_start) {
1729 continue;
1730 }
1731
1732 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1734 continue;
1735 }
1736
1737 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1739
1740 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1742 continue;
1743 }
1744
1745 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1746
1747 let text = cap.get(1).map_or("", |m| m.as_str());
1748
1749 if let Some(ref_id) = cap.get(6) {
1751 let ref_id_str = ref_id.as_str();
1752 let normalized_ref = if ref_id_str.is_empty() {
1753 Cow::Owned(text.to_lowercase()) } else {
1755 Cow::Owned(ref_id_str.to_lowercase())
1756 };
1757
1758 links.push(ParsedLink {
1760 line: line_num,
1761 start_col: col_start,
1762 end_col: col_end,
1763 byte_offset: match_start,
1764 byte_end: match_end,
1765 text: Cow::Borrowed(text),
1766 url: Cow::Borrowed(""), is_reference: true,
1768 reference_id: Some(normalized_ref),
1769 link_type: LinkType::Reference, });
1771 }
1772 }
1773
1774 (links, broken_links, footnote_refs)
1775 }
1776
1777 fn parse_images(
1779 content: &'a str,
1780 lines: &[LineInfo],
1781 code_blocks: &[(usize, usize)],
1782 code_spans: &[CodeSpan],
1783 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1784 ) -> Vec<ParsedImage<'a>> {
1785 use crate::utils::skip_context::is_in_html_comment_ranges;
1786 use std::collections::HashSet;
1787
1788 let mut images = Vec::with_capacity(content.len() / 1000);
1790 let mut found_positions = HashSet::new();
1791
1792 let parser = Parser::new(content).into_offset_iter();
1794 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1795 Vec::new();
1796 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1799 match event {
1800 Event::Start(Tag::Image {
1801 link_type,
1802 dest_url,
1803 id,
1804 ..
1805 }) => {
1806 image_stack.push((range.start, dest_url, link_type, id));
1807 text_chunks.clear();
1808 }
1809 Event::Text(text) if !image_stack.is_empty() => {
1810 text_chunks.push((text.to_string(), range.start, range.end));
1811 }
1812 Event::Code(code) if !image_stack.is_empty() => {
1813 let code_text = format!("`{code}`");
1814 text_chunks.push((code_text, range.start, range.end));
1815 }
1816 Event::End(TagEnd::Image) => {
1817 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1818 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1820 continue;
1821 }
1822
1823 if Self::is_offset_in_code_span(code_spans, start_pos) {
1825 continue;
1826 }
1827
1828 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1830 continue;
1831 }
1832
1833 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1835 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1836
1837 let is_reference = matches!(
1838 link_type,
1839 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1840 );
1841
1842 let alt_text = if start_pos < content.len() {
1845 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1846
1847 let mut close_pos = None;
1850 let mut depth = 0;
1851
1852 if image_bytes.len() > 2 {
1853 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1854 let mut backslash_count = 0;
1856 let mut j = i;
1857 while j > 0 && image_bytes[j - 1] == b'\\' {
1858 backslash_count += 1;
1859 j -= 1;
1860 }
1861 let is_escaped = backslash_count % 2 != 0;
1862
1863 if !is_escaped {
1864 if byte == b'[' {
1865 depth += 1;
1866 } else if byte == b']' {
1867 if depth == 0 {
1868 close_pos = Some(i);
1870 break;
1871 } else {
1872 depth -= 1;
1873 }
1874 }
1875 }
1876 }
1877 }
1878
1879 if let Some(pos) = close_pos {
1880 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1881 } else {
1882 Cow::Borrowed("")
1883 }
1884 } else {
1885 Cow::Borrowed("")
1886 };
1887
1888 let reference_id = if is_reference && !ref_id.is_empty() {
1889 Some(Cow::Owned(ref_id.to_lowercase()))
1890 } else if is_reference {
1891 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1893 None
1894 };
1895
1896 found_positions.insert(start_pos);
1897 images.push(ParsedImage {
1898 line: line_num,
1899 start_col: col_start,
1900 end_col: col_end,
1901 byte_offset: start_pos,
1902 byte_end: range.end,
1903 alt_text,
1904 url: Cow::Owned(url.to_string()),
1905 is_reference,
1906 reference_id,
1907 link_type,
1908 });
1909 }
1910 }
1911 _ => {}
1912 }
1913 }
1914
1915 for cap in IMAGE_PATTERN.captures_iter(content) {
1917 let full_match = cap.get(0).unwrap();
1918 let match_start = full_match.start();
1919 let match_end = full_match.end();
1920
1921 if found_positions.contains(&match_start) {
1923 continue;
1924 }
1925
1926 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1928 continue;
1929 }
1930
1931 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1933 || Self::is_offset_in_code_span(code_spans, match_start)
1934 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1935 {
1936 continue;
1937 }
1938
1939 if let Some(ref_id) = cap.get(6) {
1941 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1942 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1943 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1944 let ref_id_str = ref_id.as_str();
1945 let normalized_ref = if ref_id_str.is_empty() {
1946 Cow::Owned(alt_text.to_lowercase())
1947 } else {
1948 Cow::Owned(ref_id_str.to_lowercase())
1949 };
1950
1951 images.push(ParsedImage {
1952 line: line_num,
1953 start_col: col_start,
1954 end_col: col_end,
1955 byte_offset: match_start,
1956 byte_end: match_end,
1957 alt_text: Cow::Borrowed(alt_text),
1958 url: Cow::Borrowed(""),
1959 is_reference: true,
1960 reference_id: Some(normalized_ref),
1961 link_type: LinkType::Reference, });
1963 }
1964 }
1965
1966 images
1967 }
1968
1969 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1971 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1975 if line_info.in_code_block {
1977 continue;
1978 }
1979
1980 let line = line_info.content(content);
1981 let line_num = line_idx + 1;
1982
1983 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1984 let id_raw = cap.get(1).unwrap().as_str();
1985
1986 if id_raw.starts_with('^') {
1989 continue;
1990 }
1991
1992 let id = id_raw.to_lowercase();
1993 let url = cap.get(2).unwrap().as_str().to_string();
1994 let title_match = cap.get(3).or_else(|| cap.get(4));
1995 let title = title_match.map(|m| m.as_str().to_string());
1996
1997 let match_obj = cap.get(0).unwrap();
2000 let byte_offset = line_info.byte_offset + match_obj.start();
2001 let byte_end = line_info.byte_offset + match_obj.end();
2002
2003 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
2005 let start = line_info.byte_offset + m.start().saturating_sub(1);
2007 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
2009 } else {
2010 (None, None)
2011 };
2012
2013 refs.push(ReferenceDef {
2014 line: line_num,
2015 id,
2016 url,
2017 title,
2018 byte_offset,
2019 byte_end,
2020 title_byte_start,
2021 title_byte_end,
2022 });
2023 }
2024 }
2025
2026 refs
2027 }
2028
2029 #[inline]
2033 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2034 let trimmed_start = line.trim_start();
2035 if !trimmed_start.starts_with('>') {
2036 return None;
2037 }
2038
2039 let mut remaining = line;
2041 let mut total_prefix_len = 0;
2042
2043 loop {
2044 let trimmed = remaining.trim_start();
2045 if !trimmed.starts_with('>') {
2046 break;
2047 }
2048
2049 let leading_ws_len = remaining.len() - trimmed.len();
2051 total_prefix_len += leading_ws_len + 1;
2052
2053 let after_gt = &trimmed[1..];
2054
2055 if let Some(stripped) = after_gt.strip_prefix(' ') {
2057 total_prefix_len += 1;
2058 remaining = stripped;
2059 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2060 total_prefix_len += 1;
2061 remaining = stripped;
2062 } else {
2063 remaining = after_gt;
2064 }
2065 }
2066
2067 Some((&line[..total_prefix_len], remaining))
2068 }
2069
2070 fn detect_list_items_and_emphasis_with_pulldown(
2094 content: &str,
2095 line_offsets: &[usize],
2096 flavor: MarkdownFlavor,
2097 front_matter_end: usize,
2098 code_blocks: &[(usize, usize)],
2099 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2100 use std::collections::HashMap;
2101
2102 let mut list_items = HashMap::new();
2103 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2104
2105 let mut options = Options::empty();
2106 options.insert(Options::ENABLE_TABLES);
2107 options.insert(Options::ENABLE_FOOTNOTES);
2108 options.insert(Options::ENABLE_STRIKETHROUGH);
2109 options.insert(Options::ENABLE_TASKLISTS);
2110 options.insert(Options::ENABLE_GFM);
2112
2113 let _ = flavor;
2115
2116 let parser = Parser::new_ext(content, options).into_offset_iter();
2117 let mut list_depth: usize = 0;
2118 let mut list_stack: Vec<bool> = Vec::new();
2119
2120 for (event, range) in parser {
2121 match event {
2122 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2124 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2125 2
2126 } else {
2127 1
2128 };
2129 let match_start = range.start;
2130 let match_end = range.end;
2131
2132 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2134 let marker = content[match_start..].chars().next().unwrap_or('*');
2136 if marker == '*' || marker == '_' {
2137 let content_start = match_start + marker_count;
2139 let content_end = if match_end >= marker_count {
2140 match_end - marker_count
2141 } else {
2142 match_end
2143 };
2144 let content_part = if content_start < content_end && content_end <= content.len() {
2145 &content[content_start..content_end]
2146 } else {
2147 ""
2148 };
2149
2150 let line_idx = match line_offsets.binary_search(&match_start) {
2152 Ok(idx) => idx,
2153 Err(idx) => idx.saturating_sub(1),
2154 };
2155 let line_num = line_idx + 1;
2156 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2157 let col_start = match_start - line_start;
2158 let col_end = match_end - line_start;
2159
2160 emphasis_spans.push(EmphasisSpan {
2161 line: line_num,
2162 start_col: col_start,
2163 end_col: col_end,
2164 byte_offset: match_start,
2165 byte_end: match_end,
2166 marker,
2167 marker_count,
2168 content: content_part.to_string(),
2169 });
2170 }
2171 }
2172 }
2173 Event::Start(Tag::List(start_number)) => {
2174 list_depth += 1;
2175 list_stack.push(start_number.is_some());
2176 }
2177 Event::End(TagEnd::List(_)) => {
2178 list_depth = list_depth.saturating_sub(1);
2179 list_stack.pop();
2180 }
2181 Event::Start(Tag::Item) if list_depth > 0 => {
2182 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2184 let item_start = range.start;
2186
2187 let mut line_idx = match line_offsets.binary_search(&item_start) {
2189 Ok(idx) => idx,
2190 Err(idx) => idx.saturating_sub(1),
2191 };
2192
2193 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2197 line_idx += 1;
2198 }
2199
2200 if front_matter_end > 0 && line_idx < front_matter_end {
2202 continue;
2203 }
2204
2205 if line_idx < line_offsets.len() {
2206 let line_start_byte = line_offsets[line_idx];
2207 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2208 let line = &content[line_start_byte..line_end.min(content.len())];
2209
2210 let line = line
2212 .strip_suffix('\n')
2213 .or_else(|| line.strip_suffix("\r\n"))
2214 .unwrap_or(line);
2215
2216 let blockquote_parse = Self::parse_blockquote_prefix(line);
2218 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2219 (prefix.len(), content)
2220 } else {
2221 (0, line)
2222 };
2223
2224 if current_list_is_ordered {
2226 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2227 Self::parse_ordered_list(line_to_parse)
2228 {
2229 let marker = format!("{number_str}{delimiter}");
2230 let marker_column = blockquote_prefix_len + leading_spaces.len();
2231 let content_column = marker_column + marker.len() + spacing.len();
2232 let number = number_str.parse().ok();
2233
2234 list_items.entry(line_start_byte).or_insert((
2235 true,
2236 marker,
2237 marker_column,
2238 content_column,
2239 number,
2240 ));
2241 }
2242 } else if let Some((leading_spaces, marker, spacing, _content)) =
2243 Self::parse_unordered_list(line_to_parse)
2244 {
2245 let marker_column = blockquote_prefix_len + leading_spaces.len();
2246 let content_column = marker_column + 1 + spacing.len();
2247
2248 list_items.entry(line_start_byte).or_insert((
2249 false,
2250 marker.to_string(),
2251 marker_column,
2252 content_column,
2253 None,
2254 ));
2255 }
2256 }
2257 }
2258 _ => {}
2259 }
2260 }
2261
2262 (list_items, emphasis_spans)
2263 }
2264
2265 #[inline]
2269 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2270 let bytes = line.as_bytes();
2271 let mut i = 0;
2272
2273 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2275 i += 1;
2276 }
2277
2278 if i >= bytes.len() {
2280 return None;
2281 }
2282 let marker = bytes[i] as char;
2283 if marker != '-' && marker != '*' && marker != '+' {
2284 return None;
2285 }
2286 let marker_pos = i;
2287 i += 1;
2288
2289 let spacing_start = i;
2291 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2292 i += 1;
2293 }
2294
2295 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2296 }
2297
2298 #[inline]
2302 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2303 let bytes = line.as_bytes();
2304 let mut i = 0;
2305
2306 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2308 i += 1;
2309 }
2310
2311 let number_start = i;
2313 while i < bytes.len() && bytes[i].is_ascii_digit() {
2314 i += 1;
2315 }
2316 if i == number_start {
2317 return None; }
2319
2320 if i >= bytes.len() {
2322 return None;
2323 }
2324 let delimiter = bytes[i] as char;
2325 if delimiter != '.' && delimiter != ')' {
2326 return None;
2327 }
2328 let delimiter_pos = i;
2329 i += 1;
2330
2331 let spacing_start = i;
2333 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2334 i += 1;
2335 }
2336
2337 Some((
2338 &line[..number_start],
2339 &line[number_start..delimiter_pos],
2340 delimiter,
2341 &line[spacing_start..i],
2342 &line[i..],
2343 ))
2344 }
2345
2346 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2349 let num_lines = line_offsets.len();
2350 let mut in_code_block = vec![false; num_lines];
2351
2352 for &(start, end) in code_blocks {
2354 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2356 let mut boundary = start;
2357 while boundary > 0 && !content.is_char_boundary(boundary) {
2358 boundary -= 1;
2359 }
2360 boundary
2361 } else {
2362 start
2363 };
2364
2365 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2366 let mut boundary = end;
2367 while boundary < content.len() && !content.is_char_boundary(boundary) {
2368 boundary += 1;
2369 }
2370 boundary
2371 } else {
2372 end.min(content.len())
2373 };
2374
2375 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2394 let first_line = first_line_after.saturating_sub(1);
2395 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2396
2397 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2399 *flag = true;
2400 }
2401 }
2402
2403 in_code_block
2404 }
2405
2406 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2409 let content_lines: Vec<&str> = content.lines().collect();
2410 let num_lines = content_lines.len();
2411 let mut in_math_block = vec![false; num_lines];
2412
2413 let mut inside_math = false;
2414
2415 for (i, line) in content_lines.iter().enumerate() {
2416 if code_block_map.get(i).copied().unwrap_or(false) {
2418 continue;
2419 }
2420
2421 let trimmed = line.trim();
2422
2423 if trimmed == "$$" {
2426 if inside_math {
2427 in_math_block[i] = true;
2429 inside_math = false;
2430 } else {
2431 in_math_block[i] = true;
2433 inside_math = true;
2434 }
2435 } else if inside_math {
2436 in_math_block[i] = true;
2438 }
2439 }
2440
2441 in_math_block
2442 }
2443
2444 fn compute_basic_line_info(
2447 content: &str,
2448 line_offsets: &[usize],
2449 code_blocks: &[(usize, usize)],
2450 flavor: MarkdownFlavor,
2451 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2452 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2453 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2454 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2455 let content_lines: Vec<&str> = content.lines().collect();
2456 let mut lines = Vec::with_capacity(content_lines.len());
2457
2458 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2460
2461 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2463
2464 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2467
2468 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2471 content,
2472 line_offsets,
2473 flavor,
2474 front_matter_end,
2475 code_blocks,
2476 );
2477
2478 for (i, line) in content_lines.iter().enumerate() {
2479 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2480 let indent = line.len() - line.trim_start().len();
2481 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2483
2484 let blockquote_parse = Self::parse_blockquote_prefix(line);
2486
2487 let is_blank = if let Some((_, content)) = blockquote_parse {
2489 content.trim().is_empty()
2491 } else {
2492 line.trim().is_empty()
2493 };
2494
2495 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2497
2498 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2500 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2501 let line_end_offset = byte_offset + line.len();
2504 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2505 html_comment_ranges,
2506 byte_offset,
2507 line_end_offset,
2508 );
2509 let list_item =
2512 list_item_map
2513 .get(&byte_offset)
2514 .map(
2515 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2516 marker: marker.clone(),
2517 is_ordered: *is_ordered,
2518 number: *number,
2519 marker_column: *marker_column,
2520 content_column: *content_column,
2521 },
2522 );
2523
2524 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2527 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2528
2529 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2531
2532 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2534 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2535
2536 lines.push(LineInfo {
2537 byte_offset,
2538 byte_len: line.len(),
2539 indent,
2540 visual_indent,
2541 is_blank,
2542 in_code_block,
2543 in_front_matter,
2544 in_html_block: false, in_html_comment,
2546 list_item,
2547 heading: None, blockquote: None, in_mkdocstrings,
2550 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2553 in_math_block,
2554 in_quarto_div,
2555 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_mkdocs_html_markdown: false, in_definition_list: false, in_obsidian_comment: false, });
2565 }
2566
2567 (lines, emphasis_spans)
2568 }
2569
2570 fn detect_headings_and_blockquotes(
2572 content: &str,
2573 lines: &mut [LineInfo],
2574 flavor: MarkdownFlavor,
2575 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2576 link_byte_ranges: &[(usize, usize)],
2577 ) {
2578 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2580 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2581 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2582 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2583
2584 let content_lines: Vec<&str> = content.lines().collect();
2585
2586 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2588
2589 for i in 0..lines.len() {
2591 let line = content_lines[i];
2592
2593 if !(front_matter_end > 0 && i < front_matter_end)
2598 && let Some(bq) = parse_blockquote_detailed(line)
2599 {
2600 let nesting_level = bq.markers.len();
2601 let marker_column = bq.indent.len();
2602 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2603 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2604 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2605 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2606
2607 lines[i].blockquote = Some(BlockquoteInfo {
2608 nesting_level,
2609 indent: bq.indent.to_string(),
2610 marker_column,
2611 prefix,
2612 content: bq.content.to_string(),
2613 has_no_space_after_marker: has_no_space,
2614 has_multiple_spaces_after_marker: has_multiple_spaces,
2615 needs_md028_fix,
2616 });
2617
2618 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2621 lines[i].is_horizontal_rule = true;
2622 }
2623 }
2624
2625 if lines[i].in_code_block {
2627 continue;
2628 }
2629
2630 if front_matter_end > 0 && i < front_matter_end {
2632 continue;
2633 }
2634
2635 if lines[i].in_html_block {
2637 continue;
2638 }
2639
2640 if lines[i].is_blank {
2642 continue;
2643 }
2644
2645 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2648 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2649 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2650 } else {
2651 false
2652 };
2653
2654 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2655 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2657 continue;
2658 }
2659 let line_offset = lines[i].byte_offset;
2662 if link_byte_ranges
2663 .iter()
2664 .any(|&(start, end)| line_offset > start && line_offset < end)
2665 {
2666 continue;
2667 }
2668 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2669 let hashes = caps.get(2).map_or("", |m| m.as_str());
2670 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2671 let rest = caps.get(4).map_or("", |m| m.as_str());
2672
2673 let level = hashes.len() as u8;
2674 let marker_column = leading_spaces.len();
2675
2676 let (text, has_closing, closing_seq) = {
2678 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2680 if rest[id_start..].trim_end().ends_with('}') {
2682 (&rest[..id_start], &rest[id_start..])
2684 } else {
2685 (rest, "")
2686 }
2687 } else {
2688 (rest, "")
2689 };
2690
2691 let trimmed_rest = rest_without_id.trim_end();
2693 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2694 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2697
2698 let last_hash_char_idx = char_positions
2700 .iter()
2701 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2702
2703 if let Some(mut char_idx) = last_hash_char_idx {
2704 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2706 char_idx -= 1;
2707 }
2708
2709 let start_of_hashes = char_positions[char_idx].0;
2711
2712 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2714
2715 let potential_closing = &trimmed_rest[start_of_hashes..];
2717 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2718
2719 if is_all_hashes && has_space_before {
2720 let closing_hashes = potential_closing.to_string();
2722 let text_part = if !custom_id_part.is_empty() {
2725 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2728 } else {
2729 trimmed_rest[..start_of_hashes].trim_end().to_string()
2730 };
2731 (text_part, true, closing_hashes)
2732 } else {
2733 (rest.to_string(), false, String::new())
2735 }
2736 } else {
2737 (rest.to_string(), false, String::new())
2739 }
2740 } else {
2741 (rest.to_string(), false, String::new())
2743 }
2744 };
2745
2746 let content_column = marker_column + hashes.len() + spaces_after.len();
2747
2748 let raw_text = text.trim().to_string();
2750 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2751
2752 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2754 let next_line = content_lines[i + 1];
2755 if !lines[i + 1].in_code_block
2756 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2757 && let Some(next_line_id) =
2758 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2759 {
2760 custom_id = Some(next_line_id);
2761 }
2762 }
2763
2764 let is_valid = !spaces_after.is_empty()
2774 || rest.is_empty()
2775 || level > 1
2776 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2777
2778 lines[i].heading = Some(HeadingInfo {
2779 level,
2780 style: HeadingStyle::ATX,
2781 marker: hashes.to_string(),
2782 marker_column,
2783 content_column,
2784 text: clean_text,
2785 custom_id,
2786 raw_text,
2787 has_closing_sequence: has_closing,
2788 closing_sequence: closing_seq,
2789 is_valid,
2790 });
2791 }
2792 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2794 let next_line = content_lines[i + 1];
2795 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2796 if front_matter_end > 0 && i < front_matter_end {
2798 continue;
2799 }
2800
2801 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2803 {
2804 continue;
2805 }
2806
2807 let content_line = line.trim();
2810
2811 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2813 continue;
2814 }
2815
2816 if content_line.starts_with('_') {
2818 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2819 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2820 continue;
2821 }
2822 }
2823
2824 if let Some(first_char) = content_line.chars().next()
2826 && first_char.is_ascii_digit()
2827 {
2828 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2829 if num_end < content_line.len() {
2830 let next = content_line.chars().nth(num_end);
2831 if next == Some('.') || next == Some(')') {
2832 continue;
2833 }
2834 }
2835 }
2836
2837 if ATX_HEADING_REGEX.is_match(line) {
2839 continue;
2840 }
2841
2842 if content_line.starts_with('>') {
2844 continue;
2845 }
2846
2847 let trimmed_start = line.trim_start();
2849 if trimmed_start.len() >= 3 {
2850 let first_three: String = trimmed_start.chars().take(3).collect();
2851 if first_three == "```" || first_three == "~~~" {
2852 continue;
2853 }
2854 }
2855
2856 if content_line.starts_with('<') {
2858 continue;
2859 }
2860
2861 let underline = next_line.trim();
2862
2863 let level = if underline.starts_with('=') { 1 } else { 2 };
2864 let style = if level == 1 {
2865 HeadingStyle::Setext1
2866 } else {
2867 HeadingStyle::Setext2
2868 };
2869
2870 let raw_text = line.trim().to_string();
2872 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2873
2874 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2876 let attr_line = content_lines[i + 2];
2877 if !lines[i + 2].in_code_block
2878 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2879 && let Some(attr_line_id) =
2880 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2881 {
2882 custom_id = Some(attr_line_id);
2883 }
2884 }
2885
2886 lines[i].heading = Some(HeadingInfo {
2887 level,
2888 style,
2889 marker: underline.to_string(),
2890 marker_column: next_line.len() - next_line.trim_start().len(),
2891 content_column: lines[i].indent,
2892 text: clean_text,
2893 custom_id,
2894 raw_text,
2895 has_closing_sequence: false,
2896 closing_sequence: String::new(),
2897 is_valid: true, });
2899 }
2900 }
2901 }
2902 }
2903
2904 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2906 const BLOCK_ELEMENTS: &[&str] = &[
2909 "address",
2910 "article",
2911 "aside",
2912 "audio",
2913 "blockquote",
2914 "canvas",
2915 "details",
2916 "dialog",
2917 "dd",
2918 "div",
2919 "dl",
2920 "dt",
2921 "embed",
2922 "fieldset",
2923 "figcaption",
2924 "figure",
2925 "footer",
2926 "form",
2927 "h1",
2928 "h2",
2929 "h3",
2930 "h4",
2931 "h5",
2932 "h6",
2933 "header",
2934 "hr",
2935 "iframe",
2936 "li",
2937 "main",
2938 "menu",
2939 "nav",
2940 "noscript",
2941 "object",
2942 "ol",
2943 "p",
2944 "picture",
2945 "pre",
2946 "script",
2947 "search",
2948 "section",
2949 "source",
2950 "style",
2951 "summary",
2952 "svg",
2953 "table",
2954 "tbody",
2955 "td",
2956 "template",
2957 "textarea",
2958 "tfoot",
2959 "th",
2960 "thead",
2961 "tr",
2962 "track",
2963 "ul",
2964 "video",
2965 ];
2966
2967 let mut i = 0;
2968 while i < lines.len() {
2969 if lines[i].in_code_block || lines[i].in_front_matter {
2971 i += 1;
2972 continue;
2973 }
2974
2975 let trimmed = lines[i].content(content).trim_start();
2976
2977 if trimmed.starts_with('<') && trimmed.len() > 1 {
2979 let after_bracket = &trimmed[1..];
2981 let is_closing = after_bracket.starts_with('/');
2982 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2983
2984 let tag_name = tag_start
2986 .chars()
2987 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2988 .collect::<String>()
2989 .to_lowercase();
2990
2991 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2993 lines[i].in_html_block = true;
2995
2996 if !is_closing {
3001 let closing_tag = format!("</{tag_name}>");
3002
3003 let same_line_close = lines[i].content(content).contains(&closing_tag);
3006
3007 if !same_line_close {
3009 let allow_blank_lines = tag_name == "style" || tag_name == "script";
3011 let mut j = i + 1;
3012 let mut found_closing_tag = false;
3013 while j < lines.len() && j < i + 100 {
3014 if !allow_blank_lines && lines[j].is_blank {
3017 break;
3018 }
3019
3020 lines[j].in_html_block = true;
3021
3022 if lines[j].content(content).contains(&closing_tag) {
3024 found_closing_tag = true;
3025 }
3026
3027 if found_closing_tag {
3030 j += 1;
3031 while j < lines.len() && j < i + 100 {
3033 if lines[j].is_blank {
3034 break;
3035 }
3036 lines[j].in_html_block = true;
3037 j += 1;
3038 }
3039 break;
3040 }
3041 j += 1;
3042 }
3043 }
3044 }
3045 }
3046 }
3047
3048 i += 1;
3049 }
3050 }
3051
3052 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3055 if !flavor.supports_esm_blocks() {
3057 return;
3058 }
3059
3060 let mut in_multiline_import = false;
3061
3062 for line in lines.iter_mut() {
3063 if line.in_code_block || line.in_front_matter || line.in_html_comment {
3065 in_multiline_import = false;
3066 continue;
3067 }
3068
3069 let line_content = line.content(content);
3070 let trimmed = line_content.trim();
3071
3072 if in_multiline_import {
3074 line.in_esm_block = true;
3075 if trimmed.ends_with('\'')
3078 || trimmed.ends_with('"')
3079 || trimmed.ends_with("';")
3080 || trimmed.ends_with("\";")
3081 || line_content.contains(';')
3082 {
3083 in_multiline_import = false;
3084 }
3085 continue;
3086 }
3087
3088 if line.is_blank {
3090 continue;
3091 }
3092
3093 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3095 line.in_esm_block = true;
3096
3097 let is_import = trimmed.starts_with("import ");
3105
3106 let is_complete =
3108 trimmed.ends_with(';')
3110 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3112 || (!is_import && !trimmed.contains(" from ") && (
3114 trimmed.starts_with("export const ")
3115 || trimmed.starts_with("export let ")
3116 || trimmed.starts_with("export var ")
3117 || trimmed.starts_with("export function ")
3118 || trimmed.starts_with("export class ")
3119 || trimmed.starts_with("export default ")
3120 ));
3121
3122 if !is_complete && is_import {
3123 if trimmed.contains('{') && !trimmed.contains('}') {
3127 in_multiline_import = true;
3128 }
3129 }
3130 }
3131 }
3132 }
3133
3134 fn detect_jsx_and_mdx_comments(
3137 content: &str,
3138 lines: &mut [LineInfo],
3139 flavor: MarkdownFlavor,
3140 code_blocks: &[(usize, usize)],
3141 ) -> (ByteRanges, ByteRanges) {
3142 if !flavor.supports_jsx() {
3144 return (Vec::new(), Vec::new());
3145 }
3146
3147 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3148 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3149
3150 if !content.contains('{') {
3152 return (jsx_expression_ranges, mdx_comment_ranges);
3153 }
3154
3155 let bytes = content.as_bytes();
3156 let mut i = 0;
3157
3158 while i < bytes.len() {
3159 if bytes[i] == b'{' {
3160 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3162 i += 1;
3163 continue;
3164 }
3165
3166 let start = i;
3167
3168 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3170 let mut j = i + 3;
3172 while j + 2 < bytes.len() {
3173 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3174 let end = j + 3;
3175 mdx_comment_ranges.push((start, end));
3176
3177 Self::mark_lines_in_range(lines, content, start, end, |line| {
3179 line.in_mdx_comment = true;
3180 });
3181
3182 i = end;
3183 break;
3184 }
3185 j += 1;
3186 }
3187 if j + 2 >= bytes.len() {
3188 mdx_comment_ranges.push((start, bytes.len()));
3190 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3191 line.in_mdx_comment = true;
3192 });
3193 break;
3194 }
3195 } else {
3196 let mut brace_depth = 1;
3199 let mut j = i + 1;
3200 let mut in_string = false;
3201 let mut string_char = b'"';
3202
3203 while j < bytes.len() && brace_depth > 0 {
3204 let c = bytes[j];
3205
3206 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3208 in_string = true;
3209 string_char = c;
3210 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3211 in_string = false;
3212 } else if !in_string {
3213 if c == b'{' {
3214 brace_depth += 1;
3215 } else if c == b'}' {
3216 brace_depth -= 1;
3217 }
3218 }
3219 j += 1;
3220 }
3221
3222 if brace_depth == 0 {
3223 let end = j;
3224 jsx_expression_ranges.push((start, end));
3225
3226 Self::mark_lines_in_range(lines, content, start, end, |line| {
3228 line.in_jsx_expression = true;
3229 });
3230
3231 i = end;
3232 } else {
3233 i += 1;
3234 }
3235 }
3236 } else {
3237 i += 1;
3238 }
3239 }
3240
3241 (jsx_expression_ranges, mdx_comment_ranges)
3242 }
3243
3244 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3247 if flavor != MarkdownFlavor::MkDocs {
3248 return;
3249 }
3250
3251 use crate::utils::mkdocs_admonitions;
3252 use crate::utils::mkdocs_definition_lists;
3253 use crate::utils::mkdocs_tabs;
3254
3255 let content_lines: Vec<&str> = content.lines().collect();
3256
3257 let mut in_admonition = false;
3259 let mut admonition_indent = 0;
3260
3261 let mut in_tab = false;
3263 let mut tab_indent = 0;
3264
3265 let mut in_mkdocs_fenced_code = false;
3267 let mut mkdocs_fence_marker: Option<String> = None;
3268
3269 let mut in_definition = false;
3271
3272 let mut markdown_html_tracker = MarkdownHtmlTracker::new();
3274
3275 for (i, line) in content_lines.iter().enumerate() {
3276 if i >= lines.len() {
3277 break;
3278 }
3279
3280 if mkdocs_admonitions::is_admonition_start(line) {
3284 in_admonition = true;
3285 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3286 lines[i].in_admonition = true;
3287 } else if in_admonition {
3288 if line.trim().is_empty() {
3290 lines[i].in_admonition = true;
3292 lines[i].in_code_block = false;
3294 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3295 lines[i].in_admonition = true;
3296 lines[i].in_code_block = false;
3298 } else {
3299 in_admonition = false;
3301 if mkdocs_admonitions::is_admonition_start(line) {
3303 in_admonition = true;
3304 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3305 lines[i].in_admonition = true;
3306 }
3307 }
3308 }
3309
3310 if mkdocs_tabs::is_tab_marker(line) {
3313 in_tab = true;
3314 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3315 lines[i].in_content_tab = true;
3316 in_mkdocs_fenced_code = false;
3318 mkdocs_fence_marker = None;
3319 } else if in_tab {
3320 let trimmed = line.trim();
3321
3322 if !in_mkdocs_fenced_code {
3324 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3326 let fence_char = trimmed.chars().next().unwrap();
3327 let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
3328 if fence_len >= 3 {
3329 in_mkdocs_fenced_code = true;
3330 mkdocs_fence_marker = Some(fence_char.to_string().repeat(fence_len));
3331 }
3332 }
3333 } else if let Some(ref marker) = mkdocs_fence_marker {
3334 let fence_char = marker.chars().next().unwrap();
3336 if trimmed.starts_with(marker.as_str())
3337 && trimmed
3338 .chars()
3339 .skip(marker.len())
3340 .all(|c| c == fence_char || c.is_whitespace())
3341 {
3342 in_mkdocs_fenced_code = false;
3343 mkdocs_fence_marker = None;
3344 }
3345 }
3346
3347 if line.trim().is_empty() {
3349 lines[i].in_content_tab = true;
3351 if !in_mkdocs_fenced_code {
3353 lines[i].in_code_block = false;
3354 }
3355 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3356 lines[i].in_content_tab = true;
3357 if !in_mkdocs_fenced_code {
3360 lines[i].in_code_block = false;
3361 }
3362 } else {
3363 in_tab = false;
3365 in_mkdocs_fenced_code = false;
3366 mkdocs_fence_marker = None;
3367 if mkdocs_tabs::is_tab_marker(line) {
3369 in_tab = true;
3370 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3371 lines[i].in_content_tab = true;
3372 }
3373 }
3374 }
3375
3376 lines[i].in_mkdocs_html_markdown = markdown_html_tracker.process_line(line);
3380
3381 if lines[i].in_code_block {
3383 continue;
3384 }
3385
3386 if mkdocs_definition_lists::is_definition_line(line) {
3388 in_definition = true;
3389 lines[i].in_definition_list = true;
3390 } else if in_definition {
3391 if mkdocs_definition_lists::is_definition_continuation(line) {
3393 lines[i].in_definition_list = true;
3394 } else if line.trim().is_empty() {
3395 lines[i].in_definition_list = true;
3397 } else if mkdocs_definition_lists::could_be_term_line(line) {
3398 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3400 {
3401 lines[i].in_definition_list = true;
3402 } else {
3403 in_definition = false;
3404 }
3405 } else {
3406 in_definition = false;
3407 }
3408 } else if mkdocs_definition_lists::could_be_term_line(line) {
3409 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3411 lines[i].in_definition_list = true;
3412 in_definition = true;
3413 }
3414 }
3415 }
3416 }
3417
3418 fn detect_obsidian_comments(
3429 content: &str,
3430 lines: &mut [LineInfo],
3431 flavor: MarkdownFlavor,
3432 code_span_ranges: &[(usize, usize)],
3433 ) -> Vec<(usize, usize)> {
3434 if flavor != MarkdownFlavor::Obsidian {
3436 return Vec::new();
3437 }
3438
3439 let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3441
3442 for range in &comment_ranges {
3444 for line in lines.iter_mut() {
3445 if line.in_code_block || line.in_html_comment {
3447 continue;
3448 }
3449
3450 let line_start = line.byte_offset;
3451 let line_end = line.byte_offset + line.byte_len;
3452
3453 if line_start >= range.0 && line_end <= range.1 {
3457 line.in_obsidian_comment = true;
3458 } else if line_start < range.1 && line_end > range.0 {
3459 let line_content_start = line_start;
3467 let line_content_end = line_end;
3468
3469 if line_content_start >= range.0 && line_content_end <= range.1 {
3470 line.in_obsidian_comment = true;
3471 }
3472 }
3473 }
3474 }
3475
3476 comment_ranges
3477 }
3478
3479 fn compute_obsidian_comment_ranges(
3484 content: &str,
3485 lines: &[LineInfo],
3486 code_span_ranges: &[(usize, usize)],
3487 ) -> Vec<(usize, usize)> {
3488 let mut ranges = Vec::new();
3489
3490 if !content.contains("%%") {
3492 return ranges;
3493 }
3494
3495 let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3498 for line in lines {
3499 if line.in_code_block || line.in_html_comment {
3500 skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3501 }
3502 }
3503 skip_ranges.extend(code_span_ranges.iter().copied());
3504
3505 if !skip_ranges.is_empty() {
3506 skip_ranges.sort_by_key(|(start, _)| *start);
3508 let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3509 for (start, end) in skip_ranges {
3510 if let Some((_, last_end)) = merged.last_mut()
3511 && start <= *last_end
3512 {
3513 *last_end = (*last_end).max(end);
3514 continue;
3515 }
3516 merged.push((start, end));
3517 }
3518 skip_ranges = merged;
3519 }
3520
3521 let content_bytes = content.as_bytes();
3522 let len = content.len();
3523 let mut i = 0;
3524 let mut in_comment = false;
3525 let mut comment_start = 0;
3526 let mut skip_idx = 0;
3527
3528 while i < len.saturating_sub(1) {
3529 if skip_idx < skip_ranges.len() {
3531 let (skip_start, skip_end) = skip_ranges[skip_idx];
3532 if i >= skip_end {
3533 skip_idx += 1;
3534 continue;
3535 }
3536 if i >= skip_start {
3537 i = skip_end;
3538 continue;
3539 }
3540 }
3541
3542 if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3544 if !in_comment {
3545 in_comment = true;
3547 comment_start = i;
3548 i += 2;
3549 } else {
3550 let comment_end = i + 2;
3552 ranges.push((comment_start, comment_end));
3553 in_comment = false;
3554 i += 2;
3555 }
3556 } else {
3557 i += 1;
3558 }
3559 }
3560
3561 if in_comment {
3563 ranges.push((comment_start, len));
3564 }
3565
3566 ranges
3567 }
3568
3569 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3571 where
3572 F: FnMut(&mut LineInfo),
3573 {
3574 for line in lines.iter_mut() {
3576 let line_start = line.byte_offset;
3577 let line_end = line.byte_offset + line.byte_len;
3578
3579 if line_start < end && line_end > start {
3581 f(line);
3582 }
3583 }
3584
3585 let _ = content;
3587 }
3588
3589 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3591 if !content.contains('`') {
3593 return Vec::new();
3594 }
3595
3596 let parser = Parser::new(content).into_offset_iter();
3598 let mut ranges = Vec::new();
3599
3600 for (event, range) in parser {
3601 if let Event::Code(_) = event {
3602 ranges.push((range.start, range.end));
3603 }
3604 }
3605
3606 Self::build_code_spans_from_ranges(content, lines, &ranges)
3607 }
3608
3609 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3610 let mut code_spans = Vec::new();
3611 if ranges.is_empty() {
3612 return code_spans;
3613 }
3614
3615 for &(start_pos, end_pos) in ranges {
3616 let full_span = &content[start_pos..end_pos];
3618 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3619
3620 let content_start = start_pos + backtick_count;
3622 let content_end = end_pos - backtick_count;
3623 let span_content = if content_start < content_end {
3624 content[content_start..content_end].to_string()
3625 } else {
3626 String::new()
3627 };
3628
3629 let line_idx = lines
3632 .partition_point(|line| line.byte_offset <= start_pos)
3633 .saturating_sub(1);
3634 let line_num = line_idx + 1;
3635 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3636
3637 let end_line_idx = lines
3639 .partition_point(|line| line.byte_offset <= end_pos)
3640 .saturating_sub(1);
3641 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3642
3643 let line_content = lines[line_idx].content(content);
3646 let col_start = if byte_col_start <= line_content.len() {
3647 line_content[..byte_col_start].chars().count()
3648 } else {
3649 line_content.chars().count()
3650 };
3651
3652 let end_line_content = lines[end_line_idx].content(content);
3653 let col_end = if byte_col_end <= end_line_content.len() {
3654 end_line_content[..byte_col_end].chars().count()
3655 } else {
3656 end_line_content.chars().count()
3657 };
3658
3659 code_spans.push(CodeSpan {
3660 line: line_num,
3661 end_line: end_line_idx + 1,
3662 start_col: col_start,
3663 end_col: col_end,
3664 byte_offset: start_pos,
3665 byte_end: end_pos,
3666 backtick_count,
3667 content: span_content,
3668 });
3669 }
3670
3671 code_spans.sort_by_key(|span| span.byte_offset);
3673
3674 code_spans
3675 }
3676
3677 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3679 let mut math_spans = Vec::new();
3680
3681 if !content.contains('$') {
3683 return math_spans;
3684 }
3685
3686 let mut options = Options::empty();
3688 options.insert(Options::ENABLE_MATH);
3689 let parser = Parser::new_ext(content, options).into_offset_iter();
3690
3691 for (event, range) in parser {
3692 let (is_display, math_content) = match &event {
3693 Event::InlineMath(text) => (false, text.as_ref()),
3694 Event::DisplayMath(text) => (true, text.as_ref()),
3695 _ => continue,
3696 };
3697
3698 let start_pos = range.start;
3699 let end_pos = range.end;
3700
3701 let line_idx = lines
3703 .partition_point(|line| line.byte_offset <= start_pos)
3704 .saturating_sub(1);
3705 let line_num = line_idx + 1;
3706 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3707
3708 let end_line_idx = lines
3710 .partition_point(|line| line.byte_offset <= end_pos)
3711 .saturating_sub(1);
3712 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3713
3714 let line_content = lines[line_idx].content(content);
3716 let col_start = if byte_col_start <= line_content.len() {
3717 line_content[..byte_col_start].chars().count()
3718 } else {
3719 line_content.chars().count()
3720 };
3721
3722 let end_line_content = lines[end_line_idx].content(content);
3723 let col_end = if byte_col_end <= end_line_content.len() {
3724 end_line_content[..byte_col_end].chars().count()
3725 } else {
3726 end_line_content.chars().count()
3727 };
3728
3729 math_spans.push(MathSpan {
3730 line: line_num,
3731 end_line: end_line_idx + 1,
3732 start_col: col_start,
3733 end_col: col_end,
3734 byte_offset: start_pos,
3735 byte_end: end_pos,
3736 is_display,
3737 content: math_content.to_string(),
3738 });
3739 }
3740
3741 math_spans.sort_by_key(|span| span.byte_offset);
3743
3744 math_spans
3745 }
3746
3747 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3758 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3760
3761 #[inline]
3764 fn reset_tracking_state(
3765 list_item: &ListItemInfo,
3766 has_list_breaking_content: &mut bool,
3767 min_continuation: &mut usize,
3768 ) {
3769 *has_list_breaking_content = false;
3770 let marker_width = if list_item.is_ordered {
3771 list_item.marker.len() + 1 } else {
3773 list_item.marker.len()
3774 };
3775 *min_continuation = if list_item.is_ordered {
3776 marker_width
3777 } else {
3778 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3779 };
3780 }
3781
3782 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3785 let mut last_list_item_line = 0;
3786 let mut current_indent_level = 0;
3787 let mut last_marker_width = 0;
3788
3789 let mut has_list_breaking_content_since_last_item = false;
3791 let mut min_continuation_for_tracking = 0;
3792
3793 for (line_idx, line_info) in lines.iter().enumerate() {
3794 let line_num = line_idx + 1;
3795
3796 if line_info.in_code_block {
3798 if let Some(ref mut block) = current_block {
3799 let min_continuation_indent =
3801 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3802
3803 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3805
3806 match context {
3807 CodeBlockContext::Indented => {
3808 block.end_line = line_num;
3810 continue;
3811 }
3812 CodeBlockContext::Standalone => {
3813 let completed_block = current_block.take().unwrap();
3815 list_blocks.push(completed_block);
3816 continue;
3817 }
3818 CodeBlockContext::Adjacent => {
3819 block.end_line = line_num;
3821 continue;
3822 }
3823 }
3824 } else {
3825 continue;
3827 }
3828 }
3829
3830 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3832 caps.get(0).unwrap().as_str().to_string()
3833 } else {
3834 String::new()
3835 };
3836
3837 if let Some(ref block) = current_block
3840 && line_info.list_item.is_none()
3841 && !line_info.is_blank
3842 && !line_info.in_code_span_continuation
3843 {
3844 let line_content = line_info.content(content).trim();
3845
3846 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3851
3852 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3855
3856 let breaks_list = line_info.heading.is_some()
3857 || line_content.starts_with("---")
3858 || line_content.starts_with("***")
3859 || line_content.starts_with("___")
3860 || crate::utils::skip_context::is_table_line(line_content)
3861 || blockquote_prefix_changes
3862 || (line_info.indent > 0
3863 && line_info.indent < min_continuation_for_tracking
3864 && !is_lazy_continuation);
3865
3866 if breaks_list {
3867 has_list_breaking_content_since_last_item = true;
3868 }
3869 }
3870
3871 if line_info.in_code_span_continuation
3874 && line_info.list_item.is_none()
3875 && let Some(ref mut block) = current_block
3876 {
3877 block.end_line = line_num;
3878 }
3879
3880 let effective_continuation_indent = if let Some(ref block) = current_block {
3886 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3887 let line_content = line_info.content(content);
3888 let line_bq_level = line_content
3889 .chars()
3890 .take_while(|c| *c == '>' || c.is_whitespace())
3891 .filter(|&c| c == '>')
3892 .count();
3893 if line_bq_level > 0 && line_bq_level == block_bq_level {
3894 let mut pos = 0;
3896 let mut found_markers = 0;
3897 for c in line_content.chars() {
3898 pos += c.len_utf8();
3899 if c == '>' {
3900 found_markers += 1;
3901 if found_markers == line_bq_level {
3902 if line_content.get(pos..pos + 1) == Some(" ") {
3903 pos += 1;
3904 }
3905 break;
3906 }
3907 }
3908 }
3909 let after_bq = &line_content[pos..];
3910 after_bq.len() - after_bq.trim_start().len()
3911 } else {
3912 line_info.indent
3913 }
3914 } else {
3915 line_info.indent
3916 };
3917 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3918 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3919 if block_bq_level > 0 {
3920 if block.is_ordered { last_marker_width } else { 2 }
3921 } else {
3922 min_continuation_for_tracking
3923 }
3924 } else {
3925 min_continuation_for_tracking
3926 };
3927 let is_structural_element = line_info.heading.is_some()
3930 || line_info.content(content).trim().starts_with("```")
3931 || line_info.content(content).trim().starts_with("~~~");
3932 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3933 || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3934
3935 if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3936 eprintln!(
3937 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3938 line_num,
3939 effective_continuation_indent,
3940 adjusted_min_continuation_for_tracking,
3941 is_valid_continuation,
3942 line_info.in_code_span_continuation,
3943 line_info.in_code_block,
3944 current_block.is_some()
3945 );
3946 }
3947
3948 if !line_info.in_code_span_continuation
3949 && line_info.list_item.is_none()
3950 && !line_info.is_blank
3951 && !line_info.in_code_block
3952 && is_valid_continuation
3953 && let Some(ref mut block) = current_block
3954 {
3955 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3956 eprintln!(
3957 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3958 line_num, block.end_line, line_num
3959 );
3960 }
3961 block.end_line = line_num;
3962 }
3963
3964 if let Some(list_item) = &line_info.list_item {
3966 let item_indent = list_item.marker_column;
3968 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3971 eprintln!(
3972 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3973 line_num, list_item.marker, item_indent
3974 );
3975 }
3976
3977 if let Some(ref mut block) = current_block {
3978 let is_nested = nesting > block.nesting_level;
3982 let same_type =
3983 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3984 let same_context = block.blockquote_prefix == blockquote_prefix;
3985 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3987
3988 let marker_compatible =
3990 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3991
3992 let has_non_list_content = has_list_breaking_content_since_last_item;
3995
3996 let mut continues_list = if is_nested {
4000 same_context && reasonable_distance && !has_non_list_content
4002 } else {
4003 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
4005 };
4006
4007 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4008 eprintln!(
4009 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
4010 line_num,
4011 continues_list,
4012 is_nested,
4013 same_type,
4014 same_context,
4015 reasonable_distance,
4016 marker_compatible,
4017 has_non_list_content,
4018 last_list_item_line,
4019 block.end_line
4020 );
4021 }
4022
4023 if !continues_list
4027 && (is_nested || same_type)
4028 && reasonable_distance
4029 && line_num > 0
4030 && block.end_line == line_num - 1
4031 {
4032 if block.item_lines.contains(&(line_num - 1)) {
4035 continues_list = true;
4037 } else {
4038 continues_list = true;
4042 }
4043 }
4044
4045 if continues_list {
4046 block.end_line = line_num;
4048 block.item_lines.push(line_num);
4049
4050 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
4052 list_item.marker.len() + 1
4053 } else {
4054 list_item.marker.len()
4055 });
4056
4057 if !block.is_ordered
4059 && block.marker.is_some()
4060 && block.marker.as_ref() != Some(&list_item.marker)
4061 {
4062 block.marker = None;
4064 }
4065
4066 reset_tracking_state(
4068 list_item,
4069 &mut has_list_breaking_content_since_last_item,
4070 &mut min_continuation_for_tracking,
4071 );
4072 } else {
4073 if !same_type
4078 && !is_nested
4079 && let Some(&last_item) = block.item_lines.last()
4080 {
4081 block.end_line = last_item;
4082 }
4083
4084 list_blocks.push(block.clone());
4085
4086 *block = ListBlock {
4087 start_line: line_num,
4088 end_line: line_num,
4089 is_ordered: list_item.is_ordered,
4090 marker: if list_item.is_ordered {
4091 None
4092 } else {
4093 Some(list_item.marker.clone())
4094 },
4095 blockquote_prefix: blockquote_prefix.clone(),
4096 item_lines: vec![line_num],
4097 nesting_level: nesting,
4098 max_marker_width: if list_item.is_ordered {
4099 list_item.marker.len() + 1
4100 } else {
4101 list_item.marker.len()
4102 },
4103 };
4104
4105 reset_tracking_state(
4107 list_item,
4108 &mut has_list_breaking_content_since_last_item,
4109 &mut min_continuation_for_tracking,
4110 );
4111 }
4112 } else {
4113 current_block = Some(ListBlock {
4115 start_line: line_num,
4116 end_line: line_num,
4117 is_ordered: list_item.is_ordered,
4118 marker: if list_item.is_ordered {
4119 None
4120 } else {
4121 Some(list_item.marker.clone())
4122 },
4123 blockquote_prefix,
4124 item_lines: vec![line_num],
4125 nesting_level: nesting,
4126 max_marker_width: list_item.marker.len(),
4127 });
4128
4129 reset_tracking_state(
4131 list_item,
4132 &mut has_list_breaking_content_since_last_item,
4133 &mut min_continuation_for_tracking,
4134 );
4135 }
4136
4137 last_list_item_line = line_num;
4138 current_indent_level = item_indent;
4139 last_marker_width = if list_item.is_ordered {
4140 list_item.marker.len() + 1 } else {
4142 list_item.marker.len()
4143 };
4144 } else if let Some(ref mut block) = current_block {
4145 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4147 eprintln!(
4148 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4149 line_num, line_info.is_blank
4150 );
4151 }
4152
4153 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4161 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4162 } else {
4163 false
4164 };
4165
4166 let min_continuation_indent = if block.is_ordered {
4170 current_indent_level + last_marker_width
4171 } else {
4172 current_indent_level + 2 };
4174
4175 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4176 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4178 eprintln!(
4179 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4180 line_num, line_info.indent, min_continuation_indent
4181 );
4182 }
4183 block.end_line = line_num;
4184 } else if line_info.is_blank {
4185 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4188 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4189 }
4190 let mut check_idx = line_idx + 1;
4191 let mut found_continuation = false;
4192
4193 while check_idx < lines.len() && lines[check_idx].is_blank {
4195 check_idx += 1;
4196 }
4197
4198 if check_idx < lines.len() {
4199 let next_line = &lines[check_idx];
4200 let next_content = next_line.content(content);
4202 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4205 let next_bq_level_for_indent = next_content
4206 .chars()
4207 .take_while(|c| *c == '>' || c.is_whitespace())
4208 .filter(|&c| c == '>')
4209 .count();
4210 let effective_indent =
4211 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4212 let mut pos = 0;
4215 let mut found_markers = 0;
4216 for c in next_content.chars() {
4217 pos += c.len_utf8();
4218 if c == '>' {
4219 found_markers += 1;
4220 if found_markers == next_bq_level_for_indent {
4221 if next_content.get(pos..pos + 1) == Some(" ") {
4223 pos += 1;
4224 }
4225 break;
4226 }
4227 }
4228 }
4229 let after_blockquote_marker = &next_content[pos..];
4230 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4231 } else {
4232 next_line.indent
4233 };
4234 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4237 if block.is_ordered { last_marker_width } else { 2 }
4240 } else {
4241 min_continuation_indent
4242 };
4243 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4245 eprintln!(
4246 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4247 line_num,
4248 check_idx + 1,
4249 effective_indent,
4250 adjusted_min_continuation,
4251 next_line.list_item.is_some(),
4252 next_line.in_code_block
4253 );
4254 }
4255 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4256 found_continuation = true;
4257 }
4258 else if !next_line.in_code_block
4260 && next_line.list_item.is_some()
4261 && let Some(item) = &next_line.list_item
4262 {
4263 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4264 .find(next_line.content(content))
4265 .map_or(String::new(), |m| m.as_str().to_string());
4266 if item.marker_column == current_indent_level
4267 && item.is_ordered == block.is_ordered
4268 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4269 {
4270 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4274 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4275 if let Some(between_line) = lines.get(idx) {
4276 let between_content = between_line.content(content);
4277 let trimmed = between_content.trim();
4278 if trimmed.is_empty() {
4280 return false;
4281 }
4282 let line_indent = between_content.len() - between_content.trim_start().len();
4284
4285 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4287 .find(between_content)
4288 .map_or(String::new(), |m| m.as_str().to_string());
4289 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4290 let blockquote_level_changed =
4291 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4292
4293 if trimmed.starts_with("```")
4295 || trimmed.starts_with("~~~")
4296 || trimmed.starts_with("---")
4297 || trimmed.starts_with("***")
4298 || trimmed.starts_with("___")
4299 || blockquote_level_changed
4300 || crate::utils::skip_context::is_table_line(trimmed)
4301 || between_line.heading.is_some()
4302 {
4303 return true; }
4305
4306 line_indent >= min_continuation_indent
4308 } else {
4309 false
4310 }
4311 });
4312
4313 if block.is_ordered {
4314 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4317 if let Some(between_line) = lines.get(idx) {
4318 let between_content = between_line.content(content);
4319 let trimmed = between_content.trim();
4320 if trimmed.is_empty() {
4321 return false;
4322 }
4323 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4325 .find(between_content)
4326 .map_or(String::new(), |m| m.as_str().to_string());
4327 let between_bq_level =
4328 between_bq_prefix.chars().filter(|&c| c == '>').count();
4329 let blockquote_level_changed =
4330 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4331 trimmed.starts_with("```")
4333 || trimmed.starts_with("~~~")
4334 || trimmed.starts_with("---")
4335 || trimmed.starts_with("***")
4336 || trimmed.starts_with("___")
4337 || blockquote_level_changed
4338 || crate::utils::skip_context::is_table_line(trimmed)
4339 || between_line.heading.is_some()
4340 } else {
4341 false
4342 }
4343 });
4344 found_continuation = !has_structural_separators;
4345 } else {
4346 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4348 if let Some(between_line) = lines.get(idx) {
4349 let between_content = between_line.content(content);
4350 let trimmed = between_content.trim();
4351 if trimmed.is_empty() {
4352 return false;
4353 }
4354 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4356 .find(between_content)
4357 .map_or(String::new(), |m| m.as_str().to_string());
4358 let between_bq_level =
4359 between_bq_prefix.chars().filter(|&c| c == '>').count();
4360 let blockquote_level_changed =
4361 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4362 trimmed.starts_with("```")
4364 || trimmed.starts_with("~~~")
4365 || trimmed.starts_with("---")
4366 || trimmed.starts_with("***")
4367 || trimmed.starts_with("___")
4368 || blockquote_level_changed
4369 || crate::utils::skip_context::is_table_line(trimmed)
4370 || between_line.heading.is_some()
4371 } else {
4372 false
4373 }
4374 });
4375 found_continuation = !has_structural_separators;
4376 }
4377 }
4378 }
4379 }
4380
4381 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4382 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4383 }
4384 if found_continuation {
4385 block.end_line = line_num;
4387 } else {
4388 list_blocks.push(block.clone());
4390 current_block = None;
4391 }
4392 } else {
4393 let min_required_indent = if block.is_ordered {
4396 current_indent_level + last_marker_width
4397 } else {
4398 current_indent_level + 2
4399 };
4400
4401 let line_content = line_info.content(content).trim();
4406
4407 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4409
4410 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4413 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4414 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4415
4416 let is_structural_separator = line_info.heading.is_some()
4417 || line_content.starts_with("```")
4418 || line_content.starts_with("~~~")
4419 || line_content.starts_with("---")
4420 || line_content.starts_with("***")
4421 || line_content.starts_with("___")
4422 || blockquote_level_changed
4423 || looks_like_table;
4424
4425 let is_lazy_continuation = !is_structural_separator
4429 && !line_info.is_blank
4430 && (line_info.indent == 0
4431 || line_info.indent >= min_required_indent
4432 || line_info.in_code_span_continuation);
4433
4434 if is_lazy_continuation {
4435 block.end_line = line_num;
4438 } else {
4439 list_blocks.push(block.clone());
4441 current_block = None;
4442 }
4443 }
4444 }
4445 }
4446
4447 if let Some(block) = current_block {
4449 list_blocks.push(block);
4450 }
4451
4452 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4454
4455 list_blocks
4456 }
4457
4458 fn compute_char_frequency(content: &str) -> CharFrequency {
4460 let mut frequency = CharFrequency::default();
4461
4462 for ch in content.chars() {
4463 match ch {
4464 '#' => frequency.hash_count += 1,
4465 '*' => frequency.asterisk_count += 1,
4466 '_' => frequency.underscore_count += 1,
4467 '-' => frequency.hyphen_count += 1,
4468 '+' => frequency.plus_count += 1,
4469 '>' => frequency.gt_count += 1,
4470 '|' => frequency.pipe_count += 1,
4471 '[' => frequency.bracket_count += 1,
4472 '`' => frequency.backtick_count += 1,
4473 '<' => frequency.lt_count += 1,
4474 '!' => frequency.exclamation_count += 1,
4475 '\n' => frequency.newline_count += 1,
4476 _ => {}
4477 }
4478 }
4479
4480 frequency
4481 }
4482
4483 fn parse_html_tags(
4485 content: &str,
4486 lines: &[LineInfo],
4487 code_blocks: &[(usize, usize)],
4488 flavor: MarkdownFlavor,
4489 ) -> Vec<HtmlTag> {
4490 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4491 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4492
4493 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4494
4495 for cap in HTML_TAG_REGEX.captures_iter(content) {
4496 let full_match = cap.get(0).unwrap();
4497 let match_start = full_match.start();
4498 let match_end = full_match.end();
4499
4500 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4502 continue;
4503 }
4504
4505 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4506 let tag_name_original = cap.get(2).unwrap().as_str();
4507 let tag_name = tag_name_original.to_lowercase();
4508 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4509
4510 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4513 continue;
4514 }
4515
4516 let mut line_num = 1;
4518 let mut col_start = match_start;
4519 let mut col_end = match_end;
4520 for (idx, line_info) in lines.iter().enumerate() {
4521 if match_start >= line_info.byte_offset {
4522 line_num = idx + 1;
4523 col_start = match_start - line_info.byte_offset;
4524 col_end = match_end - line_info.byte_offset;
4525 } else {
4526 break;
4527 }
4528 }
4529
4530 html_tags.push(HtmlTag {
4531 line: line_num,
4532 start_col: col_start,
4533 end_col: col_end,
4534 byte_offset: match_start,
4535 byte_end: match_end,
4536 tag_name,
4537 is_closing,
4538 is_self_closing,
4539 raw_content: full_match.as_str().to_string(),
4540 });
4541 }
4542
4543 html_tags
4544 }
4545
4546 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4548 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4549
4550 for (line_idx, line_info) in lines.iter().enumerate() {
4551 if line_info.in_code_block || line_info.is_blank {
4553 continue;
4554 }
4555
4556 let line = line_info.content(content);
4557 let line_num = line_idx + 1;
4558
4559 if !line.contains('|') {
4561 continue;
4562 }
4563
4564 let parts: Vec<&str> = line.split('|').collect();
4566 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4567
4568 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4570 let mut column_alignments = Vec::new();
4571
4572 if is_separator {
4573 for part in &parts[1..parts.len() - 1] {
4574 let trimmed = part.trim();
4576 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4577 "center".to_string()
4578 } else if trimmed.ends_with(':') {
4579 "right".to_string()
4580 } else if trimmed.starts_with(':') {
4581 "left".to_string()
4582 } else {
4583 "none".to_string()
4584 };
4585 column_alignments.push(alignment);
4586 }
4587 }
4588
4589 table_rows.push(TableRow {
4590 line: line_num,
4591 is_separator,
4592 column_count,
4593 column_alignments,
4594 });
4595 }
4596
4597 table_rows
4598 }
4599
4600 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4602 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4603
4604 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4606 let full_match = cap.get(0).unwrap();
4607 let match_start = full_match.start();
4608 let match_end = full_match.end();
4609
4610 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4612 continue;
4613 }
4614
4615 let preceding_char = if match_start > 0 {
4617 content.chars().nth(match_start - 1)
4618 } else {
4619 None
4620 };
4621 let following_char = content.chars().nth(match_end);
4622
4623 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4624 continue;
4625 }
4626 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4627 continue;
4628 }
4629
4630 let url = full_match.as_str();
4631 let url_type = if url.starts_with("https://") {
4632 "https"
4633 } else if url.starts_with("http://") {
4634 "http"
4635 } else if url.starts_with("ftp://") {
4636 "ftp"
4637 } else {
4638 "other"
4639 };
4640
4641 let mut line_num = 1;
4643 let mut col_start = match_start;
4644 let mut col_end = match_end;
4645 for (idx, line_info) in lines.iter().enumerate() {
4646 if match_start >= line_info.byte_offset {
4647 line_num = idx + 1;
4648 col_start = match_start - line_info.byte_offset;
4649 col_end = match_end - line_info.byte_offset;
4650 } else {
4651 break;
4652 }
4653 }
4654
4655 bare_urls.push(BareUrl {
4656 line: line_num,
4657 start_col: col_start,
4658 end_col: col_end,
4659 byte_offset: match_start,
4660 byte_end: match_end,
4661 url: url.to_string(),
4662 url_type: url_type.to_string(),
4663 });
4664 }
4665
4666 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4668 let full_match = cap.get(0).unwrap();
4669 let match_start = full_match.start();
4670 let match_end = full_match.end();
4671
4672 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4674 continue;
4675 }
4676
4677 let preceding_char = if match_start > 0 {
4679 content.chars().nth(match_start - 1)
4680 } else {
4681 None
4682 };
4683 let following_char = content.chars().nth(match_end);
4684
4685 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4686 continue;
4687 }
4688 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4689 continue;
4690 }
4691
4692 let email = full_match.as_str();
4693
4694 let mut line_num = 1;
4696 let mut col_start = match_start;
4697 let mut col_end = match_end;
4698 for (idx, line_info) in lines.iter().enumerate() {
4699 if match_start >= line_info.byte_offset {
4700 line_num = idx + 1;
4701 col_start = match_start - line_info.byte_offset;
4702 col_end = match_end - line_info.byte_offset;
4703 } else {
4704 break;
4705 }
4706 }
4707
4708 bare_urls.push(BareUrl {
4709 line: line_num,
4710 start_col: col_start,
4711 end_col: col_end,
4712 byte_offset: match_start,
4713 byte_end: match_end,
4714 url: email.to_string(),
4715 url_type: "email".to_string(),
4716 });
4717 }
4718
4719 bare_urls
4720 }
4721
4722 #[must_use]
4742 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4743 ValidHeadingsIter::new(&self.lines)
4744 }
4745
4746 #[must_use]
4750 pub fn has_valid_headings(&self) -> bool {
4751 self.lines
4752 .iter()
4753 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4754 }
4755}
4756
4757fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4759 if list_blocks.len() < 2 {
4760 return;
4761 }
4762
4763 let mut merger = ListBlockMerger::new(content, lines);
4764 *list_blocks = merger.merge(list_blocks);
4765}
4766
4767struct ListBlockMerger<'a> {
4769 content: &'a str,
4770 lines: &'a [LineInfo],
4771}
4772
4773impl<'a> ListBlockMerger<'a> {
4774 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4775 Self { content, lines }
4776 }
4777
4778 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4779 let mut merged = Vec::with_capacity(list_blocks.len());
4780 let mut current = list_blocks[0].clone();
4781
4782 for next in list_blocks.iter().skip(1) {
4783 if self.should_merge_blocks(¤t, next) {
4784 current = self.merge_two_blocks(current, next);
4785 } else {
4786 merged.push(current);
4787 current = next.clone();
4788 }
4789 }
4790
4791 merged.push(current);
4792 merged
4793 }
4794
4795 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4797 if !self.blocks_are_compatible(current, next) {
4799 return false;
4800 }
4801
4802 let spacing = self.analyze_spacing_between(current, next);
4804 match spacing {
4805 BlockSpacing::Consecutive => true,
4806 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4807 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4808 self.can_merge_with_content_between(current, next)
4809 }
4810 }
4811 }
4812
4813 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4815 current.is_ordered == next.is_ordered
4816 && current.blockquote_prefix == next.blockquote_prefix
4817 && current.nesting_level == next.nesting_level
4818 }
4819
4820 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4822 let gap = next.start_line - current.end_line;
4823
4824 match gap {
4825 1 => BlockSpacing::Consecutive,
4826 2 => BlockSpacing::SingleBlank,
4827 _ if gap > 2 => {
4828 if self.has_only_blank_lines_between(current, next) {
4829 BlockSpacing::MultipleBlanks
4830 } else {
4831 BlockSpacing::ContentBetween
4832 }
4833 }
4834 _ => BlockSpacing::Consecutive, }
4836 }
4837
4838 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4840 if has_meaningful_content_between(self.content, current, next, self.lines) {
4843 return false; }
4845
4846 !current.is_ordered && current.marker == next.marker
4848 }
4849
4850 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4852 if has_meaningful_content_between(self.content, current, next, self.lines) {
4854 return false; }
4856
4857 current.is_ordered && next.is_ordered
4859 }
4860
4861 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4863 for line_num in (current.end_line + 1)..next.start_line {
4864 if let Some(line_info) = self.lines.get(line_num - 1)
4865 && !line_info.content(self.content).trim().is_empty()
4866 {
4867 return false;
4868 }
4869 }
4870 true
4871 }
4872
4873 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4875 current.end_line = next.end_line;
4876 current.item_lines.extend_from_slice(&next.item_lines);
4877
4878 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4880
4881 if !current.is_ordered && self.markers_differ(¤t, next) {
4883 current.marker = None; }
4885
4886 current
4887 }
4888
4889 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4891 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4892 }
4893}
4894
4895#[derive(Debug, PartialEq)]
4897enum BlockSpacing {
4898 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4903
4904fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4906 for line_num in (current.end_line + 1)..next.start_line {
4908 if let Some(line_info) = lines.get(line_num - 1) {
4909 let trimmed = line_info.content(content).trim();
4911
4912 if trimmed.is_empty() {
4914 continue;
4915 }
4916
4917 if line_info.heading.is_some() {
4921 return true; }
4923
4924 if is_horizontal_rule(trimmed) {
4926 return true; }
4928
4929 if crate::utils::skip_context::is_table_line(trimmed) {
4931 return true; }
4933
4934 if trimmed.starts_with('>') {
4936 return true; }
4938
4939 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4941 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4942
4943 let min_continuation_indent = if current.is_ordered {
4945 current.nesting_level + current.max_marker_width + 1 } else {
4947 current.nesting_level + 2
4948 };
4949
4950 if line_indent < min_continuation_indent {
4951 return true; }
4954 }
4955
4956 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4958
4959 let min_indent = if current.is_ordered {
4961 current.nesting_level + current.max_marker_width
4962 } else {
4963 current.nesting_level + 2
4964 };
4965
4966 if line_indent < min_indent {
4968 return true; }
4970
4971 }
4974 }
4975
4976 false
4978}
4979
4980pub fn is_horizontal_rule_line(line: &str) -> bool {
4987 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4989 if leading_spaces > 3 || line.starts_with('\t') {
4990 return false;
4991 }
4992
4993 is_horizontal_rule_content(line.trim())
4994}
4995
4996pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4999 if trimmed.len() < 3 {
5000 return false;
5001 }
5002
5003 let chars: Vec<char> = trimmed.chars().collect();
5005 if let Some(&first_char) = chars.first()
5006 && (first_char == '-' || first_char == '*' || first_char == '_')
5007 {
5008 let mut count = 0;
5009 for &ch in &chars {
5010 if ch == first_char {
5011 count += 1;
5012 } else if ch != ' ' && ch != '\t' {
5013 return false; }
5015 }
5016 return count >= 3;
5017 }
5018 false
5019}
5020
5021pub fn is_horizontal_rule(trimmed: &str) -> bool {
5023 is_horizontal_rule_content(trimmed)
5024}
5025
5026#[cfg(test)]
5028mod tests {
5029 use super::*;
5030
5031 #[test]
5032 fn test_empty_content() {
5033 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5034 assert_eq!(ctx.content, "");
5035 assert_eq!(ctx.line_offsets, vec![0]);
5036 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5037 assert_eq!(ctx.lines.len(), 0);
5038 }
5039
5040 #[test]
5041 fn test_single_line() {
5042 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
5043 assert_eq!(ctx.content, "# Hello");
5044 assert_eq!(ctx.line_offsets, vec![0]);
5045 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5046 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
5047 }
5048
5049 #[test]
5050 fn test_multi_line() {
5051 let content = "# Title\n\nSecond line\nThird line";
5052 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5053 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
5054 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
5061
5062 #[test]
5063 fn test_line_info() {
5064 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
5065 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5066
5067 assert_eq!(ctx.lines.len(), 7);
5069
5070 let line1 = &ctx.lines[0];
5072 assert_eq!(line1.content(ctx.content), "# Title");
5073 assert_eq!(line1.byte_offset, 0);
5074 assert_eq!(line1.indent, 0);
5075 assert!(!line1.is_blank);
5076 assert!(!line1.in_code_block);
5077 assert!(line1.list_item.is_none());
5078
5079 let line2 = &ctx.lines[1];
5081 assert_eq!(line2.content(ctx.content), " indented");
5082 assert_eq!(line2.byte_offset, 8);
5083 assert_eq!(line2.indent, 4);
5084 assert!(!line2.is_blank);
5085
5086 let line3 = &ctx.lines[2];
5088 assert_eq!(line3.content(ctx.content), "");
5089 assert!(line3.is_blank);
5090
5091 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5093 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5094 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5095 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5096 }
5097
5098 #[test]
5099 fn test_list_item_detection() {
5100 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
5101 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5102
5103 let line1 = &ctx.lines[0];
5105 assert!(line1.list_item.is_some());
5106 let list1 = line1.list_item.as_ref().unwrap();
5107 assert_eq!(list1.marker, "-");
5108 assert!(!list1.is_ordered);
5109 assert_eq!(list1.marker_column, 0);
5110 assert_eq!(list1.content_column, 2);
5111
5112 let line2 = &ctx.lines[1];
5114 assert!(line2.list_item.is_some());
5115 let list2 = line2.list_item.as_ref().unwrap();
5116 assert_eq!(list2.marker, "*");
5117 assert_eq!(list2.marker_column, 2);
5118
5119 let line3 = &ctx.lines[2];
5121 assert!(line3.list_item.is_some());
5122 let list3 = line3.list_item.as_ref().unwrap();
5123 assert_eq!(list3.marker, "1.");
5124 assert!(list3.is_ordered);
5125 assert_eq!(list3.number, Some(1));
5126
5127 let line6 = &ctx.lines[5];
5129 assert!(line6.list_item.is_none());
5130 }
5131
5132 #[test]
5133 fn test_offset_to_line_col_edge_cases() {
5134 let content = "a\nb\nc";
5135 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5136 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
5144
5145 #[test]
5146 fn test_mdx_esm_blocks() {
5147 let content = r##"import {Chart} from './snowfall.js'
5148export const year = 2023
5149
5150# Last year's snowfall
5151
5152In {year}, the snowfall was above average.
5153It was followed by a warm spring which caused
5154flood conditions in many of the nearby rivers.
5155
5156<Chart color="#fcb32c" year={year} />
5157"##;
5158
5159 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5160
5161 assert_eq!(ctx.lines.len(), 10);
5163 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5164 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5165 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5166 assert!(
5167 !ctx.lines[3].in_esm_block,
5168 "Line 4 (heading) should NOT be in_esm_block"
5169 );
5170 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5171 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5172 }
5173
5174 #[test]
5175 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5176 let content = r#"import {Chart} from './snowfall.js'
5177export const year = 2023
5178
5179# Last year's snowfall
5180"#;
5181
5182 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5183
5184 assert!(
5186 !ctx.lines[0].in_esm_block,
5187 "Line 1 should NOT be in_esm_block in Standard flavor"
5188 );
5189 assert!(
5190 !ctx.lines[1].in_esm_block,
5191 "Line 2 should NOT be in_esm_block in Standard flavor"
5192 );
5193 }
5194
5195 #[test]
5196 fn test_blockquote_with_indented_content() {
5197 let content = r#"# Heading
5201
5202> -S socket-path
5203> More text
5204"#;
5205 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5206
5207 assert!(
5209 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5210 "Line 3 should be a blockquote"
5211 );
5212 assert!(
5214 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5215 "Line 4 should be a blockquote"
5216 );
5217
5218 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5221 assert_eq!(bq3.content, "-S socket-path");
5222 assert_eq!(bq3.nesting_level, 1);
5223 assert!(bq3.has_multiple_spaces_after_marker);
5225
5226 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5227 assert_eq!(bq4.content, "More text");
5228 assert_eq!(bq4.nesting_level, 1);
5229 }
5230
5231 #[test]
5232 fn test_footnote_definitions_not_parsed_as_reference_defs() {
5233 let content = r#"# Title
5235
5236A footnote[^1].
5237
5238[^1]: This is the footnote content.
5239
5240[^note]: Another footnote with [link](https://example.com).
5241
5242[regular]: ./path.md "A real reference definition"
5243"#;
5244 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5245
5246 assert_eq!(
5248 ctx.reference_defs.len(),
5249 1,
5250 "Footnotes should not be parsed as reference definitions"
5251 );
5252
5253 assert_eq!(ctx.reference_defs[0].id, "regular");
5255 assert_eq!(ctx.reference_defs[0].url, "./path.md");
5256 assert_eq!(
5257 ctx.reference_defs[0].title,
5258 Some("A real reference definition".to_string())
5259 );
5260 }
5261
5262 #[test]
5263 fn test_footnote_with_inline_link_not_misidentified() {
5264 let content = r#"# Title
5267
5268A footnote[^1].
5269
5270[^1]: [link](https://www.google.com).
5271"#;
5272 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5273
5274 assert!(
5276 ctx.reference_defs.is_empty(),
5277 "Footnote with inline link should not create a reference definition"
5278 );
5279 }
5280
5281 #[test]
5282 fn test_various_footnote_formats_excluded() {
5283 let content = r#"[^1]: Numeric footnote
5285[^note]: Named footnote
5286[^a]: Single char footnote
5287[^long-footnote-name]: Long named footnote
5288[^123abc]: Mixed alphanumeric
5289
5290[ref1]: ./file1.md
5291[ref2]: ./file2.md
5292"#;
5293 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5294
5295 assert_eq!(
5297 ctx.reference_defs.len(),
5298 2,
5299 "Only regular reference definitions should be parsed"
5300 );
5301
5302 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5303 assert!(ids.contains(&"ref1"));
5304 assert!(ids.contains(&"ref2"));
5305 assert!(!ids.iter().any(|id| id.starts_with('^')));
5306 }
5307
5308 #[test]
5313 fn test_has_char_tracked_characters() {
5314 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5316 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5317
5318 assert!(ctx.has_char('#'), "Should detect hash");
5320 assert!(ctx.has_char('*'), "Should detect asterisk");
5321 assert!(ctx.has_char('_'), "Should detect underscore");
5322 assert!(ctx.has_char('-'), "Should detect hyphen");
5323 assert!(ctx.has_char('+'), "Should detect plus");
5324 assert!(ctx.has_char('>'), "Should detect gt");
5325 assert!(ctx.has_char('|'), "Should detect pipe");
5326 assert!(ctx.has_char('['), "Should detect bracket");
5327 assert!(ctx.has_char('`'), "Should detect backtick");
5328 assert!(ctx.has_char('<'), "Should detect lt");
5329 assert!(ctx.has_char('!'), "Should detect exclamation");
5330 assert!(ctx.has_char('\n'), "Should detect newline");
5331 }
5332
5333 #[test]
5334 fn test_has_char_absent_characters() {
5335 let content = "Simple text without special chars";
5336 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5337
5338 assert!(!ctx.has_char('#'), "Should not detect hash");
5340 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5341 assert!(!ctx.has_char('_'), "Should not detect underscore");
5342 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5343 assert!(!ctx.has_char('+'), "Should not detect plus");
5344 assert!(!ctx.has_char('>'), "Should not detect gt");
5345 assert!(!ctx.has_char('|'), "Should not detect pipe");
5346 assert!(!ctx.has_char('['), "Should not detect bracket");
5347 assert!(!ctx.has_char('`'), "Should not detect backtick");
5348 assert!(!ctx.has_char('<'), "Should not detect lt");
5349 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5350 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5352 }
5353
5354 #[test]
5355 fn test_has_char_fallback_for_untracked() {
5356 let content = "Text with @mention and $dollar and %percent";
5357 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5358
5359 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5361 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5362 assert!(ctx.has_char('%'), "Should detect % via fallback");
5363 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5364 }
5365
5366 #[test]
5367 fn test_char_count_tracked_characters() {
5368 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5369 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5370
5371 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5373 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5374 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5375 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5376 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5377 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5378 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5379 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5380 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5381 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5382 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5383 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5384 }
5385
5386 #[test]
5387 fn test_char_count_zero_for_absent() {
5388 let content = "Plain text";
5389 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5390
5391 assert_eq!(ctx.char_count('#'), 0);
5392 assert_eq!(ctx.char_count('*'), 0);
5393 assert_eq!(ctx.char_count('_'), 0);
5394 assert_eq!(ctx.char_count('\n'), 0);
5395 }
5396
5397 #[test]
5398 fn test_char_count_fallback_for_untracked() {
5399 let content = "@@@ $$ %%%";
5400 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5401
5402 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5403 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5404 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5405 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5406 }
5407
5408 #[test]
5409 fn test_char_count_empty_content() {
5410 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5411
5412 assert_eq!(ctx.char_count('#'), 0);
5413 assert_eq!(ctx.char_count('*'), 0);
5414 assert_eq!(ctx.char_count('@'), 0);
5415 assert!(!ctx.has_char('#'));
5416 assert!(!ctx.has_char('@'));
5417 }
5418
5419 #[test]
5424 fn test_is_in_html_tag_simple() {
5425 let content = "<div>content</div>";
5426 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5427
5428 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5430 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5431 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5432
5433 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5435 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5436
5437 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5439 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5440 }
5441
5442 #[test]
5443 fn test_is_in_html_tag_self_closing() {
5444 let content = "Text <br/> more text";
5445 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5446
5447 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5449 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5450
5451 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5453 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5454 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5455
5456 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5458 }
5459
5460 #[test]
5461 fn test_is_in_html_tag_with_attributes() {
5462 let content = r#"<a href="url" class="link">text</a>"#;
5463 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5464
5465 assert!(ctx.is_in_html_tag(0), "Start of tag");
5467 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5468 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5469 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5470
5471 assert!(!ctx.is_in_html_tag(27), "Start of content");
5473 assert!(!ctx.is_in_html_tag(30), "End of content");
5474
5475 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5477 }
5478
5479 #[test]
5480 fn test_is_in_html_tag_multiline() {
5481 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5482 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5483
5484 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5486 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5487 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5488
5489 let closing_bracket_pos = content.find(">\n").unwrap();
5491 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5492 }
5493
5494 #[test]
5495 fn test_is_in_html_tag_no_tags() {
5496 let content = "Plain text without any HTML";
5497 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5498
5499 for i in 0..content.len() {
5501 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5502 }
5503 }
5504
5505 #[test]
5510 fn test_is_in_jinja_range_expression() {
5511 let content = "Hello {{ name }}!";
5512 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5513
5514 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5516 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5517
5518 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5520 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5521 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5522 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5523 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5524
5525 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5527 }
5528
5529 #[test]
5530 fn test_is_in_jinja_range_statement() {
5531 let content = "{% if condition %}content{% endif %}";
5532 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5533
5534 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5536 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5537 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5538
5539 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5541
5542 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5544 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5545 }
5546
5547 #[test]
5548 fn test_is_in_jinja_range_multiple() {
5549 let content = "{{ a }} and {{ b }}";
5550 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5551
5552 assert!(ctx.is_in_jinja_range(0));
5554 assert!(ctx.is_in_jinja_range(3));
5555 assert!(ctx.is_in_jinja_range(6));
5556
5557 assert!(!ctx.is_in_jinja_range(8));
5559 assert!(!ctx.is_in_jinja_range(11));
5560
5561 assert!(ctx.is_in_jinja_range(12));
5563 assert!(ctx.is_in_jinja_range(15));
5564 assert!(ctx.is_in_jinja_range(18));
5565 }
5566
5567 #[test]
5568 fn test_is_in_jinja_range_no_jinja() {
5569 let content = "Plain text with single braces but not Jinja";
5570 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5571
5572 for i in 0..content.len() {
5574 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5575 }
5576 }
5577
5578 #[test]
5583 fn test_is_in_link_title_with_title() {
5584 let content = r#"[ref]: https://example.com "Title text"
5585
5586Some content."#;
5587 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5588
5589 assert_eq!(ctx.reference_defs.len(), 1);
5591 let def = &ctx.reference_defs[0];
5592 assert!(def.title_byte_start.is_some());
5593 assert!(def.title_byte_end.is_some());
5594
5595 let title_start = def.title_byte_start.unwrap();
5596 let title_end = def.title_byte_end.unwrap();
5597
5598 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5600
5601 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5603 assert!(
5604 ctx.is_in_link_title(title_start + 5),
5605 "Middle of title should be in title"
5606 );
5607 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5608
5609 assert!(
5611 !ctx.is_in_link_title(title_end),
5612 "After title end should not be in title"
5613 );
5614 }
5615
5616 #[test]
5617 fn test_is_in_link_title_without_title() {
5618 let content = "[ref]: https://example.com\n\nSome content.";
5619 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5620
5621 assert_eq!(ctx.reference_defs.len(), 1);
5623 let def = &ctx.reference_defs[0];
5624 assert!(def.title_byte_start.is_none());
5625 assert!(def.title_byte_end.is_none());
5626
5627 for i in 0..content.len() {
5629 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5630 }
5631 }
5632
5633 #[test]
5634 fn test_is_in_link_title_multiple_refs() {
5635 let content = r#"[ref1]: /url1 "Title One"
5636[ref2]: /url2
5637[ref3]: /url3 "Title Three"
5638"#;
5639 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5640
5641 assert_eq!(ctx.reference_defs.len(), 3);
5643
5644 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5646 assert!(ref1.title_byte_start.is_some());
5647
5648 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5650 assert!(ref2.title_byte_start.is_none());
5651
5652 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5654 assert!(ref3.title_byte_start.is_some());
5655
5656 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5658 assert!(ctx.is_in_link_title(start + 1));
5659 assert!(!ctx.is_in_link_title(end + 5));
5660 }
5661
5662 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5664 assert!(ctx.is_in_link_title(start + 1));
5665 }
5666 }
5667
5668 #[test]
5669 fn test_is_in_link_title_single_quotes() {
5670 let content = "[ref]: /url 'Single quoted title'\n";
5671 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5672
5673 assert_eq!(ctx.reference_defs.len(), 1);
5674 let def = &ctx.reference_defs[0];
5675
5676 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5677 assert!(ctx.is_in_link_title(start));
5678 assert!(ctx.is_in_link_title(start + 5));
5679 assert!(!ctx.is_in_link_title(end));
5680 }
5681 }
5682
5683 #[test]
5684 fn test_is_in_link_title_parentheses() {
5685 let content = "[ref]: /url (Parenthesized title)\n";
5688 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5689
5690 if ctx.reference_defs.is_empty() {
5693 for i in 0..content.len() {
5695 assert!(!ctx.is_in_link_title(i));
5696 }
5697 } else {
5698 let def = &ctx.reference_defs[0];
5699 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5700 assert!(ctx.is_in_link_title(start));
5701 assert!(ctx.is_in_link_title(start + 5));
5702 assert!(!ctx.is_in_link_title(end));
5703 } else {
5704 for i in 0..content.len() {
5706 assert!(!ctx.is_in_link_title(i));
5707 }
5708 }
5709 }
5710 }
5711
5712 #[test]
5713 fn test_is_in_link_title_no_refs() {
5714 let content = "Just plain text without any reference definitions.";
5715 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5716
5717 assert!(ctx.reference_defs.is_empty());
5718
5719 for i in 0..content.len() {
5720 assert!(!ctx.is_in_link_title(i));
5721 }
5722 }
5723
5724 #[test]
5729 fn test_math_spans_inline() {
5730 let content = "Text with inline math $[f](x)$ in it.";
5731 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5732
5733 let math_spans = ctx.math_spans();
5734 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5735
5736 let span = &math_spans[0];
5737 assert!(!span.is_display, "Should be inline math, not display");
5738 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5739 }
5740
5741 #[test]
5742 fn test_math_spans_display_single_line() {
5743 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5744 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5745
5746 let math_spans = ctx.math_spans();
5747 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5748
5749 let span = &math_spans[0];
5750 assert!(span.is_display, "Should be display math");
5751 assert!(
5752 span.content.contains("[x](\\zeta)"),
5753 "Content should contain the link-like pattern"
5754 );
5755 }
5756
5757 #[test]
5758 fn test_math_spans_display_multiline() {
5759 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5760 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5761
5762 let math_spans = ctx.math_spans();
5763 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5764
5765 let span = &math_spans[0];
5766 assert!(span.is_display, "Should be display math");
5767 }
5768
5769 #[test]
5770 fn test_is_in_math_span() {
5771 let content = "Text $[f](x)$ more text";
5772 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5773
5774 let math_start = content.find('$').unwrap();
5776 let math_end = content.rfind('$').unwrap() + 1;
5777
5778 assert!(
5779 ctx.is_in_math_span(math_start + 1),
5780 "Position inside math span should return true"
5781 );
5782 assert!(
5783 ctx.is_in_math_span(math_start + 3),
5784 "Position inside math span should return true"
5785 );
5786
5787 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5789 assert!(
5790 !ctx.is_in_math_span(math_end + 1),
5791 "Position after math span should return false"
5792 );
5793 }
5794
5795 #[test]
5796 fn test_math_spans_mixed_with_code() {
5797 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5798 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5799
5800 let math_spans = ctx.math_spans();
5801 let code_spans = ctx.code_spans();
5802
5803 assert_eq!(math_spans.len(), 1, "Should have one math span");
5804 assert_eq!(code_spans.len(), 1, "Should have one code span");
5805
5806 assert_eq!(math_spans[0].content, "[f](x)");
5808 assert_eq!(code_spans[0].content, "[g](y)");
5810 }
5811
5812 #[test]
5813 fn test_math_spans_no_math() {
5814 let content = "Regular text without any math at all.";
5815 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5816
5817 let math_spans = ctx.math_spans();
5818 assert!(math_spans.is_empty(), "Should have no math spans");
5819 }
5820
5821 #[test]
5822 fn test_math_spans_multiple() {
5823 let content = "First $a$ and second $b$ and display $$c$$";
5824 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5825
5826 let math_spans = ctx.math_spans();
5827 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5828
5829 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5831 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5832
5833 assert_eq!(inline_count, 2, "Should have two inline math spans");
5834 assert_eq!(display_count, 1, "Should have one display math span");
5835 }
5836
5837 #[test]
5838 fn test_is_in_math_span_boundary_positions() {
5839 let content = "$[f](x)$";
5842 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5843
5844 let math_spans = ctx.math_spans();
5845 assert_eq!(math_spans.len(), 1, "Should have one math span");
5846
5847 let span = &math_spans[0];
5848
5849 assert!(
5851 ctx.is_in_math_span(span.byte_offset),
5852 "Start position should be in span"
5853 );
5854
5855 assert!(
5857 ctx.is_in_math_span(span.byte_offset + 1),
5858 "Position after start should be in span"
5859 );
5860
5861 assert!(
5863 ctx.is_in_math_span(span.byte_end - 1),
5864 "Position at end-1 should be in span"
5865 );
5866
5867 assert!(
5869 !ctx.is_in_math_span(span.byte_end),
5870 "Position at byte_end should NOT be in span (exclusive)"
5871 );
5872 }
5873
5874 #[test]
5875 fn test_math_spans_at_document_start() {
5876 let content = "$x$ text";
5877 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5878
5879 let math_spans = ctx.math_spans();
5880 assert_eq!(math_spans.len(), 1);
5881 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5882 }
5883
5884 #[test]
5885 fn test_math_spans_at_document_end() {
5886 let content = "text $x$";
5887 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5888
5889 let math_spans = ctx.math_spans();
5890 assert_eq!(math_spans.len(), 1);
5891 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5892 }
5893
5894 #[test]
5895 fn test_math_spans_consecutive() {
5896 let content = "$a$$b$";
5897 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5898
5899 let math_spans = ctx.math_spans();
5900 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5902
5903 for i in 0..content.len() {
5905 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5906 }
5907 }
5908
5909 #[test]
5910 fn test_math_spans_currency_not_math() {
5911 let content = "Price is $100";
5913 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5914
5915 let math_spans = ctx.math_spans();
5916 assert!(
5919 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5920 "Unbalanced $ should not create math span containing 100"
5921 );
5922 }
5923
5924 #[test]
5929 fn test_reference_lookup_o1_basic() {
5930 let content = r#"[ref1]: /url1
5931[REF2]: /url2 "Title"
5932[Ref3]: /url3
5933
5934Use [link][ref1] and [link][REF2]."#;
5935 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5936
5937 assert_eq!(ctx.reference_defs.len(), 3);
5939
5940 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5942 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5945 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5946 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5947 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5948 }
5949
5950 #[test]
5951 fn test_reference_lookup_o1_get_reference_def() {
5952 let content = r#"[myref]: https://example.com "My Title"
5953"#;
5954 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5955
5956 let def = ctx.get_reference_def("myref").expect("Should find myref");
5958 assert_eq!(def.url, "https://example.com");
5959 assert_eq!(def.title.as_deref(), Some("My Title"));
5960
5961 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5963 assert_eq!(def2.url, "https://example.com");
5964
5965 assert!(ctx.get_reference_def("nonexistent").is_none());
5967 }
5968
5969 #[test]
5970 fn test_reference_lookup_o1_has_reference_def() {
5971 let content = r#"[foo]: /foo
5972[BAR]: /bar
5973"#;
5974 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5975
5976 assert!(ctx.has_reference_def("foo"));
5978 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5980 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5983
5984 #[test]
5985 fn test_reference_lookup_o1_empty_content() {
5986 let content = "No references here.";
5987 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5988
5989 assert!(ctx.reference_defs.is_empty());
5990 assert_eq!(ctx.get_reference_url("anything"), None);
5991 assert!(ctx.get_reference_def("anything").is_none());
5992 assert!(!ctx.has_reference_def("anything"));
5993 }
5994
5995 #[test]
5996 fn test_reference_lookup_o1_special_characters_in_id() {
5997 let content = r#"[ref-with-dash]: /url1
5998[ref_with_underscore]: /url2
5999[ref.with.dots]: /url3
6000"#;
6001 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6002
6003 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
6004 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
6005 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
6006 }
6007
6008 #[test]
6009 fn test_reference_lookup_o1_unicode_id() {
6010 let content = r#"[日本語]: /japanese
6011[émoji]: /emoji
6012"#;
6013 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6014
6015 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
6016 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
6017 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
6019}