1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17 ($name:expr, $profile:expr, $code:expr) => {{
18 let start = std::time::Instant::now();
19 let result = $code;
20 if $profile {
21 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22 }
23 result
24 }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(
36 r#"(?sx)
37 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38 (?:
39 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
40 |
41 \[([^\]]*)\] # Reference ID in group 6
42 )"#
43 ).unwrap()
44});
45
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?sx)
51 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52 (?:
53 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
54 |
55 \[([^\]]*)\] # Reference ID in group 6
56 )"#
57 ).unwrap()
58});
59
60static REF_DEF_PATTERN: LazyLock<Regex> =
62 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub visual_indent: usize,
86 pub is_blank: bool,
88 pub in_code_block: bool,
90 pub in_front_matter: bool,
92 pub in_html_block: bool,
94 pub in_html_comment: bool,
96 pub list_item: Option<ListItemInfo>,
98 pub heading: Option<HeadingInfo>,
100 pub blockquote: Option<BlockquoteInfo>,
102 pub in_mkdocstrings: bool,
104 pub in_esm_block: bool,
106 pub in_code_span_continuation: bool,
108 pub is_horizontal_rule: bool,
111 pub in_math_block: bool,
113 pub in_quarto_div: bool,
115 pub in_jsx_expression: bool,
117 pub in_mdx_comment: bool,
119 pub in_jsx_component: bool,
121 pub in_jsx_fragment: bool,
123 pub in_admonition: bool,
125 pub in_content_tab: bool,
127 pub in_definition_list: bool,
129}
130
131impl LineInfo {
132 pub fn content<'a>(&self, source: &'a str) -> &'a str {
134 &source[self.byte_offset..self.byte_offset + self.byte_len]
135 }
136}
137
138#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141 pub marker: String,
143 pub is_ordered: bool,
145 pub number: Option<usize>,
147 pub marker_column: usize,
149 pub content_column: usize,
151}
152
153#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156 ATX,
158 Setext1,
160 Setext2,
162}
163
164#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167 pub line: usize,
169 pub start_col: usize,
171 pub end_col: usize,
173 pub byte_offset: usize,
175 pub byte_end: usize,
177 pub text: Cow<'a, str>,
179 pub url: Cow<'a, str>,
181 pub is_reference: bool,
183 pub reference_id: Option<Cow<'a, str>>,
185 pub link_type: LinkType,
187}
188
189#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192 pub reference: String,
194 pub span: std::ops::Range<usize>,
196}
197
198#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201 pub id: String,
203 pub line: usize,
205 pub byte_offset: usize,
207 pub byte_end: usize,
209}
210
211#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214 pub line: usize,
216 pub start_col: usize,
218 pub end_col: usize,
220 pub byte_offset: usize,
222 pub byte_end: usize,
224 pub alt_text: Cow<'a, str>,
226 pub url: Cow<'a, str>,
228 pub is_reference: bool,
230 pub reference_id: Option<Cow<'a, str>>,
232 pub link_type: LinkType,
234}
235
236#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239 pub line: usize,
241 pub id: String,
243 pub url: String,
245 pub title: Option<String>,
247 pub byte_offset: usize,
249 pub byte_end: usize,
251 pub title_byte_start: Option<usize>,
253 pub title_byte_end: Option<usize>,
255}
256
257#[derive(Debug, Clone)]
259pub struct CodeSpan {
260 pub line: usize,
262 pub end_line: usize,
264 pub start_col: usize,
266 pub end_col: usize,
268 pub byte_offset: usize,
270 pub byte_end: usize,
272 pub backtick_count: usize,
274 pub content: String,
276}
277
278#[derive(Debug, Clone)]
280pub struct MathSpan {
281 pub line: usize,
283 pub end_line: usize,
285 pub start_col: usize,
287 pub end_col: usize,
289 pub byte_offset: usize,
291 pub byte_end: usize,
293 pub is_display: bool,
295 pub content: String,
297}
298
299#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302 pub level: u8,
304 pub style: HeadingStyle,
306 pub marker: String,
308 pub marker_column: usize,
310 pub content_column: usize,
312 pub text: String,
314 pub custom_id: Option<String>,
316 pub raw_text: String,
318 pub has_closing_sequence: bool,
320 pub closing_sequence: String,
322 pub is_valid: bool,
325}
326
327#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333 pub line_num: usize,
335 pub heading: &'a HeadingInfo,
337 pub line_info: &'a LineInfo,
339}
340
341pub struct ValidHeadingsIter<'a> {
346 lines: &'a [LineInfo],
347 current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351 fn new(lines: &'a [LineInfo]) -> Self {
352 Self {
353 lines,
354 current_index: 0,
355 }
356 }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360 type Item = ValidHeading<'a>;
361
362 fn next(&mut self) -> Option<Self::Item> {
363 while self.current_index < self.lines.len() {
364 let idx = self.current_index;
365 self.current_index += 1;
366
367 let line_info = &self.lines[idx];
368 if let Some(heading) = &line_info.heading
369 && heading.is_valid
370 {
371 return Some(ValidHeading {
372 line_num: idx + 1, heading,
374 line_info,
375 });
376 }
377 }
378 None
379 }
380}
381
382#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385 pub nesting_level: usize,
387 pub indent: String,
389 pub marker_column: usize,
391 pub prefix: String,
393 pub content: String,
395 pub has_no_space_after_marker: bool,
397 pub has_multiple_spaces_after_marker: bool,
399 pub needs_md028_fix: bool,
401}
402
403#[derive(Debug, Clone)]
405pub struct ListBlock {
406 pub start_line: usize,
408 pub end_line: usize,
410 pub is_ordered: bool,
412 pub marker: Option<String>,
414 pub blockquote_prefix: String,
416 pub item_lines: Vec<usize>,
418 pub nesting_level: usize,
420 pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429type ByteRanges = Vec<(usize, usize)>;
431
432#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435 pub hash_count: usize,
437 pub asterisk_count: usize,
439 pub underscore_count: usize,
441 pub hyphen_count: usize,
443 pub plus_count: usize,
445 pub gt_count: usize,
447 pub pipe_count: usize,
449 pub bracket_count: usize,
451 pub backtick_count: usize,
453 pub lt_count: usize,
455 pub exclamation_count: usize,
457 pub newline_count: usize,
459}
460
461#[derive(Debug, Clone)]
463pub struct HtmlTag {
464 pub line: usize,
466 pub start_col: usize,
468 pub end_col: usize,
470 pub byte_offset: usize,
472 pub byte_end: usize,
474 pub tag_name: String,
476 pub is_closing: bool,
478 pub is_self_closing: bool,
480 pub raw_content: String,
482}
483
484#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487 pub line: usize,
489 pub start_col: usize,
491 pub end_col: usize,
493 pub byte_offset: usize,
495 pub byte_end: usize,
497 pub marker: char,
499 pub marker_count: usize,
501 pub content: String,
503}
504
505#[derive(Debug, Clone)]
507pub struct TableRow {
508 pub line: usize,
510 pub is_separator: bool,
512 pub column_count: usize,
514 pub column_alignments: Vec<String>, }
517
518#[derive(Debug, Clone)]
520pub struct BareUrl {
521 pub line: usize,
523 pub start_col: usize,
525 pub end_col: usize,
527 pub byte_offset: usize,
529 pub byte_end: usize,
531 pub url: String,
533 pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538 pub content: &'a str,
539 pub line_offsets: Vec<usize>,
540 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, }
569
570struct BlockquoteComponents<'a> {
572 indent: &'a str,
573 markers: &'a str,
574 spaces_after: &'a str,
575 content: &'a str,
576}
577
578#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581 let bytes = line.as_bytes();
582 let mut pos = 0;
583
584 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586 pos += 1;
587 }
588 let indent_end = pos;
589
590 if pos >= bytes.len() || bytes[pos] != b'>' {
592 return None;
593 }
594
595 while pos < bytes.len() && bytes[pos] == b'>' {
597 pos += 1;
598 }
599 let markers_end = pos;
600
601 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603 pos += 1;
604 }
605 let spaces_end = pos;
606
607 Some(BlockquoteComponents {
608 indent: &line[0..indent_end],
609 markers: &line[indent_end..markers_end],
610 spaces_after: &line[markers_end..spaces_end],
611 content: &line[spaces_end..],
612 })
613}
614
615impl<'a> LintContext<'a> {
616 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617 #[cfg(not(target_arch = "wasm32"))]
618 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619 #[cfg(target_arch = "wasm32")]
620 let profile = false;
621
622 let line_offsets = profile_section!("Line offsets", profile, {
623 let mut offsets = vec![0];
624 for (i, c) in content.char_indices() {
625 if c == '\n' {
626 offsets.push(i + 1);
627 }
628 }
629 offsets
630 });
631
632 let (code_blocks, code_span_ranges) = profile_section!(
634 "Code blocks",
635 profile,
636 CodeBlockUtils::detect_code_blocks_and_spans(content)
637 );
638
639 let html_comment_ranges = profile_section!(
641 "HTML comment ranges",
642 profile,
643 crate::utils::skip_context::compute_html_comment_ranges(content)
644 );
645
646 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
648 if flavor == MarkdownFlavor::MkDocs {
649 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
650 } else {
651 Vec::new()
652 }
653 });
654
655 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
657 if flavor == MarkdownFlavor::Quarto {
658 crate::utils::quarto_divs::detect_div_block_ranges(content)
659 } else {
660 Vec::new()
661 }
662 });
663
664 let (mut lines, emphasis_spans) = profile_section!(
667 "Basic line info",
668 profile,
669 Self::compute_basic_line_info(
670 content,
671 &line_offsets,
672 &code_blocks,
673 flavor,
674 &html_comment_ranges,
675 &autodoc_ranges,
676 &quarto_div_ranges,
677 )
678 );
679
680 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
682
683 profile_section!(
685 "ESM blocks",
686 profile,
687 Self::detect_esm_blocks(content, &mut lines, flavor)
688 );
689
690 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
692 "JSX/MDX detection",
693 profile,
694 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
695 );
696
697 profile_section!(
699 "MkDocs constructs",
700 profile,
701 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
702 );
703
704 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
706
707 profile_section!(
709 "Headings & blockquotes",
710 profile,
711 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
712 );
713
714 let code_spans = profile_section!(
716 "Code spans",
717 profile,
718 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
719 );
720
721 for span in &code_spans {
724 if span.end_line > span.line {
725 for line_num in (span.line + 1)..=span.end_line {
727 if let Some(line_info) = lines.get_mut(line_num - 1) {
728 line_info.in_code_span_continuation = true;
729 }
730 }
731 }
732 }
733
734 let (links, broken_links, footnote_refs) = profile_section!(
736 "Links",
737 profile,
738 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
739 );
740
741 let images = profile_section!(
742 "Images",
743 profile,
744 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
745 );
746
747 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
748
749 let reference_defs_map: HashMap<String, usize> = reference_defs
751 .iter()
752 .enumerate()
753 .map(|(idx, def)| (def.id.to_lowercase(), idx))
754 .collect();
755
756 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
757
758 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
760
761 let table_blocks = profile_section!(
763 "Table blocks",
764 profile,
765 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
766 content,
767 &code_blocks,
768 &code_spans,
769 &html_comment_ranges,
770 )
771 );
772
773 let line_index = profile_section!(
775 "Line index",
776 profile,
777 crate::utils::range_utils::LineIndex::new(content)
778 );
779
780 let jinja_ranges = profile_section!(
782 "Jinja ranges",
783 profile,
784 crate::utils::jinja_utils::find_jinja_ranges(content)
785 );
786
787 let citation_ranges = profile_section!("Citation ranges", profile, {
789 if flavor == MarkdownFlavor::Quarto {
790 crate::utils::quarto_divs::find_citation_ranges(content)
791 } else {
792 Vec::new()
793 }
794 });
795
796 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
798 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
799 let mut ranges = Vec::new();
800 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
801 ranges.push((mat.start(), mat.end()));
802 }
803 ranges
804 });
805
806 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
807
808 Self {
809 content,
810 line_offsets,
811 code_blocks,
812 lines,
813 links,
814 images,
815 broken_links,
816 footnote_refs,
817 reference_defs,
818 reference_defs_map,
819 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
820 math_spans_cache: OnceLock::new(), list_blocks,
822 char_frequency,
823 html_tags_cache: OnceLock::new(),
824 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
825 table_rows_cache: OnceLock::new(),
826 bare_urls_cache: OnceLock::new(),
827 has_mixed_list_nesting_cache: OnceLock::new(),
828 html_comment_ranges,
829 table_blocks,
830 line_index,
831 jinja_ranges,
832 flavor,
833 source_file,
834 jsx_expression_ranges,
835 mdx_comment_ranges,
836 citation_ranges,
837 shortcode_ranges,
838 inline_config,
839 }
840 }
841
842 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
847 self.inline_config.is_rule_disabled(rule_name, line_number)
848 }
849
850 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
852 Arc::clone(
853 self.code_spans_cache
854 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
855 )
856 }
857
858 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
860 Arc::clone(
861 self.math_spans_cache
862 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
863 )
864 }
865
866 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
868 let math_spans = self.math_spans();
869 math_spans
870 .iter()
871 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
872 }
873
874 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
876 &self.html_comment_ranges
877 }
878
879 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
881 Arc::clone(self.html_tags_cache.get_or_init(|| {
882 Arc::new(Self::parse_html_tags(
883 self.content,
884 &self.lines,
885 &self.code_blocks,
886 self.flavor,
887 ))
888 }))
889 }
890
891 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
893 Arc::clone(
894 self.emphasis_spans_cache
895 .get()
896 .expect("emphasis_spans_cache initialized during construction"),
897 )
898 }
899
900 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
902 Arc::clone(
903 self.table_rows_cache
904 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
905 )
906 }
907
908 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
910 Arc::clone(
911 self.bare_urls_cache
912 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
913 )
914 }
915
916 pub fn has_mixed_list_nesting(&self) -> bool {
920 *self
921 .has_mixed_list_nesting_cache
922 .get_or_init(|| self.compute_mixed_list_nesting())
923 }
924
925 fn compute_mixed_list_nesting(&self) -> bool {
927 let mut stack: Vec<(usize, bool)> = Vec::new();
932 let mut last_was_blank = false;
933
934 for line_info in &self.lines {
935 if line_info.in_code_block
937 || line_info.in_front_matter
938 || line_info.in_mkdocstrings
939 || line_info.in_html_comment
940 || line_info.in_esm_block
941 {
942 continue;
943 }
944
945 if line_info.is_blank {
947 last_was_blank = true;
948 continue;
949 }
950
951 if let Some(list_item) = &line_info.list_item {
952 let current_pos = if list_item.marker_column == 1 {
954 0
955 } else {
956 list_item.marker_column
957 };
958
959 if last_was_blank && current_pos == 0 {
961 stack.clear();
962 }
963 last_was_blank = false;
964
965 while let Some(&(pos, _)) = stack.last() {
967 if pos >= current_pos {
968 stack.pop();
969 } else {
970 break;
971 }
972 }
973
974 if let Some(&(_, parent_is_ordered)) = stack.last()
976 && parent_is_ordered != list_item.is_ordered
977 {
978 return true; }
980
981 stack.push((current_pos, list_item.is_ordered));
982 } else {
983 last_was_blank = false;
985 }
986 }
987
988 false
989 }
990
991 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
993 match self.line_offsets.binary_search(&offset) {
994 Ok(line) => (line + 1, 1),
995 Err(line) => {
996 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
997 (line, offset - line_start + 1)
998 }
999 }
1000 }
1001
1002 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1004 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1006 return true;
1007 }
1008
1009 self.code_spans()
1011 .iter()
1012 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1013 }
1014
1015 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1017 if line_num > 0 {
1018 self.lines.get(line_num - 1)
1019 } else {
1020 None
1021 }
1022 }
1023
1024 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1026 self.line_info(line_num).map(|info| info.byte_offset)
1027 }
1028
1029 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1031 let normalized_id = ref_id.to_lowercase();
1032 self.reference_defs_map
1033 .get(&normalized_id)
1034 .map(|&idx| self.reference_defs[idx].url.as_str())
1035 }
1036
1037 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1039 let normalized_id = ref_id.to_lowercase();
1040 self.reference_defs_map
1041 .get(&normalized_id)
1042 .map(|&idx| &self.reference_defs[idx])
1043 }
1044
1045 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1047 let normalized_id = ref_id.to_lowercase();
1048 self.reference_defs_map.contains_key(&normalized_id)
1049 }
1050
1051 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1053 self.list_blocks
1054 .iter()
1055 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1056 }
1057
1058 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1060 self.list_blocks
1061 .iter()
1062 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1063 }
1064
1065 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1069 if line_num == 0 || line_num > self.lines.len() {
1070 return false;
1071 }
1072 self.lines[line_num - 1].in_code_block
1073 }
1074
1075 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1077 if line_num == 0 || line_num > self.lines.len() {
1078 return false;
1079 }
1080 self.lines[line_num - 1].in_front_matter
1081 }
1082
1083 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1085 if line_num == 0 || line_num > self.lines.len() {
1086 return false;
1087 }
1088 self.lines[line_num - 1].in_html_block
1089 }
1090
1091 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1093 if line_num == 0 || line_num > self.lines.len() {
1094 return false;
1095 }
1096
1097 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1101 let code_spans = self.code_spans();
1102 code_spans.iter().any(|span| {
1103 if line_num < span.line || line_num > span.end_line {
1105 return false;
1106 }
1107
1108 if span.line == span.end_line {
1109 col_0indexed >= span.start_col && col_0indexed < span.end_col
1111 } else if line_num == span.line {
1112 col_0indexed >= span.start_col
1114 } else if line_num == span.end_line {
1115 col_0indexed < span.end_col
1117 } else {
1118 true
1120 }
1121 })
1122 }
1123
1124 #[inline]
1126 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1127 let code_spans = self.code_spans();
1128 code_spans
1129 .iter()
1130 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1131 }
1132
1133 #[inline]
1136 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1137 self.reference_defs
1138 .iter()
1139 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1140 }
1141
1142 #[inline]
1146 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1147 self.html_comment_ranges
1148 .iter()
1149 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1150 }
1151
1152 #[inline]
1155 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1156 self.html_tags()
1157 .iter()
1158 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1159 }
1160
1161 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1163 self.jinja_ranges
1164 .iter()
1165 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1166 }
1167
1168 #[inline]
1170 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1171 self.jsx_expression_ranges
1172 .iter()
1173 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174 }
1175
1176 #[inline]
1178 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1179 self.mdx_comment_ranges
1180 .iter()
1181 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182 }
1183
1184 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1186 &self.jsx_expression_ranges
1187 }
1188
1189 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1191 &self.mdx_comment_ranges
1192 }
1193
1194 #[inline]
1197 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1198 self.citation_ranges
1199 .iter()
1200 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201 }
1202
1203 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1205 &self.citation_ranges
1206 }
1207
1208 #[inline]
1210 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1211 self.shortcode_ranges
1212 .iter()
1213 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214 }
1215
1216 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1218 &self.shortcode_ranges
1219 }
1220
1221 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1223 self.reference_defs.iter().any(|def| {
1224 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1225 byte_pos >= start && byte_pos < end
1226 } else {
1227 false
1228 }
1229 })
1230 }
1231
1232 pub fn has_char(&self, ch: char) -> bool {
1234 match ch {
1235 '#' => self.char_frequency.hash_count > 0,
1236 '*' => self.char_frequency.asterisk_count > 0,
1237 '_' => self.char_frequency.underscore_count > 0,
1238 '-' => self.char_frequency.hyphen_count > 0,
1239 '+' => self.char_frequency.plus_count > 0,
1240 '>' => self.char_frequency.gt_count > 0,
1241 '|' => self.char_frequency.pipe_count > 0,
1242 '[' => self.char_frequency.bracket_count > 0,
1243 '`' => self.char_frequency.backtick_count > 0,
1244 '<' => self.char_frequency.lt_count > 0,
1245 '!' => self.char_frequency.exclamation_count > 0,
1246 '\n' => self.char_frequency.newline_count > 0,
1247 _ => self.content.contains(ch), }
1249 }
1250
1251 pub fn char_count(&self, ch: char) -> usize {
1253 match ch {
1254 '#' => self.char_frequency.hash_count,
1255 '*' => self.char_frequency.asterisk_count,
1256 '_' => self.char_frequency.underscore_count,
1257 '-' => self.char_frequency.hyphen_count,
1258 '+' => self.char_frequency.plus_count,
1259 '>' => self.char_frequency.gt_count,
1260 '|' => self.char_frequency.pipe_count,
1261 '[' => self.char_frequency.bracket_count,
1262 '`' => self.char_frequency.backtick_count,
1263 '<' => self.char_frequency.lt_count,
1264 '!' => self.char_frequency.exclamation_count,
1265 '\n' => self.char_frequency.newline_count,
1266 _ => self.content.matches(ch).count(), }
1268 }
1269
1270 pub fn likely_has_headings(&self) -> bool {
1272 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1274
1275 pub fn likely_has_lists(&self) -> bool {
1277 self.char_frequency.asterisk_count > 0
1278 || self.char_frequency.hyphen_count > 0
1279 || self.char_frequency.plus_count > 0
1280 }
1281
1282 pub fn likely_has_emphasis(&self) -> bool {
1284 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1285 }
1286
1287 pub fn likely_has_tables(&self) -> bool {
1289 self.char_frequency.pipe_count > 2
1290 }
1291
1292 pub fn likely_has_blockquotes(&self) -> bool {
1294 self.char_frequency.gt_count > 0
1295 }
1296
1297 pub fn likely_has_code(&self) -> bool {
1299 self.char_frequency.backtick_count > 0
1300 }
1301
1302 pub fn likely_has_links_or_images(&self) -> bool {
1304 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1305 }
1306
1307 pub fn likely_has_html(&self) -> bool {
1309 self.char_frequency.lt_count > 0
1310 }
1311
1312 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1317 if let Some(line_info) = self.lines.get(line_idx)
1318 && let Some(ref bq) = line_info.blockquote
1319 {
1320 bq.prefix.trim_end().to_string()
1321 } else {
1322 String::new()
1323 }
1324 }
1325
1326 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1328 self.html_tags()
1329 .iter()
1330 .filter(|tag| tag.line == line_num)
1331 .cloned()
1332 .collect()
1333 }
1334
1335 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1337 self.emphasis_spans()
1338 .iter()
1339 .filter(|span| span.line == line_num)
1340 .cloned()
1341 .collect()
1342 }
1343
1344 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1346 self.table_rows()
1347 .iter()
1348 .filter(|row| row.line == line_num)
1349 .cloned()
1350 .collect()
1351 }
1352
1353 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1355 self.bare_urls()
1356 .iter()
1357 .filter(|url| url.line == line_num)
1358 .cloned()
1359 .collect()
1360 }
1361
1362 #[inline]
1368 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1369 let idx = match lines.binary_search_by(|line| {
1371 if byte_offset < line.byte_offset {
1372 std::cmp::Ordering::Greater
1373 } else if byte_offset > line.byte_offset + line.byte_len {
1374 std::cmp::Ordering::Less
1375 } else {
1376 std::cmp::Ordering::Equal
1377 }
1378 }) {
1379 Ok(idx) => idx,
1380 Err(idx) => idx.saturating_sub(1),
1381 };
1382
1383 let line = &lines[idx];
1384 let line_num = idx + 1;
1385 let col = byte_offset.saturating_sub(line.byte_offset);
1386
1387 (idx, line_num, col)
1388 }
1389
1390 #[inline]
1392 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1393 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1395
1396 if idx > 0 {
1398 let span = &code_spans[idx - 1];
1399 if offset >= span.byte_offset && offset < span.byte_end {
1400 return true;
1401 }
1402 }
1403
1404 false
1405 }
1406
1407 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1411 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1412
1413 let mut link_ranges = Vec::new();
1414 let mut options = Options::empty();
1415 options.insert(Options::ENABLE_WIKILINKS);
1416 options.insert(Options::ENABLE_FOOTNOTES);
1417
1418 let parser = Parser::new_ext(content, options).into_offset_iter();
1419 let mut link_stack: Vec<usize> = Vec::new();
1420
1421 for (event, range) in parser {
1422 match event {
1423 Event::Start(Tag::Link { .. }) => {
1424 link_stack.push(range.start);
1425 }
1426 Event::End(TagEnd::Link) => {
1427 if let Some(start_pos) = link_stack.pop() {
1428 link_ranges.push((start_pos, range.end));
1429 }
1430 }
1431 _ => {}
1432 }
1433 }
1434
1435 link_ranges
1436 }
1437
1438 fn parse_links(
1440 content: &'a str,
1441 lines: &[LineInfo],
1442 code_blocks: &[(usize, usize)],
1443 code_spans: &[CodeSpan],
1444 flavor: MarkdownFlavor,
1445 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1446 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1447 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1448 use std::collections::HashSet;
1449
1450 let mut links = Vec::with_capacity(content.len() / 500);
1451 let mut broken_links = Vec::new();
1452 let mut footnote_refs = Vec::new();
1453
1454 let mut found_positions = HashSet::new();
1456
1457 let mut options = Options::empty();
1467 options.insert(Options::ENABLE_WIKILINKS);
1468 options.insert(Options::ENABLE_FOOTNOTES);
1469
1470 let parser = Parser::new_with_broken_link_callback(
1471 content,
1472 options,
1473 Some(|link: BrokenLink<'_>| {
1474 broken_links.push(BrokenLinkInfo {
1475 reference: link.reference.to_string(),
1476 span: link.span.clone(),
1477 });
1478 None
1479 }),
1480 )
1481 .into_offset_iter();
1482
1483 let mut link_stack: Vec<(
1484 usize,
1485 usize,
1486 pulldown_cmark::CowStr<'a>,
1487 LinkType,
1488 pulldown_cmark::CowStr<'a>,
1489 )> = Vec::new();
1490 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1493 match event {
1494 Event::Start(Tag::Link {
1495 link_type,
1496 dest_url,
1497 id,
1498 ..
1499 }) => {
1500 link_stack.push((range.start, range.end, dest_url, link_type, id));
1502 text_chunks.clear();
1503 }
1504 Event::Text(text) if !link_stack.is_empty() => {
1505 text_chunks.push((text.to_string(), range.start, range.end));
1507 }
1508 Event::Code(code) if !link_stack.is_empty() => {
1509 let code_text = format!("`{code}`");
1511 text_chunks.push((code_text, range.start, range.end));
1512 }
1513 Event::End(TagEnd::Link) => {
1514 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1515 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1517 text_chunks.clear();
1518 continue;
1519 }
1520
1521 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1523
1524 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1526 text_chunks.clear();
1527 continue;
1528 }
1529
1530 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1531
1532 let is_reference = matches!(
1533 link_type,
1534 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1535 );
1536
1537 let link_text = if start_pos < content.len() {
1540 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1541
1542 let mut close_pos = None;
1546 let mut depth = 0;
1547 let mut in_code_span = false;
1548
1549 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1550 let mut backslash_count = 0;
1552 let mut j = i;
1553 while j > 0 && link_bytes[j - 1] == b'\\' {
1554 backslash_count += 1;
1555 j -= 1;
1556 }
1557 let is_escaped = backslash_count % 2 != 0;
1558
1559 if byte == b'`' && !is_escaped {
1561 in_code_span = !in_code_span;
1562 }
1563
1564 if !is_escaped && !in_code_span {
1566 if byte == b'[' {
1567 depth += 1;
1568 } else if byte == b']' {
1569 if depth == 0 {
1570 close_pos = Some(i);
1572 break;
1573 } else {
1574 depth -= 1;
1575 }
1576 }
1577 }
1578 }
1579
1580 if let Some(pos) = close_pos {
1581 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1582 } else {
1583 Cow::Borrowed("")
1584 }
1585 } else {
1586 Cow::Borrowed("")
1587 };
1588
1589 let reference_id = if is_reference && !ref_id.is_empty() {
1591 Some(Cow::Owned(ref_id.to_lowercase()))
1592 } else if is_reference {
1593 Some(Cow::Owned(link_text.to_lowercase()))
1595 } else {
1596 None
1597 };
1598
1599 found_positions.insert(start_pos);
1601
1602 links.push(ParsedLink {
1603 line: line_num,
1604 start_col: col_start,
1605 end_col: col_end,
1606 byte_offset: start_pos,
1607 byte_end: range.end,
1608 text: link_text,
1609 url: Cow::Owned(url.to_string()),
1610 is_reference,
1611 reference_id,
1612 link_type,
1613 });
1614
1615 text_chunks.clear();
1616 }
1617 }
1618 Event::FootnoteReference(footnote_id) => {
1619 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1622 continue;
1623 }
1624
1625 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1626 footnote_refs.push(FootnoteRef {
1627 id: footnote_id.to_string(),
1628 line: line_num,
1629 byte_offset: range.start,
1630 byte_end: range.end,
1631 });
1632 }
1633 _ => {}
1634 }
1635 }
1636
1637 for cap in LINK_PATTERN.captures_iter(content) {
1641 let full_match = cap.get(0).unwrap();
1642 let match_start = full_match.start();
1643 let match_end = full_match.end();
1644
1645 if found_positions.contains(&match_start) {
1647 continue;
1648 }
1649
1650 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652 continue;
1653 }
1654
1655 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1657 continue;
1658 }
1659
1660 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1662 continue;
1663 }
1664
1665 if Self::is_offset_in_code_span(code_spans, match_start) {
1667 continue;
1668 }
1669
1670 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1672 continue;
1673 }
1674
1675 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1677
1678 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1680 continue;
1681 }
1682
1683 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1684
1685 let text = cap.get(1).map_or("", |m| m.as_str());
1686
1687 if let Some(ref_id) = cap.get(6) {
1689 let ref_id_str = ref_id.as_str();
1690 let normalized_ref = if ref_id_str.is_empty() {
1691 Cow::Owned(text.to_lowercase()) } else {
1693 Cow::Owned(ref_id_str.to_lowercase())
1694 };
1695
1696 links.push(ParsedLink {
1698 line: line_num,
1699 start_col: col_start,
1700 end_col: col_end,
1701 byte_offset: match_start,
1702 byte_end: match_end,
1703 text: Cow::Borrowed(text),
1704 url: Cow::Borrowed(""), is_reference: true,
1706 reference_id: Some(normalized_ref),
1707 link_type: LinkType::Reference, });
1709 }
1710 }
1711
1712 (links, broken_links, footnote_refs)
1713 }
1714
1715 fn parse_images(
1717 content: &'a str,
1718 lines: &[LineInfo],
1719 code_blocks: &[(usize, usize)],
1720 code_spans: &[CodeSpan],
1721 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1722 ) -> Vec<ParsedImage<'a>> {
1723 use crate::utils::skip_context::is_in_html_comment_ranges;
1724 use std::collections::HashSet;
1725
1726 let mut images = Vec::with_capacity(content.len() / 1000);
1728 let mut found_positions = HashSet::new();
1729
1730 let parser = Parser::new(content).into_offset_iter();
1732 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1733 Vec::new();
1734 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1737 match event {
1738 Event::Start(Tag::Image {
1739 link_type,
1740 dest_url,
1741 id,
1742 ..
1743 }) => {
1744 image_stack.push((range.start, dest_url, link_type, id));
1745 text_chunks.clear();
1746 }
1747 Event::Text(text) if !image_stack.is_empty() => {
1748 text_chunks.push((text.to_string(), range.start, range.end));
1749 }
1750 Event::Code(code) if !image_stack.is_empty() => {
1751 let code_text = format!("`{code}`");
1752 text_chunks.push((code_text, range.start, range.end));
1753 }
1754 Event::End(TagEnd::Image) => {
1755 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1756 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1758 continue;
1759 }
1760
1761 if Self::is_offset_in_code_span(code_spans, start_pos) {
1763 continue;
1764 }
1765
1766 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1768 continue;
1769 }
1770
1771 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1773 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1774
1775 let is_reference = matches!(
1776 link_type,
1777 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1778 );
1779
1780 let alt_text = if start_pos < content.len() {
1783 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1784
1785 let mut close_pos = None;
1788 let mut depth = 0;
1789
1790 if image_bytes.len() > 2 {
1791 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1792 let mut backslash_count = 0;
1794 let mut j = i;
1795 while j > 0 && image_bytes[j - 1] == b'\\' {
1796 backslash_count += 1;
1797 j -= 1;
1798 }
1799 let is_escaped = backslash_count % 2 != 0;
1800
1801 if !is_escaped {
1802 if byte == b'[' {
1803 depth += 1;
1804 } else if byte == b']' {
1805 if depth == 0 {
1806 close_pos = Some(i);
1808 break;
1809 } else {
1810 depth -= 1;
1811 }
1812 }
1813 }
1814 }
1815 }
1816
1817 if let Some(pos) = close_pos {
1818 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1819 } else {
1820 Cow::Borrowed("")
1821 }
1822 } else {
1823 Cow::Borrowed("")
1824 };
1825
1826 let reference_id = if is_reference && !ref_id.is_empty() {
1827 Some(Cow::Owned(ref_id.to_lowercase()))
1828 } else if is_reference {
1829 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1831 None
1832 };
1833
1834 found_positions.insert(start_pos);
1835 images.push(ParsedImage {
1836 line: line_num,
1837 start_col: col_start,
1838 end_col: col_end,
1839 byte_offset: start_pos,
1840 byte_end: range.end,
1841 alt_text,
1842 url: Cow::Owned(url.to_string()),
1843 is_reference,
1844 reference_id,
1845 link_type,
1846 });
1847 }
1848 }
1849 _ => {}
1850 }
1851 }
1852
1853 for cap in IMAGE_PATTERN.captures_iter(content) {
1855 let full_match = cap.get(0).unwrap();
1856 let match_start = full_match.start();
1857 let match_end = full_match.end();
1858
1859 if found_positions.contains(&match_start) {
1861 continue;
1862 }
1863
1864 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1866 continue;
1867 }
1868
1869 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1871 || Self::is_offset_in_code_span(code_spans, match_start)
1872 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1873 {
1874 continue;
1875 }
1876
1877 if let Some(ref_id) = cap.get(6) {
1879 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1880 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1881 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1882 let ref_id_str = ref_id.as_str();
1883 let normalized_ref = if ref_id_str.is_empty() {
1884 Cow::Owned(alt_text.to_lowercase())
1885 } else {
1886 Cow::Owned(ref_id_str.to_lowercase())
1887 };
1888
1889 images.push(ParsedImage {
1890 line: line_num,
1891 start_col: col_start,
1892 end_col: col_end,
1893 byte_offset: match_start,
1894 byte_end: match_end,
1895 alt_text: Cow::Borrowed(alt_text),
1896 url: Cow::Borrowed(""),
1897 is_reference: true,
1898 reference_id: Some(normalized_ref),
1899 link_type: LinkType::Reference, });
1901 }
1902 }
1903
1904 images
1905 }
1906
1907 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1909 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1913 if line_info.in_code_block {
1915 continue;
1916 }
1917
1918 let line = line_info.content(content);
1919 let line_num = line_idx + 1;
1920
1921 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1922 let id_raw = cap.get(1).unwrap().as_str();
1923
1924 if id_raw.starts_with('^') {
1927 continue;
1928 }
1929
1930 let id = id_raw.to_lowercase();
1931 let url = cap.get(2).unwrap().as_str().to_string();
1932 let title_match = cap.get(3).or_else(|| cap.get(4));
1933 let title = title_match.map(|m| m.as_str().to_string());
1934
1935 let match_obj = cap.get(0).unwrap();
1938 let byte_offset = line_info.byte_offset + match_obj.start();
1939 let byte_end = line_info.byte_offset + match_obj.end();
1940
1941 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1943 let start = line_info.byte_offset + m.start().saturating_sub(1);
1945 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1947 } else {
1948 (None, None)
1949 };
1950
1951 refs.push(ReferenceDef {
1952 line: line_num,
1953 id,
1954 url,
1955 title,
1956 byte_offset,
1957 byte_end,
1958 title_byte_start,
1959 title_byte_end,
1960 });
1961 }
1962 }
1963
1964 refs
1965 }
1966
1967 #[inline]
1971 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1972 let trimmed_start = line.trim_start();
1973 if !trimmed_start.starts_with('>') {
1974 return None;
1975 }
1976
1977 let mut remaining = line;
1979 let mut total_prefix_len = 0;
1980
1981 loop {
1982 let trimmed = remaining.trim_start();
1983 if !trimmed.starts_with('>') {
1984 break;
1985 }
1986
1987 let leading_ws_len = remaining.len() - trimmed.len();
1989 total_prefix_len += leading_ws_len + 1;
1990
1991 let after_gt = &trimmed[1..];
1992
1993 if let Some(stripped) = after_gt.strip_prefix(' ') {
1995 total_prefix_len += 1;
1996 remaining = stripped;
1997 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1998 total_prefix_len += 1;
1999 remaining = stripped;
2000 } else {
2001 remaining = after_gt;
2002 }
2003 }
2004
2005 Some((&line[..total_prefix_len], remaining))
2006 }
2007
2008 fn detect_list_items_and_emphasis_with_pulldown(
2032 content: &str,
2033 line_offsets: &[usize],
2034 flavor: MarkdownFlavor,
2035 front_matter_end: usize,
2036 code_blocks: &[(usize, usize)],
2037 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2038 use std::collections::HashMap;
2039
2040 let mut list_items = HashMap::new();
2041 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2042
2043 let mut options = Options::empty();
2044 options.insert(Options::ENABLE_TABLES);
2045 options.insert(Options::ENABLE_FOOTNOTES);
2046 options.insert(Options::ENABLE_STRIKETHROUGH);
2047 options.insert(Options::ENABLE_TASKLISTS);
2048 options.insert(Options::ENABLE_GFM);
2050
2051 let _ = flavor;
2053
2054 let parser = Parser::new_ext(content, options).into_offset_iter();
2055 let mut list_depth: usize = 0;
2056 let mut list_stack: Vec<bool> = Vec::new();
2057
2058 for (event, range) in parser {
2059 match event {
2060 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2062 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2063 2
2064 } else {
2065 1
2066 };
2067 let match_start = range.start;
2068 let match_end = range.end;
2069
2070 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2072 let marker = content[match_start..].chars().next().unwrap_or('*');
2074 if marker == '*' || marker == '_' {
2075 let content_start = match_start + marker_count;
2077 let content_end = if match_end >= marker_count {
2078 match_end - marker_count
2079 } else {
2080 match_end
2081 };
2082 let content_part = if content_start < content_end && content_end <= content.len() {
2083 &content[content_start..content_end]
2084 } else {
2085 ""
2086 };
2087
2088 let line_idx = match line_offsets.binary_search(&match_start) {
2090 Ok(idx) => idx,
2091 Err(idx) => idx.saturating_sub(1),
2092 };
2093 let line_num = line_idx + 1;
2094 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2095 let col_start = match_start - line_start;
2096 let col_end = match_end - line_start;
2097
2098 emphasis_spans.push(EmphasisSpan {
2099 line: line_num,
2100 start_col: col_start,
2101 end_col: col_end,
2102 byte_offset: match_start,
2103 byte_end: match_end,
2104 marker,
2105 marker_count,
2106 content: content_part.to_string(),
2107 });
2108 }
2109 }
2110 }
2111 Event::Start(Tag::List(start_number)) => {
2112 list_depth += 1;
2113 list_stack.push(start_number.is_some());
2114 }
2115 Event::End(TagEnd::List(_)) => {
2116 list_depth = list_depth.saturating_sub(1);
2117 list_stack.pop();
2118 }
2119 Event::Start(Tag::Item) if list_depth > 0 => {
2120 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2122 let item_start = range.start;
2124
2125 let mut line_idx = match line_offsets.binary_search(&item_start) {
2127 Ok(idx) => idx,
2128 Err(idx) => idx.saturating_sub(1),
2129 };
2130
2131 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2135 line_idx += 1;
2136 }
2137
2138 if front_matter_end > 0 && line_idx < front_matter_end {
2140 continue;
2141 }
2142
2143 if line_idx < line_offsets.len() {
2144 let line_start_byte = line_offsets[line_idx];
2145 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2146 let line = &content[line_start_byte..line_end.min(content.len())];
2147
2148 let line = line
2150 .strip_suffix('\n')
2151 .or_else(|| line.strip_suffix("\r\n"))
2152 .unwrap_or(line);
2153
2154 let blockquote_parse = Self::parse_blockquote_prefix(line);
2156 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2157 (prefix.len(), content)
2158 } else {
2159 (0, line)
2160 };
2161
2162 if current_list_is_ordered {
2164 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2165 Self::parse_ordered_list(line_to_parse)
2166 {
2167 let marker = format!("{number_str}{delimiter}");
2168 let marker_column = blockquote_prefix_len + leading_spaces.len();
2169 let content_column = marker_column + marker.len() + spacing.len();
2170 let number = number_str.parse().ok();
2171
2172 list_items.entry(line_start_byte).or_insert((
2173 true,
2174 marker,
2175 marker_column,
2176 content_column,
2177 number,
2178 ));
2179 }
2180 } else if let Some((leading_spaces, marker, spacing, _content)) =
2181 Self::parse_unordered_list(line_to_parse)
2182 {
2183 let marker_column = blockquote_prefix_len + leading_spaces.len();
2184 let content_column = marker_column + 1 + spacing.len();
2185
2186 list_items.entry(line_start_byte).or_insert((
2187 false,
2188 marker.to_string(),
2189 marker_column,
2190 content_column,
2191 None,
2192 ));
2193 }
2194 }
2195 }
2196 _ => {}
2197 }
2198 }
2199
2200 (list_items, emphasis_spans)
2201 }
2202
2203 #[inline]
2207 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2208 let bytes = line.as_bytes();
2209 let mut i = 0;
2210
2211 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2213 i += 1;
2214 }
2215
2216 if i >= bytes.len() {
2218 return None;
2219 }
2220 let marker = bytes[i] as char;
2221 if marker != '-' && marker != '*' && marker != '+' {
2222 return None;
2223 }
2224 let marker_pos = i;
2225 i += 1;
2226
2227 let spacing_start = i;
2229 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2230 i += 1;
2231 }
2232
2233 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2234 }
2235
2236 #[inline]
2240 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2241 let bytes = line.as_bytes();
2242 let mut i = 0;
2243
2244 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2246 i += 1;
2247 }
2248
2249 let number_start = i;
2251 while i < bytes.len() && bytes[i].is_ascii_digit() {
2252 i += 1;
2253 }
2254 if i == number_start {
2255 return None; }
2257
2258 if i >= bytes.len() {
2260 return None;
2261 }
2262 let delimiter = bytes[i] as char;
2263 if delimiter != '.' && delimiter != ')' {
2264 return None;
2265 }
2266 let delimiter_pos = i;
2267 i += 1;
2268
2269 let spacing_start = i;
2271 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272 i += 1;
2273 }
2274
2275 Some((
2276 &line[..number_start],
2277 &line[number_start..delimiter_pos],
2278 delimiter,
2279 &line[spacing_start..i],
2280 &line[i..],
2281 ))
2282 }
2283
2284 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2287 let num_lines = line_offsets.len();
2288 let mut in_code_block = vec![false; num_lines];
2289
2290 for &(start, end) in code_blocks {
2292 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2294 let mut boundary = start;
2295 while boundary > 0 && !content.is_char_boundary(boundary) {
2296 boundary -= 1;
2297 }
2298 boundary
2299 } else {
2300 start
2301 };
2302
2303 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2304 let mut boundary = end;
2305 while boundary < content.len() && !content.is_char_boundary(boundary) {
2306 boundary += 1;
2307 }
2308 boundary
2309 } else {
2310 end.min(content.len())
2311 };
2312
2313 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2332 let first_line = first_line_after.saturating_sub(1);
2333 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2334
2335 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2337 *flag = true;
2338 }
2339 }
2340
2341 in_code_block
2342 }
2343
2344 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2347 let content_lines: Vec<&str> = content.lines().collect();
2348 let num_lines = content_lines.len();
2349 let mut in_math_block = vec![false; num_lines];
2350
2351 let mut inside_math = false;
2352
2353 for (i, line) in content_lines.iter().enumerate() {
2354 if code_block_map.get(i).copied().unwrap_or(false) {
2356 continue;
2357 }
2358
2359 let trimmed = line.trim();
2360
2361 if trimmed == "$$" {
2364 if inside_math {
2365 in_math_block[i] = true;
2367 inside_math = false;
2368 } else {
2369 in_math_block[i] = true;
2371 inside_math = true;
2372 }
2373 } else if inside_math {
2374 in_math_block[i] = true;
2376 }
2377 }
2378
2379 in_math_block
2380 }
2381
2382 fn compute_basic_line_info(
2385 content: &str,
2386 line_offsets: &[usize],
2387 code_blocks: &[(usize, usize)],
2388 flavor: MarkdownFlavor,
2389 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2390 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2391 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2392 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2393 let content_lines: Vec<&str> = content.lines().collect();
2394 let mut lines = Vec::with_capacity(content_lines.len());
2395
2396 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2398
2399 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2401
2402 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2405
2406 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2409 content,
2410 line_offsets,
2411 flavor,
2412 front_matter_end,
2413 code_blocks,
2414 );
2415
2416 for (i, line) in content_lines.iter().enumerate() {
2417 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2418 let indent = line.len() - line.trim_start().len();
2419 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2421
2422 let blockquote_parse = Self::parse_blockquote_prefix(line);
2424
2425 let is_blank = if let Some((_, content)) = blockquote_parse {
2427 content.trim().is_empty()
2429 } else {
2430 line.trim().is_empty()
2431 };
2432
2433 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2435
2436 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2438 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2439 let line_end_offset = byte_offset + line.len();
2442 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2443 html_comment_ranges,
2444 byte_offset,
2445 line_end_offset,
2446 );
2447 let list_item =
2450 list_item_map
2451 .get(&byte_offset)
2452 .map(
2453 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2454 marker: marker.clone(),
2455 is_ordered: *is_ordered,
2456 number: *number,
2457 marker_column: *marker_column,
2458 content_column: *content_column,
2459 },
2460 );
2461
2462 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2465 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2466
2467 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2469
2470 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2472 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2473
2474 lines.push(LineInfo {
2475 byte_offset,
2476 byte_len: line.len(),
2477 indent,
2478 visual_indent,
2479 is_blank,
2480 in_code_block,
2481 in_front_matter,
2482 in_html_block: false, in_html_comment,
2484 list_item,
2485 heading: None, blockquote: None, in_mkdocstrings,
2488 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2491 in_math_block,
2492 in_quarto_div,
2493 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, });
2501 }
2502
2503 (lines, emphasis_spans)
2504 }
2505
2506 fn detect_headings_and_blockquotes(
2508 content: &str,
2509 lines: &mut [LineInfo],
2510 flavor: MarkdownFlavor,
2511 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2512 link_byte_ranges: &[(usize, usize)],
2513 ) {
2514 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2516 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2517 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2518 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2519
2520 let content_lines: Vec<&str> = content.lines().collect();
2521
2522 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2524
2525 for i in 0..lines.len() {
2527 let line = content_lines[i];
2528
2529 if !(front_matter_end > 0 && i < front_matter_end)
2534 && let Some(bq) = parse_blockquote_detailed(line)
2535 {
2536 let nesting_level = bq.markers.len();
2537 let marker_column = bq.indent.len();
2538 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2539 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2540 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2541 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2542
2543 lines[i].blockquote = Some(BlockquoteInfo {
2544 nesting_level,
2545 indent: bq.indent.to_string(),
2546 marker_column,
2547 prefix,
2548 content: bq.content.to_string(),
2549 has_no_space_after_marker: has_no_space,
2550 has_multiple_spaces_after_marker: has_multiple_spaces,
2551 needs_md028_fix,
2552 });
2553
2554 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2557 lines[i].is_horizontal_rule = true;
2558 }
2559 }
2560
2561 if lines[i].in_code_block {
2563 continue;
2564 }
2565
2566 if front_matter_end > 0 && i < front_matter_end {
2568 continue;
2569 }
2570
2571 if lines[i].in_html_block {
2573 continue;
2574 }
2575
2576 if lines[i].is_blank {
2578 continue;
2579 }
2580
2581 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2584 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2585 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2586 } else {
2587 false
2588 };
2589
2590 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2591 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2593 continue;
2594 }
2595 let line_offset = lines[i].byte_offset;
2598 if link_byte_ranges
2599 .iter()
2600 .any(|&(start, end)| line_offset > start && line_offset < end)
2601 {
2602 continue;
2603 }
2604 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2605 let hashes = caps.get(2).map_or("", |m| m.as_str());
2606 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2607 let rest = caps.get(4).map_or("", |m| m.as_str());
2608
2609 let level = hashes.len() as u8;
2610 let marker_column = leading_spaces.len();
2611
2612 let (text, has_closing, closing_seq) = {
2614 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2616 if rest[id_start..].trim_end().ends_with('}') {
2618 (&rest[..id_start], &rest[id_start..])
2620 } else {
2621 (rest, "")
2622 }
2623 } else {
2624 (rest, "")
2625 };
2626
2627 let trimmed_rest = rest_without_id.trim_end();
2629 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2630 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2633
2634 let last_hash_char_idx = char_positions
2636 .iter()
2637 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2638
2639 if let Some(mut char_idx) = last_hash_char_idx {
2640 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2642 char_idx -= 1;
2643 }
2644
2645 let start_of_hashes = char_positions[char_idx].0;
2647
2648 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2650
2651 let potential_closing = &trimmed_rest[start_of_hashes..];
2653 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2654
2655 if is_all_hashes && has_space_before {
2656 let closing_hashes = potential_closing.to_string();
2658 let text_part = if !custom_id_part.is_empty() {
2661 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2664 } else {
2665 trimmed_rest[..start_of_hashes].trim_end().to_string()
2666 };
2667 (text_part, true, closing_hashes)
2668 } else {
2669 (rest.to_string(), false, String::new())
2671 }
2672 } else {
2673 (rest.to_string(), false, String::new())
2675 }
2676 } else {
2677 (rest.to_string(), false, String::new())
2679 }
2680 };
2681
2682 let content_column = marker_column + hashes.len() + spaces_after.len();
2683
2684 let raw_text = text.trim().to_string();
2686 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2687
2688 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2690 let next_line = content_lines[i + 1];
2691 if !lines[i + 1].in_code_block
2692 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2693 && let Some(next_line_id) =
2694 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2695 {
2696 custom_id = Some(next_line_id);
2697 }
2698 }
2699
2700 let is_valid = !spaces_after.is_empty()
2710 || rest.is_empty()
2711 || level > 1
2712 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2713
2714 lines[i].heading = Some(HeadingInfo {
2715 level,
2716 style: HeadingStyle::ATX,
2717 marker: hashes.to_string(),
2718 marker_column,
2719 content_column,
2720 text: clean_text,
2721 custom_id,
2722 raw_text,
2723 has_closing_sequence: has_closing,
2724 closing_sequence: closing_seq,
2725 is_valid,
2726 });
2727 }
2728 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2730 let next_line = content_lines[i + 1];
2731 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2732 if front_matter_end > 0 && i < front_matter_end {
2734 continue;
2735 }
2736
2737 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2739 {
2740 continue;
2741 }
2742
2743 let content_line = line.trim();
2746
2747 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2749 continue;
2750 }
2751
2752 if content_line.starts_with('_') {
2754 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2755 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2756 continue;
2757 }
2758 }
2759
2760 if let Some(first_char) = content_line.chars().next()
2762 && first_char.is_ascii_digit()
2763 {
2764 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2765 if num_end < content_line.len() {
2766 let next = content_line.chars().nth(num_end);
2767 if next == Some('.') || next == Some(')') {
2768 continue;
2769 }
2770 }
2771 }
2772
2773 if ATX_HEADING_REGEX.is_match(line) {
2775 continue;
2776 }
2777
2778 if content_line.starts_with('>') {
2780 continue;
2781 }
2782
2783 let trimmed_start = line.trim_start();
2785 if trimmed_start.len() >= 3 {
2786 let first_three: String = trimmed_start.chars().take(3).collect();
2787 if first_three == "```" || first_three == "~~~" {
2788 continue;
2789 }
2790 }
2791
2792 if content_line.starts_with('<') {
2794 continue;
2795 }
2796
2797 let underline = next_line.trim();
2798
2799 let level = if underline.starts_with('=') { 1 } else { 2 };
2800 let style = if level == 1 {
2801 HeadingStyle::Setext1
2802 } else {
2803 HeadingStyle::Setext2
2804 };
2805
2806 let raw_text = line.trim().to_string();
2808 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2809
2810 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2812 let attr_line = content_lines[i + 2];
2813 if !lines[i + 2].in_code_block
2814 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2815 && let Some(attr_line_id) =
2816 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2817 {
2818 custom_id = Some(attr_line_id);
2819 }
2820 }
2821
2822 lines[i].heading = Some(HeadingInfo {
2823 level,
2824 style,
2825 marker: underline.to_string(),
2826 marker_column: next_line.len() - next_line.trim_start().len(),
2827 content_column: lines[i].indent,
2828 text: clean_text,
2829 custom_id,
2830 raw_text,
2831 has_closing_sequence: false,
2832 closing_sequence: String::new(),
2833 is_valid: true, });
2835 }
2836 }
2837 }
2838 }
2839
2840 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2842 const BLOCK_ELEMENTS: &[&str] = &[
2845 "address",
2846 "article",
2847 "aside",
2848 "audio",
2849 "blockquote",
2850 "canvas",
2851 "details",
2852 "dialog",
2853 "dd",
2854 "div",
2855 "dl",
2856 "dt",
2857 "embed",
2858 "fieldset",
2859 "figcaption",
2860 "figure",
2861 "footer",
2862 "form",
2863 "h1",
2864 "h2",
2865 "h3",
2866 "h4",
2867 "h5",
2868 "h6",
2869 "header",
2870 "hr",
2871 "iframe",
2872 "li",
2873 "main",
2874 "menu",
2875 "nav",
2876 "noscript",
2877 "object",
2878 "ol",
2879 "p",
2880 "picture",
2881 "pre",
2882 "script",
2883 "search",
2884 "section",
2885 "source",
2886 "style",
2887 "summary",
2888 "svg",
2889 "table",
2890 "tbody",
2891 "td",
2892 "template",
2893 "textarea",
2894 "tfoot",
2895 "th",
2896 "thead",
2897 "tr",
2898 "track",
2899 "ul",
2900 "video",
2901 ];
2902
2903 let mut i = 0;
2904 while i < lines.len() {
2905 if lines[i].in_code_block || lines[i].in_front_matter {
2907 i += 1;
2908 continue;
2909 }
2910
2911 let trimmed = lines[i].content(content).trim_start();
2912
2913 if trimmed.starts_with('<') && trimmed.len() > 1 {
2915 let after_bracket = &trimmed[1..];
2917 let is_closing = after_bracket.starts_with('/');
2918 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2919
2920 let tag_name = tag_start
2922 .chars()
2923 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2924 .collect::<String>()
2925 .to_lowercase();
2926
2927 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2929 lines[i].in_html_block = true;
2931
2932 if !is_closing {
2935 let closing_tag = format!("</{tag_name}>");
2936 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2938 let mut j = i + 1;
2939 let mut found_closing_tag = false;
2940 while j < lines.len() && j < i + 100 {
2941 if !allow_blank_lines && lines[j].is_blank {
2944 break;
2945 }
2946
2947 lines[j].in_html_block = true;
2948
2949 if lines[j].content(content).contains(&closing_tag) {
2951 found_closing_tag = true;
2952 }
2953
2954 if found_closing_tag {
2957 j += 1;
2958 while j < lines.len() && j < i + 100 {
2960 if lines[j].is_blank {
2961 break;
2962 }
2963 lines[j].in_html_block = true;
2964 j += 1;
2965 }
2966 break;
2967 }
2968 j += 1;
2969 }
2970 }
2971 }
2972 }
2973
2974 i += 1;
2975 }
2976 }
2977
2978 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2981 if !flavor.supports_esm_blocks() {
2983 return;
2984 }
2985
2986 let mut in_multiline_import = false;
2987
2988 for line in lines.iter_mut() {
2989 if line.in_code_block || line.in_front_matter || line.in_html_comment {
2991 in_multiline_import = false;
2992 continue;
2993 }
2994
2995 let line_content = line.content(content);
2996 let trimmed = line_content.trim();
2997
2998 if in_multiline_import {
3000 line.in_esm_block = true;
3001 if trimmed.ends_with('\'')
3004 || trimmed.ends_with('"')
3005 || trimmed.ends_with("';")
3006 || trimmed.ends_with("\";")
3007 || line_content.contains(';')
3008 {
3009 in_multiline_import = false;
3010 }
3011 continue;
3012 }
3013
3014 if line.is_blank {
3016 continue;
3017 }
3018
3019 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3021 line.in_esm_block = true;
3022
3023 let is_import = trimmed.starts_with("import ");
3031
3032 let is_complete =
3034 trimmed.ends_with(';')
3036 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3038 || (!is_import && !trimmed.contains(" from ") && (
3040 trimmed.starts_with("export const ")
3041 || trimmed.starts_with("export let ")
3042 || trimmed.starts_with("export var ")
3043 || trimmed.starts_with("export function ")
3044 || trimmed.starts_with("export class ")
3045 || trimmed.starts_with("export default ")
3046 ));
3047
3048 if !is_complete && is_import {
3049 if trimmed.contains('{') && !trimmed.contains('}') {
3053 in_multiline_import = true;
3054 }
3055 }
3056 }
3057 }
3058 }
3059
3060 fn detect_jsx_and_mdx_comments(
3063 content: &str,
3064 lines: &mut [LineInfo],
3065 flavor: MarkdownFlavor,
3066 code_blocks: &[(usize, usize)],
3067 ) -> (ByteRanges, ByteRanges) {
3068 if !flavor.supports_jsx() {
3070 return (Vec::new(), Vec::new());
3071 }
3072
3073 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3074 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3075
3076 if !content.contains('{') {
3078 return (jsx_expression_ranges, mdx_comment_ranges);
3079 }
3080
3081 let bytes = content.as_bytes();
3082 let mut i = 0;
3083
3084 while i < bytes.len() {
3085 if bytes[i] == b'{' {
3086 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3088 i += 1;
3089 continue;
3090 }
3091
3092 let start = i;
3093
3094 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3096 let mut j = i + 3;
3098 while j + 2 < bytes.len() {
3099 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3100 let end = j + 3;
3101 mdx_comment_ranges.push((start, end));
3102
3103 Self::mark_lines_in_range(lines, content, start, end, |line| {
3105 line.in_mdx_comment = true;
3106 });
3107
3108 i = end;
3109 break;
3110 }
3111 j += 1;
3112 }
3113 if j + 2 >= bytes.len() {
3114 mdx_comment_ranges.push((start, bytes.len()));
3116 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3117 line.in_mdx_comment = true;
3118 });
3119 break;
3120 }
3121 } else {
3122 let mut brace_depth = 1;
3125 let mut j = i + 1;
3126 let mut in_string = false;
3127 let mut string_char = b'"';
3128
3129 while j < bytes.len() && brace_depth > 0 {
3130 let c = bytes[j];
3131
3132 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3134 in_string = true;
3135 string_char = c;
3136 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3137 in_string = false;
3138 } else if !in_string {
3139 if c == b'{' {
3140 brace_depth += 1;
3141 } else if c == b'}' {
3142 brace_depth -= 1;
3143 }
3144 }
3145 j += 1;
3146 }
3147
3148 if brace_depth == 0 {
3149 let end = j;
3150 jsx_expression_ranges.push((start, end));
3151
3152 Self::mark_lines_in_range(lines, content, start, end, |line| {
3154 line.in_jsx_expression = true;
3155 });
3156
3157 i = end;
3158 } else {
3159 i += 1;
3160 }
3161 }
3162 } else {
3163 i += 1;
3164 }
3165 }
3166
3167 (jsx_expression_ranges, mdx_comment_ranges)
3168 }
3169
3170 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3173 if flavor != MarkdownFlavor::MkDocs {
3174 return;
3175 }
3176
3177 use crate::utils::mkdocs_admonitions;
3178 use crate::utils::mkdocs_definition_lists;
3179 use crate::utils::mkdocs_tabs;
3180
3181 let content_lines: Vec<&str> = content.lines().collect();
3182
3183 let mut in_admonition = false;
3185 let mut admonition_indent = 0;
3186
3187 let mut in_tab = false;
3189 let mut tab_indent = 0;
3190
3191 let mut in_definition = false;
3193
3194 for (i, line) in content_lines.iter().enumerate() {
3195 if i >= lines.len() {
3196 break;
3197 }
3198
3199 if lines[i].in_code_block {
3201 continue;
3202 }
3203
3204 if mkdocs_admonitions::is_admonition_start(line) {
3206 in_admonition = true;
3207 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3208 lines[i].in_admonition = true;
3209 } else if in_admonition {
3210 if line.trim().is_empty() {
3212 lines[i].in_admonition = true;
3214 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3215 lines[i].in_admonition = true;
3216 } else {
3217 in_admonition = false;
3219 if mkdocs_admonitions::is_admonition_start(line) {
3221 in_admonition = true;
3222 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3223 lines[i].in_admonition = true;
3224 }
3225 }
3226 }
3227
3228 if mkdocs_tabs::is_tab_marker(line) {
3230 in_tab = true;
3231 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3232 lines[i].in_content_tab = true;
3233 } else if in_tab {
3234 if line.trim().is_empty() {
3236 lines[i].in_content_tab = true;
3238 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3239 lines[i].in_content_tab = true;
3240 } else {
3241 in_tab = false;
3243 if mkdocs_tabs::is_tab_marker(line) {
3245 in_tab = true;
3246 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3247 lines[i].in_content_tab = true;
3248 }
3249 }
3250 }
3251
3252 if mkdocs_definition_lists::is_definition_line(line) {
3254 in_definition = true;
3255 lines[i].in_definition_list = true;
3256 } else if in_definition {
3257 if mkdocs_definition_lists::is_definition_continuation(line) {
3259 lines[i].in_definition_list = true;
3260 } else if line.trim().is_empty() {
3261 lines[i].in_definition_list = true;
3263 } else if mkdocs_definition_lists::could_be_term_line(line) {
3264 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3266 {
3267 lines[i].in_definition_list = true;
3268 } else {
3269 in_definition = false;
3270 }
3271 } else {
3272 in_definition = false;
3273 }
3274 } else if mkdocs_definition_lists::could_be_term_line(line) {
3275 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3277 lines[i].in_definition_list = true;
3278 in_definition = true;
3279 }
3280 }
3281 }
3282 }
3283
3284 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3286 where
3287 F: FnMut(&mut LineInfo),
3288 {
3289 for line in lines.iter_mut() {
3291 let line_start = line.byte_offset;
3292 let line_end = line.byte_offset + line.byte_len;
3293
3294 if line_start < end && line_end > start {
3296 f(line);
3297 }
3298 }
3299
3300 let _ = content;
3302 }
3303
3304 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3306 if !content.contains('`') {
3308 return Vec::new();
3309 }
3310
3311 let parser = Parser::new(content).into_offset_iter();
3313 let mut ranges = Vec::new();
3314
3315 for (event, range) in parser {
3316 if let Event::Code(_) = event {
3317 ranges.push((range.start, range.end));
3318 }
3319 }
3320
3321 Self::build_code_spans_from_ranges(content, lines, &ranges)
3322 }
3323
3324 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3325 let mut code_spans = Vec::new();
3326 if ranges.is_empty() {
3327 return code_spans;
3328 }
3329
3330 for &(start_pos, end_pos) in ranges {
3331 let full_span = &content[start_pos..end_pos];
3333 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3334
3335 let content_start = start_pos + backtick_count;
3337 let content_end = end_pos - backtick_count;
3338 let span_content = if content_start < content_end {
3339 content[content_start..content_end].to_string()
3340 } else {
3341 String::new()
3342 };
3343
3344 let line_idx = lines
3347 .partition_point(|line| line.byte_offset <= start_pos)
3348 .saturating_sub(1);
3349 let line_num = line_idx + 1;
3350 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3351
3352 let end_line_idx = lines
3354 .partition_point(|line| line.byte_offset <= end_pos)
3355 .saturating_sub(1);
3356 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3357
3358 let line_content = lines[line_idx].content(content);
3361 let col_start = if byte_col_start <= line_content.len() {
3362 line_content[..byte_col_start].chars().count()
3363 } else {
3364 line_content.chars().count()
3365 };
3366
3367 let end_line_content = lines[end_line_idx].content(content);
3368 let col_end = if byte_col_end <= end_line_content.len() {
3369 end_line_content[..byte_col_end].chars().count()
3370 } else {
3371 end_line_content.chars().count()
3372 };
3373
3374 code_spans.push(CodeSpan {
3375 line: line_num,
3376 end_line: end_line_idx + 1,
3377 start_col: col_start,
3378 end_col: col_end,
3379 byte_offset: start_pos,
3380 byte_end: end_pos,
3381 backtick_count,
3382 content: span_content,
3383 });
3384 }
3385
3386 code_spans.sort_by_key(|span| span.byte_offset);
3388
3389 code_spans
3390 }
3391
3392 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3394 let mut math_spans = Vec::new();
3395
3396 if !content.contains('$') {
3398 return math_spans;
3399 }
3400
3401 let mut options = Options::empty();
3403 options.insert(Options::ENABLE_MATH);
3404 let parser = Parser::new_ext(content, options).into_offset_iter();
3405
3406 for (event, range) in parser {
3407 let (is_display, math_content) = match &event {
3408 Event::InlineMath(text) => (false, text.as_ref()),
3409 Event::DisplayMath(text) => (true, text.as_ref()),
3410 _ => continue,
3411 };
3412
3413 let start_pos = range.start;
3414 let end_pos = range.end;
3415
3416 let line_idx = lines
3418 .partition_point(|line| line.byte_offset <= start_pos)
3419 .saturating_sub(1);
3420 let line_num = line_idx + 1;
3421 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3422
3423 let end_line_idx = lines
3425 .partition_point(|line| line.byte_offset <= end_pos)
3426 .saturating_sub(1);
3427 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3428
3429 let line_content = lines[line_idx].content(content);
3431 let col_start = if byte_col_start <= line_content.len() {
3432 line_content[..byte_col_start].chars().count()
3433 } else {
3434 line_content.chars().count()
3435 };
3436
3437 let end_line_content = lines[end_line_idx].content(content);
3438 let col_end = if byte_col_end <= end_line_content.len() {
3439 end_line_content[..byte_col_end].chars().count()
3440 } else {
3441 end_line_content.chars().count()
3442 };
3443
3444 math_spans.push(MathSpan {
3445 line: line_num,
3446 end_line: end_line_idx + 1,
3447 start_col: col_start,
3448 end_col: col_end,
3449 byte_offset: start_pos,
3450 byte_end: end_pos,
3451 is_display,
3452 content: math_content.to_string(),
3453 });
3454 }
3455
3456 math_spans.sort_by_key(|span| span.byte_offset);
3458
3459 math_spans
3460 }
3461
3462 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3473 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3475
3476 #[inline]
3479 fn reset_tracking_state(
3480 list_item: &ListItemInfo,
3481 has_list_breaking_content: &mut bool,
3482 min_continuation: &mut usize,
3483 ) {
3484 *has_list_breaking_content = false;
3485 let marker_width = if list_item.is_ordered {
3486 list_item.marker.len() + 1 } else {
3488 list_item.marker.len()
3489 };
3490 *min_continuation = if list_item.is_ordered {
3491 marker_width
3492 } else {
3493 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3494 };
3495 }
3496
3497 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3500 let mut last_list_item_line = 0;
3501 let mut current_indent_level = 0;
3502 let mut last_marker_width = 0;
3503
3504 let mut has_list_breaking_content_since_last_item = false;
3506 let mut min_continuation_for_tracking = 0;
3507
3508 for (line_idx, line_info) in lines.iter().enumerate() {
3509 let line_num = line_idx + 1;
3510
3511 if line_info.in_code_block {
3513 if let Some(ref mut block) = current_block {
3514 let min_continuation_indent =
3516 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3517
3518 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3520
3521 match context {
3522 CodeBlockContext::Indented => {
3523 block.end_line = line_num;
3525 continue;
3526 }
3527 CodeBlockContext::Standalone => {
3528 let completed_block = current_block.take().unwrap();
3530 list_blocks.push(completed_block);
3531 continue;
3532 }
3533 CodeBlockContext::Adjacent => {
3534 block.end_line = line_num;
3536 continue;
3537 }
3538 }
3539 } else {
3540 continue;
3542 }
3543 }
3544
3545 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3547 caps.get(0).unwrap().as_str().to_string()
3548 } else {
3549 String::new()
3550 };
3551
3552 if let Some(ref block) = current_block
3555 && line_info.list_item.is_none()
3556 && !line_info.is_blank
3557 && !line_info.in_code_span_continuation
3558 {
3559 let line_content = line_info.content(content).trim();
3560
3561 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3566
3567 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3570
3571 let breaks_list = line_info.heading.is_some()
3572 || line_content.starts_with("---")
3573 || line_content.starts_with("***")
3574 || line_content.starts_with("___")
3575 || crate::utils::skip_context::is_table_line(line_content)
3576 || blockquote_prefix_changes
3577 || (line_info.indent > 0
3578 && line_info.indent < min_continuation_for_tracking
3579 && !is_lazy_continuation);
3580
3581 if breaks_list {
3582 has_list_breaking_content_since_last_item = true;
3583 }
3584 }
3585
3586 if line_info.in_code_span_continuation
3589 && line_info.list_item.is_none()
3590 && let Some(ref mut block) = current_block
3591 {
3592 block.end_line = line_num;
3593 }
3594
3595 let effective_continuation_indent = if let Some(ref block) = current_block {
3601 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3602 let line_content = line_info.content(content);
3603 let line_bq_level = line_content
3604 .chars()
3605 .take_while(|c| *c == '>' || c.is_whitespace())
3606 .filter(|&c| c == '>')
3607 .count();
3608 if line_bq_level > 0 && line_bq_level == block_bq_level {
3609 let mut pos = 0;
3611 let mut found_markers = 0;
3612 for c in line_content.chars() {
3613 pos += c.len_utf8();
3614 if c == '>' {
3615 found_markers += 1;
3616 if found_markers == line_bq_level {
3617 if line_content.get(pos..pos + 1) == Some(" ") {
3618 pos += 1;
3619 }
3620 break;
3621 }
3622 }
3623 }
3624 let after_bq = &line_content[pos..];
3625 after_bq.len() - after_bq.trim_start().len()
3626 } else {
3627 line_info.indent
3628 }
3629 } else {
3630 line_info.indent
3631 };
3632 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3633 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3634 if block_bq_level > 0 {
3635 if block.is_ordered { last_marker_width } else { 2 }
3636 } else {
3637 min_continuation_for_tracking
3638 }
3639 } else {
3640 min_continuation_for_tracking
3641 };
3642 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3643 || (line_info.indent == 0 && !line_info.is_blank); if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3646 eprintln!(
3647 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3648 line_num,
3649 effective_continuation_indent,
3650 adjusted_min_continuation_for_tracking,
3651 is_valid_continuation,
3652 line_info.in_code_span_continuation,
3653 line_info.in_code_block,
3654 current_block.is_some()
3655 );
3656 }
3657
3658 if !line_info.in_code_span_continuation
3659 && line_info.list_item.is_none()
3660 && !line_info.is_blank
3661 && !line_info.in_code_block
3662 && is_valid_continuation
3663 && let Some(ref mut block) = current_block
3664 {
3665 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3666 eprintln!(
3667 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3668 line_num, block.end_line, line_num
3669 );
3670 }
3671 block.end_line = line_num;
3672 }
3673
3674 if let Some(list_item) = &line_info.list_item {
3676 let item_indent = list_item.marker_column;
3678 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3681 eprintln!(
3682 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3683 line_num, list_item.marker, item_indent
3684 );
3685 }
3686
3687 if let Some(ref mut block) = current_block {
3688 let is_nested = nesting > block.nesting_level;
3692 let same_type =
3693 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3694 let same_context = block.blockquote_prefix == blockquote_prefix;
3695 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3697
3698 let marker_compatible =
3700 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3701
3702 let has_non_list_content = has_list_breaking_content_since_last_item;
3705
3706 let mut continues_list = if is_nested {
3710 same_context && reasonable_distance && !has_non_list_content
3712 } else {
3713 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3715 };
3716
3717 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3718 eprintln!(
3719 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3720 line_num,
3721 continues_list,
3722 is_nested,
3723 same_type,
3724 same_context,
3725 reasonable_distance,
3726 marker_compatible,
3727 has_non_list_content,
3728 last_list_item_line,
3729 block.end_line
3730 );
3731 }
3732
3733 if !continues_list
3737 && (is_nested || same_type)
3738 && reasonable_distance
3739 && line_num > 0
3740 && block.end_line == line_num - 1
3741 {
3742 if block.item_lines.contains(&(line_num - 1)) {
3745 continues_list = true;
3747 } else {
3748 continues_list = true;
3752 }
3753 }
3754
3755 if continues_list {
3756 block.end_line = line_num;
3758 block.item_lines.push(line_num);
3759
3760 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3762 list_item.marker.len() + 1
3763 } else {
3764 list_item.marker.len()
3765 });
3766
3767 if !block.is_ordered
3769 && block.marker.is_some()
3770 && block.marker.as_ref() != Some(&list_item.marker)
3771 {
3772 block.marker = None;
3774 }
3775
3776 reset_tracking_state(
3778 list_item,
3779 &mut has_list_breaking_content_since_last_item,
3780 &mut min_continuation_for_tracking,
3781 );
3782 } else {
3783 if !same_type
3788 && !is_nested
3789 && let Some(&last_item) = block.item_lines.last()
3790 {
3791 block.end_line = last_item;
3792 }
3793
3794 list_blocks.push(block.clone());
3795
3796 *block = ListBlock {
3797 start_line: line_num,
3798 end_line: line_num,
3799 is_ordered: list_item.is_ordered,
3800 marker: if list_item.is_ordered {
3801 None
3802 } else {
3803 Some(list_item.marker.clone())
3804 },
3805 blockquote_prefix: blockquote_prefix.clone(),
3806 item_lines: vec![line_num],
3807 nesting_level: nesting,
3808 max_marker_width: if list_item.is_ordered {
3809 list_item.marker.len() + 1
3810 } else {
3811 list_item.marker.len()
3812 },
3813 };
3814
3815 reset_tracking_state(
3817 list_item,
3818 &mut has_list_breaking_content_since_last_item,
3819 &mut min_continuation_for_tracking,
3820 );
3821 }
3822 } else {
3823 current_block = Some(ListBlock {
3825 start_line: line_num,
3826 end_line: line_num,
3827 is_ordered: list_item.is_ordered,
3828 marker: if list_item.is_ordered {
3829 None
3830 } else {
3831 Some(list_item.marker.clone())
3832 },
3833 blockquote_prefix,
3834 item_lines: vec![line_num],
3835 nesting_level: nesting,
3836 max_marker_width: list_item.marker.len(),
3837 });
3838
3839 reset_tracking_state(
3841 list_item,
3842 &mut has_list_breaking_content_since_last_item,
3843 &mut min_continuation_for_tracking,
3844 );
3845 }
3846
3847 last_list_item_line = line_num;
3848 current_indent_level = item_indent;
3849 last_marker_width = if list_item.is_ordered {
3850 list_item.marker.len() + 1 } else {
3852 list_item.marker.len()
3853 };
3854 } else if let Some(ref mut block) = current_block {
3855 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3857 eprintln!(
3858 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3859 line_num, line_info.is_blank
3860 );
3861 }
3862
3863 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3871 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3872 } else {
3873 false
3874 };
3875
3876 let min_continuation_indent = if block.is_ordered {
3880 current_indent_level + last_marker_width
3881 } else {
3882 current_indent_level + 2 };
3884
3885 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3886 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3888 eprintln!(
3889 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3890 line_num, line_info.indent, min_continuation_indent
3891 );
3892 }
3893 block.end_line = line_num;
3894 } else if line_info.is_blank {
3895 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3898 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3899 }
3900 let mut check_idx = line_idx + 1;
3901 let mut found_continuation = false;
3902
3903 while check_idx < lines.len() && lines[check_idx].is_blank {
3905 check_idx += 1;
3906 }
3907
3908 if check_idx < lines.len() {
3909 let next_line = &lines[check_idx];
3910 let next_content = next_line.content(content);
3912 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3915 let next_bq_level_for_indent = next_content
3916 .chars()
3917 .take_while(|c| *c == '>' || c.is_whitespace())
3918 .filter(|&c| c == '>')
3919 .count();
3920 let effective_indent =
3921 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3922 let mut pos = 0;
3925 let mut found_markers = 0;
3926 for c in next_content.chars() {
3927 pos += c.len_utf8();
3928 if c == '>' {
3929 found_markers += 1;
3930 if found_markers == next_bq_level_for_indent {
3931 if next_content.get(pos..pos + 1) == Some(" ") {
3933 pos += 1;
3934 }
3935 break;
3936 }
3937 }
3938 }
3939 let after_blockquote_marker = &next_content[pos..];
3940 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3941 } else {
3942 next_line.indent
3943 };
3944 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3947 if block.is_ordered { last_marker_width } else { 2 }
3950 } else {
3951 min_continuation_indent
3952 };
3953 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3955 eprintln!(
3956 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3957 line_num,
3958 check_idx + 1,
3959 effective_indent,
3960 adjusted_min_continuation,
3961 next_line.list_item.is_some(),
3962 next_line.in_code_block
3963 );
3964 }
3965 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3966 found_continuation = true;
3967 }
3968 else if !next_line.in_code_block
3970 && next_line.list_item.is_some()
3971 && let Some(item) = &next_line.list_item
3972 {
3973 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3974 .find(next_line.content(content))
3975 .map_or(String::new(), |m| m.as_str().to_string());
3976 if item.marker_column == current_indent_level
3977 && item.is_ordered == block.is_ordered
3978 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3979 {
3980 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3984 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3985 if let Some(between_line) = lines.get(idx) {
3986 let between_content = between_line.content(content);
3987 let trimmed = between_content.trim();
3988 if trimmed.is_empty() {
3990 return false;
3991 }
3992 let line_indent = between_content.len() - between_content.trim_start().len();
3994
3995 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3997 .find(between_content)
3998 .map_or(String::new(), |m| m.as_str().to_string());
3999 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4000 let blockquote_level_changed =
4001 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4002
4003 if trimmed.starts_with("```")
4005 || trimmed.starts_with("~~~")
4006 || trimmed.starts_with("---")
4007 || trimmed.starts_with("***")
4008 || trimmed.starts_with("___")
4009 || blockquote_level_changed
4010 || crate::utils::skip_context::is_table_line(trimmed)
4011 || between_line.heading.is_some()
4012 {
4013 return true; }
4015
4016 line_indent >= min_continuation_indent
4018 } else {
4019 false
4020 }
4021 });
4022
4023 if block.is_ordered {
4024 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4027 if let Some(between_line) = lines.get(idx) {
4028 let between_content = between_line.content(content);
4029 let trimmed = between_content.trim();
4030 if trimmed.is_empty() {
4031 return false;
4032 }
4033 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4035 .find(between_content)
4036 .map_or(String::new(), |m| m.as_str().to_string());
4037 let between_bq_level =
4038 between_bq_prefix.chars().filter(|&c| c == '>').count();
4039 let blockquote_level_changed =
4040 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4041 trimmed.starts_with("```")
4043 || trimmed.starts_with("~~~")
4044 || trimmed.starts_with("---")
4045 || trimmed.starts_with("***")
4046 || trimmed.starts_with("___")
4047 || blockquote_level_changed
4048 || crate::utils::skip_context::is_table_line(trimmed)
4049 || between_line.heading.is_some()
4050 } else {
4051 false
4052 }
4053 });
4054 found_continuation = !has_structural_separators;
4055 } else {
4056 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4058 if let Some(between_line) = lines.get(idx) {
4059 let between_content = between_line.content(content);
4060 let trimmed = between_content.trim();
4061 if trimmed.is_empty() {
4062 return false;
4063 }
4064 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4066 .find(between_content)
4067 .map_or(String::new(), |m| m.as_str().to_string());
4068 let between_bq_level =
4069 between_bq_prefix.chars().filter(|&c| c == '>').count();
4070 let blockquote_level_changed =
4071 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4072 trimmed.starts_with("```")
4074 || trimmed.starts_with("~~~")
4075 || trimmed.starts_with("---")
4076 || trimmed.starts_with("***")
4077 || trimmed.starts_with("___")
4078 || blockquote_level_changed
4079 || crate::utils::skip_context::is_table_line(trimmed)
4080 || between_line.heading.is_some()
4081 } else {
4082 false
4083 }
4084 });
4085 found_continuation = !has_structural_separators;
4086 }
4087 }
4088 }
4089 }
4090
4091 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4092 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4093 }
4094 if found_continuation {
4095 block.end_line = line_num;
4097 } else {
4098 list_blocks.push(block.clone());
4100 current_block = None;
4101 }
4102 } else {
4103 let min_required_indent = if block.is_ordered {
4106 current_indent_level + last_marker_width
4107 } else {
4108 current_indent_level + 2
4109 };
4110
4111 let line_content = line_info.content(content).trim();
4116
4117 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4119
4120 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4123 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4124 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4125
4126 let is_structural_separator = line_info.heading.is_some()
4127 || line_content.starts_with("```")
4128 || line_content.starts_with("~~~")
4129 || line_content.starts_with("---")
4130 || line_content.starts_with("***")
4131 || line_content.starts_with("___")
4132 || blockquote_level_changed
4133 || looks_like_table;
4134
4135 let is_lazy_continuation = !is_structural_separator
4139 && !line_info.is_blank
4140 && (line_info.indent == 0
4141 || line_info.indent >= min_required_indent
4142 || line_info.in_code_span_continuation);
4143
4144 if is_lazy_continuation {
4145 block.end_line = line_num;
4148 } else {
4149 list_blocks.push(block.clone());
4151 current_block = None;
4152 }
4153 }
4154 }
4155 }
4156
4157 if let Some(block) = current_block {
4159 list_blocks.push(block);
4160 }
4161
4162 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4164
4165 list_blocks
4166 }
4167
4168 fn compute_char_frequency(content: &str) -> CharFrequency {
4170 let mut frequency = CharFrequency::default();
4171
4172 for ch in content.chars() {
4173 match ch {
4174 '#' => frequency.hash_count += 1,
4175 '*' => frequency.asterisk_count += 1,
4176 '_' => frequency.underscore_count += 1,
4177 '-' => frequency.hyphen_count += 1,
4178 '+' => frequency.plus_count += 1,
4179 '>' => frequency.gt_count += 1,
4180 '|' => frequency.pipe_count += 1,
4181 '[' => frequency.bracket_count += 1,
4182 '`' => frequency.backtick_count += 1,
4183 '<' => frequency.lt_count += 1,
4184 '!' => frequency.exclamation_count += 1,
4185 '\n' => frequency.newline_count += 1,
4186 _ => {}
4187 }
4188 }
4189
4190 frequency
4191 }
4192
4193 fn parse_html_tags(
4195 content: &str,
4196 lines: &[LineInfo],
4197 code_blocks: &[(usize, usize)],
4198 flavor: MarkdownFlavor,
4199 ) -> Vec<HtmlTag> {
4200 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4201 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4202
4203 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4204
4205 for cap in HTML_TAG_REGEX.captures_iter(content) {
4206 let full_match = cap.get(0).unwrap();
4207 let match_start = full_match.start();
4208 let match_end = full_match.end();
4209
4210 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4212 continue;
4213 }
4214
4215 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4216 let tag_name_original = cap.get(2).unwrap().as_str();
4217 let tag_name = tag_name_original.to_lowercase();
4218 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4219
4220 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4223 continue;
4224 }
4225
4226 let mut line_num = 1;
4228 let mut col_start = match_start;
4229 let mut col_end = match_end;
4230 for (idx, line_info) in lines.iter().enumerate() {
4231 if match_start >= line_info.byte_offset {
4232 line_num = idx + 1;
4233 col_start = match_start - line_info.byte_offset;
4234 col_end = match_end - line_info.byte_offset;
4235 } else {
4236 break;
4237 }
4238 }
4239
4240 html_tags.push(HtmlTag {
4241 line: line_num,
4242 start_col: col_start,
4243 end_col: col_end,
4244 byte_offset: match_start,
4245 byte_end: match_end,
4246 tag_name,
4247 is_closing,
4248 is_self_closing,
4249 raw_content: full_match.as_str().to_string(),
4250 });
4251 }
4252
4253 html_tags
4254 }
4255
4256 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4258 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4259
4260 for (line_idx, line_info) in lines.iter().enumerate() {
4261 if line_info.in_code_block || line_info.is_blank {
4263 continue;
4264 }
4265
4266 let line = line_info.content(content);
4267 let line_num = line_idx + 1;
4268
4269 if !line.contains('|') {
4271 continue;
4272 }
4273
4274 let parts: Vec<&str> = line.split('|').collect();
4276 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4277
4278 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4280 let mut column_alignments = Vec::new();
4281
4282 if is_separator {
4283 for part in &parts[1..parts.len() - 1] {
4284 let trimmed = part.trim();
4286 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4287 "center".to_string()
4288 } else if trimmed.ends_with(':') {
4289 "right".to_string()
4290 } else if trimmed.starts_with(':') {
4291 "left".to_string()
4292 } else {
4293 "none".to_string()
4294 };
4295 column_alignments.push(alignment);
4296 }
4297 }
4298
4299 table_rows.push(TableRow {
4300 line: line_num,
4301 is_separator,
4302 column_count,
4303 column_alignments,
4304 });
4305 }
4306
4307 table_rows
4308 }
4309
4310 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4312 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4313
4314 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4316 let full_match = cap.get(0).unwrap();
4317 let match_start = full_match.start();
4318 let match_end = full_match.end();
4319
4320 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4322 continue;
4323 }
4324
4325 let preceding_char = if match_start > 0 {
4327 content.chars().nth(match_start - 1)
4328 } else {
4329 None
4330 };
4331 let following_char = content.chars().nth(match_end);
4332
4333 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4334 continue;
4335 }
4336 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4337 continue;
4338 }
4339
4340 let url = full_match.as_str();
4341 let url_type = if url.starts_with("https://") {
4342 "https"
4343 } else if url.starts_with("http://") {
4344 "http"
4345 } else if url.starts_with("ftp://") {
4346 "ftp"
4347 } else {
4348 "other"
4349 };
4350
4351 let mut line_num = 1;
4353 let mut col_start = match_start;
4354 let mut col_end = match_end;
4355 for (idx, line_info) in lines.iter().enumerate() {
4356 if match_start >= line_info.byte_offset {
4357 line_num = idx + 1;
4358 col_start = match_start - line_info.byte_offset;
4359 col_end = match_end - line_info.byte_offset;
4360 } else {
4361 break;
4362 }
4363 }
4364
4365 bare_urls.push(BareUrl {
4366 line: line_num,
4367 start_col: col_start,
4368 end_col: col_end,
4369 byte_offset: match_start,
4370 byte_end: match_end,
4371 url: url.to_string(),
4372 url_type: url_type.to_string(),
4373 });
4374 }
4375
4376 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4378 let full_match = cap.get(0).unwrap();
4379 let match_start = full_match.start();
4380 let match_end = full_match.end();
4381
4382 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4384 continue;
4385 }
4386
4387 let preceding_char = if match_start > 0 {
4389 content.chars().nth(match_start - 1)
4390 } else {
4391 None
4392 };
4393 let following_char = content.chars().nth(match_end);
4394
4395 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4396 continue;
4397 }
4398 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4399 continue;
4400 }
4401
4402 let email = full_match.as_str();
4403
4404 let mut line_num = 1;
4406 let mut col_start = match_start;
4407 let mut col_end = match_end;
4408 for (idx, line_info) in lines.iter().enumerate() {
4409 if match_start >= line_info.byte_offset {
4410 line_num = idx + 1;
4411 col_start = match_start - line_info.byte_offset;
4412 col_end = match_end - line_info.byte_offset;
4413 } else {
4414 break;
4415 }
4416 }
4417
4418 bare_urls.push(BareUrl {
4419 line: line_num,
4420 start_col: col_start,
4421 end_col: col_end,
4422 byte_offset: match_start,
4423 byte_end: match_end,
4424 url: email.to_string(),
4425 url_type: "email".to_string(),
4426 });
4427 }
4428
4429 bare_urls
4430 }
4431
4432 #[must_use]
4452 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4453 ValidHeadingsIter::new(&self.lines)
4454 }
4455
4456 #[must_use]
4460 pub fn has_valid_headings(&self) -> bool {
4461 self.lines
4462 .iter()
4463 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4464 }
4465}
4466
4467fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4469 if list_blocks.len() < 2 {
4470 return;
4471 }
4472
4473 let mut merger = ListBlockMerger::new(content, lines);
4474 *list_blocks = merger.merge(list_blocks);
4475}
4476
4477struct ListBlockMerger<'a> {
4479 content: &'a str,
4480 lines: &'a [LineInfo],
4481}
4482
4483impl<'a> ListBlockMerger<'a> {
4484 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4485 Self { content, lines }
4486 }
4487
4488 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4489 let mut merged = Vec::with_capacity(list_blocks.len());
4490 let mut current = list_blocks[0].clone();
4491
4492 for next in list_blocks.iter().skip(1) {
4493 if self.should_merge_blocks(¤t, next) {
4494 current = self.merge_two_blocks(current, next);
4495 } else {
4496 merged.push(current);
4497 current = next.clone();
4498 }
4499 }
4500
4501 merged.push(current);
4502 merged
4503 }
4504
4505 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4507 if !self.blocks_are_compatible(current, next) {
4509 return false;
4510 }
4511
4512 let spacing = self.analyze_spacing_between(current, next);
4514 match spacing {
4515 BlockSpacing::Consecutive => true,
4516 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4517 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4518 self.can_merge_with_content_between(current, next)
4519 }
4520 }
4521 }
4522
4523 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4525 current.is_ordered == next.is_ordered
4526 && current.blockquote_prefix == next.blockquote_prefix
4527 && current.nesting_level == next.nesting_level
4528 }
4529
4530 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4532 let gap = next.start_line - current.end_line;
4533
4534 match gap {
4535 1 => BlockSpacing::Consecutive,
4536 2 => BlockSpacing::SingleBlank,
4537 _ if gap > 2 => {
4538 if self.has_only_blank_lines_between(current, next) {
4539 BlockSpacing::MultipleBlanks
4540 } else {
4541 BlockSpacing::ContentBetween
4542 }
4543 }
4544 _ => BlockSpacing::Consecutive, }
4546 }
4547
4548 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4550 if has_meaningful_content_between(self.content, current, next, self.lines) {
4553 return false; }
4555
4556 !current.is_ordered && current.marker == next.marker
4558 }
4559
4560 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4562 if has_meaningful_content_between(self.content, current, next, self.lines) {
4564 return false; }
4566
4567 current.is_ordered && next.is_ordered
4569 }
4570
4571 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4573 for line_num in (current.end_line + 1)..next.start_line {
4574 if let Some(line_info) = self.lines.get(line_num - 1)
4575 && !line_info.content(self.content).trim().is_empty()
4576 {
4577 return false;
4578 }
4579 }
4580 true
4581 }
4582
4583 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4585 current.end_line = next.end_line;
4586 current.item_lines.extend_from_slice(&next.item_lines);
4587
4588 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4590
4591 if !current.is_ordered && self.markers_differ(¤t, next) {
4593 current.marker = None; }
4595
4596 current
4597 }
4598
4599 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4601 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4602 }
4603}
4604
4605#[derive(Debug, PartialEq)]
4607enum BlockSpacing {
4608 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4613
4614fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4616 for line_num in (current.end_line + 1)..next.start_line {
4618 if let Some(line_info) = lines.get(line_num - 1) {
4619 let trimmed = line_info.content(content).trim();
4621
4622 if trimmed.is_empty() {
4624 continue;
4625 }
4626
4627 if line_info.heading.is_some() {
4631 return true; }
4633
4634 if is_horizontal_rule(trimmed) {
4636 return true; }
4638
4639 if crate::utils::skip_context::is_table_line(trimmed) {
4641 return true; }
4643
4644 if trimmed.starts_with('>') {
4646 return true; }
4648
4649 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4651 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4652
4653 let min_continuation_indent = if current.is_ordered {
4655 current.nesting_level + current.max_marker_width + 1 } else {
4657 current.nesting_level + 2
4658 };
4659
4660 if line_indent < min_continuation_indent {
4661 return true; }
4664 }
4665
4666 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4668
4669 let min_indent = if current.is_ordered {
4671 current.nesting_level + current.max_marker_width
4672 } else {
4673 current.nesting_level + 2
4674 };
4675
4676 if line_indent < min_indent {
4678 return true; }
4680
4681 }
4684 }
4685
4686 false
4688}
4689
4690pub fn is_horizontal_rule_line(line: &str) -> bool {
4697 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4699 if leading_spaces > 3 || line.starts_with('\t') {
4700 return false;
4701 }
4702
4703 is_horizontal_rule_content(line.trim())
4704}
4705
4706pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4709 if trimmed.len() < 3 {
4710 return false;
4711 }
4712
4713 let chars: Vec<char> = trimmed.chars().collect();
4715 if let Some(&first_char) = chars.first()
4716 && (first_char == '-' || first_char == '*' || first_char == '_')
4717 {
4718 let mut count = 0;
4719 for &ch in &chars {
4720 if ch == first_char {
4721 count += 1;
4722 } else if ch != ' ' && ch != '\t' {
4723 return false; }
4725 }
4726 return count >= 3;
4727 }
4728 false
4729}
4730
4731pub fn is_horizontal_rule(trimmed: &str) -> bool {
4733 is_horizontal_rule_content(trimmed)
4734}
4735
4736#[cfg(test)]
4738mod tests {
4739 use super::*;
4740
4741 #[test]
4742 fn test_empty_content() {
4743 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4744 assert_eq!(ctx.content, "");
4745 assert_eq!(ctx.line_offsets, vec![0]);
4746 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4747 assert_eq!(ctx.lines.len(), 0);
4748 }
4749
4750 #[test]
4751 fn test_single_line() {
4752 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4753 assert_eq!(ctx.content, "# Hello");
4754 assert_eq!(ctx.line_offsets, vec![0]);
4755 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4756 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4757 }
4758
4759 #[test]
4760 fn test_multi_line() {
4761 let content = "# Title\n\nSecond line\nThird line";
4762 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4763 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4764 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4771
4772 #[test]
4773 fn test_line_info() {
4774 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4775 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4776
4777 assert_eq!(ctx.lines.len(), 7);
4779
4780 let line1 = &ctx.lines[0];
4782 assert_eq!(line1.content(ctx.content), "# Title");
4783 assert_eq!(line1.byte_offset, 0);
4784 assert_eq!(line1.indent, 0);
4785 assert!(!line1.is_blank);
4786 assert!(!line1.in_code_block);
4787 assert!(line1.list_item.is_none());
4788
4789 let line2 = &ctx.lines[1];
4791 assert_eq!(line2.content(ctx.content), " indented");
4792 assert_eq!(line2.byte_offset, 8);
4793 assert_eq!(line2.indent, 4);
4794 assert!(!line2.is_blank);
4795
4796 let line3 = &ctx.lines[2];
4798 assert_eq!(line3.content(ctx.content), "");
4799 assert!(line3.is_blank);
4800
4801 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4803 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4804 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4805 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4806 }
4807
4808 #[test]
4809 fn test_list_item_detection() {
4810 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4811 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4812
4813 let line1 = &ctx.lines[0];
4815 assert!(line1.list_item.is_some());
4816 let list1 = line1.list_item.as_ref().unwrap();
4817 assert_eq!(list1.marker, "-");
4818 assert!(!list1.is_ordered);
4819 assert_eq!(list1.marker_column, 0);
4820 assert_eq!(list1.content_column, 2);
4821
4822 let line2 = &ctx.lines[1];
4824 assert!(line2.list_item.is_some());
4825 let list2 = line2.list_item.as_ref().unwrap();
4826 assert_eq!(list2.marker, "*");
4827 assert_eq!(list2.marker_column, 2);
4828
4829 let line3 = &ctx.lines[2];
4831 assert!(line3.list_item.is_some());
4832 let list3 = line3.list_item.as_ref().unwrap();
4833 assert_eq!(list3.marker, "1.");
4834 assert!(list3.is_ordered);
4835 assert_eq!(list3.number, Some(1));
4836
4837 let line6 = &ctx.lines[5];
4839 assert!(line6.list_item.is_none());
4840 }
4841
4842 #[test]
4843 fn test_offset_to_line_col_edge_cases() {
4844 let content = "a\nb\nc";
4845 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4846 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4854
4855 #[test]
4856 fn test_mdx_esm_blocks() {
4857 let content = r##"import {Chart} from './snowfall.js'
4858export const year = 2023
4859
4860# Last year's snowfall
4861
4862In {year}, the snowfall was above average.
4863It was followed by a warm spring which caused
4864flood conditions in many of the nearby rivers.
4865
4866<Chart color="#fcb32c" year={year} />
4867"##;
4868
4869 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4870
4871 assert_eq!(ctx.lines.len(), 10);
4873 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4874 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4875 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4876 assert!(
4877 !ctx.lines[3].in_esm_block,
4878 "Line 4 (heading) should NOT be in_esm_block"
4879 );
4880 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4881 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4882 }
4883
4884 #[test]
4885 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4886 let content = r#"import {Chart} from './snowfall.js'
4887export const year = 2023
4888
4889# Last year's snowfall
4890"#;
4891
4892 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4893
4894 assert!(
4896 !ctx.lines[0].in_esm_block,
4897 "Line 1 should NOT be in_esm_block in Standard flavor"
4898 );
4899 assert!(
4900 !ctx.lines[1].in_esm_block,
4901 "Line 2 should NOT be in_esm_block in Standard flavor"
4902 );
4903 }
4904
4905 #[test]
4906 fn test_blockquote_with_indented_content() {
4907 let content = r#"# Heading
4911
4912> -S socket-path
4913> More text
4914"#;
4915 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4916
4917 assert!(
4919 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4920 "Line 3 should be a blockquote"
4921 );
4922 assert!(
4924 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4925 "Line 4 should be a blockquote"
4926 );
4927
4928 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4931 assert_eq!(bq3.content, "-S socket-path");
4932 assert_eq!(bq3.nesting_level, 1);
4933 assert!(bq3.has_multiple_spaces_after_marker);
4935
4936 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4937 assert_eq!(bq4.content, "More text");
4938 assert_eq!(bq4.nesting_level, 1);
4939 }
4940
4941 #[test]
4942 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4943 let content = r#"# Title
4945
4946A footnote[^1].
4947
4948[^1]: This is the footnote content.
4949
4950[^note]: Another footnote with [link](https://example.com).
4951
4952[regular]: ./path.md "A real reference definition"
4953"#;
4954 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4955
4956 assert_eq!(
4958 ctx.reference_defs.len(),
4959 1,
4960 "Footnotes should not be parsed as reference definitions"
4961 );
4962
4963 assert_eq!(ctx.reference_defs[0].id, "regular");
4965 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4966 assert_eq!(
4967 ctx.reference_defs[0].title,
4968 Some("A real reference definition".to_string())
4969 );
4970 }
4971
4972 #[test]
4973 fn test_footnote_with_inline_link_not_misidentified() {
4974 let content = r#"# Title
4977
4978A footnote[^1].
4979
4980[^1]: [link](https://www.google.com).
4981"#;
4982 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4983
4984 assert!(
4986 ctx.reference_defs.is_empty(),
4987 "Footnote with inline link should not create a reference definition"
4988 );
4989 }
4990
4991 #[test]
4992 fn test_various_footnote_formats_excluded() {
4993 let content = r#"[^1]: Numeric footnote
4995[^note]: Named footnote
4996[^a]: Single char footnote
4997[^long-footnote-name]: Long named footnote
4998[^123abc]: Mixed alphanumeric
4999
5000[ref1]: ./file1.md
5001[ref2]: ./file2.md
5002"#;
5003 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5004
5005 assert_eq!(
5007 ctx.reference_defs.len(),
5008 2,
5009 "Only regular reference definitions should be parsed"
5010 );
5011
5012 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5013 assert!(ids.contains(&"ref1"));
5014 assert!(ids.contains(&"ref2"));
5015 assert!(!ids.iter().any(|id| id.starts_with('^')));
5016 }
5017
5018 #[test]
5023 fn test_has_char_tracked_characters() {
5024 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5026 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5027
5028 assert!(ctx.has_char('#'), "Should detect hash");
5030 assert!(ctx.has_char('*'), "Should detect asterisk");
5031 assert!(ctx.has_char('_'), "Should detect underscore");
5032 assert!(ctx.has_char('-'), "Should detect hyphen");
5033 assert!(ctx.has_char('+'), "Should detect plus");
5034 assert!(ctx.has_char('>'), "Should detect gt");
5035 assert!(ctx.has_char('|'), "Should detect pipe");
5036 assert!(ctx.has_char('['), "Should detect bracket");
5037 assert!(ctx.has_char('`'), "Should detect backtick");
5038 assert!(ctx.has_char('<'), "Should detect lt");
5039 assert!(ctx.has_char('!'), "Should detect exclamation");
5040 assert!(ctx.has_char('\n'), "Should detect newline");
5041 }
5042
5043 #[test]
5044 fn test_has_char_absent_characters() {
5045 let content = "Simple text without special chars";
5046 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5047
5048 assert!(!ctx.has_char('#'), "Should not detect hash");
5050 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5051 assert!(!ctx.has_char('_'), "Should not detect underscore");
5052 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5053 assert!(!ctx.has_char('+'), "Should not detect plus");
5054 assert!(!ctx.has_char('>'), "Should not detect gt");
5055 assert!(!ctx.has_char('|'), "Should not detect pipe");
5056 assert!(!ctx.has_char('['), "Should not detect bracket");
5057 assert!(!ctx.has_char('`'), "Should not detect backtick");
5058 assert!(!ctx.has_char('<'), "Should not detect lt");
5059 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5060 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5062 }
5063
5064 #[test]
5065 fn test_has_char_fallback_for_untracked() {
5066 let content = "Text with @mention and $dollar and %percent";
5067 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5068
5069 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5071 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5072 assert!(ctx.has_char('%'), "Should detect % via fallback");
5073 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5074 }
5075
5076 #[test]
5077 fn test_char_count_tracked_characters() {
5078 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5079 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5080
5081 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5083 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5084 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5085 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5086 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5087 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5088 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5089 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5090 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5091 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5092 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5093 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5094 }
5095
5096 #[test]
5097 fn test_char_count_zero_for_absent() {
5098 let content = "Plain text";
5099 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5100
5101 assert_eq!(ctx.char_count('#'), 0);
5102 assert_eq!(ctx.char_count('*'), 0);
5103 assert_eq!(ctx.char_count('_'), 0);
5104 assert_eq!(ctx.char_count('\n'), 0);
5105 }
5106
5107 #[test]
5108 fn test_char_count_fallback_for_untracked() {
5109 let content = "@@@ $$ %%%";
5110 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5111
5112 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5113 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5114 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5115 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5116 }
5117
5118 #[test]
5119 fn test_char_count_empty_content() {
5120 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5121
5122 assert_eq!(ctx.char_count('#'), 0);
5123 assert_eq!(ctx.char_count('*'), 0);
5124 assert_eq!(ctx.char_count('@'), 0);
5125 assert!(!ctx.has_char('#'));
5126 assert!(!ctx.has_char('@'));
5127 }
5128
5129 #[test]
5134 fn test_is_in_html_tag_simple() {
5135 let content = "<div>content</div>";
5136 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5137
5138 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5140 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5141 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5142
5143 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5145 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5146
5147 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5149 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5150 }
5151
5152 #[test]
5153 fn test_is_in_html_tag_self_closing() {
5154 let content = "Text <br/> more text";
5155 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5156
5157 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5159 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5160
5161 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5163 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5164 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5165
5166 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5168 }
5169
5170 #[test]
5171 fn test_is_in_html_tag_with_attributes() {
5172 let content = r#"<a href="url" class="link">text</a>"#;
5173 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5174
5175 assert!(ctx.is_in_html_tag(0), "Start of tag");
5177 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5178 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5179 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5180
5181 assert!(!ctx.is_in_html_tag(27), "Start of content");
5183 assert!(!ctx.is_in_html_tag(30), "End of content");
5184
5185 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5187 }
5188
5189 #[test]
5190 fn test_is_in_html_tag_multiline() {
5191 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5192 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5193
5194 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5196 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5197 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5198
5199 let closing_bracket_pos = content.find(">\n").unwrap();
5201 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5202 }
5203
5204 #[test]
5205 fn test_is_in_html_tag_no_tags() {
5206 let content = "Plain text without any HTML";
5207 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5208
5209 for i in 0..content.len() {
5211 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5212 }
5213 }
5214
5215 #[test]
5220 fn test_is_in_jinja_range_expression() {
5221 let content = "Hello {{ name }}!";
5222 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5223
5224 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5226 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5227
5228 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5230 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5231 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5232 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5233 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5234
5235 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5237 }
5238
5239 #[test]
5240 fn test_is_in_jinja_range_statement() {
5241 let content = "{% if condition %}content{% endif %}";
5242 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5243
5244 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5246 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5247 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5248
5249 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5251
5252 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5254 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5255 }
5256
5257 #[test]
5258 fn test_is_in_jinja_range_multiple() {
5259 let content = "{{ a }} and {{ b }}";
5260 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5261
5262 assert!(ctx.is_in_jinja_range(0));
5264 assert!(ctx.is_in_jinja_range(3));
5265 assert!(ctx.is_in_jinja_range(6));
5266
5267 assert!(!ctx.is_in_jinja_range(8));
5269 assert!(!ctx.is_in_jinja_range(11));
5270
5271 assert!(ctx.is_in_jinja_range(12));
5273 assert!(ctx.is_in_jinja_range(15));
5274 assert!(ctx.is_in_jinja_range(18));
5275 }
5276
5277 #[test]
5278 fn test_is_in_jinja_range_no_jinja() {
5279 let content = "Plain text with single braces but not Jinja";
5280 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5281
5282 for i in 0..content.len() {
5284 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5285 }
5286 }
5287
5288 #[test]
5293 fn test_is_in_link_title_with_title() {
5294 let content = r#"[ref]: https://example.com "Title text"
5295
5296Some content."#;
5297 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5298
5299 assert_eq!(ctx.reference_defs.len(), 1);
5301 let def = &ctx.reference_defs[0];
5302 assert!(def.title_byte_start.is_some());
5303 assert!(def.title_byte_end.is_some());
5304
5305 let title_start = def.title_byte_start.unwrap();
5306 let title_end = def.title_byte_end.unwrap();
5307
5308 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5310
5311 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5313 assert!(
5314 ctx.is_in_link_title(title_start + 5),
5315 "Middle of title should be in title"
5316 );
5317 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5318
5319 assert!(
5321 !ctx.is_in_link_title(title_end),
5322 "After title end should not be in title"
5323 );
5324 }
5325
5326 #[test]
5327 fn test_is_in_link_title_without_title() {
5328 let content = "[ref]: https://example.com\n\nSome content.";
5329 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5330
5331 assert_eq!(ctx.reference_defs.len(), 1);
5333 let def = &ctx.reference_defs[0];
5334 assert!(def.title_byte_start.is_none());
5335 assert!(def.title_byte_end.is_none());
5336
5337 for i in 0..content.len() {
5339 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5340 }
5341 }
5342
5343 #[test]
5344 fn test_is_in_link_title_multiple_refs() {
5345 let content = r#"[ref1]: /url1 "Title One"
5346[ref2]: /url2
5347[ref3]: /url3 "Title Three"
5348"#;
5349 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5350
5351 assert_eq!(ctx.reference_defs.len(), 3);
5353
5354 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5356 assert!(ref1.title_byte_start.is_some());
5357
5358 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5360 assert!(ref2.title_byte_start.is_none());
5361
5362 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5364 assert!(ref3.title_byte_start.is_some());
5365
5366 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5368 assert!(ctx.is_in_link_title(start + 1));
5369 assert!(!ctx.is_in_link_title(end + 5));
5370 }
5371
5372 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5374 assert!(ctx.is_in_link_title(start + 1));
5375 }
5376 }
5377
5378 #[test]
5379 fn test_is_in_link_title_single_quotes() {
5380 let content = "[ref]: /url 'Single quoted title'\n";
5381 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5382
5383 assert_eq!(ctx.reference_defs.len(), 1);
5384 let def = &ctx.reference_defs[0];
5385
5386 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5387 assert!(ctx.is_in_link_title(start));
5388 assert!(ctx.is_in_link_title(start + 5));
5389 assert!(!ctx.is_in_link_title(end));
5390 }
5391 }
5392
5393 #[test]
5394 fn test_is_in_link_title_parentheses() {
5395 let content = "[ref]: /url (Parenthesized title)\n";
5398 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5399
5400 if ctx.reference_defs.is_empty() {
5403 for i in 0..content.len() {
5405 assert!(!ctx.is_in_link_title(i));
5406 }
5407 } else {
5408 let def = &ctx.reference_defs[0];
5409 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5410 assert!(ctx.is_in_link_title(start));
5411 assert!(ctx.is_in_link_title(start + 5));
5412 assert!(!ctx.is_in_link_title(end));
5413 } else {
5414 for i in 0..content.len() {
5416 assert!(!ctx.is_in_link_title(i));
5417 }
5418 }
5419 }
5420 }
5421
5422 #[test]
5423 fn test_is_in_link_title_no_refs() {
5424 let content = "Just plain text without any reference definitions.";
5425 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5426
5427 assert!(ctx.reference_defs.is_empty());
5428
5429 for i in 0..content.len() {
5430 assert!(!ctx.is_in_link_title(i));
5431 }
5432 }
5433
5434 #[test]
5439 fn test_math_spans_inline() {
5440 let content = "Text with inline math $[f](x)$ in it.";
5441 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5442
5443 let math_spans = ctx.math_spans();
5444 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5445
5446 let span = &math_spans[0];
5447 assert!(!span.is_display, "Should be inline math, not display");
5448 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5449 }
5450
5451 #[test]
5452 fn test_math_spans_display_single_line() {
5453 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5454 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5455
5456 let math_spans = ctx.math_spans();
5457 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5458
5459 let span = &math_spans[0];
5460 assert!(span.is_display, "Should be display math");
5461 assert!(
5462 span.content.contains("[x](\\zeta)"),
5463 "Content should contain the link-like pattern"
5464 );
5465 }
5466
5467 #[test]
5468 fn test_math_spans_display_multiline() {
5469 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5470 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5471
5472 let math_spans = ctx.math_spans();
5473 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5474
5475 let span = &math_spans[0];
5476 assert!(span.is_display, "Should be display math");
5477 }
5478
5479 #[test]
5480 fn test_is_in_math_span() {
5481 let content = "Text $[f](x)$ more text";
5482 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5483
5484 let math_start = content.find('$').unwrap();
5486 let math_end = content.rfind('$').unwrap() + 1;
5487
5488 assert!(
5489 ctx.is_in_math_span(math_start + 1),
5490 "Position inside math span should return true"
5491 );
5492 assert!(
5493 ctx.is_in_math_span(math_start + 3),
5494 "Position inside math span should return true"
5495 );
5496
5497 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5499 assert!(
5500 !ctx.is_in_math_span(math_end + 1),
5501 "Position after math span should return false"
5502 );
5503 }
5504
5505 #[test]
5506 fn test_math_spans_mixed_with_code() {
5507 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5508 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5509
5510 let math_spans = ctx.math_spans();
5511 let code_spans = ctx.code_spans();
5512
5513 assert_eq!(math_spans.len(), 1, "Should have one math span");
5514 assert_eq!(code_spans.len(), 1, "Should have one code span");
5515
5516 assert_eq!(math_spans[0].content, "[f](x)");
5518 assert_eq!(code_spans[0].content, "[g](y)");
5520 }
5521
5522 #[test]
5523 fn test_math_spans_no_math() {
5524 let content = "Regular text without any math at all.";
5525 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5526
5527 let math_spans = ctx.math_spans();
5528 assert!(math_spans.is_empty(), "Should have no math spans");
5529 }
5530
5531 #[test]
5532 fn test_math_spans_multiple() {
5533 let content = "First $a$ and second $b$ and display $$c$$";
5534 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5535
5536 let math_spans = ctx.math_spans();
5537 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5538
5539 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5541 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5542
5543 assert_eq!(inline_count, 2, "Should have two inline math spans");
5544 assert_eq!(display_count, 1, "Should have one display math span");
5545 }
5546
5547 #[test]
5548 fn test_is_in_math_span_boundary_positions() {
5549 let content = "$[f](x)$";
5552 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5553
5554 let math_spans = ctx.math_spans();
5555 assert_eq!(math_spans.len(), 1, "Should have one math span");
5556
5557 let span = &math_spans[0];
5558
5559 assert!(
5561 ctx.is_in_math_span(span.byte_offset),
5562 "Start position should be in span"
5563 );
5564
5565 assert!(
5567 ctx.is_in_math_span(span.byte_offset + 1),
5568 "Position after start should be in span"
5569 );
5570
5571 assert!(
5573 ctx.is_in_math_span(span.byte_end - 1),
5574 "Position at end-1 should be in span"
5575 );
5576
5577 assert!(
5579 !ctx.is_in_math_span(span.byte_end),
5580 "Position at byte_end should NOT be in span (exclusive)"
5581 );
5582 }
5583
5584 #[test]
5585 fn test_math_spans_at_document_start() {
5586 let content = "$x$ text";
5587 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5588
5589 let math_spans = ctx.math_spans();
5590 assert_eq!(math_spans.len(), 1);
5591 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5592 }
5593
5594 #[test]
5595 fn test_math_spans_at_document_end() {
5596 let content = "text $x$";
5597 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5598
5599 let math_spans = ctx.math_spans();
5600 assert_eq!(math_spans.len(), 1);
5601 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5602 }
5603
5604 #[test]
5605 fn test_math_spans_consecutive() {
5606 let content = "$a$$b$";
5607 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5608
5609 let math_spans = ctx.math_spans();
5610 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5612
5613 for i in 0..content.len() {
5615 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5616 }
5617 }
5618
5619 #[test]
5620 fn test_math_spans_currency_not_math() {
5621 let content = "Price is $100";
5623 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5624
5625 let math_spans = ctx.math_spans();
5626 assert!(
5629 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5630 "Unbalanced $ should not create math span containing 100"
5631 );
5632 }
5633
5634 #[test]
5639 fn test_reference_lookup_o1_basic() {
5640 let content = r#"[ref1]: /url1
5641[REF2]: /url2 "Title"
5642[Ref3]: /url3
5643
5644Use [link][ref1] and [link][REF2]."#;
5645 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5646
5647 assert_eq!(ctx.reference_defs.len(), 3);
5649
5650 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5652 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5655 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5656 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5657 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5658 }
5659
5660 #[test]
5661 fn test_reference_lookup_o1_get_reference_def() {
5662 let content = r#"[myref]: https://example.com "My Title"
5663"#;
5664 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5665
5666 let def = ctx.get_reference_def("myref").expect("Should find myref");
5668 assert_eq!(def.url, "https://example.com");
5669 assert_eq!(def.title.as_deref(), Some("My Title"));
5670
5671 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5673 assert_eq!(def2.url, "https://example.com");
5674
5675 assert!(ctx.get_reference_def("nonexistent").is_none());
5677 }
5678
5679 #[test]
5680 fn test_reference_lookup_o1_has_reference_def() {
5681 let content = r#"[foo]: /foo
5682[BAR]: /bar
5683"#;
5684 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5685
5686 assert!(ctx.has_reference_def("foo"));
5688 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5690 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5693
5694 #[test]
5695 fn test_reference_lookup_o1_empty_content() {
5696 let content = "No references here.";
5697 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5698
5699 assert!(ctx.reference_defs.is_empty());
5700 assert_eq!(ctx.get_reference_url("anything"), None);
5701 assert!(ctx.get_reference_def("anything").is_none());
5702 assert!(!ctx.has_reference_def("anything"));
5703 }
5704
5705 #[test]
5706 fn test_reference_lookup_o1_special_characters_in_id() {
5707 let content = r#"[ref-with-dash]: /url1
5708[ref_with_underscore]: /url2
5709[ref.with.dots]: /url3
5710"#;
5711 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5712
5713 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5714 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5715 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5716 }
5717
5718 #[test]
5719 fn test_reference_lookup_o1_unicode_id() {
5720 let content = r#"[日本語]: /japanese
5721[émoji]: /emoji
5722"#;
5723 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5724
5725 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5726 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5727 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5729}