1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17 ($name:expr, $profile:expr, $code:expr) => {{
18 let start = std::time::Instant::now();
19 let result = $code;
20 if $profile {
21 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22 }
23 result
24 }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(
36 r#"(?sx)
37 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38 (?:
39 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
40 |
41 \[([^\]]*)\] # Reference ID in group 6
42 )"#
43 ).unwrap()
44});
45
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?sx)
51 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52 (?:
53 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
54 |
55 \[([^\]]*)\] # Reference ID in group 6
56 )"#
57 ).unwrap()
58});
59
60static REF_DEF_PATTERN: LazyLock<Regex> =
62 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub visual_indent: usize,
86 pub is_blank: bool,
88 pub in_code_block: bool,
90 pub in_front_matter: bool,
92 pub in_html_block: bool,
94 pub in_html_comment: bool,
96 pub list_item: Option<ListItemInfo>,
98 pub heading: Option<HeadingInfo>,
100 pub blockquote: Option<BlockquoteInfo>,
102 pub in_mkdocstrings: bool,
104 pub in_esm_block: bool,
106 pub in_code_span_continuation: bool,
108 pub is_horizontal_rule: bool,
111 pub in_math_block: bool,
113 pub in_quarto_div: bool,
115 pub in_jsx_expression: bool,
117 pub in_mdx_comment: bool,
119 pub in_jsx_component: bool,
121 pub in_jsx_fragment: bool,
123 pub in_admonition: bool,
125 pub in_content_tab: bool,
127 pub in_definition_list: bool,
129}
130
131impl LineInfo {
132 pub fn content<'a>(&self, source: &'a str) -> &'a str {
134 &source[self.byte_offset..self.byte_offset + self.byte_len]
135 }
136}
137
138#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141 pub marker: String,
143 pub is_ordered: bool,
145 pub number: Option<usize>,
147 pub marker_column: usize,
149 pub content_column: usize,
151}
152
153#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156 ATX,
158 Setext1,
160 Setext2,
162}
163
164#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167 pub line: usize,
169 pub start_col: usize,
171 pub end_col: usize,
173 pub byte_offset: usize,
175 pub byte_end: usize,
177 pub text: Cow<'a, str>,
179 pub url: Cow<'a, str>,
181 pub is_reference: bool,
183 pub reference_id: Option<Cow<'a, str>>,
185 pub link_type: LinkType,
187}
188
189#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192 pub reference: String,
194 pub span: std::ops::Range<usize>,
196}
197
198#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201 pub id: String,
203 pub line: usize,
205 pub byte_offset: usize,
207 pub byte_end: usize,
209}
210
211#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214 pub line: usize,
216 pub start_col: usize,
218 pub end_col: usize,
220 pub byte_offset: usize,
222 pub byte_end: usize,
224 pub alt_text: Cow<'a, str>,
226 pub url: Cow<'a, str>,
228 pub is_reference: bool,
230 pub reference_id: Option<Cow<'a, str>>,
232 pub link_type: LinkType,
234}
235
236#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239 pub line: usize,
241 pub id: String,
243 pub url: String,
245 pub title: Option<String>,
247 pub byte_offset: usize,
249 pub byte_end: usize,
251 pub title_byte_start: Option<usize>,
253 pub title_byte_end: Option<usize>,
255}
256
257#[derive(Debug, Clone)]
259pub struct CodeSpan {
260 pub line: usize,
262 pub end_line: usize,
264 pub start_col: usize,
266 pub end_col: usize,
268 pub byte_offset: usize,
270 pub byte_end: usize,
272 pub backtick_count: usize,
274 pub content: String,
276}
277
278#[derive(Debug, Clone)]
280pub struct MathSpan {
281 pub line: usize,
283 pub end_line: usize,
285 pub start_col: usize,
287 pub end_col: usize,
289 pub byte_offset: usize,
291 pub byte_end: usize,
293 pub is_display: bool,
295 pub content: String,
297}
298
299#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302 pub level: u8,
304 pub style: HeadingStyle,
306 pub marker: String,
308 pub marker_column: usize,
310 pub content_column: usize,
312 pub text: String,
314 pub custom_id: Option<String>,
316 pub raw_text: String,
318 pub has_closing_sequence: bool,
320 pub closing_sequence: String,
322 pub is_valid: bool,
325}
326
327#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333 pub line_num: usize,
335 pub heading: &'a HeadingInfo,
337 pub line_info: &'a LineInfo,
339}
340
341pub struct ValidHeadingsIter<'a> {
346 lines: &'a [LineInfo],
347 current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351 fn new(lines: &'a [LineInfo]) -> Self {
352 Self {
353 lines,
354 current_index: 0,
355 }
356 }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360 type Item = ValidHeading<'a>;
361
362 fn next(&mut self) -> Option<Self::Item> {
363 while self.current_index < self.lines.len() {
364 let idx = self.current_index;
365 self.current_index += 1;
366
367 let line_info = &self.lines[idx];
368 if let Some(heading) = &line_info.heading
369 && heading.is_valid
370 {
371 return Some(ValidHeading {
372 line_num: idx + 1, heading,
374 line_info,
375 });
376 }
377 }
378 None
379 }
380}
381
382#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385 pub nesting_level: usize,
387 pub indent: String,
389 pub marker_column: usize,
391 pub prefix: String,
393 pub content: String,
395 pub has_no_space_after_marker: bool,
397 pub has_multiple_spaces_after_marker: bool,
399 pub needs_md028_fix: bool,
401}
402
403#[derive(Debug, Clone)]
405pub struct ListBlock {
406 pub start_line: usize,
408 pub end_line: usize,
410 pub is_ordered: bool,
412 pub marker: Option<String>,
414 pub blockquote_prefix: String,
416 pub item_lines: Vec<usize>,
418 pub nesting_level: usize,
420 pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429type ByteRanges = Vec<(usize, usize)>;
431
432#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435 pub hash_count: usize,
437 pub asterisk_count: usize,
439 pub underscore_count: usize,
441 pub hyphen_count: usize,
443 pub plus_count: usize,
445 pub gt_count: usize,
447 pub pipe_count: usize,
449 pub bracket_count: usize,
451 pub backtick_count: usize,
453 pub lt_count: usize,
455 pub exclamation_count: usize,
457 pub newline_count: usize,
459}
460
461#[derive(Debug, Clone)]
463pub struct HtmlTag {
464 pub line: usize,
466 pub start_col: usize,
468 pub end_col: usize,
470 pub byte_offset: usize,
472 pub byte_end: usize,
474 pub tag_name: String,
476 pub is_closing: bool,
478 pub is_self_closing: bool,
480 pub raw_content: String,
482}
483
484#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487 pub line: usize,
489 pub start_col: usize,
491 pub end_col: usize,
493 pub byte_offset: usize,
495 pub byte_end: usize,
497 pub marker: char,
499 pub marker_count: usize,
501 pub content: String,
503}
504
505#[derive(Debug, Clone)]
507pub struct TableRow {
508 pub line: usize,
510 pub is_separator: bool,
512 pub column_count: usize,
514 pub column_alignments: Vec<String>, }
517
518#[derive(Debug, Clone)]
520pub struct BareUrl {
521 pub line: usize,
523 pub start_col: usize,
525 pub end_col: usize,
527 pub byte_offset: usize,
529 pub byte_end: usize,
531 pub url: String,
533 pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538 pub content: &'a str,
539 pub line_offsets: Vec<usize>,
540 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, }
569
570struct BlockquoteComponents<'a> {
572 indent: &'a str,
573 markers: &'a str,
574 spaces_after: &'a str,
575 content: &'a str,
576}
577
578#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581 let bytes = line.as_bytes();
582 let mut pos = 0;
583
584 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586 pos += 1;
587 }
588 let indent_end = pos;
589
590 if pos >= bytes.len() || bytes[pos] != b'>' {
592 return None;
593 }
594
595 while pos < bytes.len() && bytes[pos] == b'>' {
597 pos += 1;
598 }
599 let markers_end = pos;
600
601 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603 pos += 1;
604 }
605 let spaces_end = pos;
606
607 Some(BlockquoteComponents {
608 indent: &line[0..indent_end],
609 markers: &line[indent_end..markers_end],
610 spaces_after: &line[markers_end..spaces_end],
611 content: &line[spaces_end..],
612 })
613}
614
615impl<'a> LintContext<'a> {
616 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617 #[cfg(not(target_arch = "wasm32"))]
618 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619 #[cfg(target_arch = "wasm32")]
620 let profile = false;
621
622 let line_offsets = profile_section!("Line offsets", profile, {
623 let mut offsets = vec![0];
624 for (i, c) in content.char_indices() {
625 if c == '\n' {
626 offsets.push(i + 1);
627 }
628 }
629 offsets
630 });
631
632 let (code_blocks, code_span_ranges) = profile_section!(
634 "Code blocks",
635 profile,
636 CodeBlockUtils::detect_code_blocks_and_spans(content)
637 );
638
639 let html_comment_ranges = profile_section!(
641 "HTML comment ranges",
642 profile,
643 crate::utils::skip_context::compute_html_comment_ranges(content)
644 );
645
646 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
648 if flavor == MarkdownFlavor::MkDocs {
649 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
650 } else {
651 Vec::new()
652 }
653 });
654
655 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
657 if flavor == MarkdownFlavor::Quarto {
658 crate::utils::quarto_divs::detect_div_block_ranges(content)
659 } else {
660 Vec::new()
661 }
662 });
663
664 let (mut lines, emphasis_spans) = profile_section!(
667 "Basic line info",
668 profile,
669 Self::compute_basic_line_info(
670 content,
671 &line_offsets,
672 &code_blocks,
673 flavor,
674 &html_comment_ranges,
675 &autodoc_ranges,
676 &quarto_div_ranges,
677 )
678 );
679
680 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
682
683 profile_section!(
685 "ESM blocks",
686 profile,
687 Self::detect_esm_blocks(content, &mut lines, flavor)
688 );
689
690 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
692 "JSX/MDX detection",
693 profile,
694 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
695 );
696
697 profile_section!(
699 "MkDocs constructs",
700 profile,
701 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
702 );
703
704 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
706
707 profile_section!(
709 "Headings & blockquotes",
710 profile,
711 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
712 );
713
714 let code_spans = profile_section!(
716 "Code spans",
717 profile,
718 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
719 );
720
721 for span in &code_spans {
724 if span.end_line > span.line {
725 for line_num in (span.line + 1)..=span.end_line {
727 if let Some(line_info) = lines.get_mut(line_num - 1) {
728 line_info.in_code_span_continuation = true;
729 }
730 }
731 }
732 }
733
734 let (links, broken_links, footnote_refs) = profile_section!(
736 "Links",
737 profile,
738 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
739 );
740
741 let images = profile_section!(
742 "Images",
743 profile,
744 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
745 );
746
747 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
748
749 let reference_defs_map: HashMap<String, usize> = reference_defs
751 .iter()
752 .enumerate()
753 .map(|(idx, def)| (def.id.to_lowercase(), idx))
754 .collect();
755
756 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
757
758 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
760
761 let table_blocks = profile_section!(
763 "Table blocks",
764 profile,
765 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
766 content,
767 &code_blocks,
768 &code_spans,
769 &html_comment_ranges,
770 )
771 );
772
773 let line_index = profile_section!(
775 "Line index",
776 profile,
777 crate::utils::range_utils::LineIndex::new(content)
778 );
779
780 let jinja_ranges = profile_section!(
782 "Jinja ranges",
783 profile,
784 crate::utils::jinja_utils::find_jinja_ranges(content)
785 );
786
787 let citation_ranges = profile_section!("Citation ranges", profile, {
789 if flavor == MarkdownFlavor::Quarto {
790 crate::utils::quarto_divs::find_citation_ranges(content)
791 } else {
792 Vec::new()
793 }
794 });
795
796 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
798 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
799 let mut ranges = Vec::new();
800 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
801 ranges.push((mat.start(), mat.end()));
802 }
803 ranges
804 });
805
806 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
807
808 Self {
809 content,
810 line_offsets,
811 code_blocks,
812 lines,
813 links,
814 images,
815 broken_links,
816 footnote_refs,
817 reference_defs,
818 reference_defs_map,
819 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
820 math_spans_cache: OnceLock::new(), list_blocks,
822 char_frequency,
823 html_tags_cache: OnceLock::new(),
824 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
825 table_rows_cache: OnceLock::new(),
826 bare_urls_cache: OnceLock::new(),
827 has_mixed_list_nesting_cache: OnceLock::new(),
828 html_comment_ranges,
829 table_blocks,
830 line_index,
831 jinja_ranges,
832 flavor,
833 source_file,
834 jsx_expression_ranges,
835 mdx_comment_ranges,
836 citation_ranges,
837 shortcode_ranges,
838 inline_config,
839 }
840 }
841
842 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
847 self.inline_config.is_rule_disabled(rule_name, line_number)
848 }
849
850 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
852 Arc::clone(
853 self.code_spans_cache
854 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
855 )
856 }
857
858 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
860 Arc::clone(
861 self.math_spans_cache
862 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
863 )
864 }
865
866 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
868 let math_spans = self.math_spans();
869 math_spans
870 .iter()
871 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
872 }
873
874 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
876 &self.html_comment_ranges
877 }
878
879 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
881 Arc::clone(self.html_tags_cache.get_or_init(|| {
882 Arc::new(Self::parse_html_tags(
883 self.content,
884 &self.lines,
885 &self.code_blocks,
886 self.flavor,
887 ))
888 }))
889 }
890
891 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
893 Arc::clone(
894 self.emphasis_spans_cache
895 .get()
896 .expect("emphasis_spans_cache initialized during construction"),
897 )
898 }
899
900 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
902 Arc::clone(
903 self.table_rows_cache
904 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
905 )
906 }
907
908 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
910 Arc::clone(
911 self.bare_urls_cache
912 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
913 )
914 }
915
916 pub fn has_mixed_list_nesting(&self) -> bool {
920 *self
921 .has_mixed_list_nesting_cache
922 .get_or_init(|| self.compute_mixed_list_nesting())
923 }
924
925 fn compute_mixed_list_nesting(&self) -> bool {
927 let mut stack: Vec<(usize, bool)> = Vec::new();
932 let mut last_was_blank = false;
933
934 for line_info in &self.lines {
935 if line_info.in_code_block
937 || line_info.in_front_matter
938 || line_info.in_mkdocstrings
939 || line_info.in_html_comment
940 || line_info.in_esm_block
941 {
942 continue;
943 }
944
945 if line_info.is_blank {
947 last_was_blank = true;
948 continue;
949 }
950
951 if let Some(list_item) = &line_info.list_item {
952 let current_pos = if list_item.marker_column == 1 {
954 0
955 } else {
956 list_item.marker_column
957 };
958
959 if last_was_blank && current_pos == 0 {
961 stack.clear();
962 }
963 last_was_blank = false;
964
965 while let Some(&(pos, _)) = stack.last() {
967 if pos >= current_pos {
968 stack.pop();
969 } else {
970 break;
971 }
972 }
973
974 if let Some(&(_, parent_is_ordered)) = stack.last()
976 && parent_is_ordered != list_item.is_ordered
977 {
978 return true; }
980
981 stack.push((current_pos, list_item.is_ordered));
982 } else {
983 last_was_blank = false;
985 }
986 }
987
988 false
989 }
990
991 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
993 match self.line_offsets.binary_search(&offset) {
994 Ok(line) => (line + 1, 1),
995 Err(line) => {
996 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
997 (line, offset - line_start + 1)
998 }
999 }
1000 }
1001
1002 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1004 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1006 return true;
1007 }
1008
1009 self.code_spans()
1011 .iter()
1012 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1013 }
1014
1015 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1017 if line_num > 0 {
1018 self.lines.get(line_num - 1)
1019 } else {
1020 None
1021 }
1022 }
1023
1024 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1026 self.line_info(line_num).map(|info| info.byte_offset)
1027 }
1028
1029 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1031 let normalized_id = ref_id.to_lowercase();
1032 self.reference_defs_map
1033 .get(&normalized_id)
1034 .map(|&idx| self.reference_defs[idx].url.as_str())
1035 }
1036
1037 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1039 let normalized_id = ref_id.to_lowercase();
1040 self.reference_defs_map
1041 .get(&normalized_id)
1042 .map(|&idx| &self.reference_defs[idx])
1043 }
1044
1045 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1047 let normalized_id = ref_id.to_lowercase();
1048 self.reference_defs_map.contains_key(&normalized_id)
1049 }
1050
1051 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1053 self.list_blocks
1054 .iter()
1055 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1056 }
1057
1058 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1060 self.list_blocks
1061 .iter()
1062 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1063 }
1064
1065 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1069 if line_num == 0 || line_num > self.lines.len() {
1070 return false;
1071 }
1072 self.lines[line_num - 1].in_code_block
1073 }
1074
1075 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1077 if line_num == 0 || line_num > self.lines.len() {
1078 return false;
1079 }
1080 self.lines[line_num - 1].in_front_matter
1081 }
1082
1083 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1085 if line_num == 0 || line_num > self.lines.len() {
1086 return false;
1087 }
1088 self.lines[line_num - 1].in_html_block
1089 }
1090
1091 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1093 if line_num == 0 || line_num > self.lines.len() {
1094 return false;
1095 }
1096
1097 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1101 let code_spans = self.code_spans();
1102 code_spans.iter().any(|span| {
1103 if line_num < span.line || line_num > span.end_line {
1105 return false;
1106 }
1107
1108 if span.line == span.end_line {
1109 col_0indexed >= span.start_col && col_0indexed < span.end_col
1111 } else if line_num == span.line {
1112 col_0indexed >= span.start_col
1114 } else if line_num == span.end_line {
1115 col_0indexed < span.end_col
1117 } else {
1118 true
1120 }
1121 })
1122 }
1123
1124 #[inline]
1126 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1127 let code_spans = self.code_spans();
1128 code_spans
1129 .iter()
1130 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1131 }
1132
1133 #[inline]
1136 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1137 self.reference_defs
1138 .iter()
1139 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1140 }
1141
1142 #[inline]
1146 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1147 self.html_comment_ranges
1148 .iter()
1149 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1150 }
1151
1152 #[inline]
1155 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1156 self.html_tags()
1157 .iter()
1158 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1159 }
1160
1161 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1163 self.jinja_ranges
1164 .iter()
1165 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1166 }
1167
1168 #[inline]
1170 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1171 self.jsx_expression_ranges
1172 .iter()
1173 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174 }
1175
1176 #[inline]
1178 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1179 self.mdx_comment_ranges
1180 .iter()
1181 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182 }
1183
1184 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1186 &self.jsx_expression_ranges
1187 }
1188
1189 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1191 &self.mdx_comment_ranges
1192 }
1193
1194 #[inline]
1197 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1198 self.citation_ranges
1199 .iter()
1200 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201 }
1202
1203 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1205 &self.citation_ranges
1206 }
1207
1208 #[inline]
1210 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1211 self.shortcode_ranges
1212 .iter()
1213 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214 }
1215
1216 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1218 &self.shortcode_ranges
1219 }
1220
1221 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1223 self.reference_defs.iter().any(|def| {
1224 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1225 byte_pos >= start && byte_pos < end
1226 } else {
1227 false
1228 }
1229 })
1230 }
1231
1232 pub fn has_char(&self, ch: char) -> bool {
1234 match ch {
1235 '#' => self.char_frequency.hash_count > 0,
1236 '*' => self.char_frequency.asterisk_count > 0,
1237 '_' => self.char_frequency.underscore_count > 0,
1238 '-' => self.char_frequency.hyphen_count > 0,
1239 '+' => self.char_frequency.plus_count > 0,
1240 '>' => self.char_frequency.gt_count > 0,
1241 '|' => self.char_frequency.pipe_count > 0,
1242 '[' => self.char_frequency.bracket_count > 0,
1243 '`' => self.char_frequency.backtick_count > 0,
1244 '<' => self.char_frequency.lt_count > 0,
1245 '!' => self.char_frequency.exclamation_count > 0,
1246 '\n' => self.char_frequency.newline_count > 0,
1247 _ => self.content.contains(ch), }
1249 }
1250
1251 pub fn char_count(&self, ch: char) -> usize {
1253 match ch {
1254 '#' => self.char_frequency.hash_count,
1255 '*' => self.char_frequency.asterisk_count,
1256 '_' => self.char_frequency.underscore_count,
1257 '-' => self.char_frequency.hyphen_count,
1258 '+' => self.char_frequency.plus_count,
1259 '>' => self.char_frequency.gt_count,
1260 '|' => self.char_frequency.pipe_count,
1261 '[' => self.char_frequency.bracket_count,
1262 '`' => self.char_frequency.backtick_count,
1263 '<' => self.char_frequency.lt_count,
1264 '!' => self.char_frequency.exclamation_count,
1265 '\n' => self.char_frequency.newline_count,
1266 _ => self.content.matches(ch).count(), }
1268 }
1269
1270 pub fn likely_has_headings(&self) -> bool {
1272 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1274
1275 pub fn likely_has_lists(&self) -> bool {
1277 self.char_frequency.asterisk_count > 0
1278 || self.char_frequency.hyphen_count > 0
1279 || self.char_frequency.plus_count > 0
1280 }
1281
1282 pub fn likely_has_emphasis(&self) -> bool {
1284 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1285 }
1286
1287 pub fn likely_has_tables(&self) -> bool {
1289 self.char_frequency.pipe_count > 2
1290 }
1291
1292 pub fn likely_has_blockquotes(&self) -> bool {
1294 self.char_frequency.gt_count > 0
1295 }
1296
1297 pub fn likely_has_code(&self) -> bool {
1299 self.char_frequency.backtick_count > 0
1300 }
1301
1302 pub fn likely_has_links_or_images(&self) -> bool {
1304 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1305 }
1306
1307 pub fn likely_has_html(&self) -> bool {
1309 self.char_frequency.lt_count > 0
1310 }
1311
1312 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1317 if let Some(line_info) = self.lines.get(line_idx)
1318 && let Some(ref bq) = line_info.blockquote
1319 {
1320 bq.prefix.trim_end().to_string()
1321 } else {
1322 String::new()
1323 }
1324 }
1325
1326 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1328 self.html_tags()
1329 .iter()
1330 .filter(|tag| tag.line == line_num)
1331 .cloned()
1332 .collect()
1333 }
1334
1335 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1337 self.emphasis_spans()
1338 .iter()
1339 .filter(|span| span.line == line_num)
1340 .cloned()
1341 .collect()
1342 }
1343
1344 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1346 self.table_rows()
1347 .iter()
1348 .filter(|row| row.line == line_num)
1349 .cloned()
1350 .collect()
1351 }
1352
1353 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1355 self.bare_urls()
1356 .iter()
1357 .filter(|url| url.line == line_num)
1358 .cloned()
1359 .collect()
1360 }
1361
1362 #[inline]
1368 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1369 let idx = match lines.binary_search_by(|line| {
1371 if byte_offset < line.byte_offset {
1372 std::cmp::Ordering::Greater
1373 } else if byte_offset > line.byte_offset + line.byte_len {
1374 std::cmp::Ordering::Less
1375 } else {
1376 std::cmp::Ordering::Equal
1377 }
1378 }) {
1379 Ok(idx) => idx,
1380 Err(idx) => idx.saturating_sub(1),
1381 };
1382
1383 let line = &lines[idx];
1384 let line_num = idx + 1;
1385 let col = byte_offset.saturating_sub(line.byte_offset);
1386
1387 (idx, line_num, col)
1388 }
1389
1390 #[inline]
1392 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1393 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1395
1396 if idx > 0 {
1398 let span = &code_spans[idx - 1];
1399 if offset >= span.byte_offset && offset < span.byte_end {
1400 return true;
1401 }
1402 }
1403
1404 false
1405 }
1406
1407 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1411 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1412
1413 let mut link_ranges = Vec::new();
1414 let mut options = Options::empty();
1415 options.insert(Options::ENABLE_WIKILINKS);
1416 options.insert(Options::ENABLE_FOOTNOTES);
1417
1418 let parser = Parser::new_ext(content, options).into_offset_iter();
1419 let mut link_stack: Vec<usize> = Vec::new();
1420
1421 for (event, range) in parser {
1422 match event {
1423 Event::Start(Tag::Link { .. }) => {
1424 link_stack.push(range.start);
1425 }
1426 Event::End(TagEnd::Link) => {
1427 if let Some(start_pos) = link_stack.pop() {
1428 link_ranges.push((start_pos, range.end));
1429 }
1430 }
1431 _ => {}
1432 }
1433 }
1434
1435 link_ranges
1436 }
1437
1438 fn parse_links(
1440 content: &'a str,
1441 lines: &[LineInfo],
1442 code_blocks: &[(usize, usize)],
1443 code_spans: &[CodeSpan],
1444 flavor: MarkdownFlavor,
1445 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1446 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1447 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1448 use std::collections::HashSet;
1449
1450 let mut links = Vec::with_capacity(content.len() / 500);
1451 let mut broken_links = Vec::new();
1452 let mut footnote_refs = Vec::new();
1453
1454 let mut found_positions = HashSet::new();
1456
1457 let mut options = Options::empty();
1467 options.insert(Options::ENABLE_WIKILINKS);
1468 options.insert(Options::ENABLE_FOOTNOTES);
1469
1470 let parser = Parser::new_with_broken_link_callback(
1471 content,
1472 options,
1473 Some(|link: BrokenLink<'_>| {
1474 broken_links.push(BrokenLinkInfo {
1475 reference: link.reference.to_string(),
1476 span: link.span.clone(),
1477 });
1478 None
1479 }),
1480 )
1481 .into_offset_iter();
1482
1483 let mut link_stack: Vec<(
1484 usize,
1485 usize,
1486 pulldown_cmark::CowStr<'a>,
1487 LinkType,
1488 pulldown_cmark::CowStr<'a>,
1489 )> = Vec::new();
1490 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1493 match event {
1494 Event::Start(Tag::Link {
1495 link_type,
1496 dest_url,
1497 id,
1498 ..
1499 }) => {
1500 link_stack.push((range.start, range.end, dest_url, link_type, id));
1502 text_chunks.clear();
1503 }
1504 Event::Text(text) if !link_stack.is_empty() => {
1505 text_chunks.push((text.to_string(), range.start, range.end));
1507 }
1508 Event::Code(code) if !link_stack.is_empty() => {
1509 let code_text = format!("`{code}`");
1511 text_chunks.push((code_text, range.start, range.end));
1512 }
1513 Event::End(TagEnd::Link) => {
1514 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1515 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1517 text_chunks.clear();
1518 continue;
1519 }
1520
1521 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1523
1524 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1526 text_chunks.clear();
1527 continue;
1528 }
1529
1530 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1531
1532 let is_reference = matches!(
1533 link_type,
1534 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1535 );
1536
1537 let link_text = if start_pos < content.len() {
1540 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1541
1542 let mut close_pos = None;
1546 let mut depth = 0;
1547 let mut in_code_span = false;
1548
1549 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1550 let mut backslash_count = 0;
1552 let mut j = i;
1553 while j > 0 && link_bytes[j - 1] == b'\\' {
1554 backslash_count += 1;
1555 j -= 1;
1556 }
1557 let is_escaped = backslash_count % 2 != 0;
1558
1559 if byte == b'`' && !is_escaped {
1561 in_code_span = !in_code_span;
1562 }
1563
1564 if !is_escaped && !in_code_span {
1566 if byte == b'[' {
1567 depth += 1;
1568 } else if byte == b']' {
1569 if depth == 0 {
1570 close_pos = Some(i);
1572 break;
1573 } else {
1574 depth -= 1;
1575 }
1576 }
1577 }
1578 }
1579
1580 if let Some(pos) = close_pos {
1581 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1582 } else {
1583 Cow::Borrowed("")
1584 }
1585 } else {
1586 Cow::Borrowed("")
1587 };
1588
1589 let reference_id = if is_reference && !ref_id.is_empty() {
1591 Some(Cow::Owned(ref_id.to_lowercase()))
1592 } else if is_reference {
1593 Some(Cow::Owned(link_text.to_lowercase()))
1595 } else {
1596 None
1597 };
1598
1599 found_positions.insert(start_pos);
1601
1602 links.push(ParsedLink {
1603 line: line_num,
1604 start_col: col_start,
1605 end_col: col_end,
1606 byte_offset: start_pos,
1607 byte_end: range.end,
1608 text: link_text,
1609 url: Cow::Owned(url.to_string()),
1610 is_reference,
1611 reference_id,
1612 link_type,
1613 });
1614
1615 text_chunks.clear();
1616 }
1617 }
1618 Event::FootnoteReference(footnote_id) => {
1619 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1622 continue;
1623 }
1624
1625 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1626 footnote_refs.push(FootnoteRef {
1627 id: footnote_id.to_string(),
1628 line: line_num,
1629 byte_offset: range.start,
1630 byte_end: range.end,
1631 });
1632 }
1633 _ => {}
1634 }
1635 }
1636
1637 for cap in LINK_PATTERN.captures_iter(content) {
1641 let full_match = cap.get(0).unwrap();
1642 let match_start = full_match.start();
1643 let match_end = full_match.end();
1644
1645 if found_positions.contains(&match_start) {
1647 continue;
1648 }
1649
1650 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652 continue;
1653 }
1654
1655 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1657 continue;
1658 }
1659
1660 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1662 continue;
1663 }
1664
1665 if Self::is_offset_in_code_span(code_spans, match_start) {
1667 continue;
1668 }
1669
1670 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1672 continue;
1673 }
1674
1675 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1677
1678 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1680 continue;
1681 }
1682
1683 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1684
1685 let text = cap.get(1).map_or("", |m| m.as_str());
1686
1687 if let Some(ref_id) = cap.get(6) {
1689 let ref_id_str = ref_id.as_str();
1690 let normalized_ref = if ref_id_str.is_empty() {
1691 Cow::Owned(text.to_lowercase()) } else {
1693 Cow::Owned(ref_id_str.to_lowercase())
1694 };
1695
1696 links.push(ParsedLink {
1698 line: line_num,
1699 start_col: col_start,
1700 end_col: col_end,
1701 byte_offset: match_start,
1702 byte_end: match_end,
1703 text: Cow::Borrowed(text),
1704 url: Cow::Borrowed(""), is_reference: true,
1706 reference_id: Some(normalized_ref),
1707 link_type: LinkType::Reference, });
1709 }
1710 }
1711
1712 (links, broken_links, footnote_refs)
1713 }
1714
1715 fn parse_images(
1717 content: &'a str,
1718 lines: &[LineInfo],
1719 code_blocks: &[(usize, usize)],
1720 code_spans: &[CodeSpan],
1721 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1722 ) -> Vec<ParsedImage<'a>> {
1723 use crate::utils::skip_context::is_in_html_comment_ranges;
1724 use std::collections::HashSet;
1725
1726 let mut images = Vec::with_capacity(content.len() / 1000);
1728 let mut found_positions = HashSet::new();
1729
1730 let parser = Parser::new(content).into_offset_iter();
1732 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1733 Vec::new();
1734 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1737 match event {
1738 Event::Start(Tag::Image {
1739 link_type,
1740 dest_url,
1741 id,
1742 ..
1743 }) => {
1744 image_stack.push((range.start, dest_url, link_type, id));
1745 text_chunks.clear();
1746 }
1747 Event::Text(text) if !image_stack.is_empty() => {
1748 text_chunks.push((text.to_string(), range.start, range.end));
1749 }
1750 Event::Code(code) if !image_stack.is_empty() => {
1751 let code_text = format!("`{code}`");
1752 text_chunks.push((code_text, range.start, range.end));
1753 }
1754 Event::End(TagEnd::Image) => {
1755 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1756 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1758 continue;
1759 }
1760
1761 if Self::is_offset_in_code_span(code_spans, start_pos) {
1763 continue;
1764 }
1765
1766 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1768 continue;
1769 }
1770
1771 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1773 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1774
1775 let is_reference = matches!(
1776 link_type,
1777 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1778 );
1779
1780 let alt_text = if start_pos < content.len() {
1783 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1784
1785 let mut close_pos = None;
1788 let mut depth = 0;
1789
1790 if image_bytes.len() > 2 {
1791 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1792 let mut backslash_count = 0;
1794 let mut j = i;
1795 while j > 0 && image_bytes[j - 1] == b'\\' {
1796 backslash_count += 1;
1797 j -= 1;
1798 }
1799 let is_escaped = backslash_count % 2 != 0;
1800
1801 if !is_escaped {
1802 if byte == b'[' {
1803 depth += 1;
1804 } else if byte == b']' {
1805 if depth == 0 {
1806 close_pos = Some(i);
1808 break;
1809 } else {
1810 depth -= 1;
1811 }
1812 }
1813 }
1814 }
1815 }
1816
1817 if let Some(pos) = close_pos {
1818 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1819 } else {
1820 Cow::Borrowed("")
1821 }
1822 } else {
1823 Cow::Borrowed("")
1824 };
1825
1826 let reference_id = if is_reference && !ref_id.is_empty() {
1827 Some(Cow::Owned(ref_id.to_lowercase()))
1828 } else if is_reference {
1829 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1831 None
1832 };
1833
1834 found_positions.insert(start_pos);
1835 images.push(ParsedImage {
1836 line: line_num,
1837 start_col: col_start,
1838 end_col: col_end,
1839 byte_offset: start_pos,
1840 byte_end: range.end,
1841 alt_text,
1842 url: Cow::Owned(url.to_string()),
1843 is_reference,
1844 reference_id,
1845 link_type,
1846 });
1847 }
1848 }
1849 _ => {}
1850 }
1851 }
1852
1853 for cap in IMAGE_PATTERN.captures_iter(content) {
1855 let full_match = cap.get(0).unwrap();
1856 let match_start = full_match.start();
1857 let match_end = full_match.end();
1858
1859 if found_positions.contains(&match_start) {
1861 continue;
1862 }
1863
1864 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1866 continue;
1867 }
1868
1869 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1871 || Self::is_offset_in_code_span(code_spans, match_start)
1872 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1873 {
1874 continue;
1875 }
1876
1877 if let Some(ref_id) = cap.get(6) {
1879 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1880 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1881 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1882 let ref_id_str = ref_id.as_str();
1883 let normalized_ref = if ref_id_str.is_empty() {
1884 Cow::Owned(alt_text.to_lowercase())
1885 } else {
1886 Cow::Owned(ref_id_str.to_lowercase())
1887 };
1888
1889 images.push(ParsedImage {
1890 line: line_num,
1891 start_col: col_start,
1892 end_col: col_end,
1893 byte_offset: match_start,
1894 byte_end: match_end,
1895 alt_text: Cow::Borrowed(alt_text),
1896 url: Cow::Borrowed(""),
1897 is_reference: true,
1898 reference_id: Some(normalized_ref),
1899 link_type: LinkType::Reference, });
1901 }
1902 }
1903
1904 images
1905 }
1906
1907 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1909 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1913 if line_info.in_code_block {
1915 continue;
1916 }
1917
1918 let line = line_info.content(content);
1919 let line_num = line_idx + 1;
1920
1921 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1922 let id_raw = cap.get(1).unwrap().as_str();
1923
1924 if id_raw.starts_with('^') {
1927 continue;
1928 }
1929
1930 let id = id_raw.to_lowercase();
1931 let url = cap.get(2).unwrap().as_str().to_string();
1932 let title_match = cap.get(3).or_else(|| cap.get(4));
1933 let title = title_match.map(|m| m.as_str().to_string());
1934
1935 let match_obj = cap.get(0).unwrap();
1938 let byte_offset = line_info.byte_offset + match_obj.start();
1939 let byte_end = line_info.byte_offset + match_obj.end();
1940
1941 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1943 let start = line_info.byte_offset + m.start().saturating_sub(1);
1945 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1947 } else {
1948 (None, None)
1949 };
1950
1951 refs.push(ReferenceDef {
1952 line: line_num,
1953 id,
1954 url,
1955 title,
1956 byte_offset,
1957 byte_end,
1958 title_byte_start,
1959 title_byte_end,
1960 });
1961 }
1962 }
1963
1964 refs
1965 }
1966
1967 #[inline]
1971 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1972 let trimmed_start = line.trim_start();
1973 if !trimmed_start.starts_with('>') {
1974 return None;
1975 }
1976
1977 let mut remaining = line;
1979 let mut total_prefix_len = 0;
1980
1981 loop {
1982 let trimmed = remaining.trim_start();
1983 if !trimmed.starts_with('>') {
1984 break;
1985 }
1986
1987 let leading_ws_len = remaining.len() - trimmed.len();
1989 total_prefix_len += leading_ws_len + 1;
1990
1991 let after_gt = &trimmed[1..];
1992
1993 if let Some(stripped) = after_gt.strip_prefix(' ') {
1995 total_prefix_len += 1;
1996 remaining = stripped;
1997 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1998 total_prefix_len += 1;
1999 remaining = stripped;
2000 } else {
2001 remaining = after_gt;
2002 }
2003 }
2004
2005 Some((&line[..total_prefix_len], remaining))
2006 }
2007
2008 fn detect_list_items_and_emphasis_with_pulldown(
2032 content: &str,
2033 line_offsets: &[usize],
2034 flavor: MarkdownFlavor,
2035 front_matter_end: usize,
2036 code_blocks: &[(usize, usize)],
2037 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2038 use std::collections::HashMap;
2039
2040 let mut list_items = HashMap::new();
2041 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2042
2043 let mut options = Options::empty();
2044 options.insert(Options::ENABLE_TABLES);
2045 options.insert(Options::ENABLE_FOOTNOTES);
2046 options.insert(Options::ENABLE_STRIKETHROUGH);
2047 options.insert(Options::ENABLE_TASKLISTS);
2048 options.insert(Options::ENABLE_GFM);
2050
2051 let _ = flavor;
2053
2054 let parser = Parser::new_ext(content, options).into_offset_iter();
2055 let mut list_depth: usize = 0;
2056 let mut list_stack: Vec<bool> = Vec::new();
2057
2058 for (event, range) in parser {
2059 match event {
2060 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2062 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2063 2
2064 } else {
2065 1
2066 };
2067 let match_start = range.start;
2068 let match_end = range.end;
2069
2070 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2072 let marker = content[match_start..].chars().next().unwrap_or('*');
2074 if marker == '*' || marker == '_' {
2075 let content_start = match_start + marker_count;
2077 let content_end = if match_end >= marker_count {
2078 match_end - marker_count
2079 } else {
2080 match_end
2081 };
2082 let content_part = if content_start < content_end && content_end <= content.len() {
2083 &content[content_start..content_end]
2084 } else {
2085 ""
2086 };
2087
2088 let line_idx = match line_offsets.binary_search(&match_start) {
2090 Ok(idx) => idx,
2091 Err(idx) => idx.saturating_sub(1),
2092 };
2093 let line_num = line_idx + 1;
2094 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2095 let col_start = match_start - line_start;
2096 let col_end = match_end - line_start;
2097
2098 emphasis_spans.push(EmphasisSpan {
2099 line: line_num,
2100 start_col: col_start,
2101 end_col: col_end,
2102 byte_offset: match_start,
2103 byte_end: match_end,
2104 marker,
2105 marker_count,
2106 content: content_part.to_string(),
2107 });
2108 }
2109 }
2110 }
2111 Event::Start(Tag::List(start_number)) => {
2112 list_depth += 1;
2113 list_stack.push(start_number.is_some());
2114 }
2115 Event::End(TagEnd::List(_)) => {
2116 list_depth = list_depth.saturating_sub(1);
2117 list_stack.pop();
2118 }
2119 Event::Start(Tag::Item) if list_depth > 0 => {
2120 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2122 let item_start = range.start;
2124
2125 let mut line_idx = match line_offsets.binary_search(&item_start) {
2127 Ok(idx) => idx,
2128 Err(idx) => idx.saturating_sub(1),
2129 };
2130
2131 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2135 line_idx += 1;
2136 }
2137
2138 if front_matter_end > 0 && line_idx < front_matter_end {
2140 continue;
2141 }
2142
2143 if line_idx < line_offsets.len() {
2144 let line_start_byte = line_offsets[line_idx];
2145 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2146 let line = &content[line_start_byte..line_end.min(content.len())];
2147
2148 let line = line
2150 .strip_suffix('\n')
2151 .or_else(|| line.strip_suffix("\r\n"))
2152 .unwrap_or(line);
2153
2154 let blockquote_parse = Self::parse_blockquote_prefix(line);
2156 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2157 (prefix.len(), content)
2158 } else {
2159 (0, line)
2160 };
2161
2162 if current_list_is_ordered {
2164 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2165 Self::parse_ordered_list(line_to_parse)
2166 {
2167 let marker = format!("{number_str}{delimiter}");
2168 let marker_column = blockquote_prefix_len + leading_spaces.len();
2169 let content_column = marker_column + marker.len() + spacing.len();
2170 let number = number_str.parse().ok();
2171
2172 list_items.entry(line_start_byte).or_insert((
2173 true,
2174 marker,
2175 marker_column,
2176 content_column,
2177 number,
2178 ));
2179 }
2180 } else if let Some((leading_spaces, marker, spacing, _content)) =
2181 Self::parse_unordered_list(line_to_parse)
2182 {
2183 let marker_column = blockquote_prefix_len + leading_spaces.len();
2184 let content_column = marker_column + 1 + spacing.len();
2185
2186 list_items.entry(line_start_byte).or_insert((
2187 false,
2188 marker.to_string(),
2189 marker_column,
2190 content_column,
2191 None,
2192 ));
2193 }
2194 }
2195 }
2196 _ => {}
2197 }
2198 }
2199
2200 (list_items, emphasis_spans)
2201 }
2202
2203 #[inline]
2207 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2208 let bytes = line.as_bytes();
2209 let mut i = 0;
2210
2211 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2213 i += 1;
2214 }
2215
2216 if i >= bytes.len() {
2218 return None;
2219 }
2220 let marker = bytes[i] as char;
2221 if marker != '-' && marker != '*' && marker != '+' {
2222 return None;
2223 }
2224 let marker_pos = i;
2225 i += 1;
2226
2227 let spacing_start = i;
2229 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2230 i += 1;
2231 }
2232
2233 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2234 }
2235
2236 #[inline]
2240 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2241 let bytes = line.as_bytes();
2242 let mut i = 0;
2243
2244 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2246 i += 1;
2247 }
2248
2249 let number_start = i;
2251 while i < bytes.len() && bytes[i].is_ascii_digit() {
2252 i += 1;
2253 }
2254 if i == number_start {
2255 return None; }
2257
2258 if i >= bytes.len() {
2260 return None;
2261 }
2262 let delimiter = bytes[i] as char;
2263 if delimiter != '.' && delimiter != ')' {
2264 return None;
2265 }
2266 let delimiter_pos = i;
2267 i += 1;
2268
2269 let spacing_start = i;
2271 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272 i += 1;
2273 }
2274
2275 Some((
2276 &line[..number_start],
2277 &line[number_start..delimiter_pos],
2278 delimiter,
2279 &line[spacing_start..i],
2280 &line[i..],
2281 ))
2282 }
2283
2284 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2287 let num_lines = line_offsets.len();
2288 let mut in_code_block = vec![false; num_lines];
2289
2290 for &(start, end) in code_blocks {
2292 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2294 let mut boundary = start;
2295 while boundary > 0 && !content.is_char_boundary(boundary) {
2296 boundary -= 1;
2297 }
2298 boundary
2299 } else {
2300 start
2301 };
2302
2303 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2304 let mut boundary = end;
2305 while boundary < content.len() && !content.is_char_boundary(boundary) {
2306 boundary += 1;
2307 }
2308 boundary
2309 } else {
2310 end.min(content.len())
2311 };
2312
2313 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2332 let first_line = first_line_after.saturating_sub(1);
2333 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2334
2335 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2337 *flag = true;
2338 }
2339 }
2340
2341 in_code_block
2342 }
2343
2344 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2347 let content_lines: Vec<&str> = content.lines().collect();
2348 let num_lines = content_lines.len();
2349 let mut in_math_block = vec![false; num_lines];
2350
2351 let mut inside_math = false;
2352
2353 for (i, line) in content_lines.iter().enumerate() {
2354 if code_block_map.get(i).copied().unwrap_or(false) {
2356 continue;
2357 }
2358
2359 let trimmed = line.trim();
2360
2361 if trimmed == "$$" {
2364 if inside_math {
2365 in_math_block[i] = true;
2367 inside_math = false;
2368 } else {
2369 in_math_block[i] = true;
2371 inside_math = true;
2372 }
2373 } else if inside_math {
2374 in_math_block[i] = true;
2376 }
2377 }
2378
2379 in_math_block
2380 }
2381
2382 fn compute_basic_line_info(
2385 content: &str,
2386 line_offsets: &[usize],
2387 code_blocks: &[(usize, usize)],
2388 flavor: MarkdownFlavor,
2389 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2390 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2391 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2392 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2393 let content_lines: Vec<&str> = content.lines().collect();
2394 let mut lines = Vec::with_capacity(content_lines.len());
2395
2396 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2398
2399 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2401
2402 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2405
2406 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2409 content,
2410 line_offsets,
2411 flavor,
2412 front_matter_end,
2413 code_blocks,
2414 );
2415
2416 for (i, line) in content_lines.iter().enumerate() {
2417 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2418 let indent = line.len() - line.trim_start().len();
2419 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2421
2422 let blockquote_parse = Self::parse_blockquote_prefix(line);
2424
2425 let is_blank = if let Some((_, content)) = blockquote_parse {
2427 content.trim().is_empty()
2429 } else {
2430 line.trim().is_empty()
2431 };
2432
2433 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2435
2436 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2438 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2439 let line_end_offset = byte_offset + line.len();
2442 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2443 html_comment_ranges,
2444 byte_offset,
2445 line_end_offset,
2446 );
2447 let list_item =
2450 list_item_map
2451 .get(&byte_offset)
2452 .map(
2453 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2454 marker: marker.clone(),
2455 is_ordered: *is_ordered,
2456 number: *number,
2457 marker_column: *marker_column,
2458 content_column: *content_column,
2459 },
2460 );
2461
2462 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2465 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2466
2467 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2469
2470 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2472 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2473
2474 lines.push(LineInfo {
2475 byte_offset,
2476 byte_len: line.len(),
2477 indent,
2478 visual_indent,
2479 is_blank,
2480 in_code_block,
2481 in_front_matter,
2482 in_html_block: false, in_html_comment,
2484 list_item,
2485 heading: None, blockquote: None, in_mkdocstrings,
2488 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2491 in_math_block,
2492 in_quarto_div,
2493 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, });
2501 }
2502
2503 (lines, emphasis_spans)
2504 }
2505
2506 fn detect_headings_and_blockquotes(
2508 content: &str,
2509 lines: &mut [LineInfo],
2510 flavor: MarkdownFlavor,
2511 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2512 link_byte_ranges: &[(usize, usize)],
2513 ) {
2514 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2516 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2517 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2518 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2519
2520 let content_lines: Vec<&str> = content.lines().collect();
2521
2522 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2524
2525 for i in 0..lines.len() {
2527 let line = content_lines[i];
2528
2529 if !(front_matter_end > 0 && i < front_matter_end)
2534 && let Some(bq) = parse_blockquote_detailed(line)
2535 {
2536 let nesting_level = bq.markers.len();
2537 let marker_column = bq.indent.len();
2538 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2539 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2540 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2541 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2542
2543 lines[i].blockquote = Some(BlockquoteInfo {
2544 nesting_level,
2545 indent: bq.indent.to_string(),
2546 marker_column,
2547 prefix,
2548 content: bq.content.to_string(),
2549 has_no_space_after_marker: has_no_space,
2550 has_multiple_spaces_after_marker: has_multiple_spaces,
2551 needs_md028_fix,
2552 });
2553
2554 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2557 lines[i].is_horizontal_rule = true;
2558 }
2559 }
2560
2561 if lines[i].in_code_block {
2563 continue;
2564 }
2565
2566 if front_matter_end > 0 && i < front_matter_end {
2568 continue;
2569 }
2570
2571 if lines[i].in_html_block {
2573 continue;
2574 }
2575
2576 if lines[i].is_blank {
2578 continue;
2579 }
2580
2581 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2584 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2585 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2586 } else {
2587 false
2588 };
2589
2590 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2591 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2593 continue;
2594 }
2595 let line_offset = lines[i].byte_offset;
2598 if link_byte_ranges
2599 .iter()
2600 .any(|&(start, end)| line_offset > start && line_offset < end)
2601 {
2602 continue;
2603 }
2604 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2605 let hashes = caps.get(2).map_or("", |m| m.as_str());
2606 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2607 let rest = caps.get(4).map_or("", |m| m.as_str());
2608
2609 let level = hashes.len() as u8;
2610 let marker_column = leading_spaces.len();
2611
2612 let (text, has_closing, closing_seq) = {
2614 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2616 if rest[id_start..].trim_end().ends_with('}') {
2618 (&rest[..id_start], &rest[id_start..])
2620 } else {
2621 (rest, "")
2622 }
2623 } else {
2624 (rest, "")
2625 };
2626
2627 let trimmed_rest = rest_without_id.trim_end();
2629 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2630 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2633
2634 let last_hash_char_idx = char_positions
2636 .iter()
2637 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2638
2639 if let Some(mut char_idx) = last_hash_char_idx {
2640 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2642 char_idx -= 1;
2643 }
2644
2645 let start_of_hashes = char_positions[char_idx].0;
2647
2648 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2650
2651 let potential_closing = &trimmed_rest[start_of_hashes..];
2653 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2654
2655 if is_all_hashes && has_space_before {
2656 let closing_hashes = potential_closing.to_string();
2658 let text_part = if !custom_id_part.is_empty() {
2661 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2664 } else {
2665 trimmed_rest[..start_of_hashes].trim_end().to_string()
2666 };
2667 (text_part, true, closing_hashes)
2668 } else {
2669 (rest.to_string(), false, String::new())
2671 }
2672 } else {
2673 (rest.to_string(), false, String::new())
2675 }
2676 } else {
2677 (rest.to_string(), false, String::new())
2679 }
2680 };
2681
2682 let content_column = marker_column + hashes.len() + spaces_after.len();
2683
2684 let raw_text = text.trim().to_string();
2686 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2687
2688 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2690 let next_line = content_lines[i + 1];
2691 if !lines[i + 1].in_code_block
2692 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2693 && let Some(next_line_id) =
2694 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2695 {
2696 custom_id = Some(next_line_id);
2697 }
2698 }
2699
2700 let is_valid = !spaces_after.is_empty()
2710 || rest.is_empty()
2711 || level > 1
2712 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2713
2714 lines[i].heading = Some(HeadingInfo {
2715 level,
2716 style: HeadingStyle::ATX,
2717 marker: hashes.to_string(),
2718 marker_column,
2719 content_column,
2720 text: clean_text,
2721 custom_id,
2722 raw_text,
2723 has_closing_sequence: has_closing,
2724 closing_sequence: closing_seq,
2725 is_valid,
2726 });
2727 }
2728 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2730 let next_line = content_lines[i + 1];
2731 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2732 if front_matter_end > 0 && i < front_matter_end {
2734 continue;
2735 }
2736
2737 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2739 {
2740 continue;
2741 }
2742
2743 let content_line = line.trim();
2746
2747 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2749 continue;
2750 }
2751
2752 if content_line.starts_with('_') {
2754 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2755 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2756 continue;
2757 }
2758 }
2759
2760 if let Some(first_char) = content_line.chars().next()
2762 && first_char.is_ascii_digit()
2763 {
2764 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2765 if num_end < content_line.len() {
2766 let next = content_line.chars().nth(num_end);
2767 if next == Some('.') || next == Some(')') {
2768 continue;
2769 }
2770 }
2771 }
2772
2773 if ATX_HEADING_REGEX.is_match(line) {
2775 continue;
2776 }
2777
2778 if content_line.starts_with('>') {
2780 continue;
2781 }
2782
2783 let trimmed_start = line.trim_start();
2785 if trimmed_start.len() >= 3 {
2786 let first_three: String = trimmed_start.chars().take(3).collect();
2787 if first_three == "```" || first_three == "~~~" {
2788 continue;
2789 }
2790 }
2791
2792 if content_line.starts_with('<') {
2794 continue;
2795 }
2796
2797 let underline = next_line.trim();
2798
2799 let level = if underline.starts_with('=') { 1 } else { 2 };
2800 let style = if level == 1 {
2801 HeadingStyle::Setext1
2802 } else {
2803 HeadingStyle::Setext2
2804 };
2805
2806 let raw_text = line.trim().to_string();
2808 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2809
2810 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2812 let attr_line = content_lines[i + 2];
2813 if !lines[i + 2].in_code_block
2814 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2815 && let Some(attr_line_id) =
2816 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2817 {
2818 custom_id = Some(attr_line_id);
2819 }
2820 }
2821
2822 lines[i].heading = Some(HeadingInfo {
2823 level,
2824 style,
2825 marker: underline.to_string(),
2826 marker_column: next_line.len() - next_line.trim_start().len(),
2827 content_column: lines[i].indent,
2828 text: clean_text,
2829 custom_id,
2830 raw_text,
2831 has_closing_sequence: false,
2832 closing_sequence: String::new(),
2833 is_valid: true, });
2835 }
2836 }
2837 }
2838 }
2839
2840 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2842 const BLOCK_ELEMENTS: &[&str] = &[
2845 "address",
2846 "article",
2847 "aside",
2848 "audio",
2849 "blockquote",
2850 "canvas",
2851 "details",
2852 "dialog",
2853 "dd",
2854 "div",
2855 "dl",
2856 "dt",
2857 "embed",
2858 "fieldset",
2859 "figcaption",
2860 "figure",
2861 "footer",
2862 "form",
2863 "h1",
2864 "h2",
2865 "h3",
2866 "h4",
2867 "h5",
2868 "h6",
2869 "header",
2870 "hr",
2871 "iframe",
2872 "li",
2873 "main",
2874 "menu",
2875 "nav",
2876 "noscript",
2877 "object",
2878 "ol",
2879 "p",
2880 "picture",
2881 "pre",
2882 "script",
2883 "search",
2884 "section",
2885 "source",
2886 "style",
2887 "summary",
2888 "svg",
2889 "table",
2890 "tbody",
2891 "td",
2892 "template",
2893 "textarea",
2894 "tfoot",
2895 "th",
2896 "thead",
2897 "tr",
2898 "track",
2899 "ul",
2900 "video",
2901 ];
2902
2903 let mut i = 0;
2904 while i < lines.len() {
2905 if lines[i].in_code_block || lines[i].in_front_matter {
2907 i += 1;
2908 continue;
2909 }
2910
2911 let trimmed = lines[i].content(content).trim_start();
2912
2913 if trimmed.starts_with('<') && trimmed.len() > 1 {
2915 let after_bracket = &trimmed[1..];
2917 let is_closing = after_bracket.starts_with('/');
2918 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2919
2920 let tag_name = tag_start
2922 .chars()
2923 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2924 .collect::<String>()
2925 .to_lowercase();
2926
2927 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2929 lines[i].in_html_block = true;
2931
2932 if !is_closing {
2935 let closing_tag = format!("</{tag_name}>");
2936 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2938 let mut j = i + 1;
2939 let mut found_closing_tag = false;
2940 while j < lines.len() && j < i + 100 {
2941 if !allow_blank_lines && lines[j].is_blank {
2944 break;
2945 }
2946
2947 lines[j].in_html_block = true;
2948
2949 if lines[j].content(content).contains(&closing_tag) {
2951 found_closing_tag = true;
2952 }
2953
2954 if found_closing_tag {
2957 j += 1;
2958 while j < lines.len() && j < i + 100 {
2960 if lines[j].is_blank {
2961 break;
2962 }
2963 lines[j].in_html_block = true;
2964 j += 1;
2965 }
2966 break;
2967 }
2968 j += 1;
2969 }
2970 }
2971 }
2972 }
2973
2974 i += 1;
2975 }
2976 }
2977
2978 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2981 if !flavor.supports_esm_blocks() {
2983 return;
2984 }
2985
2986 let mut in_multiline_import = false;
2987
2988 for line in lines.iter_mut() {
2989 if line.in_code_block || line.in_front_matter || line.in_html_comment {
2991 in_multiline_import = false;
2992 continue;
2993 }
2994
2995 let line_content = line.content(content);
2996 let trimmed = line_content.trim();
2997
2998 if in_multiline_import {
3000 line.in_esm_block = true;
3001 if trimmed.ends_with('\'')
3004 || trimmed.ends_with('"')
3005 || trimmed.ends_with("';")
3006 || trimmed.ends_with("\";")
3007 || line_content.contains(';')
3008 {
3009 in_multiline_import = false;
3010 }
3011 continue;
3012 }
3013
3014 if line.is_blank {
3016 continue;
3017 }
3018
3019 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3021 line.in_esm_block = true;
3022
3023 let is_import = trimmed.starts_with("import ");
3031
3032 let is_complete =
3034 trimmed.ends_with(';')
3036 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3038 || (!is_import && !trimmed.contains(" from ") && (
3040 trimmed.starts_with("export const ")
3041 || trimmed.starts_with("export let ")
3042 || trimmed.starts_with("export var ")
3043 || trimmed.starts_with("export function ")
3044 || trimmed.starts_with("export class ")
3045 || trimmed.starts_with("export default ")
3046 ));
3047
3048 if !is_complete && is_import {
3049 if trimmed.contains('{') && !trimmed.contains('}') {
3053 in_multiline_import = true;
3054 }
3055 }
3056 }
3057 }
3058 }
3059
3060 fn detect_jsx_and_mdx_comments(
3063 content: &str,
3064 lines: &mut [LineInfo],
3065 flavor: MarkdownFlavor,
3066 code_blocks: &[(usize, usize)],
3067 ) -> (ByteRanges, ByteRanges) {
3068 if !flavor.supports_jsx() {
3070 return (Vec::new(), Vec::new());
3071 }
3072
3073 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3074 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3075
3076 if !content.contains('{') {
3078 return (jsx_expression_ranges, mdx_comment_ranges);
3079 }
3080
3081 let bytes = content.as_bytes();
3082 let mut i = 0;
3083
3084 while i < bytes.len() {
3085 if bytes[i] == b'{' {
3086 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3088 i += 1;
3089 continue;
3090 }
3091
3092 let start = i;
3093
3094 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3096 let mut j = i + 3;
3098 while j + 2 < bytes.len() {
3099 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3100 let end = j + 3;
3101 mdx_comment_ranges.push((start, end));
3102
3103 Self::mark_lines_in_range(lines, content, start, end, |line| {
3105 line.in_mdx_comment = true;
3106 });
3107
3108 i = end;
3109 break;
3110 }
3111 j += 1;
3112 }
3113 if j + 2 >= bytes.len() {
3114 mdx_comment_ranges.push((start, bytes.len()));
3116 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3117 line.in_mdx_comment = true;
3118 });
3119 break;
3120 }
3121 } else {
3122 let mut brace_depth = 1;
3125 let mut j = i + 1;
3126 let mut in_string = false;
3127 let mut string_char = b'"';
3128
3129 while j < bytes.len() && brace_depth > 0 {
3130 let c = bytes[j];
3131
3132 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3134 in_string = true;
3135 string_char = c;
3136 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3137 in_string = false;
3138 } else if !in_string {
3139 if c == b'{' {
3140 brace_depth += 1;
3141 } else if c == b'}' {
3142 brace_depth -= 1;
3143 }
3144 }
3145 j += 1;
3146 }
3147
3148 if brace_depth == 0 {
3149 let end = j;
3150 jsx_expression_ranges.push((start, end));
3151
3152 Self::mark_lines_in_range(lines, content, start, end, |line| {
3154 line.in_jsx_expression = true;
3155 });
3156
3157 i = end;
3158 } else {
3159 i += 1;
3160 }
3161 }
3162 } else {
3163 i += 1;
3164 }
3165 }
3166
3167 (jsx_expression_ranges, mdx_comment_ranges)
3168 }
3169
3170 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3173 if flavor != MarkdownFlavor::MkDocs {
3174 return;
3175 }
3176
3177 use crate::utils::mkdocs_admonitions;
3178 use crate::utils::mkdocs_definition_lists;
3179 use crate::utils::mkdocs_tabs;
3180
3181 let content_lines: Vec<&str> = content.lines().collect();
3182
3183 let mut in_admonition = false;
3185 let mut admonition_indent = 0;
3186
3187 let mut in_tab = false;
3189 let mut tab_indent = 0;
3190
3191 let mut in_definition = false;
3193
3194 for (i, line) in content_lines.iter().enumerate() {
3195 if i >= lines.len() {
3196 break;
3197 }
3198
3199 if lines[i].in_code_block {
3201 continue;
3202 }
3203
3204 if mkdocs_admonitions::is_admonition_start(line) {
3206 in_admonition = true;
3207 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3208 lines[i].in_admonition = true;
3209 } else if in_admonition {
3210 if line.trim().is_empty() {
3212 lines[i].in_admonition = true;
3214 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3215 lines[i].in_admonition = true;
3216 } else {
3217 in_admonition = false;
3219 if mkdocs_admonitions::is_admonition_start(line) {
3221 in_admonition = true;
3222 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3223 lines[i].in_admonition = true;
3224 }
3225 }
3226 }
3227
3228 if mkdocs_tabs::is_tab_marker(line) {
3230 in_tab = true;
3231 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3232 lines[i].in_content_tab = true;
3233 } else if in_tab {
3234 if line.trim().is_empty() {
3236 lines[i].in_content_tab = true;
3238 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3239 lines[i].in_content_tab = true;
3240 } else {
3241 in_tab = false;
3243 if mkdocs_tabs::is_tab_marker(line) {
3245 in_tab = true;
3246 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3247 lines[i].in_content_tab = true;
3248 }
3249 }
3250 }
3251
3252 if mkdocs_definition_lists::is_definition_line(line) {
3254 in_definition = true;
3255 lines[i].in_definition_list = true;
3256 } else if in_definition {
3257 if mkdocs_definition_lists::is_definition_continuation(line) {
3259 lines[i].in_definition_list = true;
3260 } else if line.trim().is_empty() {
3261 lines[i].in_definition_list = true;
3263 } else if mkdocs_definition_lists::could_be_term_line(line) {
3264 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3266 {
3267 lines[i].in_definition_list = true;
3268 } else {
3269 in_definition = false;
3270 }
3271 } else {
3272 in_definition = false;
3273 }
3274 } else if mkdocs_definition_lists::could_be_term_line(line) {
3275 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3277 lines[i].in_definition_list = true;
3278 in_definition = true;
3279 }
3280 }
3281 }
3282 }
3283
3284 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3286 where
3287 F: FnMut(&mut LineInfo),
3288 {
3289 for line in lines.iter_mut() {
3291 let line_start = line.byte_offset;
3292 let line_end = line.byte_offset + line.byte_len;
3293
3294 if line_start < end && line_end > start {
3296 f(line);
3297 }
3298 }
3299
3300 let _ = content;
3302 }
3303
3304 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3306 if !content.contains('`') {
3308 return Vec::new();
3309 }
3310
3311 let parser = Parser::new(content).into_offset_iter();
3313 let mut ranges = Vec::new();
3314
3315 for (event, range) in parser {
3316 if let Event::Code(_) = event {
3317 ranges.push((range.start, range.end));
3318 }
3319 }
3320
3321 Self::build_code_spans_from_ranges(content, lines, &ranges)
3322 }
3323
3324 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3325 let mut code_spans = Vec::new();
3326 if ranges.is_empty() {
3327 return code_spans;
3328 }
3329
3330 for &(start_pos, end_pos) in ranges {
3331 let full_span = &content[start_pos..end_pos];
3333 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3334
3335 let content_start = start_pos + backtick_count;
3337 let content_end = end_pos - backtick_count;
3338 let span_content = if content_start < content_end {
3339 content[content_start..content_end].to_string()
3340 } else {
3341 String::new()
3342 };
3343
3344 let line_idx = lines
3347 .partition_point(|line| line.byte_offset <= start_pos)
3348 .saturating_sub(1);
3349 let line_num = line_idx + 1;
3350 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3351
3352 let end_line_idx = lines
3354 .partition_point(|line| line.byte_offset <= end_pos)
3355 .saturating_sub(1);
3356 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3357
3358 let line_content = lines[line_idx].content(content);
3361 let col_start = if byte_col_start <= line_content.len() {
3362 line_content[..byte_col_start].chars().count()
3363 } else {
3364 line_content.chars().count()
3365 };
3366
3367 let end_line_content = lines[end_line_idx].content(content);
3368 let col_end = if byte_col_end <= end_line_content.len() {
3369 end_line_content[..byte_col_end].chars().count()
3370 } else {
3371 end_line_content.chars().count()
3372 };
3373
3374 code_spans.push(CodeSpan {
3375 line: line_num,
3376 end_line: end_line_idx + 1,
3377 start_col: col_start,
3378 end_col: col_end,
3379 byte_offset: start_pos,
3380 byte_end: end_pos,
3381 backtick_count,
3382 content: span_content,
3383 });
3384 }
3385
3386 code_spans.sort_by_key(|span| span.byte_offset);
3388
3389 code_spans
3390 }
3391
3392 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3394 let mut math_spans = Vec::new();
3395
3396 if !content.contains('$') {
3398 return math_spans;
3399 }
3400
3401 let mut options = Options::empty();
3403 options.insert(Options::ENABLE_MATH);
3404 let parser = Parser::new_ext(content, options).into_offset_iter();
3405
3406 for (event, range) in parser {
3407 let (is_display, math_content) = match &event {
3408 Event::InlineMath(text) => (false, text.as_ref()),
3409 Event::DisplayMath(text) => (true, text.as_ref()),
3410 _ => continue,
3411 };
3412
3413 let start_pos = range.start;
3414 let end_pos = range.end;
3415
3416 let line_idx = lines
3418 .partition_point(|line| line.byte_offset <= start_pos)
3419 .saturating_sub(1);
3420 let line_num = line_idx + 1;
3421 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3422
3423 let end_line_idx = lines
3425 .partition_point(|line| line.byte_offset <= end_pos)
3426 .saturating_sub(1);
3427 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3428
3429 let line_content = lines[line_idx].content(content);
3431 let col_start = if byte_col_start <= line_content.len() {
3432 line_content[..byte_col_start].chars().count()
3433 } else {
3434 line_content.chars().count()
3435 };
3436
3437 let end_line_content = lines[end_line_idx].content(content);
3438 let col_end = if byte_col_end <= end_line_content.len() {
3439 end_line_content[..byte_col_end].chars().count()
3440 } else {
3441 end_line_content.chars().count()
3442 };
3443
3444 math_spans.push(MathSpan {
3445 line: line_num,
3446 end_line: end_line_idx + 1,
3447 start_col: col_start,
3448 end_col: col_end,
3449 byte_offset: start_pos,
3450 byte_end: end_pos,
3451 is_display,
3452 content: math_content.to_string(),
3453 });
3454 }
3455
3456 math_spans.sort_by_key(|span| span.byte_offset);
3458
3459 math_spans
3460 }
3461
3462 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3473 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3475
3476 #[inline]
3479 fn reset_tracking_state(
3480 list_item: &ListItemInfo,
3481 has_list_breaking_content: &mut bool,
3482 min_continuation: &mut usize,
3483 ) {
3484 *has_list_breaking_content = false;
3485 let marker_width = if list_item.is_ordered {
3486 list_item.marker.len() + 1 } else {
3488 list_item.marker.len()
3489 };
3490 *min_continuation = if list_item.is_ordered {
3491 marker_width
3492 } else {
3493 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3494 };
3495 }
3496
3497 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3500 let mut last_list_item_line = 0;
3501 let mut current_indent_level = 0;
3502 let mut last_marker_width = 0;
3503
3504 let mut has_list_breaking_content_since_last_item = false;
3506 let mut min_continuation_for_tracking = 0;
3507
3508 for (line_idx, line_info) in lines.iter().enumerate() {
3509 let line_num = line_idx + 1;
3510
3511 if line_info.in_code_block {
3513 if let Some(ref mut block) = current_block {
3514 let min_continuation_indent =
3516 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3517
3518 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3520
3521 match context {
3522 CodeBlockContext::Indented => {
3523 block.end_line = line_num;
3525 continue;
3526 }
3527 CodeBlockContext::Standalone => {
3528 let completed_block = current_block.take().unwrap();
3530 list_blocks.push(completed_block);
3531 continue;
3532 }
3533 CodeBlockContext::Adjacent => {
3534 block.end_line = line_num;
3536 continue;
3537 }
3538 }
3539 } else {
3540 continue;
3542 }
3543 }
3544
3545 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3547 caps.get(0).unwrap().as_str().to_string()
3548 } else {
3549 String::new()
3550 };
3551
3552 if let Some(ref block) = current_block
3555 && line_info.list_item.is_none()
3556 && !line_info.is_blank
3557 && !line_info.in_code_span_continuation
3558 {
3559 let line_content = line_info.content(content).trim();
3560
3561 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3566
3567 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3570
3571 let breaks_list = line_info.heading.is_some()
3572 || line_content.starts_with("---")
3573 || line_content.starts_with("***")
3574 || line_content.starts_with("___")
3575 || crate::utils::skip_context::is_table_line(line_content)
3576 || blockquote_prefix_changes
3577 || (line_info.indent > 0
3578 && line_info.indent < min_continuation_for_tracking
3579 && !is_lazy_continuation);
3580
3581 if breaks_list {
3582 has_list_breaking_content_since_last_item = true;
3583 }
3584 }
3585
3586 if line_info.in_code_span_continuation
3589 && line_info.list_item.is_none()
3590 && let Some(ref mut block) = current_block
3591 {
3592 block.end_line = line_num;
3593 }
3594
3595 let effective_continuation_indent = if let Some(ref block) = current_block {
3601 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3602 let line_content = line_info.content(content);
3603 let line_bq_level = line_content
3604 .chars()
3605 .take_while(|c| *c == '>' || c.is_whitespace())
3606 .filter(|&c| c == '>')
3607 .count();
3608 if line_bq_level > 0 && line_bq_level == block_bq_level {
3609 let mut pos = 0;
3611 let mut found_markers = 0;
3612 for c in line_content.chars() {
3613 pos += c.len_utf8();
3614 if c == '>' {
3615 found_markers += 1;
3616 if found_markers == line_bq_level {
3617 if line_content.get(pos..pos + 1) == Some(" ") {
3618 pos += 1;
3619 }
3620 break;
3621 }
3622 }
3623 }
3624 let after_bq = &line_content[pos..];
3625 after_bq.len() - after_bq.trim_start().len()
3626 } else {
3627 line_info.indent
3628 }
3629 } else {
3630 line_info.indent
3631 };
3632 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3633 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3634 if block_bq_level > 0 {
3635 if block.is_ordered { last_marker_width } else { 2 }
3636 } else {
3637 min_continuation_for_tracking
3638 }
3639 } else {
3640 min_continuation_for_tracking
3641 };
3642 let is_structural_element = line_info.heading.is_some()
3645 || line_info.content(content).trim().starts_with("```")
3646 || line_info.content(content).trim().starts_with("~~~");
3647 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3648 || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3649
3650 if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3651 eprintln!(
3652 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3653 line_num,
3654 effective_continuation_indent,
3655 adjusted_min_continuation_for_tracking,
3656 is_valid_continuation,
3657 line_info.in_code_span_continuation,
3658 line_info.in_code_block,
3659 current_block.is_some()
3660 );
3661 }
3662
3663 if !line_info.in_code_span_continuation
3664 && line_info.list_item.is_none()
3665 && !line_info.is_blank
3666 && !line_info.in_code_block
3667 && is_valid_continuation
3668 && let Some(ref mut block) = current_block
3669 {
3670 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3671 eprintln!(
3672 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3673 line_num, block.end_line, line_num
3674 );
3675 }
3676 block.end_line = line_num;
3677 }
3678
3679 if let Some(list_item) = &line_info.list_item {
3681 let item_indent = list_item.marker_column;
3683 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3686 eprintln!(
3687 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3688 line_num, list_item.marker, item_indent
3689 );
3690 }
3691
3692 if let Some(ref mut block) = current_block {
3693 let is_nested = nesting > block.nesting_level;
3697 let same_type =
3698 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3699 let same_context = block.blockquote_prefix == blockquote_prefix;
3700 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3702
3703 let marker_compatible =
3705 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3706
3707 let has_non_list_content = has_list_breaking_content_since_last_item;
3710
3711 let mut continues_list = if is_nested {
3715 same_context && reasonable_distance && !has_non_list_content
3717 } else {
3718 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3720 };
3721
3722 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3723 eprintln!(
3724 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3725 line_num,
3726 continues_list,
3727 is_nested,
3728 same_type,
3729 same_context,
3730 reasonable_distance,
3731 marker_compatible,
3732 has_non_list_content,
3733 last_list_item_line,
3734 block.end_line
3735 );
3736 }
3737
3738 if !continues_list
3742 && (is_nested || same_type)
3743 && reasonable_distance
3744 && line_num > 0
3745 && block.end_line == line_num - 1
3746 {
3747 if block.item_lines.contains(&(line_num - 1)) {
3750 continues_list = true;
3752 } else {
3753 continues_list = true;
3757 }
3758 }
3759
3760 if continues_list {
3761 block.end_line = line_num;
3763 block.item_lines.push(line_num);
3764
3765 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3767 list_item.marker.len() + 1
3768 } else {
3769 list_item.marker.len()
3770 });
3771
3772 if !block.is_ordered
3774 && block.marker.is_some()
3775 && block.marker.as_ref() != Some(&list_item.marker)
3776 {
3777 block.marker = None;
3779 }
3780
3781 reset_tracking_state(
3783 list_item,
3784 &mut has_list_breaking_content_since_last_item,
3785 &mut min_continuation_for_tracking,
3786 );
3787 } else {
3788 if !same_type
3793 && !is_nested
3794 && let Some(&last_item) = block.item_lines.last()
3795 {
3796 block.end_line = last_item;
3797 }
3798
3799 list_blocks.push(block.clone());
3800
3801 *block = ListBlock {
3802 start_line: line_num,
3803 end_line: line_num,
3804 is_ordered: list_item.is_ordered,
3805 marker: if list_item.is_ordered {
3806 None
3807 } else {
3808 Some(list_item.marker.clone())
3809 },
3810 blockquote_prefix: blockquote_prefix.clone(),
3811 item_lines: vec![line_num],
3812 nesting_level: nesting,
3813 max_marker_width: if list_item.is_ordered {
3814 list_item.marker.len() + 1
3815 } else {
3816 list_item.marker.len()
3817 },
3818 };
3819
3820 reset_tracking_state(
3822 list_item,
3823 &mut has_list_breaking_content_since_last_item,
3824 &mut min_continuation_for_tracking,
3825 );
3826 }
3827 } else {
3828 current_block = Some(ListBlock {
3830 start_line: line_num,
3831 end_line: line_num,
3832 is_ordered: list_item.is_ordered,
3833 marker: if list_item.is_ordered {
3834 None
3835 } else {
3836 Some(list_item.marker.clone())
3837 },
3838 blockquote_prefix,
3839 item_lines: vec![line_num],
3840 nesting_level: nesting,
3841 max_marker_width: list_item.marker.len(),
3842 });
3843
3844 reset_tracking_state(
3846 list_item,
3847 &mut has_list_breaking_content_since_last_item,
3848 &mut min_continuation_for_tracking,
3849 );
3850 }
3851
3852 last_list_item_line = line_num;
3853 current_indent_level = item_indent;
3854 last_marker_width = if list_item.is_ordered {
3855 list_item.marker.len() + 1 } else {
3857 list_item.marker.len()
3858 };
3859 } else if let Some(ref mut block) = current_block {
3860 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3862 eprintln!(
3863 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3864 line_num, line_info.is_blank
3865 );
3866 }
3867
3868 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3876 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3877 } else {
3878 false
3879 };
3880
3881 let min_continuation_indent = if block.is_ordered {
3885 current_indent_level + last_marker_width
3886 } else {
3887 current_indent_level + 2 };
3889
3890 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3891 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3893 eprintln!(
3894 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3895 line_num, line_info.indent, min_continuation_indent
3896 );
3897 }
3898 block.end_line = line_num;
3899 } else if line_info.is_blank {
3900 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3903 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3904 }
3905 let mut check_idx = line_idx + 1;
3906 let mut found_continuation = false;
3907
3908 while check_idx < lines.len() && lines[check_idx].is_blank {
3910 check_idx += 1;
3911 }
3912
3913 if check_idx < lines.len() {
3914 let next_line = &lines[check_idx];
3915 let next_content = next_line.content(content);
3917 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3920 let next_bq_level_for_indent = next_content
3921 .chars()
3922 .take_while(|c| *c == '>' || c.is_whitespace())
3923 .filter(|&c| c == '>')
3924 .count();
3925 let effective_indent =
3926 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3927 let mut pos = 0;
3930 let mut found_markers = 0;
3931 for c in next_content.chars() {
3932 pos += c.len_utf8();
3933 if c == '>' {
3934 found_markers += 1;
3935 if found_markers == next_bq_level_for_indent {
3936 if next_content.get(pos..pos + 1) == Some(" ") {
3938 pos += 1;
3939 }
3940 break;
3941 }
3942 }
3943 }
3944 let after_blockquote_marker = &next_content[pos..];
3945 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3946 } else {
3947 next_line.indent
3948 };
3949 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3952 if block.is_ordered { last_marker_width } else { 2 }
3955 } else {
3956 min_continuation_indent
3957 };
3958 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3960 eprintln!(
3961 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3962 line_num,
3963 check_idx + 1,
3964 effective_indent,
3965 adjusted_min_continuation,
3966 next_line.list_item.is_some(),
3967 next_line.in_code_block
3968 );
3969 }
3970 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3971 found_continuation = true;
3972 }
3973 else if !next_line.in_code_block
3975 && next_line.list_item.is_some()
3976 && let Some(item) = &next_line.list_item
3977 {
3978 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3979 .find(next_line.content(content))
3980 .map_or(String::new(), |m| m.as_str().to_string());
3981 if item.marker_column == current_indent_level
3982 && item.is_ordered == block.is_ordered
3983 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3984 {
3985 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3989 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3990 if let Some(between_line) = lines.get(idx) {
3991 let between_content = between_line.content(content);
3992 let trimmed = between_content.trim();
3993 if trimmed.is_empty() {
3995 return false;
3996 }
3997 let line_indent = between_content.len() - between_content.trim_start().len();
3999
4000 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4002 .find(between_content)
4003 .map_or(String::new(), |m| m.as_str().to_string());
4004 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4005 let blockquote_level_changed =
4006 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4007
4008 if trimmed.starts_with("```")
4010 || trimmed.starts_with("~~~")
4011 || trimmed.starts_with("---")
4012 || trimmed.starts_with("***")
4013 || trimmed.starts_with("___")
4014 || blockquote_level_changed
4015 || crate::utils::skip_context::is_table_line(trimmed)
4016 || between_line.heading.is_some()
4017 {
4018 return true; }
4020
4021 line_indent >= min_continuation_indent
4023 } else {
4024 false
4025 }
4026 });
4027
4028 if block.is_ordered {
4029 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4032 if let Some(between_line) = lines.get(idx) {
4033 let between_content = between_line.content(content);
4034 let trimmed = between_content.trim();
4035 if trimmed.is_empty() {
4036 return false;
4037 }
4038 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4040 .find(between_content)
4041 .map_or(String::new(), |m| m.as_str().to_string());
4042 let between_bq_level =
4043 between_bq_prefix.chars().filter(|&c| c == '>').count();
4044 let blockquote_level_changed =
4045 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4046 trimmed.starts_with("```")
4048 || trimmed.starts_with("~~~")
4049 || trimmed.starts_with("---")
4050 || trimmed.starts_with("***")
4051 || trimmed.starts_with("___")
4052 || blockquote_level_changed
4053 || crate::utils::skip_context::is_table_line(trimmed)
4054 || between_line.heading.is_some()
4055 } else {
4056 false
4057 }
4058 });
4059 found_continuation = !has_structural_separators;
4060 } else {
4061 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4063 if let Some(between_line) = lines.get(idx) {
4064 let between_content = between_line.content(content);
4065 let trimmed = between_content.trim();
4066 if trimmed.is_empty() {
4067 return false;
4068 }
4069 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4071 .find(between_content)
4072 .map_or(String::new(), |m| m.as_str().to_string());
4073 let between_bq_level =
4074 between_bq_prefix.chars().filter(|&c| c == '>').count();
4075 let blockquote_level_changed =
4076 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4077 trimmed.starts_with("```")
4079 || trimmed.starts_with("~~~")
4080 || trimmed.starts_with("---")
4081 || trimmed.starts_with("***")
4082 || trimmed.starts_with("___")
4083 || blockquote_level_changed
4084 || crate::utils::skip_context::is_table_line(trimmed)
4085 || between_line.heading.is_some()
4086 } else {
4087 false
4088 }
4089 });
4090 found_continuation = !has_structural_separators;
4091 }
4092 }
4093 }
4094 }
4095
4096 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4097 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4098 }
4099 if found_continuation {
4100 block.end_line = line_num;
4102 } else {
4103 list_blocks.push(block.clone());
4105 current_block = None;
4106 }
4107 } else {
4108 let min_required_indent = if block.is_ordered {
4111 current_indent_level + last_marker_width
4112 } else {
4113 current_indent_level + 2
4114 };
4115
4116 let line_content = line_info.content(content).trim();
4121
4122 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4124
4125 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4128 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4129 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4130
4131 let is_structural_separator = line_info.heading.is_some()
4132 || line_content.starts_with("```")
4133 || line_content.starts_with("~~~")
4134 || line_content.starts_with("---")
4135 || line_content.starts_with("***")
4136 || line_content.starts_with("___")
4137 || blockquote_level_changed
4138 || looks_like_table;
4139
4140 let is_lazy_continuation = !is_structural_separator
4144 && !line_info.is_blank
4145 && (line_info.indent == 0
4146 || line_info.indent >= min_required_indent
4147 || line_info.in_code_span_continuation);
4148
4149 if is_lazy_continuation {
4150 block.end_line = line_num;
4153 } else {
4154 list_blocks.push(block.clone());
4156 current_block = None;
4157 }
4158 }
4159 }
4160 }
4161
4162 if let Some(block) = current_block {
4164 list_blocks.push(block);
4165 }
4166
4167 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4169
4170 list_blocks
4171 }
4172
4173 fn compute_char_frequency(content: &str) -> CharFrequency {
4175 let mut frequency = CharFrequency::default();
4176
4177 for ch in content.chars() {
4178 match ch {
4179 '#' => frequency.hash_count += 1,
4180 '*' => frequency.asterisk_count += 1,
4181 '_' => frequency.underscore_count += 1,
4182 '-' => frequency.hyphen_count += 1,
4183 '+' => frequency.plus_count += 1,
4184 '>' => frequency.gt_count += 1,
4185 '|' => frequency.pipe_count += 1,
4186 '[' => frequency.bracket_count += 1,
4187 '`' => frequency.backtick_count += 1,
4188 '<' => frequency.lt_count += 1,
4189 '!' => frequency.exclamation_count += 1,
4190 '\n' => frequency.newline_count += 1,
4191 _ => {}
4192 }
4193 }
4194
4195 frequency
4196 }
4197
4198 fn parse_html_tags(
4200 content: &str,
4201 lines: &[LineInfo],
4202 code_blocks: &[(usize, usize)],
4203 flavor: MarkdownFlavor,
4204 ) -> Vec<HtmlTag> {
4205 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4206 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4207
4208 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4209
4210 for cap in HTML_TAG_REGEX.captures_iter(content) {
4211 let full_match = cap.get(0).unwrap();
4212 let match_start = full_match.start();
4213 let match_end = full_match.end();
4214
4215 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4217 continue;
4218 }
4219
4220 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4221 let tag_name_original = cap.get(2).unwrap().as_str();
4222 let tag_name = tag_name_original.to_lowercase();
4223 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4224
4225 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4228 continue;
4229 }
4230
4231 let mut line_num = 1;
4233 let mut col_start = match_start;
4234 let mut col_end = match_end;
4235 for (idx, line_info) in lines.iter().enumerate() {
4236 if match_start >= line_info.byte_offset {
4237 line_num = idx + 1;
4238 col_start = match_start - line_info.byte_offset;
4239 col_end = match_end - line_info.byte_offset;
4240 } else {
4241 break;
4242 }
4243 }
4244
4245 html_tags.push(HtmlTag {
4246 line: line_num,
4247 start_col: col_start,
4248 end_col: col_end,
4249 byte_offset: match_start,
4250 byte_end: match_end,
4251 tag_name,
4252 is_closing,
4253 is_self_closing,
4254 raw_content: full_match.as_str().to_string(),
4255 });
4256 }
4257
4258 html_tags
4259 }
4260
4261 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4263 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4264
4265 for (line_idx, line_info) in lines.iter().enumerate() {
4266 if line_info.in_code_block || line_info.is_blank {
4268 continue;
4269 }
4270
4271 let line = line_info.content(content);
4272 let line_num = line_idx + 1;
4273
4274 if !line.contains('|') {
4276 continue;
4277 }
4278
4279 let parts: Vec<&str> = line.split('|').collect();
4281 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4282
4283 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4285 let mut column_alignments = Vec::new();
4286
4287 if is_separator {
4288 for part in &parts[1..parts.len() - 1] {
4289 let trimmed = part.trim();
4291 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4292 "center".to_string()
4293 } else if trimmed.ends_with(':') {
4294 "right".to_string()
4295 } else if trimmed.starts_with(':') {
4296 "left".to_string()
4297 } else {
4298 "none".to_string()
4299 };
4300 column_alignments.push(alignment);
4301 }
4302 }
4303
4304 table_rows.push(TableRow {
4305 line: line_num,
4306 is_separator,
4307 column_count,
4308 column_alignments,
4309 });
4310 }
4311
4312 table_rows
4313 }
4314
4315 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4317 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4318
4319 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4321 let full_match = cap.get(0).unwrap();
4322 let match_start = full_match.start();
4323 let match_end = full_match.end();
4324
4325 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4327 continue;
4328 }
4329
4330 let preceding_char = if match_start > 0 {
4332 content.chars().nth(match_start - 1)
4333 } else {
4334 None
4335 };
4336 let following_char = content.chars().nth(match_end);
4337
4338 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4339 continue;
4340 }
4341 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4342 continue;
4343 }
4344
4345 let url = full_match.as_str();
4346 let url_type = if url.starts_with("https://") {
4347 "https"
4348 } else if url.starts_with("http://") {
4349 "http"
4350 } else if url.starts_with("ftp://") {
4351 "ftp"
4352 } else {
4353 "other"
4354 };
4355
4356 let mut line_num = 1;
4358 let mut col_start = match_start;
4359 let mut col_end = match_end;
4360 for (idx, line_info) in lines.iter().enumerate() {
4361 if match_start >= line_info.byte_offset {
4362 line_num = idx + 1;
4363 col_start = match_start - line_info.byte_offset;
4364 col_end = match_end - line_info.byte_offset;
4365 } else {
4366 break;
4367 }
4368 }
4369
4370 bare_urls.push(BareUrl {
4371 line: line_num,
4372 start_col: col_start,
4373 end_col: col_end,
4374 byte_offset: match_start,
4375 byte_end: match_end,
4376 url: url.to_string(),
4377 url_type: url_type.to_string(),
4378 });
4379 }
4380
4381 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4383 let full_match = cap.get(0).unwrap();
4384 let match_start = full_match.start();
4385 let match_end = full_match.end();
4386
4387 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4389 continue;
4390 }
4391
4392 let preceding_char = if match_start > 0 {
4394 content.chars().nth(match_start - 1)
4395 } else {
4396 None
4397 };
4398 let following_char = content.chars().nth(match_end);
4399
4400 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4401 continue;
4402 }
4403 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4404 continue;
4405 }
4406
4407 let email = full_match.as_str();
4408
4409 let mut line_num = 1;
4411 let mut col_start = match_start;
4412 let mut col_end = match_end;
4413 for (idx, line_info) in lines.iter().enumerate() {
4414 if match_start >= line_info.byte_offset {
4415 line_num = idx + 1;
4416 col_start = match_start - line_info.byte_offset;
4417 col_end = match_end - line_info.byte_offset;
4418 } else {
4419 break;
4420 }
4421 }
4422
4423 bare_urls.push(BareUrl {
4424 line: line_num,
4425 start_col: col_start,
4426 end_col: col_end,
4427 byte_offset: match_start,
4428 byte_end: match_end,
4429 url: email.to_string(),
4430 url_type: "email".to_string(),
4431 });
4432 }
4433
4434 bare_urls
4435 }
4436
4437 #[must_use]
4457 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4458 ValidHeadingsIter::new(&self.lines)
4459 }
4460
4461 #[must_use]
4465 pub fn has_valid_headings(&self) -> bool {
4466 self.lines
4467 .iter()
4468 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4469 }
4470}
4471
4472fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4474 if list_blocks.len() < 2 {
4475 return;
4476 }
4477
4478 let mut merger = ListBlockMerger::new(content, lines);
4479 *list_blocks = merger.merge(list_blocks);
4480}
4481
4482struct ListBlockMerger<'a> {
4484 content: &'a str,
4485 lines: &'a [LineInfo],
4486}
4487
4488impl<'a> ListBlockMerger<'a> {
4489 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4490 Self { content, lines }
4491 }
4492
4493 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4494 let mut merged = Vec::with_capacity(list_blocks.len());
4495 let mut current = list_blocks[0].clone();
4496
4497 for next in list_blocks.iter().skip(1) {
4498 if self.should_merge_blocks(¤t, next) {
4499 current = self.merge_two_blocks(current, next);
4500 } else {
4501 merged.push(current);
4502 current = next.clone();
4503 }
4504 }
4505
4506 merged.push(current);
4507 merged
4508 }
4509
4510 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4512 if !self.blocks_are_compatible(current, next) {
4514 return false;
4515 }
4516
4517 let spacing = self.analyze_spacing_between(current, next);
4519 match spacing {
4520 BlockSpacing::Consecutive => true,
4521 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4522 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4523 self.can_merge_with_content_between(current, next)
4524 }
4525 }
4526 }
4527
4528 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4530 current.is_ordered == next.is_ordered
4531 && current.blockquote_prefix == next.blockquote_prefix
4532 && current.nesting_level == next.nesting_level
4533 }
4534
4535 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4537 let gap = next.start_line - current.end_line;
4538
4539 match gap {
4540 1 => BlockSpacing::Consecutive,
4541 2 => BlockSpacing::SingleBlank,
4542 _ if gap > 2 => {
4543 if self.has_only_blank_lines_between(current, next) {
4544 BlockSpacing::MultipleBlanks
4545 } else {
4546 BlockSpacing::ContentBetween
4547 }
4548 }
4549 _ => BlockSpacing::Consecutive, }
4551 }
4552
4553 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4555 if has_meaningful_content_between(self.content, current, next, self.lines) {
4558 return false; }
4560
4561 !current.is_ordered && current.marker == next.marker
4563 }
4564
4565 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4567 if has_meaningful_content_between(self.content, current, next, self.lines) {
4569 return false; }
4571
4572 current.is_ordered && next.is_ordered
4574 }
4575
4576 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4578 for line_num in (current.end_line + 1)..next.start_line {
4579 if let Some(line_info) = self.lines.get(line_num - 1)
4580 && !line_info.content(self.content).trim().is_empty()
4581 {
4582 return false;
4583 }
4584 }
4585 true
4586 }
4587
4588 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4590 current.end_line = next.end_line;
4591 current.item_lines.extend_from_slice(&next.item_lines);
4592
4593 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4595
4596 if !current.is_ordered && self.markers_differ(¤t, next) {
4598 current.marker = None; }
4600
4601 current
4602 }
4603
4604 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4606 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4607 }
4608}
4609
4610#[derive(Debug, PartialEq)]
4612enum BlockSpacing {
4613 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4618
4619fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4621 for line_num in (current.end_line + 1)..next.start_line {
4623 if let Some(line_info) = lines.get(line_num - 1) {
4624 let trimmed = line_info.content(content).trim();
4626
4627 if trimmed.is_empty() {
4629 continue;
4630 }
4631
4632 if line_info.heading.is_some() {
4636 return true; }
4638
4639 if is_horizontal_rule(trimmed) {
4641 return true; }
4643
4644 if crate::utils::skip_context::is_table_line(trimmed) {
4646 return true; }
4648
4649 if trimmed.starts_with('>') {
4651 return true; }
4653
4654 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4656 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4657
4658 let min_continuation_indent = if current.is_ordered {
4660 current.nesting_level + current.max_marker_width + 1 } else {
4662 current.nesting_level + 2
4663 };
4664
4665 if line_indent < min_continuation_indent {
4666 return true; }
4669 }
4670
4671 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4673
4674 let min_indent = if current.is_ordered {
4676 current.nesting_level + current.max_marker_width
4677 } else {
4678 current.nesting_level + 2
4679 };
4680
4681 if line_indent < min_indent {
4683 return true; }
4685
4686 }
4689 }
4690
4691 false
4693}
4694
4695pub fn is_horizontal_rule_line(line: &str) -> bool {
4702 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4704 if leading_spaces > 3 || line.starts_with('\t') {
4705 return false;
4706 }
4707
4708 is_horizontal_rule_content(line.trim())
4709}
4710
4711pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4714 if trimmed.len() < 3 {
4715 return false;
4716 }
4717
4718 let chars: Vec<char> = trimmed.chars().collect();
4720 if let Some(&first_char) = chars.first()
4721 && (first_char == '-' || first_char == '*' || first_char == '_')
4722 {
4723 let mut count = 0;
4724 for &ch in &chars {
4725 if ch == first_char {
4726 count += 1;
4727 } else if ch != ' ' && ch != '\t' {
4728 return false; }
4730 }
4731 return count >= 3;
4732 }
4733 false
4734}
4735
4736pub fn is_horizontal_rule(trimmed: &str) -> bool {
4738 is_horizontal_rule_content(trimmed)
4739}
4740
4741#[cfg(test)]
4743mod tests {
4744 use super::*;
4745
4746 #[test]
4747 fn test_empty_content() {
4748 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4749 assert_eq!(ctx.content, "");
4750 assert_eq!(ctx.line_offsets, vec![0]);
4751 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4752 assert_eq!(ctx.lines.len(), 0);
4753 }
4754
4755 #[test]
4756 fn test_single_line() {
4757 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4758 assert_eq!(ctx.content, "# Hello");
4759 assert_eq!(ctx.line_offsets, vec![0]);
4760 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4761 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4762 }
4763
4764 #[test]
4765 fn test_multi_line() {
4766 let content = "# Title\n\nSecond line\nThird line";
4767 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4768 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4769 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4776
4777 #[test]
4778 fn test_line_info() {
4779 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4780 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4781
4782 assert_eq!(ctx.lines.len(), 7);
4784
4785 let line1 = &ctx.lines[0];
4787 assert_eq!(line1.content(ctx.content), "# Title");
4788 assert_eq!(line1.byte_offset, 0);
4789 assert_eq!(line1.indent, 0);
4790 assert!(!line1.is_blank);
4791 assert!(!line1.in_code_block);
4792 assert!(line1.list_item.is_none());
4793
4794 let line2 = &ctx.lines[1];
4796 assert_eq!(line2.content(ctx.content), " indented");
4797 assert_eq!(line2.byte_offset, 8);
4798 assert_eq!(line2.indent, 4);
4799 assert!(!line2.is_blank);
4800
4801 let line3 = &ctx.lines[2];
4803 assert_eq!(line3.content(ctx.content), "");
4804 assert!(line3.is_blank);
4805
4806 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4808 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4809 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4810 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4811 }
4812
4813 #[test]
4814 fn test_list_item_detection() {
4815 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4816 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4817
4818 let line1 = &ctx.lines[0];
4820 assert!(line1.list_item.is_some());
4821 let list1 = line1.list_item.as_ref().unwrap();
4822 assert_eq!(list1.marker, "-");
4823 assert!(!list1.is_ordered);
4824 assert_eq!(list1.marker_column, 0);
4825 assert_eq!(list1.content_column, 2);
4826
4827 let line2 = &ctx.lines[1];
4829 assert!(line2.list_item.is_some());
4830 let list2 = line2.list_item.as_ref().unwrap();
4831 assert_eq!(list2.marker, "*");
4832 assert_eq!(list2.marker_column, 2);
4833
4834 let line3 = &ctx.lines[2];
4836 assert!(line3.list_item.is_some());
4837 let list3 = line3.list_item.as_ref().unwrap();
4838 assert_eq!(list3.marker, "1.");
4839 assert!(list3.is_ordered);
4840 assert_eq!(list3.number, Some(1));
4841
4842 let line6 = &ctx.lines[5];
4844 assert!(line6.list_item.is_none());
4845 }
4846
4847 #[test]
4848 fn test_offset_to_line_col_edge_cases() {
4849 let content = "a\nb\nc";
4850 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4851 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4859
4860 #[test]
4861 fn test_mdx_esm_blocks() {
4862 let content = r##"import {Chart} from './snowfall.js'
4863export const year = 2023
4864
4865# Last year's snowfall
4866
4867In {year}, the snowfall was above average.
4868It was followed by a warm spring which caused
4869flood conditions in many of the nearby rivers.
4870
4871<Chart color="#fcb32c" year={year} />
4872"##;
4873
4874 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4875
4876 assert_eq!(ctx.lines.len(), 10);
4878 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4879 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4880 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4881 assert!(
4882 !ctx.lines[3].in_esm_block,
4883 "Line 4 (heading) should NOT be in_esm_block"
4884 );
4885 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4886 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4887 }
4888
4889 #[test]
4890 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4891 let content = r#"import {Chart} from './snowfall.js'
4892export const year = 2023
4893
4894# Last year's snowfall
4895"#;
4896
4897 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4898
4899 assert!(
4901 !ctx.lines[0].in_esm_block,
4902 "Line 1 should NOT be in_esm_block in Standard flavor"
4903 );
4904 assert!(
4905 !ctx.lines[1].in_esm_block,
4906 "Line 2 should NOT be in_esm_block in Standard flavor"
4907 );
4908 }
4909
4910 #[test]
4911 fn test_blockquote_with_indented_content() {
4912 let content = r#"# Heading
4916
4917> -S socket-path
4918> More text
4919"#;
4920 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4921
4922 assert!(
4924 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4925 "Line 3 should be a blockquote"
4926 );
4927 assert!(
4929 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4930 "Line 4 should be a blockquote"
4931 );
4932
4933 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4936 assert_eq!(bq3.content, "-S socket-path");
4937 assert_eq!(bq3.nesting_level, 1);
4938 assert!(bq3.has_multiple_spaces_after_marker);
4940
4941 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4942 assert_eq!(bq4.content, "More text");
4943 assert_eq!(bq4.nesting_level, 1);
4944 }
4945
4946 #[test]
4947 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4948 let content = r#"# Title
4950
4951A footnote[^1].
4952
4953[^1]: This is the footnote content.
4954
4955[^note]: Another footnote with [link](https://example.com).
4956
4957[regular]: ./path.md "A real reference definition"
4958"#;
4959 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4960
4961 assert_eq!(
4963 ctx.reference_defs.len(),
4964 1,
4965 "Footnotes should not be parsed as reference definitions"
4966 );
4967
4968 assert_eq!(ctx.reference_defs[0].id, "regular");
4970 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4971 assert_eq!(
4972 ctx.reference_defs[0].title,
4973 Some("A real reference definition".to_string())
4974 );
4975 }
4976
4977 #[test]
4978 fn test_footnote_with_inline_link_not_misidentified() {
4979 let content = r#"# Title
4982
4983A footnote[^1].
4984
4985[^1]: [link](https://www.google.com).
4986"#;
4987 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4988
4989 assert!(
4991 ctx.reference_defs.is_empty(),
4992 "Footnote with inline link should not create a reference definition"
4993 );
4994 }
4995
4996 #[test]
4997 fn test_various_footnote_formats_excluded() {
4998 let content = r#"[^1]: Numeric footnote
5000[^note]: Named footnote
5001[^a]: Single char footnote
5002[^long-footnote-name]: Long named footnote
5003[^123abc]: Mixed alphanumeric
5004
5005[ref1]: ./file1.md
5006[ref2]: ./file2.md
5007"#;
5008 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5009
5010 assert_eq!(
5012 ctx.reference_defs.len(),
5013 2,
5014 "Only regular reference definitions should be parsed"
5015 );
5016
5017 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5018 assert!(ids.contains(&"ref1"));
5019 assert!(ids.contains(&"ref2"));
5020 assert!(!ids.iter().any(|id| id.starts_with('^')));
5021 }
5022
5023 #[test]
5028 fn test_has_char_tracked_characters() {
5029 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5031 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5032
5033 assert!(ctx.has_char('#'), "Should detect hash");
5035 assert!(ctx.has_char('*'), "Should detect asterisk");
5036 assert!(ctx.has_char('_'), "Should detect underscore");
5037 assert!(ctx.has_char('-'), "Should detect hyphen");
5038 assert!(ctx.has_char('+'), "Should detect plus");
5039 assert!(ctx.has_char('>'), "Should detect gt");
5040 assert!(ctx.has_char('|'), "Should detect pipe");
5041 assert!(ctx.has_char('['), "Should detect bracket");
5042 assert!(ctx.has_char('`'), "Should detect backtick");
5043 assert!(ctx.has_char('<'), "Should detect lt");
5044 assert!(ctx.has_char('!'), "Should detect exclamation");
5045 assert!(ctx.has_char('\n'), "Should detect newline");
5046 }
5047
5048 #[test]
5049 fn test_has_char_absent_characters() {
5050 let content = "Simple text without special chars";
5051 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5052
5053 assert!(!ctx.has_char('#'), "Should not detect hash");
5055 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5056 assert!(!ctx.has_char('_'), "Should not detect underscore");
5057 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5058 assert!(!ctx.has_char('+'), "Should not detect plus");
5059 assert!(!ctx.has_char('>'), "Should not detect gt");
5060 assert!(!ctx.has_char('|'), "Should not detect pipe");
5061 assert!(!ctx.has_char('['), "Should not detect bracket");
5062 assert!(!ctx.has_char('`'), "Should not detect backtick");
5063 assert!(!ctx.has_char('<'), "Should not detect lt");
5064 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5065 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5067 }
5068
5069 #[test]
5070 fn test_has_char_fallback_for_untracked() {
5071 let content = "Text with @mention and $dollar and %percent";
5072 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5073
5074 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5076 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5077 assert!(ctx.has_char('%'), "Should detect % via fallback");
5078 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5079 }
5080
5081 #[test]
5082 fn test_char_count_tracked_characters() {
5083 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5084 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5085
5086 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5088 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5089 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5090 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5091 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5092 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5093 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5094 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5095 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5096 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5097 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5098 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5099 }
5100
5101 #[test]
5102 fn test_char_count_zero_for_absent() {
5103 let content = "Plain text";
5104 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5105
5106 assert_eq!(ctx.char_count('#'), 0);
5107 assert_eq!(ctx.char_count('*'), 0);
5108 assert_eq!(ctx.char_count('_'), 0);
5109 assert_eq!(ctx.char_count('\n'), 0);
5110 }
5111
5112 #[test]
5113 fn test_char_count_fallback_for_untracked() {
5114 let content = "@@@ $$ %%%";
5115 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5116
5117 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5118 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5119 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5120 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5121 }
5122
5123 #[test]
5124 fn test_char_count_empty_content() {
5125 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5126
5127 assert_eq!(ctx.char_count('#'), 0);
5128 assert_eq!(ctx.char_count('*'), 0);
5129 assert_eq!(ctx.char_count('@'), 0);
5130 assert!(!ctx.has_char('#'));
5131 assert!(!ctx.has_char('@'));
5132 }
5133
5134 #[test]
5139 fn test_is_in_html_tag_simple() {
5140 let content = "<div>content</div>";
5141 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5142
5143 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5145 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5146 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5147
5148 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5150 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5151
5152 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5154 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5155 }
5156
5157 #[test]
5158 fn test_is_in_html_tag_self_closing() {
5159 let content = "Text <br/> more text";
5160 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5161
5162 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5164 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5165
5166 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5168 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5169 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5170
5171 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5173 }
5174
5175 #[test]
5176 fn test_is_in_html_tag_with_attributes() {
5177 let content = r#"<a href="url" class="link">text</a>"#;
5178 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5179
5180 assert!(ctx.is_in_html_tag(0), "Start of tag");
5182 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5183 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5184 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5185
5186 assert!(!ctx.is_in_html_tag(27), "Start of content");
5188 assert!(!ctx.is_in_html_tag(30), "End of content");
5189
5190 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5192 }
5193
5194 #[test]
5195 fn test_is_in_html_tag_multiline() {
5196 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5197 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5198
5199 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5201 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5202 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5203
5204 let closing_bracket_pos = content.find(">\n").unwrap();
5206 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5207 }
5208
5209 #[test]
5210 fn test_is_in_html_tag_no_tags() {
5211 let content = "Plain text without any HTML";
5212 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5213
5214 for i in 0..content.len() {
5216 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5217 }
5218 }
5219
5220 #[test]
5225 fn test_is_in_jinja_range_expression() {
5226 let content = "Hello {{ name }}!";
5227 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5228
5229 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5231 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5232
5233 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5235 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5236 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5237 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5238 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5239
5240 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5242 }
5243
5244 #[test]
5245 fn test_is_in_jinja_range_statement() {
5246 let content = "{% if condition %}content{% endif %}";
5247 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5248
5249 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5251 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5252 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5253
5254 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5256
5257 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5259 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5260 }
5261
5262 #[test]
5263 fn test_is_in_jinja_range_multiple() {
5264 let content = "{{ a }} and {{ b }}";
5265 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5266
5267 assert!(ctx.is_in_jinja_range(0));
5269 assert!(ctx.is_in_jinja_range(3));
5270 assert!(ctx.is_in_jinja_range(6));
5271
5272 assert!(!ctx.is_in_jinja_range(8));
5274 assert!(!ctx.is_in_jinja_range(11));
5275
5276 assert!(ctx.is_in_jinja_range(12));
5278 assert!(ctx.is_in_jinja_range(15));
5279 assert!(ctx.is_in_jinja_range(18));
5280 }
5281
5282 #[test]
5283 fn test_is_in_jinja_range_no_jinja() {
5284 let content = "Plain text with single braces but not Jinja";
5285 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5286
5287 for i in 0..content.len() {
5289 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5290 }
5291 }
5292
5293 #[test]
5298 fn test_is_in_link_title_with_title() {
5299 let content = r#"[ref]: https://example.com "Title text"
5300
5301Some content."#;
5302 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5303
5304 assert_eq!(ctx.reference_defs.len(), 1);
5306 let def = &ctx.reference_defs[0];
5307 assert!(def.title_byte_start.is_some());
5308 assert!(def.title_byte_end.is_some());
5309
5310 let title_start = def.title_byte_start.unwrap();
5311 let title_end = def.title_byte_end.unwrap();
5312
5313 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5315
5316 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5318 assert!(
5319 ctx.is_in_link_title(title_start + 5),
5320 "Middle of title should be in title"
5321 );
5322 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5323
5324 assert!(
5326 !ctx.is_in_link_title(title_end),
5327 "After title end should not be in title"
5328 );
5329 }
5330
5331 #[test]
5332 fn test_is_in_link_title_without_title() {
5333 let content = "[ref]: https://example.com\n\nSome content.";
5334 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5335
5336 assert_eq!(ctx.reference_defs.len(), 1);
5338 let def = &ctx.reference_defs[0];
5339 assert!(def.title_byte_start.is_none());
5340 assert!(def.title_byte_end.is_none());
5341
5342 for i in 0..content.len() {
5344 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5345 }
5346 }
5347
5348 #[test]
5349 fn test_is_in_link_title_multiple_refs() {
5350 let content = r#"[ref1]: /url1 "Title One"
5351[ref2]: /url2
5352[ref3]: /url3 "Title Three"
5353"#;
5354 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5355
5356 assert_eq!(ctx.reference_defs.len(), 3);
5358
5359 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5361 assert!(ref1.title_byte_start.is_some());
5362
5363 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5365 assert!(ref2.title_byte_start.is_none());
5366
5367 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5369 assert!(ref3.title_byte_start.is_some());
5370
5371 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5373 assert!(ctx.is_in_link_title(start + 1));
5374 assert!(!ctx.is_in_link_title(end + 5));
5375 }
5376
5377 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5379 assert!(ctx.is_in_link_title(start + 1));
5380 }
5381 }
5382
5383 #[test]
5384 fn test_is_in_link_title_single_quotes() {
5385 let content = "[ref]: /url 'Single quoted title'\n";
5386 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5387
5388 assert_eq!(ctx.reference_defs.len(), 1);
5389 let def = &ctx.reference_defs[0];
5390
5391 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5392 assert!(ctx.is_in_link_title(start));
5393 assert!(ctx.is_in_link_title(start + 5));
5394 assert!(!ctx.is_in_link_title(end));
5395 }
5396 }
5397
5398 #[test]
5399 fn test_is_in_link_title_parentheses() {
5400 let content = "[ref]: /url (Parenthesized title)\n";
5403 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5404
5405 if ctx.reference_defs.is_empty() {
5408 for i in 0..content.len() {
5410 assert!(!ctx.is_in_link_title(i));
5411 }
5412 } else {
5413 let def = &ctx.reference_defs[0];
5414 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5415 assert!(ctx.is_in_link_title(start));
5416 assert!(ctx.is_in_link_title(start + 5));
5417 assert!(!ctx.is_in_link_title(end));
5418 } else {
5419 for i in 0..content.len() {
5421 assert!(!ctx.is_in_link_title(i));
5422 }
5423 }
5424 }
5425 }
5426
5427 #[test]
5428 fn test_is_in_link_title_no_refs() {
5429 let content = "Just plain text without any reference definitions.";
5430 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5431
5432 assert!(ctx.reference_defs.is_empty());
5433
5434 for i in 0..content.len() {
5435 assert!(!ctx.is_in_link_title(i));
5436 }
5437 }
5438
5439 #[test]
5444 fn test_math_spans_inline() {
5445 let content = "Text with inline math $[f](x)$ in it.";
5446 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5447
5448 let math_spans = ctx.math_spans();
5449 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5450
5451 let span = &math_spans[0];
5452 assert!(!span.is_display, "Should be inline math, not display");
5453 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5454 }
5455
5456 #[test]
5457 fn test_math_spans_display_single_line() {
5458 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5459 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5460
5461 let math_spans = ctx.math_spans();
5462 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5463
5464 let span = &math_spans[0];
5465 assert!(span.is_display, "Should be display math");
5466 assert!(
5467 span.content.contains("[x](\\zeta)"),
5468 "Content should contain the link-like pattern"
5469 );
5470 }
5471
5472 #[test]
5473 fn test_math_spans_display_multiline() {
5474 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5475 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5476
5477 let math_spans = ctx.math_spans();
5478 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5479
5480 let span = &math_spans[0];
5481 assert!(span.is_display, "Should be display math");
5482 }
5483
5484 #[test]
5485 fn test_is_in_math_span() {
5486 let content = "Text $[f](x)$ more text";
5487 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5488
5489 let math_start = content.find('$').unwrap();
5491 let math_end = content.rfind('$').unwrap() + 1;
5492
5493 assert!(
5494 ctx.is_in_math_span(math_start + 1),
5495 "Position inside math span should return true"
5496 );
5497 assert!(
5498 ctx.is_in_math_span(math_start + 3),
5499 "Position inside math span should return true"
5500 );
5501
5502 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5504 assert!(
5505 !ctx.is_in_math_span(math_end + 1),
5506 "Position after math span should return false"
5507 );
5508 }
5509
5510 #[test]
5511 fn test_math_spans_mixed_with_code() {
5512 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5513 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5514
5515 let math_spans = ctx.math_spans();
5516 let code_spans = ctx.code_spans();
5517
5518 assert_eq!(math_spans.len(), 1, "Should have one math span");
5519 assert_eq!(code_spans.len(), 1, "Should have one code span");
5520
5521 assert_eq!(math_spans[0].content, "[f](x)");
5523 assert_eq!(code_spans[0].content, "[g](y)");
5525 }
5526
5527 #[test]
5528 fn test_math_spans_no_math() {
5529 let content = "Regular text without any math at all.";
5530 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5531
5532 let math_spans = ctx.math_spans();
5533 assert!(math_spans.is_empty(), "Should have no math spans");
5534 }
5535
5536 #[test]
5537 fn test_math_spans_multiple() {
5538 let content = "First $a$ and second $b$ and display $$c$$";
5539 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5540
5541 let math_spans = ctx.math_spans();
5542 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5543
5544 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5546 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5547
5548 assert_eq!(inline_count, 2, "Should have two inline math spans");
5549 assert_eq!(display_count, 1, "Should have one display math span");
5550 }
5551
5552 #[test]
5553 fn test_is_in_math_span_boundary_positions() {
5554 let content = "$[f](x)$";
5557 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5558
5559 let math_spans = ctx.math_spans();
5560 assert_eq!(math_spans.len(), 1, "Should have one math span");
5561
5562 let span = &math_spans[0];
5563
5564 assert!(
5566 ctx.is_in_math_span(span.byte_offset),
5567 "Start position should be in span"
5568 );
5569
5570 assert!(
5572 ctx.is_in_math_span(span.byte_offset + 1),
5573 "Position after start should be in span"
5574 );
5575
5576 assert!(
5578 ctx.is_in_math_span(span.byte_end - 1),
5579 "Position at end-1 should be in span"
5580 );
5581
5582 assert!(
5584 !ctx.is_in_math_span(span.byte_end),
5585 "Position at byte_end should NOT be in span (exclusive)"
5586 );
5587 }
5588
5589 #[test]
5590 fn test_math_spans_at_document_start() {
5591 let content = "$x$ text";
5592 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5593
5594 let math_spans = ctx.math_spans();
5595 assert_eq!(math_spans.len(), 1);
5596 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5597 }
5598
5599 #[test]
5600 fn test_math_spans_at_document_end() {
5601 let content = "text $x$";
5602 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5603
5604 let math_spans = ctx.math_spans();
5605 assert_eq!(math_spans.len(), 1);
5606 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5607 }
5608
5609 #[test]
5610 fn test_math_spans_consecutive() {
5611 let content = "$a$$b$";
5612 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5613
5614 let math_spans = ctx.math_spans();
5615 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5617
5618 for i in 0..content.len() {
5620 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5621 }
5622 }
5623
5624 #[test]
5625 fn test_math_spans_currency_not_math() {
5626 let content = "Price is $100";
5628 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5629
5630 let math_spans = ctx.math_spans();
5631 assert!(
5634 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5635 "Unbalanced $ should not create math span containing 100"
5636 );
5637 }
5638
5639 #[test]
5644 fn test_reference_lookup_o1_basic() {
5645 let content = r#"[ref1]: /url1
5646[REF2]: /url2 "Title"
5647[Ref3]: /url3
5648
5649Use [link][ref1] and [link][REF2]."#;
5650 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5651
5652 assert_eq!(ctx.reference_defs.len(), 3);
5654
5655 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5657 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5660 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5661 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5662 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5663 }
5664
5665 #[test]
5666 fn test_reference_lookup_o1_get_reference_def() {
5667 let content = r#"[myref]: https://example.com "My Title"
5668"#;
5669 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5670
5671 let def = ctx.get_reference_def("myref").expect("Should find myref");
5673 assert_eq!(def.url, "https://example.com");
5674 assert_eq!(def.title.as_deref(), Some("My Title"));
5675
5676 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5678 assert_eq!(def2.url, "https://example.com");
5679
5680 assert!(ctx.get_reference_def("nonexistent").is_none());
5682 }
5683
5684 #[test]
5685 fn test_reference_lookup_o1_has_reference_def() {
5686 let content = r#"[foo]: /foo
5687[BAR]: /bar
5688"#;
5689 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5690
5691 assert!(ctx.has_reference_def("foo"));
5693 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5695 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5698
5699 #[test]
5700 fn test_reference_lookup_o1_empty_content() {
5701 let content = "No references here.";
5702 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5703
5704 assert!(ctx.reference_defs.is_empty());
5705 assert_eq!(ctx.get_reference_url("anything"), None);
5706 assert!(ctx.get_reference_def("anything").is_none());
5707 assert!(!ctx.has_reference_def("anything"));
5708 }
5709
5710 #[test]
5711 fn test_reference_lookup_o1_special_characters_in_id() {
5712 let content = r#"[ref-with-dash]: /url1
5713[ref_with_underscore]: /url2
5714[ref.with.dots]: /url3
5715"#;
5716 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5717
5718 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5719 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5720 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5721 }
5722
5723 #[test]
5724 fn test_reference_lookup_o1_unicode_id() {
5725 let content = r#"[日本語]: /japanese
5726[émoji]: /emoji
5727"#;
5728 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5729
5730 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5731 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5732 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5734}