1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17 ($name:expr, $profile:expr, $code:expr) => {{
18 let start = std::time::Instant::now();
19 let result = $code;
20 if $profile {
21 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22 }
23 result
24 }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(
36 r#"(?sx)
37 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38 (?:
39 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
40 |
41 \[([^\]]*)\] # Reference ID in group 6
42 )"#
43 ).unwrap()
44});
45
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?sx)
51 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52 (?:
53 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
54 |
55 \[([^\]]*)\] # Reference ID in group 6
56 )"#
57 ).unwrap()
58});
59
60static REF_DEF_PATTERN: LazyLock<Regex> =
62 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub visual_indent: usize,
86 pub is_blank: bool,
88 pub in_code_block: bool,
90 pub in_front_matter: bool,
92 pub in_html_block: bool,
94 pub in_html_comment: bool,
96 pub list_item: Option<ListItemInfo>,
98 pub heading: Option<HeadingInfo>,
100 pub blockquote: Option<BlockquoteInfo>,
102 pub in_mkdocstrings: bool,
104 pub in_esm_block: bool,
106 pub in_code_span_continuation: bool,
108 pub is_horizontal_rule: bool,
111 pub in_math_block: bool,
113 pub in_quarto_div: bool,
115 pub in_jsx_expression: bool,
117 pub in_mdx_comment: bool,
119 pub in_jsx_component: bool,
121 pub in_jsx_fragment: bool,
123 pub in_admonition: bool,
125 pub in_content_tab: bool,
127 pub in_definition_list: bool,
129}
130
131impl LineInfo {
132 pub fn content<'a>(&self, source: &'a str) -> &'a str {
134 &source[self.byte_offset..self.byte_offset + self.byte_len]
135 }
136}
137
138#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141 pub marker: String,
143 pub is_ordered: bool,
145 pub number: Option<usize>,
147 pub marker_column: usize,
149 pub content_column: usize,
151}
152
153#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156 ATX,
158 Setext1,
160 Setext2,
162}
163
164#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167 pub line: usize,
169 pub start_col: usize,
171 pub end_col: usize,
173 pub byte_offset: usize,
175 pub byte_end: usize,
177 pub text: Cow<'a, str>,
179 pub url: Cow<'a, str>,
181 pub is_reference: bool,
183 pub reference_id: Option<Cow<'a, str>>,
185 pub link_type: LinkType,
187}
188
189#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192 pub reference: String,
194 pub span: std::ops::Range<usize>,
196}
197
198#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201 pub id: String,
203 pub line: usize,
205 pub byte_offset: usize,
207 pub byte_end: usize,
209}
210
211#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214 pub line: usize,
216 pub start_col: usize,
218 pub end_col: usize,
220 pub byte_offset: usize,
222 pub byte_end: usize,
224 pub alt_text: Cow<'a, str>,
226 pub url: Cow<'a, str>,
228 pub is_reference: bool,
230 pub reference_id: Option<Cow<'a, str>>,
232 pub link_type: LinkType,
234}
235
236#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239 pub line: usize,
241 pub id: String,
243 pub url: String,
245 pub title: Option<String>,
247 pub byte_offset: usize,
249 pub byte_end: usize,
251 pub title_byte_start: Option<usize>,
253 pub title_byte_end: Option<usize>,
255}
256
257#[derive(Debug, Clone)]
259pub struct CodeSpan {
260 pub line: usize,
262 pub end_line: usize,
264 pub start_col: usize,
266 pub end_col: usize,
268 pub byte_offset: usize,
270 pub byte_end: usize,
272 pub backtick_count: usize,
274 pub content: String,
276}
277
278#[derive(Debug, Clone)]
280pub struct MathSpan {
281 pub line: usize,
283 pub end_line: usize,
285 pub start_col: usize,
287 pub end_col: usize,
289 pub byte_offset: usize,
291 pub byte_end: usize,
293 pub is_display: bool,
295 pub content: String,
297}
298
299#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302 pub level: u8,
304 pub style: HeadingStyle,
306 pub marker: String,
308 pub marker_column: usize,
310 pub content_column: usize,
312 pub text: String,
314 pub custom_id: Option<String>,
316 pub raw_text: String,
318 pub has_closing_sequence: bool,
320 pub closing_sequence: String,
322 pub is_valid: bool,
325}
326
327#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333 pub line_num: usize,
335 pub heading: &'a HeadingInfo,
337 pub line_info: &'a LineInfo,
339}
340
341pub struct ValidHeadingsIter<'a> {
346 lines: &'a [LineInfo],
347 current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351 fn new(lines: &'a [LineInfo]) -> Self {
352 Self {
353 lines,
354 current_index: 0,
355 }
356 }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360 type Item = ValidHeading<'a>;
361
362 fn next(&mut self) -> Option<Self::Item> {
363 while self.current_index < self.lines.len() {
364 let idx = self.current_index;
365 self.current_index += 1;
366
367 let line_info = &self.lines[idx];
368 if let Some(heading) = &line_info.heading
369 && heading.is_valid
370 {
371 return Some(ValidHeading {
372 line_num: idx + 1, heading,
374 line_info,
375 });
376 }
377 }
378 None
379 }
380}
381
382#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385 pub nesting_level: usize,
387 pub indent: String,
389 pub marker_column: usize,
391 pub prefix: String,
393 pub content: String,
395 pub has_no_space_after_marker: bool,
397 pub has_multiple_spaces_after_marker: bool,
399 pub needs_md028_fix: bool,
401}
402
403#[derive(Debug, Clone)]
405pub struct ListBlock {
406 pub start_line: usize,
408 pub end_line: usize,
410 pub is_ordered: bool,
412 pub marker: Option<String>,
414 pub blockquote_prefix: String,
416 pub item_lines: Vec<usize>,
418 pub nesting_level: usize,
420 pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429type ByteRanges = Vec<(usize, usize)>;
431
432#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435 pub hash_count: usize,
437 pub asterisk_count: usize,
439 pub underscore_count: usize,
441 pub hyphen_count: usize,
443 pub plus_count: usize,
445 pub gt_count: usize,
447 pub pipe_count: usize,
449 pub bracket_count: usize,
451 pub backtick_count: usize,
453 pub lt_count: usize,
455 pub exclamation_count: usize,
457 pub newline_count: usize,
459}
460
461#[derive(Debug, Clone)]
463pub struct HtmlTag {
464 pub line: usize,
466 pub start_col: usize,
468 pub end_col: usize,
470 pub byte_offset: usize,
472 pub byte_end: usize,
474 pub tag_name: String,
476 pub is_closing: bool,
478 pub is_self_closing: bool,
480 pub raw_content: String,
482}
483
484#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487 pub line: usize,
489 pub start_col: usize,
491 pub end_col: usize,
493 pub byte_offset: usize,
495 pub byte_end: usize,
497 pub marker: char,
499 pub marker_count: usize,
501 pub content: String,
503}
504
505#[derive(Debug, Clone)]
507pub struct TableRow {
508 pub line: usize,
510 pub is_separator: bool,
512 pub column_count: usize,
514 pub column_alignments: Vec<String>, }
517
518#[derive(Debug, Clone)]
520pub struct BareUrl {
521 pub line: usize,
523 pub start_col: usize,
525 pub end_col: usize,
527 pub byte_offset: usize,
529 pub byte_end: usize,
531 pub url: String,
533 pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538 pub content: &'a str,
539 pub line_offsets: Vec<usize>,
540 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, }
569
570struct BlockquoteComponents<'a> {
572 indent: &'a str,
573 markers: &'a str,
574 spaces_after: &'a str,
575 content: &'a str,
576}
577
578#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581 let bytes = line.as_bytes();
582 let mut pos = 0;
583
584 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586 pos += 1;
587 }
588 let indent_end = pos;
589
590 if pos >= bytes.len() || bytes[pos] != b'>' {
592 return None;
593 }
594
595 while pos < bytes.len() && bytes[pos] == b'>' {
597 pos += 1;
598 }
599 let markers_end = pos;
600
601 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603 pos += 1;
604 }
605 let spaces_end = pos;
606
607 Some(BlockquoteComponents {
608 indent: &line[0..indent_end],
609 markers: &line[indent_end..markers_end],
610 spaces_after: &line[markers_end..spaces_end],
611 content: &line[spaces_end..],
612 })
613}
614
615impl<'a> LintContext<'a> {
616 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617 #[cfg(not(target_arch = "wasm32"))]
618 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619 #[cfg(target_arch = "wasm32")]
620 let profile = false;
621
622 let line_offsets = profile_section!("Line offsets", profile, {
623 let mut offsets = vec![0];
624 for (i, c) in content.char_indices() {
625 if c == '\n' {
626 offsets.push(i + 1);
627 }
628 }
629 offsets
630 });
631
632 let (code_blocks, code_span_ranges) = profile_section!(
634 "Code blocks",
635 profile,
636 CodeBlockUtils::detect_code_blocks_and_spans(content)
637 );
638
639 let html_comment_ranges = profile_section!(
641 "HTML comment ranges",
642 profile,
643 crate::utils::skip_context::compute_html_comment_ranges(content)
644 );
645
646 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
648 if flavor == MarkdownFlavor::MkDocs {
649 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
650 } else {
651 Vec::new()
652 }
653 });
654
655 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
657 if flavor == MarkdownFlavor::Quarto {
658 crate::utils::quarto_divs::detect_div_block_ranges(content)
659 } else {
660 Vec::new()
661 }
662 });
663
664 let (mut lines, emphasis_spans) = profile_section!(
667 "Basic line info",
668 profile,
669 Self::compute_basic_line_info(
670 content,
671 &line_offsets,
672 &code_blocks,
673 flavor,
674 &html_comment_ranges,
675 &autodoc_ranges,
676 &quarto_div_ranges,
677 )
678 );
679
680 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
682
683 profile_section!(
685 "ESM blocks",
686 profile,
687 Self::detect_esm_blocks(content, &mut lines, flavor)
688 );
689
690 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
692 "JSX/MDX detection",
693 profile,
694 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
695 );
696
697 profile_section!(
699 "MkDocs constructs",
700 profile,
701 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
702 );
703
704 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
706
707 profile_section!(
709 "Headings & blockquotes",
710 profile,
711 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
712 );
713
714 let code_spans = profile_section!(
716 "Code spans",
717 profile,
718 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
719 );
720
721 for span in &code_spans {
724 if span.end_line > span.line {
725 for line_num in (span.line + 1)..=span.end_line {
727 if let Some(line_info) = lines.get_mut(line_num - 1) {
728 line_info.in_code_span_continuation = true;
729 }
730 }
731 }
732 }
733
734 let (links, broken_links, footnote_refs) = profile_section!(
736 "Links",
737 profile,
738 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
739 );
740
741 let images = profile_section!(
742 "Images",
743 profile,
744 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
745 );
746
747 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
748
749 let reference_defs_map: HashMap<String, usize> = reference_defs
751 .iter()
752 .enumerate()
753 .map(|(idx, def)| (def.id.to_lowercase(), idx))
754 .collect();
755
756 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
757
758 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
760
761 let table_blocks = profile_section!(
763 "Table blocks",
764 profile,
765 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
766 content,
767 &code_blocks,
768 &code_spans,
769 &html_comment_ranges,
770 )
771 );
772
773 let line_index = profile_section!(
775 "Line index",
776 profile,
777 crate::utils::range_utils::LineIndex::new(content)
778 );
779
780 let jinja_ranges = profile_section!(
782 "Jinja ranges",
783 profile,
784 crate::utils::jinja_utils::find_jinja_ranges(content)
785 );
786
787 let citation_ranges = profile_section!("Citation ranges", profile, {
789 if flavor == MarkdownFlavor::Quarto {
790 crate::utils::quarto_divs::find_citation_ranges(content)
791 } else {
792 Vec::new()
793 }
794 });
795
796 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
798 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
799 let mut ranges = Vec::new();
800 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
801 ranges.push((mat.start(), mat.end()));
802 }
803 ranges
804 });
805
806 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
807
808 Self {
809 content,
810 line_offsets,
811 code_blocks,
812 lines,
813 links,
814 images,
815 broken_links,
816 footnote_refs,
817 reference_defs,
818 reference_defs_map,
819 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
820 math_spans_cache: OnceLock::new(), list_blocks,
822 char_frequency,
823 html_tags_cache: OnceLock::new(),
824 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
825 table_rows_cache: OnceLock::new(),
826 bare_urls_cache: OnceLock::new(),
827 has_mixed_list_nesting_cache: OnceLock::new(),
828 html_comment_ranges,
829 table_blocks,
830 line_index,
831 jinja_ranges,
832 flavor,
833 source_file,
834 jsx_expression_ranges,
835 mdx_comment_ranges,
836 citation_ranges,
837 shortcode_ranges,
838 inline_config,
839 }
840 }
841
842 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
847 self.inline_config.is_rule_disabled(rule_name, line_number)
848 }
849
850 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
852 Arc::clone(
853 self.code_spans_cache
854 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
855 )
856 }
857
858 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
860 Arc::clone(
861 self.math_spans_cache
862 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
863 )
864 }
865
866 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
868 let math_spans = self.math_spans();
869 math_spans
870 .iter()
871 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
872 }
873
874 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
876 &self.html_comment_ranges
877 }
878
879 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
881 Arc::clone(self.html_tags_cache.get_or_init(|| {
882 Arc::new(Self::parse_html_tags(
883 self.content,
884 &self.lines,
885 &self.code_blocks,
886 self.flavor,
887 ))
888 }))
889 }
890
891 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
893 Arc::clone(
894 self.emphasis_spans_cache
895 .get()
896 .expect("emphasis_spans_cache initialized during construction"),
897 )
898 }
899
900 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
902 Arc::clone(
903 self.table_rows_cache
904 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
905 )
906 }
907
908 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
910 Arc::clone(
911 self.bare_urls_cache
912 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
913 )
914 }
915
916 pub fn has_mixed_list_nesting(&self) -> bool {
920 *self
921 .has_mixed_list_nesting_cache
922 .get_or_init(|| self.compute_mixed_list_nesting())
923 }
924
925 fn compute_mixed_list_nesting(&self) -> bool {
927 let mut stack: Vec<(usize, bool)> = Vec::new();
932 let mut last_was_blank = false;
933
934 for line_info in &self.lines {
935 if line_info.in_code_block
937 || line_info.in_front_matter
938 || line_info.in_mkdocstrings
939 || line_info.in_html_comment
940 || line_info.in_esm_block
941 {
942 continue;
943 }
944
945 if line_info.is_blank {
947 last_was_blank = true;
948 continue;
949 }
950
951 if let Some(list_item) = &line_info.list_item {
952 let current_pos = if list_item.marker_column == 1 {
954 0
955 } else {
956 list_item.marker_column
957 };
958
959 if last_was_blank && current_pos == 0 {
961 stack.clear();
962 }
963 last_was_blank = false;
964
965 while let Some(&(pos, _)) = stack.last() {
967 if pos >= current_pos {
968 stack.pop();
969 } else {
970 break;
971 }
972 }
973
974 if let Some(&(_, parent_is_ordered)) = stack.last()
976 && parent_is_ordered != list_item.is_ordered
977 {
978 return true; }
980
981 stack.push((current_pos, list_item.is_ordered));
982 } else {
983 last_was_blank = false;
985 }
986 }
987
988 false
989 }
990
991 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
993 match self.line_offsets.binary_search(&offset) {
994 Ok(line) => (line + 1, 1),
995 Err(line) => {
996 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
997 (line, offset - line_start + 1)
998 }
999 }
1000 }
1001
1002 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1004 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1006 return true;
1007 }
1008
1009 self.code_spans()
1011 .iter()
1012 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1013 }
1014
1015 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1017 if line_num > 0 {
1018 self.lines.get(line_num - 1)
1019 } else {
1020 None
1021 }
1022 }
1023
1024 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1026 self.line_info(line_num).map(|info| info.byte_offset)
1027 }
1028
1029 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1031 let normalized_id = ref_id.to_lowercase();
1032 self.reference_defs_map
1033 .get(&normalized_id)
1034 .map(|&idx| self.reference_defs[idx].url.as_str())
1035 }
1036
1037 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1039 let normalized_id = ref_id.to_lowercase();
1040 self.reference_defs_map
1041 .get(&normalized_id)
1042 .map(|&idx| &self.reference_defs[idx])
1043 }
1044
1045 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1047 let normalized_id = ref_id.to_lowercase();
1048 self.reference_defs_map.contains_key(&normalized_id)
1049 }
1050
1051 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1053 self.list_blocks
1054 .iter()
1055 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1056 }
1057
1058 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1060 self.list_blocks
1061 .iter()
1062 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1063 }
1064
1065 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1069 if line_num == 0 || line_num > self.lines.len() {
1070 return false;
1071 }
1072 self.lines[line_num - 1].in_code_block
1073 }
1074
1075 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1077 if line_num == 0 || line_num > self.lines.len() {
1078 return false;
1079 }
1080 self.lines[line_num - 1].in_front_matter
1081 }
1082
1083 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1085 if line_num == 0 || line_num > self.lines.len() {
1086 return false;
1087 }
1088 self.lines[line_num - 1].in_html_block
1089 }
1090
1091 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1093 if line_num == 0 || line_num > self.lines.len() {
1094 return false;
1095 }
1096
1097 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1101 let code_spans = self.code_spans();
1102 code_spans.iter().any(|span| {
1103 if line_num < span.line || line_num > span.end_line {
1105 return false;
1106 }
1107
1108 if span.line == span.end_line {
1109 col_0indexed >= span.start_col && col_0indexed < span.end_col
1111 } else if line_num == span.line {
1112 col_0indexed >= span.start_col
1114 } else if line_num == span.end_line {
1115 col_0indexed < span.end_col
1117 } else {
1118 true
1120 }
1121 })
1122 }
1123
1124 #[inline]
1126 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1127 let code_spans = self.code_spans();
1128 code_spans
1129 .iter()
1130 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1131 }
1132
1133 #[inline]
1136 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1137 self.reference_defs
1138 .iter()
1139 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1140 }
1141
1142 #[inline]
1146 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1147 self.html_comment_ranges
1148 .iter()
1149 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1150 }
1151
1152 #[inline]
1155 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1156 self.html_tags()
1157 .iter()
1158 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1159 }
1160
1161 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1163 self.jinja_ranges
1164 .iter()
1165 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1166 }
1167
1168 #[inline]
1170 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1171 self.jsx_expression_ranges
1172 .iter()
1173 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174 }
1175
1176 #[inline]
1178 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1179 self.mdx_comment_ranges
1180 .iter()
1181 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182 }
1183
1184 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1186 &self.jsx_expression_ranges
1187 }
1188
1189 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1191 &self.mdx_comment_ranges
1192 }
1193
1194 #[inline]
1197 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1198 self.citation_ranges
1199 .iter()
1200 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201 }
1202
1203 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1205 &self.citation_ranges
1206 }
1207
1208 #[inline]
1210 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1211 self.shortcode_ranges
1212 .iter()
1213 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214 }
1215
1216 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1218 &self.shortcode_ranges
1219 }
1220
1221 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1223 self.reference_defs.iter().any(|def| {
1224 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1225 byte_pos >= start && byte_pos < end
1226 } else {
1227 false
1228 }
1229 })
1230 }
1231
1232 pub fn has_char(&self, ch: char) -> bool {
1234 match ch {
1235 '#' => self.char_frequency.hash_count > 0,
1236 '*' => self.char_frequency.asterisk_count > 0,
1237 '_' => self.char_frequency.underscore_count > 0,
1238 '-' => self.char_frequency.hyphen_count > 0,
1239 '+' => self.char_frequency.plus_count > 0,
1240 '>' => self.char_frequency.gt_count > 0,
1241 '|' => self.char_frequency.pipe_count > 0,
1242 '[' => self.char_frequency.bracket_count > 0,
1243 '`' => self.char_frequency.backtick_count > 0,
1244 '<' => self.char_frequency.lt_count > 0,
1245 '!' => self.char_frequency.exclamation_count > 0,
1246 '\n' => self.char_frequency.newline_count > 0,
1247 _ => self.content.contains(ch), }
1249 }
1250
1251 pub fn char_count(&self, ch: char) -> usize {
1253 match ch {
1254 '#' => self.char_frequency.hash_count,
1255 '*' => self.char_frequency.asterisk_count,
1256 '_' => self.char_frequency.underscore_count,
1257 '-' => self.char_frequency.hyphen_count,
1258 '+' => self.char_frequency.plus_count,
1259 '>' => self.char_frequency.gt_count,
1260 '|' => self.char_frequency.pipe_count,
1261 '[' => self.char_frequency.bracket_count,
1262 '`' => self.char_frequency.backtick_count,
1263 '<' => self.char_frequency.lt_count,
1264 '!' => self.char_frequency.exclamation_count,
1265 '\n' => self.char_frequency.newline_count,
1266 _ => self.content.matches(ch).count(), }
1268 }
1269
1270 pub fn likely_has_headings(&self) -> bool {
1272 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1274
1275 pub fn likely_has_lists(&self) -> bool {
1277 self.char_frequency.asterisk_count > 0
1278 || self.char_frequency.hyphen_count > 0
1279 || self.char_frequency.plus_count > 0
1280 }
1281
1282 pub fn likely_has_emphasis(&self) -> bool {
1284 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1285 }
1286
1287 pub fn likely_has_tables(&self) -> bool {
1289 self.char_frequency.pipe_count > 2
1290 }
1291
1292 pub fn likely_has_blockquotes(&self) -> bool {
1294 self.char_frequency.gt_count > 0
1295 }
1296
1297 pub fn likely_has_code(&self) -> bool {
1299 self.char_frequency.backtick_count > 0
1300 }
1301
1302 pub fn likely_has_links_or_images(&self) -> bool {
1304 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1305 }
1306
1307 pub fn likely_has_html(&self) -> bool {
1309 self.char_frequency.lt_count > 0
1310 }
1311
1312 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1317 if let Some(line_info) = self.lines.get(line_idx)
1318 && let Some(ref bq) = line_info.blockquote
1319 {
1320 bq.prefix.trim_end().to_string()
1321 } else {
1322 String::new()
1323 }
1324 }
1325
1326 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1328 self.html_tags()
1329 .iter()
1330 .filter(|tag| tag.line == line_num)
1331 .cloned()
1332 .collect()
1333 }
1334
1335 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1337 self.emphasis_spans()
1338 .iter()
1339 .filter(|span| span.line == line_num)
1340 .cloned()
1341 .collect()
1342 }
1343
1344 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1346 self.table_rows()
1347 .iter()
1348 .filter(|row| row.line == line_num)
1349 .cloned()
1350 .collect()
1351 }
1352
1353 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1355 self.bare_urls()
1356 .iter()
1357 .filter(|url| url.line == line_num)
1358 .cloned()
1359 .collect()
1360 }
1361
1362 #[inline]
1368 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1369 let idx = match lines.binary_search_by(|line| {
1371 if byte_offset < line.byte_offset {
1372 std::cmp::Ordering::Greater
1373 } else if byte_offset > line.byte_offset + line.byte_len {
1374 std::cmp::Ordering::Less
1375 } else {
1376 std::cmp::Ordering::Equal
1377 }
1378 }) {
1379 Ok(idx) => idx,
1380 Err(idx) => idx.saturating_sub(1),
1381 };
1382
1383 let line = &lines[idx];
1384 let line_num = idx + 1;
1385 let col = byte_offset.saturating_sub(line.byte_offset);
1386
1387 (idx, line_num, col)
1388 }
1389
1390 #[inline]
1392 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1393 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1395
1396 if idx > 0 {
1398 let span = &code_spans[idx - 1];
1399 if offset >= span.byte_offset && offset < span.byte_end {
1400 return true;
1401 }
1402 }
1403
1404 false
1405 }
1406
1407 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1411 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1412
1413 let mut link_ranges = Vec::new();
1414 let mut options = Options::empty();
1415 options.insert(Options::ENABLE_WIKILINKS);
1416 options.insert(Options::ENABLE_FOOTNOTES);
1417
1418 let parser = Parser::new_ext(content, options).into_offset_iter();
1419 let mut link_stack: Vec<usize> = Vec::new();
1420
1421 for (event, range) in parser {
1422 match event {
1423 Event::Start(Tag::Link { .. }) => {
1424 link_stack.push(range.start);
1425 }
1426 Event::End(TagEnd::Link) => {
1427 if let Some(start_pos) = link_stack.pop() {
1428 link_ranges.push((start_pos, range.end));
1429 }
1430 }
1431 _ => {}
1432 }
1433 }
1434
1435 link_ranges
1436 }
1437
1438 fn parse_links(
1440 content: &'a str,
1441 lines: &[LineInfo],
1442 code_blocks: &[(usize, usize)],
1443 code_spans: &[CodeSpan],
1444 flavor: MarkdownFlavor,
1445 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1446 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1447 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1448 use std::collections::HashSet;
1449
1450 let mut links = Vec::with_capacity(content.len() / 500);
1451 let mut broken_links = Vec::new();
1452 let mut footnote_refs = Vec::new();
1453
1454 let mut found_positions = HashSet::new();
1456
1457 let mut options = Options::empty();
1467 options.insert(Options::ENABLE_WIKILINKS);
1468 options.insert(Options::ENABLE_FOOTNOTES);
1469
1470 let parser = Parser::new_with_broken_link_callback(
1471 content,
1472 options,
1473 Some(|link: BrokenLink<'_>| {
1474 broken_links.push(BrokenLinkInfo {
1475 reference: link.reference.to_string(),
1476 span: link.span.clone(),
1477 });
1478 None
1479 }),
1480 )
1481 .into_offset_iter();
1482
1483 let mut link_stack: Vec<(
1484 usize,
1485 usize,
1486 pulldown_cmark::CowStr<'a>,
1487 LinkType,
1488 pulldown_cmark::CowStr<'a>,
1489 )> = Vec::new();
1490 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1493 match event {
1494 Event::Start(Tag::Link {
1495 link_type,
1496 dest_url,
1497 id,
1498 ..
1499 }) => {
1500 link_stack.push((range.start, range.end, dest_url, link_type, id));
1502 text_chunks.clear();
1503 }
1504 Event::Text(text) if !link_stack.is_empty() => {
1505 text_chunks.push((text.to_string(), range.start, range.end));
1507 }
1508 Event::Code(code) if !link_stack.is_empty() => {
1509 let code_text = format!("`{code}`");
1511 text_chunks.push((code_text, range.start, range.end));
1512 }
1513 Event::End(TagEnd::Link) => {
1514 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1515 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1517 text_chunks.clear();
1518 continue;
1519 }
1520
1521 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1523
1524 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1526 text_chunks.clear();
1527 continue;
1528 }
1529
1530 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1531
1532 let is_reference = matches!(
1533 link_type,
1534 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1535 );
1536
1537 let link_text = if start_pos < content.len() {
1540 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1541
1542 let mut close_pos = None;
1546 let mut depth = 0;
1547 let mut in_code_span = false;
1548
1549 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1550 let mut backslash_count = 0;
1552 let mut j = i;
1553 while j > 0 && link_bytes[j - 1] == b'\\' {
1554 backslash_count += 1;
1555 j -= 1;
1556 }
1557 let is_escaped = backslash_count % 2 != 0;
1558
1559 if byte == b'`' && !is_escaped {
1561 in_code_span = !in_code_span;
1562 }
1563
1564 if !is_escaped && !in_code_span {
1566 if byte == b'[' {
1567 depth += 1;
1568 } else if byte == b']' {
1569 if depth == 0 {
1570 close_pos = Some(i);
1572 break;
1573 } else {
1574 depth -= 1;
1575 }
1576 }
1577 }
1578 }
1579
1580 if let Some(pos) = close_pos {
1581 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1582 } else {
1583 Cow::Borrowed("")
1584 }
1585 } else {
1586 Cow::Borrowed("")
1587 };
1588
1589 let reference_id = if is_reference && !ref_id.is_empty() {
1591 Some(Cow::Owned(ref_id.to_lowercase()))
1592 } else if is_reference {
1593 Some(Cow::Owned(link_text.to_lowercase()))
1595 } else {
1596 None
1597 };
1598
1599 found_positions.insert(start_pos);
1601
1602 links.push(ParsedLink {
1603 line: line_num,
1604 start_col: col_start,
1605 end_col: col_end,
1606 byte_offset: start_pos,
1607 byte_end: range.end,
1608 text: link_text,
1609 url: Cow::Owned(url.to_string()),
1610 is_reference,
1611 reference_id,
1612 link_type,
1613 });
1614
1615 text_chunks.clear();
1616 }
1617 }
1618 Event::FootnoteReference(footnote_id) => {
1619 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1622 continue;
1623 }
1624
1625 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1626 footnote_refs.push(FootnoteRef {
1627 id: footnote_id.to_string(),
1628 line: line_num,
1629 byte_offset: range.start,
1630 byte_end: range.end,
1631 });
1632 }
1633 _ => {}
1634 }
1635 }
1636
1637 for cap in LINK_PATTERN.captures_iter(content) {
1641 let full_match = cap.get(0).unwrap();
1642 let match_start = full_match.start();
1643 let match_end = full_match.end();
1644
1645 if found_positions.contains(&match_start) {
1647 continue;
1648 }
1649
1650 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652 continue;
1653 }
1654
1655 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1657 continue;
1658 }
1659
1660 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1662 continue;
1663 }
1664
1665 if Self::is_offset_in_code_span(code_spans, match_start) {
1667 continue;
1668 }
1669
1670 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1672 continue;
1673 }
1674
1675 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1677
1678 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1680 continue;
1681 }
1682
1683 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1684
1685 let text = cap.get(1).map_or("", |m| m.as_str());
1686
1687 if let Some(ref_id) = cap.get(6) {
1689 let ref_id_str = ref_id.as_str();
1690 let normalized_ref = if ref_id_str.is_empty() {
1691 Cow::Owned(text.to_lowercase()) } else {
1693 Cow::Owned(ref_id_str.to_lowercase())
1694 };
1695
1696 links.push(ParsedLink {
1698 line: line_num,
1699 start_col: col_start,
1700 end_col: col_end,
1701 byte_offset: match_start,
1702 byte_end: match_end,
1703 text: Cow::Borrowed(text),
1704 url: Cow::Borrowed(""), is_reference: true,
1706 reference_id: Some(normalized_ref),
1707 link_type: LinkType::Reference, });
1709 }
1710 }
1711
1712 (links, broken_links, footnote_refs)
1713 }
1714
1715 fn parse_images(
1717 content: &'a str,
1718 lines: &[LineInfo],
1719 code_blocks: &[(usize, usize)],
1720 code_spans: &[CodeSpan],
1721 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1722 ) -> Vec<ParsedImage<'a>> {
1723 use crate::utils::skip_context::is_in_html_comment_ranges;
1724 use std::collections::HashSet;
1725
1726 let mut images = Vec::with_capacity(content.len() / 1000);
1728 let mut found_positions = HashSet::new();
1729
1730 let parser = Parser::new(content).into_offset_iter();
1732 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1733 Vec::new();
1734 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1737 match event {
1738 Event::Start(Tag::Image {
1739 link_type,
1740 dest_url,
1741 id,
1742 ..
1743 }) => {
1744 image_stack.push((range.start, dest_url, link_type, id));
1745 text_chunks.clear();
1746 }
1747 Event::Text(text) if !image_stack.is_empty() => {
1748 text_chunks.push((text.to_string(), range.start, range.end));
1749 }
1750 Event::Code(code) if !image_stack.is_empty() => {
1751 let code_text = format!("`{code}`");
1752 text_chunks.push((code_text, range.start, range.end));
1753 }
1754 Event::End(TagEnd::Image) => {
1755 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1756 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1758 continue;
1759 }
1760
1761 if Self::is_offset_in_code_span(code_spans, start_pos) {
1763 continue;
1764 }
1765
1766 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1768 continue;
1769 }
1770
1771 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1773 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1774
1775 let is_reference = matches!(
1776 link_type,
1777 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1778 );
1779
1780 let alt_text = if start_pos < content.len() {
1783 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1784
1785 let mut close_pos = None;
1788 let mut depth = 0;
1789
1790 if image_bytes.len() > 2 {
1791 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1792 let mut backslash_count = 0;
1794 let mut j = i;
1795 while j > 0 && image_bytes[j - 1] == b'\\' {
1796 backslash_count += 1;
1797 j -= 1;
1798 }
1799 let is_escaped = backslash_count % 2 != 0;
1800
1801 if !is_escaped {
1802 if byte == b'[' {
1803 depth += 1;
1804 } else if byte == b']' {
1805 if depth == 0 {
1806 close_pos = Some(i);
1808 break;
1809 } else {
1810 depth -= 1;
1811 }
1812 }
1813 }
1814 }
1815 }
1816
1817 if let Some(pos) = close_pos {
1818 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1819 } else {
1820 Cow::Borrowed("")
1821 }
1822 } else {
1823 Cow::Borrowed("")
1824 };
1825
1826 let reference_id = if is_reference && !ref_id.is_empty() {
1827 Some(Cow::Owned(ref_id.to_lowercase()))
1828 } else if is_reference {
1829 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1831 None
1832 };
1833
1834 found_positions.insert(start_pos);
1835 images.push(ParsedImage {
1836 line: line_num,
1837 start_col: col_start,
1838 end_col: col_end,
1839 byte_offset: start_pos,
1840 byte_end: range.end,
1841 alt_text,
1842 url: Cow::Owned(url.to_string()),
1843 is_reference,
1844 reference_id,
1845 link_type,
1846 });
1847 }
1848 }
1849 _ => {}
1850 }
1851 }
1852
1853 for cap in IMAGE_PATTERN.captures_iter(content) {
1855 let full_match = cap.get(0).unwrap();
1856 let match_start = full_match.start();
1857 let match_end = full_match.end();
1858
1859 if found_positions.contains(&match_start) {
1861 continue;
1862 }
1863
1864 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1866 continue;
1867 }
1868
1869 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1871 || Self::is_offset_in_code_span(code_spans, match_start)
1872 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1873 {
1874 continue;
1875 }
1876
1877 if let Some(ref_id) = cap.get(6) {
1879 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1880 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1881 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1882 let ref_id_str = ref_id.as_str();
1883 let normalized_ref = if ref_id_str.is_empty() {
1884 Cow::Owned(alt_text.to_lowercase())
1885 } else {
1886 Cow::Owned(ref_id_str.to_lowercase())
1887 };
1888
1889 images.push(ParsedImage {
1890 line: line_num,
1891 start_col: col_start,
1892 end_col: col_end,
1893 byte_offset: match_start,
1894 byte_end: match_end,
1895 alt_text: Cow::Borrowed(alt_text),
1896 url: Cow::Borrowed(""),
1897 is_reference: true,
1898 reference_id: Some(normalized_ref),
1899 link_type: LinkType::Reference, });
1901 }
1902 }
1903
1904 images
1905 }
1906
1907 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1909 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1913 if line_info.in_code_block {
1915 continue;
1916 }
1917
1918 let line = line_info.content(content);
1919 let line_num = line_idx + 1;
1920
1921 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1922 let id_raw = cap.get(1).unwrap().as_str();
1923
1924 if id_raw.starts_with('^') {
1927 continue;
1928 }
1929
1930 let id = id_raw.to_lowercase();
1931 let url = cap.get(2).unwrap().as_str().to_string();
1932 let title_match = cap.get(3).or_else(|| cap.get(4));
1933 let title = title_match.map(|m| m.as_str().to_string());
1934
1935 let match_obj = cap.get(0).unwrap();
1938 let byte_offset = line_info.byte_offset + match_obj.start();
1939 let byte_end = line_info.byte_offset + match_obj.end();
1940
1941 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1943 let start = line_info.byte_offset + m.start().saturating_sub(1);
1945 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1947 } else {
1948 (None, None)
1949 };
1950
1951 refs.push(ReferenceDef {
1952 line: line_num,
1953 id,
1954 url,
1955 title,
1956 byte_offset,
1957 byte_end,
1958 title_byte_start,
1959 title_byte_end,
1960 });
1961 }
1962 }
1963
1964 refs
1965 }
1966
1967 #[inline]
1971 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1972 let trimmed_start = line.trim_start();
1973 if !trimmed_start.starts_with('>') {
1974 return None;
1975 }
1976
1977 let mut remaining = line;
1979 let mut total_prefix_len = 0;
1980
1981 loop {
1982 let trimmed = remaining.trim_start();
1983 if !trimmed.starts_with('>') {
1984 break;
1985 }
1986
1987 let leading_ws_len = remaining.len() - trimmed.len();
1989 total_prefix_len += leading_ws_len + 1;
1990
1991 let after_gt = &trimmed[1..];
1992
1993 if let Some(stripped) = after_gt.strip_prefix(' ') {
1995 total_prefix_len += 1;
1996 remaining = stripped;
1997 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1998 total_prefix_len += 1;
1999 remaining = stripped;
2000 } else {
2001 remaining = after_gt;
2002 }
2003 }
2004
2005 Some((&line[..total_prefix_len], remaining))
2006 }
2007
2008 fn detect_list_items_and_emphasis_with_pulldown(
2032 content: &str,
2033 line_offsets: &[usize],
2034 flavor: MarkdownFlavor,
2035 front_matter_end: usize,
2036 code_blocks: &[(usize, usize)],
2037 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2038 use std::collections::HashMap;
2039
2040 let mut list_items = HashMap::new();
2041 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2042
2043 let mut options = Options::empty();
2044 options.insert(Options::ENABLE_TABLES);
2045 options.insert(Options::ENABLE_FOOTNOTES);
2046 options.insert(Options::ENABLE_STRIKETHROUGH);
2047 options.insert(Options::ENABLE_TASKLISTS);
2048 options.insert(Options::ENABLE_GFM);
2050
2051 let _ = flavor;
2053
2054 let parser = Parser::new_ext(content, options).into_offset_iter();
2055 let mut list_depth: usize = 0;
2056 let mut list_stack: Vec<bool> = Vec::new();
2057
2058 for (event, range) in parser {
2059 match event {
2060 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2062 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2063 2
2064 } else {
2065 1
2066 };
2067 let match_start = range.start;
2068 let match_end = range.end;
2069
2070 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2072 let marker = content[match_start..].chars().next().unwrap_or('*');
2074 if marker == '*' || marker == '_' {
2075 let content_start = match_start + marker_count;
2077 let content_end = if match_end >= marker_count {
2078 match_end - marker_count
2079 } else {
2080 match_end
2081 };
2082 let content_part = if content_start < content_end && content_end <= content.len() {
2083 &content[content_start..content_end]
2084 } else {
2085 ""
2086 };
2087
2088 let line_idx = match line_offsets.binary_search(&match_start) {
2090 Ok(idx) => idx,
2091 Err(idx) => idx.saturating_sub(1),
2092 };
2093 let line_num = line_idx + 1;
2094 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2095 let col_start = match_start - line_start;
2096 let col_end = match_end - line_start;
2097
2098 emphasis_spans.push(EmphasisSpan {
2099 line: line_num,
2100 start_col: col_start,
2101 end_col: col_end,
2102 byte_offset: match_start,
2103 byte_end: match_end,
2104 marker,
2105 marker_count,
2106 content: content_part.to_string(),
2107 });
2108 }
2109 }
2110 }
2111 Event::Start(Tag::List(start_number)) => {
2112 list_depth += 1;
2113 list_stack.push(start_number.is_some());
2114 }
2115 Event::End(TagEnd::List(_)) => {
2116 list_depth = list_depth.saturating_sub(1);
2117 list_stack.pop();
2118 }
2119 Event::Start(Tag::Item) if list_depth > 0 => {
2120 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2122 let item_start = range.start;
2124
2125 let mut line_idx = match line_offsets.binary_search(&item_start) {
2127 Ok(idx) => idx,
2128 Err(idx) => idx.saturating_sub(1),
2129 };
2130
2131 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2135 line_idx += 1;
2136 }
2137
2138 if front_matter_end > 0 && line_idx < front_matter_end {
2140 continue;
2141 }
2142
2143 if line_idx < line_offsets.len() {
2144 let line_start_byte = line_offsets[line_idx];
2145 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2146 let line = &content[line_start_byte..line_end.min(content.len())];
2147
2148 let line = line
2150 .strip_suffix('\n')
2151 .or_else(|| line.strip_suffix("\r\n"))
2152 .unwrap_or(line);
2153
2154 let blockquote_parse = Self::parse_blockquote_prefix(line);
2156 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2157 (prefix.len(), content)
2158 } else {
2159 (0, line)
2160 };
2161
2162 if current_list_is_ordered {
2164 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2165 Self::parse_ordered_list(line_to_parse)
2166 {
2167 let marker = format!("{number_str}{delimiter}");
2168 let marker_column = blockquote_prefix_len + leading_spaces.len();
2169 let content_column = marker_column + marker.len() + spacing.len();
2170 let number = number_str.parse().ok();
2171
2172 list_items.entry(line_start_byte).or_insert((
2173 true,
2174 marker,
2175 marker_column,
2176 content_column,
2177 number,
2178 ));
2179 }
2180 } else if let Some((leading_spaces, marker, spacing, _content)) =
2181 Self::parse_unordered_list(line_to_parse)
2182 {
2183 let marker_column = blockquote_prefix_len + leading_spaces.len();
2184 let content_column = marker_column + 1 + spacing.len();
2185
2186 list_items.entry(line_start_byte).or_insert((
2187 false,
2188 marker.to_string(),
2189 marker_column,
2190 content_column,
2191 None,
2192 ));
2193 }
2194 }
2195 }
2196 _ => {}
2197 }
2198 }
2199
2200 (list_items, emphasis_spans)
2201 }
2202
2203 #[inline]
2207 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2208 let bytes = line.as_bytes();
2209 let mut i = 0;
2210
2211 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2213 i += 1;
2214 }
2215
2216 if i >= bytes.len() {
2218 return None;
2219 }
2220 let marker = bytes[i] as char;
2221 if marker != '-' && marker != '*' && marker != '+' {
2222 return None;
2223 }
2224 let marker_pos = i;
2225 i += 1;
2226
2227 let spacing_start = i;
2229 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2230 i += 1;
2231 }
2232
2233 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2234 }
2235
2236 #[inline]
2240 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2241 let bytes = line.as_bytes();
2242 let mut i = 0;
2243
2244 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2246 i += 1;
2247 }
2248
2249 let number_start = i;
2251 while i < bytes.len() && bytes[i].is_ascii_digit() {
2252 i += 1;
2253 }
2254 if i == number_start {
2255 return None; }
2257
2258 if i >= bytes.len() {
2260 return None;
2261 }
2262 let delimiter = bytes[i] as char;
2263 if delimiter != '.' && delimiter != ')' {
2264 return None;
2265 }
2266 let delimiter_pos = i;
2267 i += 1;
2268
2269 let spacing_start = i;
2271 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272 i += 1;
2273 }
2274
2275 Some((
2276 &line[..number_start],
2277 &line[number_start..delimiter_pos],
2278 delimiter,
2279 &line[spacing_start..i],
2280 &line[i..],
2281 ))
2282 }
2283
2284 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2287 let num_lines = line_offsets.len();
2288 let mut in_code_block = vec![false; num_lines];
2289
2290 for &(start, end) in code_blocks {
2292 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2294 let mut boundary = start;
2295 while boundary > 0 && !content.is_char_boundary(boundary) {
2296 boundary -= 1;
2297 }
2298 boundary
2299 } else {
2300 start
2301 };
2302
2303 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2304 let mut boundary = end;
2305 while boundary < content.len() && !content.is_char_boundary(boundary) {
2306 boundary += 1;
2307 }
2308 boundary
2309 } else {
2310 end.min(content.len())
2311 };
2312
2313 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2332 let first_line = first_line_after.saturating_sub(1);
2333 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2334
2335 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2337 *flag = true;
2338 }
2339 }
2340
2341 in_code_block
2342 }
2343
2344 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2347 let content_lines: Vec<&str> = content.lines().collect();
2348 let num_lines = content_lines.len();
2349 let mut in_math_block = vec![false; num_lines];
2350
2351 let mut inside_math = false;
2352
2353 for (i, line) in content_lines.iter().enumerate() {
2354 if code_block_map.get(i).copied().unwrap_or(false) {
2356 continue;
2357 }
2358
2359 let trimmed = line.trim();
2360
2361 if trimmed == "$$" {
2364 if inside_math {
2365 in_math_block[i] = true;
2367 inside_math = false;
2368 } else {
2369 in_math_block[i] = true;
2371 inside_math = true;
2372 }
2373 } else if inside_math {
2374 in_math_block[i] = true;
2376 }
2377 }
2378
2379 in_math_block
2380 }
2381
2382 fn compute_basic_line_info(
2385 content: &str,
2386 line_offsets: &[usize],
2387 code_blocks: &[(usize, usize)],
2388 flavor: MarkdownFlavor,
2389 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2390 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2391 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2392 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2393 let content_lines: Vec<&str> = content.lines().collect();
2394 let mut lines = Vec::with_capacity(content_lines.len());
2395
2396 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2398
2399 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2401
2402 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2405
2406 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2409 content,
2410 line_offsets,
2411 flavor,
2412 front_matter_end,
2413 code_blocks,
2414 );
2415
2416 for (i, line) in content_lines.iter().enumerate() {
2417 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2418 let indent = line.len() - line.trim_start().len();
2419 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2421
2422 let blockquote_parse = Self::parse_blockquote_prefix(line);
2424
2425 let is_blank = if let Some((_, content)) = blockquote_parse {
2427 content.trim().is_empty()
2429 } else {
2430 line.trim().is_empty()
2431 };
2432
2433 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2435
2436 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2438 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2439 let line_end_offset = byte_offset + line.len();
2442 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2443 html_comment_ranges,
2444 byte_offset,
2445 line_end_offset,
2446 );
2447 let list_item =
2450 list_item_map
2451 .get(&byte_offset)
2452 .map(
2453 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2454 marker: marker.clone(),
2455 is_ordered: *is_ordered,
2456 number: *number,
2457 marker_column: *marker_column,
2458 content_column: *content_column,
2459 },
2460 );
2461
2462 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2465 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2466
2467 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2469
2470 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2472 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2473
2474 lines.push(LineInfo {
2475 byte_offset,
2476 byte_len: line.len(),
2477 indent,
2478 visual_indent,
2479 is_blank,
2480 in_code_block,
2481 in_front_matter,
2482 in_html_block: false, in_html_comment,
2484 list_item,
2485 heading: None, blockquote: None, in_mkdocstrings,
2488 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2491 in_math_block,
2492 in_quarto_div,
2493 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, });
2501 }
2502
2503 (lines, emphasis_spans)
2504 }
2505
2506 fn detect_headings_and_blockquotes(
2508 content: &str,
2509 lines: &mut [LineInfo],
2510 flavor: MarkdownFlavor,
2511 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2512 link_byte_ranges: &[(usize, usize)],
2513 ) {
2514 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2516 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2517 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2518 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2519
2520 let content_lines: Vec<&str> = content.lines().collect();
2521
2522 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2524
2525 for i in 0..lines.len() {
2527 let line = content_lines[i];
2528
2529 if !(front_matter_end > 0 && i < front_matter_end)
2534 && let Some(bq) = parse_blockquote_detailed(line)
2535 {
2536 let nesting_level = bq.markers.len();
2537 let marker_column = bq.indent.len();
2538 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2539 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2540 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2541 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2542
2543 lines[i].blockquote = Some(BlockquoteInfo {
2544 nesting_level,
2545 indent: bq.indent.to_string(),
2546 marker_column,
2547 prefix,
2548 content: bq.content.to_string(),
2549 has_no_space_after_marker: has_no_space,
2550 has_multiple_spaces_after_marker: has_multiple_spaces,
2551 needs_md028_fix,
2552 });
2553
2554 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2557 lines[i].is_horizontal_rule = true;
2558 }
2559 }
2560
2561 if lines[i].in_code_block {
2563 continue;
2564 }
2565
2566 if front_matter_end > 0 && i < front_matter_end {
2568 continue;
2569 }
2570
2571 if lines[i].in_html_block {
2573 continue;
2574 }
2575
2576 if lines[i].is_blank {
2578 continue;
2579 }
2580
2581 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2584 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2585 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2586 } else {
2587 false
2588 };
2589
2590 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2591 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2593 continue;
2594 }
2595 let line_offset = lines[i].byte_offset;
2598 if link_byte_ranges
2599 .iter()
2600 .any(|&(start, end)| line_offset > start && line_offset < end)
2601 {
2602 continue;
2603 }
2604 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2605 let hashes = caps.get(2).map_or("", |m| m.as_str());
2606 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2607 let rest = caps.get(4).map_or("", |m| m.as_str());
2608
2609 let level = hashes.len() as u8;
2610 let marker_column = leading_spaces.len();
2611
2612 let (text, has_closing, closing_seq) = {
2614 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2616 if rest[id_start..].trim_end().ends_with('}') {
2618 (&rest[..id_start], &rest[id_start..])
2620 } else {
2621 (rest, "")
2622 }
2623 } else {
2624 (rest, "")
2625 };
2626
2627 let trimmed_rest = rest_without_id.trim_end();
2629 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2630 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2633
2634 let last_hash_char_idx = char_positions
2636 .iter()
2637 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2638
2639 if let Some(mut char_idx) = last_hash_char_idx {
2640 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2642 char_idx -= 1;
2643 }
2644
2645 let start_of_hashes = char_positions[char_idx].0;
2647
2648 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2650
2651 let potential_closing = &trimmed_rest[start_of_hashes..];
2653 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2654
2655 if is_all_hashes && has_space_before {
2656 let closing_hashes = potential_closing.to_string();
2658 let text_part = if !custom_id_part.is_empty() {
2661 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2664 } else {
2665 trimmed_rest[..start_of_hashes].trim_end().to_string()
2666 };
2667 (text_part, true, closing_hashes)
2668 } else {
2669 (rest.to_string(), false, String::new())
2671 }
2672 } else {
2673 (rest.to_string(), false, String::new())
2675 }
2676 } else {
2677 (rest.to_string(), false, String::new())
2679 }
2680 };
2681
2682 let content_column = marker_column + hashes.len() + spaces_after.len();
2683
2684 let raw_text = text.trim().to_string();
2686 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2687
2688 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2690 let next_line = content_lines[i + 1];
2691 if !lines[i + 1].in_code_block
2692 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2693 && let Some(next_line_id) =
2694 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2695 {
2696 custom_id = Some(next_line_id);
2697 }
2698 }
2699
2700 let is_valid = !spaces_after.is_empty()
2710 || rest.is_empty()
2711 || level > 1
2712 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2713
2714 lines[i].heading = Some(HeadingInfo {
2715 level,
2716 style: HeadingStyle::ATX,
2717 marker: hashes.to_string(),
2718 marker_column,
2719 content_column,
2720 text: clean_text,
2721 custom_id,
2722 raw_text,
2723 has_closing_sequence: has_closing,
2724 closing_sequence: closing_seq,
2725 is_valid,
2726 });
2727 }
2728 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2730 let next_line = content_lines[i + 1];
2731 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2732 if front_matter_end > 0 && i < front_matter_end {
2734 continue;
2735 }
2736
2737 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2739 {
2740 continue;
2741 }
2742
2743 let content_line = line.trim();
2746
2747 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2749 continue;
2750 }
2751
2752 if content_line.starts_with('_') {
2754 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2755 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2756 continue;
2757 }
2758 }
2759
2760 if let Some(first_char) = content_line.chars().next()
2762 && first_char.is_ascii_digit()
2763 {
2764 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2765 if num_end < content_line.len() {
2766 let next = content_line.chars().nth(num_end);
2767 if next == Some('.') || next == Some(')') {
2768 continue;
2769 }
2770 }
2771 }
2772
2773 if ATX_HEADING_REGEX.is_match(line) {
2775 continue;
2776 }
2777
2778 if content_line.starts_with('>') {
2780 continue;
2781 }
2782
2783 let trimmed_start = line.trim_start();
2785 if trimmed_start.len() >= 3 {
2786 let first_three: String = trimmed_start.chars().take(3).collect();
2787 if first_three == "```" || first_three == "~~~" {
2788 continue;
2789 }
2790 }
2791
2792 if content_line.starts_with('<') {
2794 continue;
2795 }
2796
2797 let underline = next_line.trim();
2798
2799 let level = if underline.starts_with('=') { 1 } else { 2 };
2800 let style = if level == 1 {
2801 HeadingStyle::Setext1
2802 } else {
2803 HeadingStyle::Setext2
2804 };
2805
2806 let raw_text = line.trim().to_string();
2808 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2809
2810 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2812 let attr_line = content_lines[i + 2];
2813 if !lines[i + 2].in_code_block
2814 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2815 && let Some(attr_line_id) =
2816 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2817 {
2818 custom_id = Some(attr_line_id);
2819 }
2820 }
2821
2822 lines[i].heading = Some(HeadingInfo {
2823 level,
2824 style,
2825 marker: underline.to_string(),
2826 marker_column: next_line.len() - next_line.trim_start().len(),
2827 content_column: lines[i].indent,
2828 text: clean_text,
2829 custom_id,
2830 raw_text,
2831 has_closing_sequence: false,
2832 closing_sequence: String::new(),
2833 is_valid: true, });
2835 }
2836 }
2837 }
2838 }
2839
2840 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2842 const BLOCK_ELEMENTS: &[&str] = &[
2845 "address",
2846 "article",
2847 "aside",
2848 "audio",
2849 "blockquote",
2850 "canvas",
2851 "details",
2852 "dialog",
2853 "dd",
2854 "div",
2855 "dl",
2856 "dt",
2857 "embed",
2858 "fieldset",
2859 "figcaption",
2860 "figure",
2861 "footer",
2862 "form",
2863 "h1",
2864 "h2",
2865 "h3",
2866 "h4",
2867 "h5",
2868 "h6",
2869 "header",
2870 "hr",
2871 "iframe",
2872 "li",
2873 "main",
2874 "menu",
2875 "nav",
2876 "noscript",
2877 "object",
2878 "ol",
2879 "p",
2880 "picture",
2881 "pre",
2882 "script",
2883 "search",
2884 "section",
2885 "source",
2886 "style",
2887 "summary",
2888 "svg",
2889 "table",
2890 "tbody",
2891 "td",
2892 "template",
2893 "textarea",
2894 "tfoot",
2895 "th",
2896 "thead",
2897 "tr",
2898 "track",
2899 "ul",
2900 "video",
2901 ];
2902
2903 let mut i = 0;
2904 while i < lines.len() {
2905 if lines[i].in_code_block || lines[i].in_front_matter {
2907 i += 1;
2908 continue;
2909 }
2910
2911 let trimmed = lines[i].content(content).trim_start();
2912
2913 if trimmed.starts_with('<') && trimmed.len() > 1 {
2915 let after_bracket = &trimmed[1..];
2917 let is_closing = after_bracket.starts_with('/');
2918 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2919
2920 let tag_name = tag_start
2922 .chars()
2923 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2924 .collect::<String>()
2925 .to_lowercase();
2926
2927 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2929 lines[i].in_html_block = true;
2931
2932 if !is_closing {
2937 let closing_tag = format!("</{tag_name}>");
2938
2939 let same_line_close = lines[i].content(content).contains(&closing_tag);
2942
2943 if !same_line_close {
2945 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2947 let mut j = i + 1;
2948 let mut found_closing_tag = false;
2949 while j < lines.len() && j < i + 100 {
2950 if !allow_blank_lines && lines[j].is_blank {
2953 break;
2954 }
2955
2956 lines[j].in_html_block = true;
2957
2958 if lines[j].content(content).contains(&closing_tag) {
2960 found_closing_tag = true;
2961 }
2962
2963 if found_closing_tag {
2966 j += 1;
2967 while j < lines.len() && j < i + 100 {
2969 if lines[j].is_blank {
2970 break;
2971 }
2972 lines[j].in_html_block = true;
2973 j += 1;
2974 }
2975 break;
2976 }
2977 j += 1;
2978 }
2979 }
2980 }
2981 }
2982 }
2983
2984 i += 1;
2985 }
2986 }
2987
2988 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2991 if !flavor.supports_esm_blocks() {
2993 return;
2994 }
2995
2996 let mut in_multiline_import = false;
2997
2998 for line in lines.iter_mut() {
2999 if line.in_code_block || line.in_front_matter || line.in_html_comment {
3001 in_multiline_import = false;
3002 continue;
3003 }
3004
3005 let line_content = line.content(content);
3006 let trimmed = line_content.trim();
3007
3008 if in_multiline_import {
3010 line.in_esm_block = true;
3011 if trimmed.ends_with('\'')
3014 || trimmed.ends_with('"')
3015 || trimmed.ends_with("';")
3016 || trimmed.ends_with("\";")
3017 || line_content.contains(';')
3018 {
3019 in_multiline_import = false;
3020 }
3021 continue;
3022 }
3023
3024 if line.is_blank {
3026 continue;
3027 }
3028
3029 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3031 line.in_esm_block = true;
3032
3033 let is_import = trimmed.starts_with("import ");
3041
3042 let is_complete =
3044 trimmed.ends_with(';')
3046 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3048 || (!is_import && !trimmed.contains(" from ") && (
3050 trimmed.starts_with("export const ")
3051 || trimmed.starts_with("export let ")
3052 || trimmed.starts_with("export var ")
3053 || trimmed.starts_with("export function ")
3054 || trimmed.starts_with("export class ")
3055 || trimmed.starts_with("export default ")
3056 ));
3057
3058 if !is_complete && is_import {
3059 if trimmed.contains('{') && !trimmed.contains('}') {
3063 in_multiline_import = true;
3064 }
3065 }
3066 }
3067 }
3068 }
3069
3070 fn detect_jsx_and_mdx_comments(
3073 content: &str,
3074 lines: &mut [LineInfo],
3075 flavor: MarkdownFlavor,
3076 code_blocks: &[(usize, usize)],
3077 ) -> (ByteRanges, ByteRanges) {
3078 if !flavor.supports_jsx() {
3080 return (Vec::new(), Vec::new());
3081 }
3082
3083 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3084 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3085
3086 if !content.contains('{') {
3088 return (jsx_expression_ranges, mdx_comment_ranges);
3089 }
3090
3091 let bytes = content.as_bytes();
3092 let mut i = 0;
3093
3094 while i < bytes.len() {
3095 if bytes[i] == b'{' {
3096 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3098 i += 1;
3099 continue;
3100 }
3101
3102 let start = i;
3103
3104 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3106 let mut j = i + 3;
3108 while j + 2 < bytes.len() {
3109 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3110 let end = j + 3;
3111 mdx_comment_ranges.push((start, end));
3112
3113 Self::mark_lines_in_range(lines, content, start, end, |line| {
3115 line.in_mdx_comment = true;
3116 });
3117
3118 i = end;
3119 break;
3120 }
3121 j += 1;
3122 }
3123 if j + 2 >= bytes.len() {
3124 mdx_comment_ranges.push((start, bytes.len()));
3126 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3127 line.in_mdx_comment = true;
3128 });
3129 break;
3130 }
3131 } else {
3132 let mut brace_depth = 1;
3135 let mut j = i + 1;
3136 let mut in_string = false;
3137 let mut string_char = b'"';
3138
3139 while j < bytes.len() && brace_depth > 0 {
3140 let c = bytes[j];
3141
3142 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3144 in_string = true;
3145 string_char = c;
3146 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3147 in_string = false;
3148 } else if !in_string {
3149 if c == b'{' {
3150 brace_depth += 1;
3151 } else if c == b'}' {
3152 brace_depth -= 1;
3153 }
3154 }
3155 j += 1;
3156 }
3157
3158 if brace_depth == 0 {
3159 let end = j;
3160 jsx_expression_ranges.push((start, end));
3161
3162 Self::mark_lines_in_range(lines, content, start, end, |line| {
3164 line.in_jsx_expression = true;
3165 });
3166
3167 i = end;
3168 } else {
3169 i += 1;
3170 }
3171 }
3172 } else {
3173 i += 1;
3174 }
3175 }
3176
3177 (jsx_expression_ranges, mdx_comment_ranges)
3178 }
3179
3180 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3183 if flavor != MarkdownFlavor::MkDocs {
3184 return;
3185 }
3186
3187 use crate::utils::mkdocs_admonitions;
3188 use crate::utils::mkdocs_definition_lists;
3189 use crate::utils::mkdocs_tabs;
3190
3191 let content_lines: Vec<&str> = content.lines().collect();
3192
3193 let mut in_admonition = false;
3195 let mut admonition_indent = 0;
3196
3197 let mut in_tab = false;
3199 let mut tab_indent = 0;
3200
3201 let mut in_definition = false;
3203
3204 for (i, line) in content_lines.iter().enumerate() {
3205 if i >= lines.len() {
3206 break;
3207 }
3208
3209 if lines[i].in_code_block {
3211 continue;
3212 }
3213
3214 if mkdocs_admonitions::is_admonition_start(line) {
3216 in_admonition = true;
3217 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3218 lines[i].in_admonition = true;
3219 } else if in_admonition {
3220 if line.trim().is_empty() {
3222 lines[i].in_admonition = true;
3224 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3225 lines[i].in_admonition = true;
3226 } else {
3227 in_admonition = false;
3229 if mkdocs_admonitions::is_admonition_start(line) {
3231 in_admonition = true;
3232 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3233 lines[i].in_admonition = true;
3234 }
3235 }
3236 }
3237
3238 if mkdocs_tabs::is_tab_marker(line) {
3240 in_tab = true;
3241 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3242 lines[i].in_content_tab = true;
3243 } else if in_tab {
3244 if line.trim().is_empty() {
3246 lines[i].in_content_tab = true;
3248 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3249 lines[i].in_content_tab = true;
3250 } else {
3251 in_tab = false;
3253 if mkdocs_tabs::is_tab_marker(line) {
3255 in_tab = true;
3256 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3257 lines[i].in_content_tab = true;
3258 }
3259 }
3260 }
3261
3262 if mkdocs_definition_lists::is_definition_line(line) {
3264 in_definition = true;
3265 lines[i].in_definition_list = true;
3266 } else if in_definition {
3267 if mkdocs_definition_lists::is_definition_continuation(line) {
3269 lines[i].in_definition_list = true;
3270 } else if line.trim().is_empty() {
3271 lines[i].in_definition_list = true;
3273 } else if mkdocs_definition_lists::could_be_term_line(line) {
3274 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3276 {
3277 lines[i].in_definition_list = true;
3278 } else {
3279 in_definition = false;
3280 }
3281 } else {
3282 in_definition = false;
3283 }
3284 } else if mkdocs_definition_lists::could_be_term_line(line) {
3285 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3287 lines[i].in_definition_list = true;
3288 in_definition = true;
3289 }
3290 }
3291 }
3292 }
3293
3294 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3296 where
3297 F: FnMut(&mut LineInfo),
3298 {
3299 for line in lines.iter_mut() {
3301 let line_start = line.byte_offset;
3302 let line_end = line.byte_offset + line.byte_len;
3303
3304 if line_start < end && line_end > start {
3306 f(line);
3307 }
3308 }
3309
3310 let _ = content;
3312 }
3313
3314 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3316 if !content.contains('`') {
3318 return Vec::new();
3319 }
3320
3321 let parser = Parser::new(content).into_offset_iter();
3323 let mut ranges = Vec::new();
3324
3325 for (event, range) in parser {
3326 if let Event::Code(_) = event {
3327 ranges.push((range.start, range.end));
3328 }
3329 }
3330
3331 Self::build_code_spans_from_ranges(content, lines, &ranges)
3332 }
3333
3334 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3335 let mut code_spans = Vec::new();
3336 if ranges.is_empty() {
3337 return code_spans;
3338 }
3339
3340 for &(start_pos, end_pos) in ranges {
3341 let full_span = &content[start_pos..end_pos];
3343 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3344
3345 let content_start = start_pos + backtick_count;
3347 let content_end = end_pos - backtick_count;
3348 let span_content = if content_start < content_end {
3349 content[content_start..content_end].to_string()
3350 } else {
3351 String::new()
3352 };
3353
3354 let line_idx = lines
3357 .partition_point(|line| line.byte_offset <= start_pos)
3358 .saturating_sub(1);
3359 let line_num = line_idx + 1;
3360 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3361
3362 let end_line_idx = lines
3364 .partition_point(|line| line.byte_offset <= end_pos)
3365 .saturating_sub(1);
3366 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3367
3368 let line_content = lines[line_idx].content(content);
3371 let col_start = if byte_col_start <= line_content.len() {
3372 line_content[..byte_col_start].chars().count()
3373 } else {
3374 line_content.chars().count()
3375 };
3376
3377 let end_line_content = lines[end_line_idx].content(content);
3378 let col_end = if byte_col_end <= end_line_content.len() {
3379 end_line_content[..byte_col_end].chars().count()
3380 } else {
3381 end_line_content.chars().count()
3382 };
3383
3384 code_spans.push(CodeSpan {
3385 line: line_num,
3386 end_line: end_line_idx + 1,
3387 start_col: col_start,
3388 end_col: col_end,
3389 byte_offset: start_pos,
3390 byte_end: end_pos,
3391 backtick_count,
3392 content: span_content,
3393 });
3394 }
3395
3396 code_spans.sort_by_key(|span| span.byte_offset);
3398
3399 code_spans
3400 }
3401
3402 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3404 let mut math_spans = Vec::new();
3405
3406 if !content.contains('$') {
3408 return math_spans;
3409 }
3410
3411 let mut options = Options::empty();
3413 options.insert(Options::ENABLE_MATH);
3414 let parser = Parser::new_ext(content, options).into_offset_iter();
3415
3416 for (event, range) in parser {
3417 let (is_display, math_content) = match &event {
3418 Event::InlineMath(text) => (false, text.as_ref()),
3419 Event::DisplayMath(text) => (true, text.as_ref()),
3420 _ => continue,
3421 };
3422
3423 let start_pos = range.start;
3424 let end_pos = range.end;
3425
3426 let line_idx = lines
3428 .partition_point(|line| line.byte_offset <= start_pos)
3429 .saturating_sub(1);
3430 let line_num = line_idx + 1;
3431 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3432
3433 let end_line_idx = lines
3435 .partition_point(|line| line.byte_offset <= end_pos)
3436 .saturating_sub(1);
3437 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3438
3439 let line_content = lines[line_idx].content(content);
3441 let col_start = if byte_col_start <= line_content.len() {
3442 line_content[..byte_col_start].chars().count()
3443 } else {
3444 line_content.chars().count()
3445 };
3446
3447 let end_line_content = lines[end_line_idx].content(content);
3448 let col_end = if byte_col_end <= end_line_content.len() {
3449 end_line_content[..byte_col_end].chars().count()
3450 } else {
3451 end_line_content.chars().count()
3452 };
3453
3454 math_spans.push(MathSpan {
3455 line: line_num,
3456 end_line: end_line_idx + 1,
3457 start_col: col_start,
3458 end_col: col_end,
3459 byte_offset: start_pos,
3460 byte_end: end_pos,
3461 is_display,
3462 content: math_content.to_string(),
3463 });
3464 }
3465
3466 math_spans.sort_by_key(|span| span.byte_offset);
3468
3469 math_spans
3470 }
3471
3472 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3483 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3485
3486 #[inline]
3489 fn reset_tracking_state(
3490 list_item: &ListItemInfo,
3491 has_list_breaking_content: &mut bool,
3492 min_continuation: &mut usize,
3493 ) {
3494 *has_list_breaking_content = false;
3495 let marker_width = if list_item.is_ordered {
3496 list_item.marker.len() + 1 } else {
3498 list_item.marker.len()
3499 };
3500 *min_continuation = if list_item.is_ordered {
3501 marker_width
3502 } else {
3503 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3504 };
3505 }
3506
3507 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3510 let mut last_list_item_line = 0;
3511 let mut current_indent_level = 0;
3512 let mut last_marker_width = 0;
3513
3514 let mut has_list_breaking_content_since_last_item = false;
3516 let mut min_continuation_for_tracking = 0;
3517
3518 for (line_idx, line_info) in lines.iter().enumerate() {
3519 let line_num = line_idx + 1;
3520
3521 if line_info.in_code_block {
3523 if let Some(ref mut block) = current_block {
3524 let min_continuation_indent =
3526 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3527
3528 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3530
3531 match context {
3532 CodeBlockContext::Indented => {
3533 block.end_line = line_num;
3535 continue;
3536 }
3537 CodeBlockContext::Standalone => {
3538 let completed_block = current_block.take().unwrap();
3540 list_blocks.push(completed_block);
3541 continue;
3542 }
3543 CodeBlockContext::Adjacent => {
3544 block.end_line = line_num;
3546 continue;
3547 }
3548 }
3549 } else {
3550 continue;
3552 }
3553 }
3554
3555 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3557 caps.get(0).unwrap().as_str().to_string()
3558 } else {
3559 String::new()
3560 };
3561
3562 if let Some(ref block) = current_block
3565 && line_info.list_item.is_none()
3566 && !line_info.is_blank
3567 && !line_info.in_code_span_continuation
3568 {
3569 let line_content = line_info.content(content).trim();
3570
3571 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3576
3577 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3580
3581 let breaks_list = line_info.heading.is_some()
3582 || line_content.starts_with("---")
3583 || line_content.starts_with("***")
3584 || line_content.starts_with("___")
3585 || crate::utils::skip_context::is_table_line(line_content)
3586 || blockquote_prefix_changes
3587 || (line_info.indent > 0
3588 && line_info.indent < min_continuation_for_tracking
3589 && !is_lazy_continuation);
3590
3591 if breaks_list {
3592 has_list_breaking_content_since_last_item = true;
3593 }
3594 }
3595
3596 if line_info.in_code_span_continuation
3599 && line_info.list_item.is_none()
3600 && let Some(ref mut block) = current_block
3601 {
3602 block.end_line = line_num;
3603 }
3604
3605 let effective_continuation_indent = if let Some(ref block) = current_block {
3611 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3612 let line_content = line_info.content(content);
3613 let line_bq_level = line_content
3614 .chars()
3615 .take_while(|c| *c == '>' || c.is_whitespace())
3616 .filter(|&c| c == '>')
3617 .count();
3618 if line_bq_level > 0 && line_bq_level == block_bq_level {
3619 let mut pos = 0;
3621 let mut found_markers = 0;
3622 for c in line_content.chars() {
3623 pos += c.len_utf8();
3624 if c == '>' {
3625 found_markers += 1;
3626 if found_markers == line_bq_level {
3627 if line_content.get(pos..pos + 1) == Some(" ") {
3628 pos += 1;
3629 }
3630 break;
3631 }
3632 }
3633 }
3634 let after_bq = &line_content[pos..];
3635 after_bq.len() - after_bq.trim_start().len()
3636 } else {
3637 line_info.indent
3638 }
3639 } else {
3640 line_info.indent
3641 };
3642 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3643 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3644 if block_bq_level > 0 {
3645 if block.is_ordered { last_marker_width } else { 2 }
3646 } else {
3647 min_continuation_for_tracking
3648 }
3649 } else {
3650 min_continuation_for_tracking
3651 };
3652 let is_structural_element = line_info.heading.is_some()
3655 || line_info.content(content).trim().starts_with("```")
3656 || line_info.content(content).trim().starts_with("~~~");
3657 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3658 || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3659
3660 if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3661 eprintln!(
3662 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3663 line_num,
3664 effective_continuation_indent,
3665 adjusted_min_continuation_for_tracking,
3666 is_valid_continuation,
3667 line_info.in_code_span_continuation,
3668 line_info.in_code_block,
3669 current_block.is_some()
3670 );
3671 }
3672
3673 if !line_info.in_code_span_continuation
3674 && line_info.list_item.is_none()
3675 && !line_info.is_blank
3676 && !line_info.in_code_block
3677 && is_valid_continuation
3678 && let Some(ref mut block) = current_block
3679 {
3680 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3681 eprintln!(
3682 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3683 line_num, block.end_line, line_num
3684 );
3685 }
3686 block.end_line = line_num;
3687 }
3688
3689 if let Some(list_item) = &line_info.list_item {
3691 let item_indent = list_item.marker_column;
3693 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3696 eprintln!(
3697 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3698 line_num, list_item.marker, item_indent
3699 );
3700 }
3701
3702 if let Some(ref mut block) = current_block {
3703 let is_nested = nesting > block.nesting_level;
3707 let same_type =
3708 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3709 let same_context = block.blockquote_prefix == blockquote_prefix;
3710 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3712
3713 let marker_compatible =
3715 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3716
3717 let has_non_list_content = has_list_breaking_content_since_last_item;
3720
3721 let mut continues_list = if is_nested {
3725 same_context && reasonable_distance && !has_non_list_content
3727 } else {
3728 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3730 };
3731
3732 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3733 eprintln!(
3734 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3735 line_num,
3736 continues_list,
3737 is_nested,
3738 same_type,
3739 same_context,
3740 reasonable_distance,
3741 marker_compatible,
3742 has_non_list_content,
3743 last_list_item_line,
3744 block.end_line
3745 );
3746 }
3747
3748 if !continues_list
3752 && (is_nested || same_type)
3753 && reasonable_distance
3754 && line_num > 0
3755 && block.end_line == line_num - 1
3756 {
3757 if block.item_lines.contains(&(line_num - 1)) {
3760 continues_list = true;
3762 } else {
3763 continues_list = true;
3767 }
3768 }
3769
3770 if continues_list {
3771 block.end_line = line_num;
3773 block.item_lines.push(line_num);
3774
3775 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3777 list_item.marker.len() + 1
3778 } else {
3779 list_item.marker.len()
3780 });
3781
3782 if !block.is_ordered
3784 && block.marker.is_some()
3785 && block.marker.as_ref() != Some(&list_item.marker)
3786 {
3787 block.marker = None;
3789 }
3790
3791 reset_tracking_state(
3793 list_item,
3794 &mut has_list_breaking_content_since_last_item,
3795 &mut min_continuation_for_tracking,
3796 );
3797 } else {
3798 if !same_type
3803 && !is_nested
3804 && let Some(&last_item) = block.item_lines.last()
3805 {
3806 block.end_line = last_item;
3807 }
3808
3809 list_blocks.push(block.clone());
3810
3811 *block = ListBlock {
3812 start_line: line_num,
3813 end_line: line_num,
3814 is_ordered: list_item.is_ordered,
3815 marker: if list_item.is_ordered {
3816 None
3817 } else {
3818 Some(list_item.marker.clone())
3819 },
3820 blockquote_prefix: blockquote_prefix.clone(),
3821 item_lines: vec![line_num],
3822 nesting_level: nesting,
3823 max_marker_width: if list_item.is_ordered {
3824 list_item.marker.len() + 1
3825 } else {
3826 list_item.marker.len()
3827 },
3828 };
3829
3830 reset_tracking_state(
3832 list_item,
3833 &mut has_list_breaking_content_since_last_item,
3834 &mut min_continuation_for_tracking,
3835 );
3836 }
3837 } else {
3838 current_block = Some(ListBlock {
3840 start_line: line_num,
3841 end_line: line_num,
3842 is_ordered: list_item.is_ordered,
3843 marker: if list_item.is_ordered {
3844 None
3845 } else {
3846 Some(list_item.marker.clone())
3847 },
3848 blockquote_prefix,
3849 item_lines: vec![line_num],
3850 nesting_level: nesting,
3851 max_marker_width: list_item.marker.len(),
3852 });
3853
3854 reset_tracking_state(
3856 list_item,
3857 &mut has_list_breaking_content_since_last_item,
3858 &mut min_continuation_for_tracking,
3859 );
3860 }
3861
3862 last_list_item_line = line_num;
3863 current_indent_level = item_indent;
3864 last_marker_width = if list_item.is_ordered {
3865 list_item.marker.len() + 1 } else {
3867 list_item.marker.len()
3868 };
3869 } else if let Some(ref mut block) = current_block {
3870 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3872 eprintln!(
3873 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3874 line_num, line_info.is_blank
3875 );
3876 }
3877
3878 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3886 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3887 } else {
3888 false
3889 };
3890
3891 let min_continuation_indent = if block.is_ordered {
3895 current_indent_level + last_marker_width
3896 } else {
3897 current_indent_level + 2 };
3899
3900 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3901 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3903 eprintln!(
3904 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3905 line_num, line_info.indent, min_continuation_indent
3906 );
3907 }
3908 block.end_line = line_num;
3909 } else if line_info.is_blank {
3910 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3913 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3914 }
3915 let mut check_idx = line_idx + 1;
3916 let mut found_continuation = false;
3917
3918 while check_idx < lines.len() && lines[check_idx].is_blank {
3920 check_idx += 1;
3921 }
3922
3923 if check_idx < lines.len() {
3924 let next_line = &lines[check_idx];
3925 let next_content = next_line.content(content);
3927 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3930 let next_bq_level_for_indent = next_content
3931 .chars()
3932 .take_while(|c| *c == '>' || c.is_whitespace())
3933 .filter(|&c| c == '>')
3934 .count();
3935 let effective_indent =
3936 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3937 let mut pos = 0;
3940 let mut found_markers = 0;
3941 for c in next_content.chars() {
3942 pos += c.len_utf8();
3943 if c == '>' {
3944 found_markers += 1;
3945 if found_markers == next_bq_level_for_indent {
3946 if next_content.get(pos..pos + 1) == Some(" ") {
3948 pos += 1;
3949 }
3950 break;
3951 }
3952 }
3953 }
3954 let after_blockquote_marker = &next_content[pos..];
3955 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3956 } else {
3957 next_line.indent
3958 };
3959 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3962 if block.is_ordered { last_marker_width } else { 2 }
3965 } else {
3966 min_continuation_indent
3967 };
3968 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3970 eprintln!(
3971 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3972 line_num,
3973 check_idx + 1,
3974 effective_indent,
3975 adjusted_min_continuation,
3976 next_line.list_item.is_some(),
3977 next_line.in_code_block
3978 );
3979 }
3980 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3981 found_continuation = true;
3982 }
3983 else if !next_line.in_code_block
3985 && next_line.list_item.is_some()
3986 && let Some(item) = &next_line.list_item
3987 {
3988 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3989 .find(next_line.content(content))
3990 .map_or(String::new(), |m| m.as_str().to_string());
3991 if item.marker_column == current_indent_level
3992 && item.is_ordered == block.is_ordered
3993 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3994 {
3995 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3999 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4000 if let Some(between_line) = lines.get(idx) {
4001 let between_content = between_line.content(content);
4002 let trimmed = between_content.trim();
4003 if trimmed.is_empty() {
4005 return false;
4006 }
4007 let line_indent = between_content.len() - between_content.trim_start().len();
4009
4010 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4012 .find(between_content)
4013 .map_or(String::new(), |m| m.as_str().to_string());
4014 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4015 let blockquote_level_changed =
4016 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4017
4018 if trimmed.starts_with("```")
4020 || trimmed.starts_with("~~~")
4021 || trimmed.starts_with("---")
4022 || trimmed.starts_with("***")
4023 || trimmed.starts_with("___")
4024 || blockquote_level_changed
4025 || crate::utils::skip_context::is_table_line(trimmed)
4026 || between_line.heading.is_some()
4027 {
4028 return true; }
4030
4031 line_indent >= min_continuation_indent
4033 } else {
4034 false
4035 }
4036 });
4037
4038 if block.is_ordered {
4039 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4042 if let Some(between_line) = lines.get(idx) {
4043 let between_content = between_line.content(content);
4044 let trimmed = between_content.trim();
4045 if trimmed.is_empty() {
4046 return false;
4047 }
4048 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4050 .find(between_content)
4051 .map_or(String::new(), |m| m.as_str().to_string());
4052 let between_bq_level =
4053 between_bq_prefix.chars().filter(|&c| c == '>').count();
4054 let blockquote_level_changed =
4055 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4056 trimmed.starts_with("```")
4058 || trimmed.starts_with("~~~")
4059 || trimmed.starts_with("---")
4060 || trimmed.starts_with("***")
4061 || trimmed.starts_with("___")
4062 || blockquote_level_changed
4063 || crate::utils::skip_context::is_table_line(trimmed)
4064 || between_line.heading.is_some()
4065 } else {
4066 false
4067 }
4068 });
4069 found_continuation = !has_structural_separators;
4070 } else {
4071 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4073 if let Some(between_line) = lines.get(idx) {
4074 let between_content = between_line.content(content);
4075 let trimmed = between_content.trim();
4076 if trimmed.is_empty() {
4077 return false;
4078 }
4079 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4081 .find(between_content)
4082 .map_or(String::new(), |m| m.as_str().to_string());
4083 let between_bq_level =
4084 between_bq_prefix.chars().filter(|&c| c == '>').count();
4085 let blockquote_level_changed =
4086 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4087 trimmed.starts_with("```")
4089 || trimmed.starts_with("~~~")
4090 || trimmed.starts_with("---")
4091 || trimmed.starts_with("***")
4092 || trimmed.starts_with("___")
4093 || blockquote_level_changed
4094 || crate::utils::skip_context::is_table_line(trimmed)
4095 || between_line.heading.is_some()
4096 } else {
4097 false
4098 }
4099 });
4100 found_continuation = !has_structural_separators;
4101 }
4102 }
4103 }
4104 }
4105
4106 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4107 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4108 }
4109 if found_continuation {
4110 block.end_line = line_num;
4112 } else {
4113 list_blocks.push(block.clone());
4115 current_block = None;
4116 }
4117 } else {
4118 let min_required_indent = if block.is_ordered {
4121 current_indent_level + last_marker_width
4122 } else {
4123 current_indent_level + 2
4124 };
4125
4126 let line_content = line_info.content(content).trim();
4131
4132 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4134
4135 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4138 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4139 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4140
4141 let is_structural_separator = line_info.heading.is_some()
4142 || line_content.starts_with("```")
4143 || line_content.starts_with("~~~")
4144 || line_content.starts_with("---")
4145 || line_content.starts_with("***")
4146 || line_content.starts_with("___")
4147 || blockquote_level_changed
4148 || looks_like_table;
4149
4150 let is_lazy_continuation = !is_structural_separator
4154 && !line_info.is_blank
4155 && (line_info.indent == 0
4156 || line_info.indent >= min_required_indent
4157 || line_info.in_code_span_continuation);
4158
4159 if is_lazy_continuation {
4160 block.end_line = line_num;
4163 } else {
4164 list_blocks.push(block.clone());
4166 current_block = None;
4167 }
4168 }
4169 }
4170 }
4171
4172 if let Some(block) = current_block {
4174 list_blocks.push(block);
4175 }
4176
4177 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4179
4180 list_blocks
4181 }
4182
4183 fn compute_char_frequency(content: &str) -> CharFrequency {
4185 let mut frequency = CharFrequency::default();
4186
4187 for ch in content.chars() {
4188 match ch {
4189 '#' => frequency.hash_count += 1,
4190 '*' => frequency.asterisk_count += 1,
4191 '_' => frequency.underscore_count += 1,
4192 '-' => frequency.hyphen_count += 1,
4193 '+' => frequency.plus_count += 1,
4194 '>' => frequency.gt_count += 1,
4195 '|' => frequency.pipe_count += 1,
4196 '[' => frequency.bracket_count += 1,
4197 '`' => frequency.backtick_count += 1,
4198 '<' => frequency.lt_count += 1,
4199 '!' => frequency.exclamation_count += 1,
4200 '\n' => frequency.newline_count += 1,
4201 _ => {}
4202 }
4203 }
4204
4205 frequency
4206 }
4207
4208 fn parse_html_tags(
4210 content: &str,
4211 lines: &[LineInfo],
4212 code_blocks: &[(usize, usize)],
4213 flavor: MarkdownFlavor,
4214 ) -> Vec<HtmlTag> {
4215 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4216 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4217
4218 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4219
4220 for cap in HTML_TAG_REGEX.captures_iter(content) {
4221 let full_match = cap.get(0).unwrap();
4222 let match_start = full_match.start();
4223 let match_end = full_match.end();
4224
4225 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4227 continue;
4228 }
4229
4230 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4231 let tag_name_original = cap.get(2).unwrap().as_str();
4232 let tag_name = tag_name_original.to_lowercase();
4233 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4234
4235 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4238 continue;
4239 }
4240
4241 let mut line_num = 1;
4243 let mut col_start = match_start;
4244 let mut col_end = match_end;
4245 for (idx, line_info) in lines.iter().enumerate() {
4246 if match_start >= line_info.byte_offset {
4247 line_num = idx + 1;
4248 col_start = match_start - line_info.byte_offset;
4249 col_end = match_end - line_info.byte_offset;
4250 } else {
4251 break;
4252 }
4253 }
4254
4255 html_tags.push(HtmlTag {
4256 line: line_num,
4257 start_col: col_start,
4258 end_col: col_end,
4259 byte_offset: match_start,
4260 byte_end: match_end,
4261 tag_name,
4262 is_closing,
4263 is_self_closing,
4264 raw_content: full_match.as_str().to_string(),
4265 });
4266 }
4267
4268 html_tags
4269 }
4270
4271 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4273 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4274
4275 for (line_idx, line_info) in lines.iter().enumerate() {
4276 if line_info.in_code_block || line_info.is_blank {
4278 continue;
4279 }
4280
4281 let line = line_info.content(content);
4282 let line_num = line_idx + 1;
4283
4284 if !line.contains('|') {
4286 continue;
4287 }
4288
4289 let parts: Vec<&str> = line.split('|').collect();
4291 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4292
4293 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4295 let mut column_alignments = Vec::new();
4296
4297 if is_separator {
4298 for part in &parts[1..parts.len() - 1] {
4299 let trimmed = part.trim();
4301 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4302 "center".to_string()
4303 } else if trimmed.ends_with(':') {
4304 "right".to_string()
4305 } else if trimmed.starts_with(':') {
4306 "left".to_string()
4307 } else {
4308 "none".to_string()
4309 };
4310 column_alignments.push(alignment);
4311 }
4312 }
4313
4314 table_rows.push(TableRow {
4315 line: line_num,
4316 is_separator,
4317 column_count,
4318 column_alignments,
4319 });
4320 }
4321
4322 table_rows
4323 }
4324
4325 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4327 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4328
4329 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4331 let full_match = cap.get(0).unwrap();
4332 let match_start = full_match.start();
4333 let match_end = full_match.end();
4334
4335 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4337 continue;
4338 }
4339
4340 let preceding_char = if match_start > 0 {
4342 content.chars().nth(match_start - 1)
4343 } else {
4344 None
4345 };
4346 let following_char = content.chars().nth(match_end);
4347
4348 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4349 continue;
4350 }
4351 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4352 continue;
4353 }
4354
4355 let url = full_match.as_str();
4356 let url_type = if url.starts_with("https://") {
4357 "https"
4358 } else if url.starts_with("http://") {
4359 "http"
4360 } else if url.starts_with("ftp://") {
4361 "ftp"
4362 } else {
4363 "other"
4364 };
4365
4366 let mut line_num = 1;
4368 let mut col_start = match_start;
4369 let mut col_end = match_end;
4370 for (idx, line_info) in lines.iter().enumerate() {
4371 if match_start >= line_info.byte_offset {
4372 line_num = idx + 1;
4373 col_start = match_start - line_info.byte_offset;
4374 col_end = match_end - line_info.byte_offset;
4375 } else {
4376 break;
4377 }
4378 }
4379
4380 bare_urls.push(BareUrl {
4381 line: line_num,
4382 start_col: col_start,
4383 end_col: col_end,
4384 byte_offset: match_start,
4385 byte_end: match_end,
4386 url: url.to_string(),
4387 url_type: url_type.to_string(),
4388 });
4389 }
4390
4391 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4393 let full_match = cap.get(0).unwrap();
4394 let match_start = full_match.start();
4395 let match_end = full_match.end();
4396
4397 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4399 continue;
4400 }
4401
4402 let preceding_char = if match_start > 0 {
4404 content.chars().nth(match_start - 1)
4405 } else {
4406 None
4407 };
4408 let following_char = content.chars().nth(match_end);
4409
4410 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4411 continue;
4412 }
4413 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4414 continue;
4415 }
4416
4417 let email = full_match.as_str();
4418
4419 let mut line_num = 1;
4421 let mut col_start = match_start;
4422 let mut col_end = match_end;
4423 for (idx, line_info) in lines.iter().enumerate() {
4424 if match_start >= line_info.byte_offset {
4425 line_num = idx + 1;
4426 col_start = match_start - line_info.byte_offset;
4427 col_end = match_end - line_info.byte_offset;
4428 } else {
4429 break;
4430 }
4431 }
4432
4433 bare_urls.push(BareUrl {
4434 line: line_num,
4435 start_col: col_start,
4436 end_col: col_end,
4437 byte_offset: match_start,
4438 byte_end: match_end,
4439 url: email.to_string(),
4440 url_type: "email".to_string(),
4441 });
4442 }
4443
4444 bare_urls
4445 }
4446
4447 #[must_use]
4467 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4468 ValidHeadingsIter::new(&self.lines)
4469 }
4470
4471 #[must_use]
4475 pub fn has_valid_headings(&self) -> bool {
4476 self.lines
4477 .iter()
4478 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4479 }
4480}
4481
4482fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4484 if list_blocks.len() < 2 {
4485 return;
4486 }
4487
4488 let mut merger = ListBlockMerger::new(content, lines);
4489 *list_blocks = merger.merge(list_blocks);
4490}
4491
4492struct ListBlockMerger<'a> {
4494 content: &'a str,
4495 lines: &'a [LineInfo],
4496}
4497
4498impl<'a> ListBlockMerger<'a> {
4499 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4500 Self { content, lines }
4501 }
4502
4503 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4504 let mut merged = Vec::with_capacity(list_blocks.len());
4505 let mut current = list_blocks[0].clone();
4506
4507 for next in list_blocks.iter().skip(1) {
4508 if self.should_merge_blocks(¤t, next) {
4509 current = self.merge_two_blocks(current, next);
4510 } else {
4511 merged.push(current);
4512 current = next.clone();
4513 }
4514 }
4515
4516 merged.push(current);
4517 merged
4518 }
4519
4520 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4522 if !self.blocks_are_compatible(current, next) {
4524 return false;
4525 }
4526
4527 let spacing = self.analyze_spacing_between(current, next);
4529 match spacing {
4530 BlockSpacing::Consecutive => true,
4531 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4532 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4533 self.can_merge_with_content_between(current, next)
4534 }
4535 }
4536 }
4537
4538 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4540 current.is_ordered == next.is_ordered
4541 && current.blockquote_prefix == next.blockquote_prefix
4542 && current.nesting_level == next.nesting_level
4543 }
4544
4545 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4547 let gap = next.start_line - current.end_line;
4548
4549 match gap {
4550 1 => BlockSpacing::Consecutive,
4551 2 => BlockSpacing::SingleBlank,
4552 _ if gap > 2 => {
4553 if self.has_only_blank_lines_between(current, next) {
4554 BlockSpacing::MultipleBlanks
4555 } else {
4556 BlockSpacing::ContentBetween
4557 }
4558 }
4559 _ => BlockSpacing::Consecutive, }
4561 }
4562
4563 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4565 if has_meaningful_content_between(self.content, current, next, self.lines) {
4568 return false; }
4570
4571 !current.is_ordered && current.marker == next.marker
4573 }
4574
4575 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4577 if has_meaningful_content_between(self.content, current, next, self.lines) {
4579 return false; }
4581
4582 current.is_ordered && next.is_ordered
4584 }
4585
4586 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4588 for line_num in (current.end_line + 1)..next.start_line {
4589 if let Some(line_info) = self.lines.get(line_num - 1)
4590 && !line_info.content(self.content).trim().is_empty()
4591 {
4592 return false;
4593 }
4594 }
4595 true
4596 }
4597
4598 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4600 current.end_line = next.end_line;
4601 current.item_lines.extend_from_slice(&next.item_lines);
4602
4603 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4605
4606 if !current.is_ordered && self.markers_differ(¤t, next) {
4608 current.marker = None; }
4610
4611 current
4612 }
4613
4614 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4616 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4617 }
4618}
4619
4620#[derive(Debug, PartialEq)]
4622enum BlockSpacing {
4623 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4628
4629fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4631 for line_num in (current.end_line + 1)..next.start_line {
4633 if let Some(line_info) = lines.get(line_num - 1) {
4634 let trimmed = line_info.content(content).trim();
4636
4637 if trimmed.is_empty() {
4639 continue;
4640 }
4641
4642 if line_info.heading.is_some() {
4646 return true; }
4648
4649 if is_horizontal_rule(trimmed) {
4651 return true; }
4653
4654 if crate::utils::skip_context::is_table_line(trimmed) {
4656 return true; }
4658
4659 if trimmed.starts_with('>') {
4661 return true; }
4663
4664 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4666 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4667
4668 let min_continuation_indent = if current.is_ordered {
4670 current.nesting_level + current.max_marker_width + 1 } else {
4672 current.nesting_level + 2
4673 };
4674
4675 if line_indent < min_continuation_indent {
4676 return true; }
4679 }
4680
4681 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4683
4684 let min_indent = if current.is_ordered {
4686 current.nesting_level + current.max_marker_width
4687 } else {
4688 current.nesting_level + 2
4689 };
4690
4691 if line_indent < min_indent {
4693 return true; }
4695
4696 }
4699 }
4700
4701 false
4703}
4704
4705pub fn is_horizontal_rule_line(line: &str) -> bool {
4712 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4714 if leading_spaces > 3 || line.starts_with('\t') {
4715 return false;
4716 }
4717
4718 is_horizontal_rule_content(line.trim())
4719}
4720
4721pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4724 if trimmed.len() < 3 {
4725 return false;
4726 }
4727
4728 let chars: Vec<char> = trimmed.chars().collect();
4730 if let Some(&first_char) = chars.first()
4731 && (first_char == '-' || first_char == '*' || first_char == '_')
4732 {
4733 let mut count = 0;
4734 for &ch in &chars {
4735 if ch == first_char {
4736 count += 1;
4737 } else if ch != ' ' && ch != '\t' {
4738 return false; }
4740 }
4741 return count >= 3;
4742 }
4743 false
4744}
4745
4746pub fn is_horizontal_rule(trimmed: &str) -> bool {
4748 is_horizontal_rule_content(trimmed)
4749}
4750
4751#[cfg(test)]
4753mod tests {
4754 use super::*;
4755
4756 #[test]
4757 fn test_empty_content() {
4758 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4759 assert_eq!(ctx.content, "");
4760 assert_eq!(ctx.line_offsets, vec![0]);
4761 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4762 assert_eq!(ctx.lines.len(), 0);
4763 }
4764
4765 #[test]
4766 fn test_single_line() {
4767 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4768 assert_eq!(ctx.content, "# Hello");
4769 assert_eq!(ctx.line_offsets, vec![0]);
4770 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4771 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4772 }
4773
4774 #[test]
4775 fn test_multi_line() {
4776 let content = "# Title\n\nSecond line\nThird line";
4777 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4778 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4779 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4786
4787 #[test]
4788 fn test_line_info() {
4789 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4790 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4791
4792 assert_eq!(ctx.lines.len(), 7);
4794
4795 let line1 = &ctx.lines[0];
4797 assert_eq!(line1.content(ctx.content), "# Title");
4798 assert_eq!(line1.byte_offset, 0);
4799 assert_eq!(line1.indent, 0);
4800 assert!(!line1.is_blank);
4801 assert!(!line1.in_code_block);
4802 assert!(line1.list_item.is_none());
4803
4804 let line2 = &ctx.lines[1];
4806 assert_eq!(line2.content(ctx.content), " indented");
4807 assert_eq!(line2.byte_offset, 8);
4808 assert_eq!(line2.indent, 4);
4809 assert!(!line2.is_blank);
4810
4811 let line3 = &ctx.lines[2];
4813 assert_eq!(line3.content(ctx.content), "");
4814 assert!(line3.is_blank);
4815
4816 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4818 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4819 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4820 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4821 }
4822
4823 #[test]
4824 fn test_list_item_detection() {
4825 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4826 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4827
4828 let line1 = &ctx.lines[0];
4830 assert!(line1.list_item.is_some());
4831 let list1 = line1.list_item.as_ref().unwrap();
4832 assert_eq!(list1.marker, "-");
4833 assert!(!list1.is_ordered);
4834 assert_eq!(list1.marker_column, 0);
4835 assert_eq!(list1.content_column, 2);
4836
4837 let line2 = &ctx.lines[1];
4839 assert!(line2.list_item.is_some());
4840 let list2 = line2.list_item.as_ref().unwrap();
4841 assert_eq!(list2.marker, "*");
4842 assert_eq!(list2.marker_column, 2);
4843
4844 let line3 = &ctx.lines[2];
4846 assert!(line3.list_item.is_some());
4847 let list3 = line3.list_item.as_ref().unwrap();
4848 assert_eq!(list3.marker, "1.");
4849 assert!(list3.is_ordered);
4850 assert_eq!(list3.number, Some(1));
4851
4852 let line6 = &ctx.lines[5];
4854 assert!(line6.list_item.is_none());
4855 }
4856
4857 #[test]
4858 fn test_offset_to_line_col_edge_cases() {
4859 let content = "a\nb\nc";
4860 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4861 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4869
4870 #[test]
4871 fn test_mdx_esm_blocks() {
4872 let content = r##"import {Chart} from './snowfall.js'
4873export const year = 2023
4874
4875# Last year's snowfall
4876
4877In {year}, the snowfall was above average.
4878It was followed by a warm spring which caused
4879flood conditions in many of the nearby rivers.
4880
4881<Chart color="#fcb32c" year={year} />
4882"##;
4883
4884 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4885
4886 assert_eq!(ctx.lines.len(), 10);
4888 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4889 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4890 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4891 assert!(
4892 !ctx.lines[3].in_esm_block,
4893 "Line 4 (heading) should NOT be in_esm_block"
4894 );
4895 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4896 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4897 }
4898
4899 #[test]
4900 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4901 let content = r#"import {Chart} from './snowfall.js'
4902export const year = 2023
4903
4904# Last year's snowfall
4905"#;
4906
4907 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4908
4909 assert!(
4911 !ctx.lines[0].in_esm_block,
4912 "Line 1 should NOT be in_esm_block in Standard flavor"
4913 );
4914 assert!(
4915 !ctx.lines[1].in_esm_block,
4916 "Line 2 should NOT be in_esm_block in Standard flavor"
4917 );
4918 }
4919
4920 #[test]
4921 fn test_blockquote_with_indented_content() {
4922 let content = r#"# Heading
4926
4927> -S socket-path
4928> More text
4929"#;
4930 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4931
4932 assert!(
4934 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4935 "Line 3 should be a blockquote"
4936 );
4937 assert!(
4939 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4940 "Line 4 should be a blockquote"
4941 );
4942
4943 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4946 assert_eq!(bq3.content, "-S socket-path");
4947 assert_eq!(bq3.nesting_level, 1);
4948 assert!(bq3.has_multiple_spaces_after_marker);
4950
4951 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4952 assert_eq!(bq4.content, "More text");
4953 assert_eq!(bq4.nesting_level, 1);
4954 }
4955
4956 #[test]
4957 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4958 let content = r#"# Title
4960
4961A footnote[^1].
4962
4963[^1]: This is the footnote content.
4964
4965[^note]: Another footnote with [link](https://example.com).
4966
4967[regular]: ./path.md "A real reference definition"
4968"#;
4969 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4970
4971 assert_eq!(
4973 ctx.reference_defs.len(),
4974 1,
4975 "Footnotes should not be parsed as reference definitions"
4976 );
4977
4978 assert_eq!(ctx.reference_defs[0].id, "regular");
4980 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4981 assert_eq!(
4982 ctx.reference_defs[0].title,
4983 Some("A real reference definition".to_string())
4984 );
4985 }
4986
4987 #[test]
4988 fn test_footnote_with_inline_link_not_misidentified() {
4989 let content = r#"# Title
4992
4993A footnote[^1].
4994
4995[^1]: [link](https://www.google.com).
4996"#;
4997 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4998
4999 assert!(
5001 ctx.reference_defs.is_empty(),
5002 "Footnote with inline link should not create a reference definition"
5003 );
5004 }
5005
5006 #[test]
5007 fn test_various_footnote_formats_excluded() {
5008 let content = r#"[^1]: Numeric footnote
5010[^note]: Named footnote
5011[^a]: Single char footnote
5012[^long-footnote-name]: Long named footnote
5013[^123abc]: Mixed alphanumeric
5014
5015[ref1]: ./file1.md
5016[ref2]: ./file2.md
5017"#;
5018 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5019
5020 assert_eq!(
5022 ctx.reference_defs.len(),
5023 2,
5024 "Only regular reference definitions should be parsed"
5025 );
5026
5027 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5028 assert!(ids.contains(&"ref1"));
5029 assert!(ids.contains(&"ref2"));
5030 assert!(!ids.iter().any(|id| id.starts_with('^')));
5031 }
5032
5033 #[test]
5038 fn test_has_char_tracked_characters() {
5039 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5041 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5042
5043 assert!(ctx.has_char('#'), "Should detect hash");
5045 assert!(ctx.has_char('*'), "Should detect asterisk");
5046 assert!(ctx.has_char('_'), "Should detect underscore");
5047 assert!(ctx.has_char('-'), "Should detect hyphen");
5048 assert!(ctx.has_char('+'), "Should detect plus");
5049 assert!(ctx.has_char('>'), "Should detect gt");
5050 assert!(ctx.has_char('|'), "Should detect pipe");
5051 assert!(ctx.has_char('['), "Should detect bracket");
5052 assert!(ctx.has_char('`'), "Should detect backtick");
5053 assert!(ctx.has_char('<'), "Should detect lt");
5054 assert!(ctx.has_char('!'), "Should detect exclamation");
5055 assert!(ctx.has_char('\n'), "Should detect newline");
5056 }
5057
5058 #[test]
5059 fn test_has_char_absent_characters() {
5060 let content = "Simple text without special chars";
5061 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5062
5063 assert!(!ctx.has_char('#'), "Should not detect hash");
5065 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5066 assert!(!ctx.has_char('_'), "Should not detect underscore");
5067 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5068 assert!(!ctx.has_char('+'), "Should not detect plus");
5069 assert!(!ctx.has_char('>'), "Should not detect gt");
5070 assert!(!ctx.has_char('|'), "Should not detect pipe");
5071 assert!(!ctx.has_char('['), "Should not detect bracket");
5072 assert!(!ctx.has_char('`'), "Should not detect backtick");
5073 assert!(!ctx.has_char('<'), "Should not detect lt");
5074 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5075 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5077 }
5078
5079 #[test]
5080 fn test_has_char_fallback_for_untracked() {
5081 let content = "Text with @mention and $dollar and %percent";
5082 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5083
5084 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5086 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5087 assert!(ctx.has_char('%'), "Should detect % via fallback");
5088 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5089 }
5090
5091 #[test]
5092 fn test_char_count_tracked_characters() {
5093 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5094 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5095
5096 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5098 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5099 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5100 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5101 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5102 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5103 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5104 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5105 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5106 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5107 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5108 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5109 }
5110
5111 #[test]
5112 fn test_char_count_zero_for_absent() {
5113 let content = "Plain text";
5114 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5115
5116 assert_eq!(ctx.char_count('#'), 0);
5117 assert_eq!(ctx.char_count('*'), 0);
5118 assert_eq!(ctx.char_count('_'), 0);
5119 assert_eq!(ctx.char_count('\n'), 0);
5120 }
5121
5122 #[test]
5123 fn test_char_count_fallback_for_untracked() {
5124 let content = "@@@ $$ %%%";
5125 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5126
5127 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5128 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5129 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5130 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5131 }
5132
5133 #[test]
5134 fn test_char_count_empty_content() {
5135 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5136
5137 assert_eq!(ctx.char_count('#'), 0);
5138 assert_eq!(ctx.char_count('*'), 0);
5139 assert_eq!(ctx.char_count('@'), 0);
5140 assert!(!ctx.has_char('#'));
5141 assert!(!ctx.has_char('@'));
5142 }
5143
5144 #[test]
5149 fn test_is_in_html_tag_simple() {
5150 let content = "<div>content</div>";
5151 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5152
5153 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5155 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5156 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5157
5158 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5160 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5161
5162 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5164 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5165 }
5166
5167 #[test]
5168 fn test_is_in_html_tag_self_closing() {
5169 let content = "Text <br/> more text";
5170 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5171
5172 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5174 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5175
5176 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5178 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5179 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5180
5181 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5183 }
5184
5185 #[test]
5186 fn test_is_in_html_tag_with_attributes() {
5187 let content = r#"<a href="url" class="link">text</a>"#;
5188 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5189
5190 assert!(ctx.is_in_html_tag(0), "Start of tag");
5192 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5193 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5194 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5195
5196 assert!(!ctx.is_in_html_tag(27), "Start of content");
5198 assert!(!ctx.is_in_html_tag(30), "End of content");
5199
5200 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5202 }
5203
5204 #[test]
5205 fn test_is_in_html_tag_multiline() {
5206 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5207 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5208
5209 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5211 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5212 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5213
5214 let closing_bracket_pos = content.find(">\n").unwrap();
5216 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5217 }
5218
5219 #[test]
5220 fn test_is_in_html_tag_no_tags() {
5221 let content = "Plain text without any HTML";
5222 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5223
5224 for i in 0..content.len() {
5226 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5227 }
5228 }
5229
5230 #[test]
5235 fn test_is_in_jinja_range_expression() {
5236 let content = "Hello {{ name }}!";
5237 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5238
5239 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5241 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5242
5243 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5245 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5246 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5247 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5248 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5249
5250 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5252 }
5253
5254 #[test]
5255 fn test_is_in_jinja_range_statement() {
5256 let content = "{% if condition %}content{% endif %}";
5257 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5258
5259 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5261 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5262 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5263
5264 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5266
5267 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5269 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5270 }
5271
5272 #[test]
5273 fn test_is_in_jinja_range_multiple() {
5274 let content = "{{ a }} and {{ b }}";
5275 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5276
5277 assert!(ctx.is_in_jinja_range(0));
5279 assert!(ctx.is_in_jinja_range(3));
5280 assert!(ctx.is_in_jinja_range(6));
5281
5282 assert!(!ctx.is_in_jinja_range(8));
5284 assert!(!ctx.is_in_jinja_range(11));
5285
5286 assert!(ctx.is_in_jinja_range(12));
5288 assert!(ctx.is_in_jinja_range(15));
5289 assert!(ctx.is_in_jinja_range(18));
5290 }
5291
5292 #[test]
5293 fn test_is_in_jinja_range_no_jinja() {
5294 let content = "Plain text with single braces but not Jinja";
5295 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5296
5297 for i in 0..content.len() {
5299 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5300 }
5301 }
5302
5303 #[test]
5308 fn test_is_in_link_title_with_title() {
5309 let content = r#"[ref]: https://example.com "Title text"
5310
5311Some content."#;
5312 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5313
5314 assert_eq!(ctx.reference_defs.len(), 1);
5316 let def = &ctx.reference_defs[0];
5317 assert!(def.title_byte_start.is_some());
5318 assert!(def.title_byte_end.is_some());
5319
5320 let title_start = def.title_byte_start.unwrap();
5321 let title_end = def.title_byte_end.unwrap();
5322
5323 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5325
5326 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5328 assert!(
5329 ctx.is_in_link_title(title_start + 5),
5330 "Middle of title should be in title"
5331 );
5332 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5333
5334 assert!(
5336 !ctx.is_in_link_title(title_end),
5337 "After title end should not be in title"
5338 );
5339 }
5340
5341 #[test]
5342 fn test_is_in_link_title_without_title() {
5343 let content = "[ref]: https://example.com\n\nSome content.";
5344 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5345
5346 assert_eq!(ctx.reference_defs.len(), 1);
5348 let def = &ctx.reference_defs[0];
5349 assert!(def.title_byte_start.is_none());
5350 assert!(def.title_byte_end.is_none());
5351
5352 for i in 0..content.len() {
5354 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5355 }
5356 }
5357
5358 #[test]
5359 fn test_is_in_link_title_multiple_refs() {
5360 let content = r#"[ref1]: /url1 "Title One"
5361[ref2]: /url2
5362[ref3]: /url3 "Title Three"
5363"#;
5364 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5365
5366 assert_eq!(ctx.reference_defs.len(), 3);
5368
5369 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5371 assert!(ref1.title_byte_start.is_some());
5372
5373 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5375 assert!(ref2.title_byte_start.is_none());
5376
5377 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5379 assert!(ref3.title_byte_start.is_some());
5380
5381 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5383 assert!(ctx.is_in_link_title(start + 1));
5384 assert!(!ctx.is_in_link_title(end + 5));
5385 }
5386
5387 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5389 assert!(ctx.is_in_link_title(start + 1));
5390 }
5391 }
5392
5393 #[test]
5394 fn test_is_in_link_title_single_quotes() {
5395 let content = "[ref]: /url 'Single quoted title'\n";
5396 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5397
5398 assert_eq!(ctx.reference_defs.len(), 1);
5399 let def = &ctx.reference_defs[0];
5400
5401 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5402 assert!(ctx.is_in_link_title(start));
5403 assert!(ctx.is_in_link_title(start + 5));
5404 assert!(!ctx.is_in_link_title(end));
5405 }
5406 }
5407
5408 #[test]
5409 fn test_is_in_link_title_parentheses() {
5410 let content = "[ref]: /url (Parenthesized title)\n";
5413 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5414
5415 if ctx.reference_defs.is_empty() {
5418 for i in 0..content.len() {
5420 assert!(!ctx.is_in_link_title(i));
5421 }
5422 } else {
5423 let def = &ctx.reference_defs[0];
5424 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5425 assert!(ctx.is_in_link_title(start));
5426 assert!(ctx.is_in_link_title(start + 5));
5427 assert!(!ctx.is_in_link_title(end));
5428 } else {
5429 for i in 0..content.len() {
5431 assert!(!ctx.is_in_link_title(i));
5432 }
5433 }
5434 }
5435 }
5436
5437 #[test]
5438 fn test_is_in_link_title_no_refs() {
5439 let content = "Just plain text without any reference definitions.";
5440 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5441
5442 assert!(ctx.reference_defs.is_empty());
5443
5444 for i in 0..content.len() {
5445 assert!(!ctx.is_in_link_title(i));
5446 }
5447 }
5448
5449 #[test]
5454 fn test_math_spans_inline() {
5455 let content = "Text with inline math $[f](x)$ in it.";
5456 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5457
5458 let math_spans = ctx.math_spans();
5459 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5460
5461 let span = &math_spans[0];
5462 assert!(!span.is_display, "Should be inline math, not display");
5463 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5464 }
5465
5466 #[test]
5467 fn test_math_spans_display_single_line() {
5468 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5469 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5470
5471 let math_spans = ctx.math_spans();
5472 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5473
5474 let span = &math_spans[0];
5475 assert!(span.is_display, "Should be display math");
5476 assert!(
5477 span.content.contains("[x](\\zeta)"),
5478 "Content should contain the link-like pattern"
5479 );
5480 }
5481
5482 #[test]
5483 fn test_math_spans_display_multiline() {
5484 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5485 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5486
5487 let math_spans = ctx.math_spans();
5488 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5489
5490 let span = &math_spans[0];
5491 assert!(span.is_display, "Should be display math");
5492 }
5493
5494 #[test]
5495 fn test_is_in_math_span() {
5496 let content = "Text $[f](x)$ more text";
5497 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5498
5499 let math_start = content.find('$').unwrap();
5501 let math_end = content.rfind('$').unwrap() + 1;
5502
5503 assert!(
5504 ctx.is_in_math_span(math_start + 1),
5505 "Position inside math span should return true"
5506 );
5507 assert!(
5508 ctx.is_in_math_span(math_start + 3),
5509 "Position inside math span should return true"
5510 );
5511
5512 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5514 assert!(
5515 !ctx.is_in_math_span(math_end + 1),
5516 "Position after math span should return false"
5517 );
5518 }
5519
5520 #[test]
5521 fn test_math_spans_mixed_with_code() {
5522 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5523 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5524
5525 let math_spans = ctx.math_spans();
5526 let code_spans = ctx.code_spans();
5527
5528 assert_eq!(math_spans.len(), 1, "Should have one math span");
5529 assert_eq!(code_spans.len(), 1, "Should have one code span");
5530
5531 assert_eq!(math_spans[0].content, "[f](x)");
5533 assert_eq!(code_spans[0].content, "[g](y)");
5535 }
5536
5537 #[test]
5538 fn test_math_spans_no_math() {
5539 let content = "Regular text without any math at all.";
5540 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5541
5542 let math_spans = ctx.math_spans();
5543 assert!(math_spans.is_empty(), "Should have no math spans");
5544 }
5545
5546 #[test]
5547 fn test_math_spans_multiple() {
5548 let content = "First $a$ and second $b$ and display $$c$$";
5549 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5550
5551 let math_spans = ctx.math_spans();
5552 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5553
5554 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5556 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5557
5558 assert_eq!(inline_count, 2, "Should have two inline math spans");
5559 assert_eq!(display_count, 1, "Should have one display math span");
5560 }
5561
5562 #[test]
5563 fn test_is_in_math_span_boundary_positions() {
5564 let content = "$[f](x)$";
5567 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5568
5569 let math_spans = ctx.math_spans();
5570 assert_eq!(math_spans.len(), 1, "Should have one math span");
5571
5572 let span = &math_spans[0];
5573
5574 assert!(
5576 ctx.is_in_math_span(span.byte_offset),
5577 "Start position should be in span"
5578 );
5579
5580 assert!(
5582 ctx.is_in_math_span(span.byte_offset + 1),
5583 "Position after start should be in span"
5584 );
5585
5586 assert!(
5588 ctx.is_in_math_span(span.byte_end - 1),
5589 "Position at end-1 should be in span"
5590 );
5591
5592 assert!(
5594 !ctx.is_in_math_span(span.byte_end),
5595 "Position at byte_end should NOT be in span (exclusive)"
5596 );
5597 }
5598
5599 #[test]
5600 fn test_math_spans_at_document_start() {
5601 let content = "$x$ text";
5602 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5603
5604 let math_spans = ctx.math_spans();
5605 assert_eq!(math_spans.len(), 1);
5606 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5607 }
5608
5609 #[test]
5610 fn test_math_spans_at_document_end() {
5611 let content = "text $x$";
5612 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5613
5614 let math_spans = ctx.math_spans();
5615 assert_eq!(math_spans.len(), 1);
5616 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5617 }
5618
5619 #[test]
5620 fn test_math_spans_consecutive() {
5621 let content = "$a$$b$";
5622 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5623
5624 let math_spans = ctx.math_spans();
5625 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5627
5628 for i in 0..content.len() {
5630 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5631 }
5632 }
5633
5634 #[test]
5635 fn test_math_spans_currency_not_math() {
5636 let content = "Price is $100";
5638 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5639
5640 let math_spans = ctx.math_spans();
5641 assert!(
5644 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5645 "Unbalanced $ should not create math span containing 100"
5646 );
5647 }
5648
5649 #[test]
5654 fn test_reference_lookup_o1_basic() {
5655 let content = r#"[ref1]: /url1
5656[REF2]: /url2 "Title"
5657[Ref3]: /url3
5658
5659Use [link][ref1] and [link][REF2]."#;
5660 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5661
5662 assert_eq!(ctx.reference_defs.len(), 3);
5664
5665 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5667 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5670 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5671 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5672 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5673 }
5674
5675 #[test]
5676 fn test_reference_lookup_o1_get_reference_def() {
5677 let content = r#"[myref]: https://example.com "My Title"
5678"#;
5679 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5680
5681 let def = ctx.get_reference_def("myref").expect("Should find myref");
5683 assert_eq!(def.url, "https://example.com");
5684 assert_eq!(def.title.as_deref(), Some("My Title"));
5685
5686 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5688 assert_eq!(def2.url, "https://example.com");
5689
5690 assert!(ctx.get_reference_def("nonexistent").is_none());
5692 }
5693
5694 #[test]
5695 fn test_reference_lookup_o1_has_reference_def() {
5696 let content = r#"[foo]: /foo
5697[BAR]: /bar
5698"#;
5699 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5700
5701 assert!(ctx.has_reference_def("foo"));
5703 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5705 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5708
5709 #[test]
5710 fn test_reference_lookup_o1_empty_content() {
5711 let content = "No references here.";
5712 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5713
5714 assert!(ctx.reference_defs.is_empty());
5715 assert_eq!(ctx.get_reference_url("anything"), None);
5716 assert!(ctx.get_reference_def("anything").is_none());
5717 assert!(!ctx.has_reference_def("anything"));
5718 }
5719
5720 #[test]
5721 fn test_reference_lookup_o1_special_characters_in_id() {
5722 let content = r#"[ref-with-dash]: /url1
5723[ref_with_underscore]: /url2
5724[ref.with.dots]: /url3
5725"#;
5726 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5727
5728 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5729 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5730 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5731 }
5732
5733 #[test]
5734 fn test_reference_lookup_o1_unicode_id() {
5735 let content = r#"[日本語]: /japanese
5736[émoji]: /emoji
5737"#;
5738 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5739
5740 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5741 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5742 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5744}