1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17 ($name:expr, $profile:expr, $code:expr) => {{
18 let start = std::time::Instant::now();
19 let result = $code;
20 if $profile {
21 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22 }
23 result
24 }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(
36 r#"(?sx)
37 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38 (?:
39 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
40 |
41 \[([^\]]*)\] # Reference ID in group 6
42 )"#
43 ).unwrap()
44});
45
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?sx)
51 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52 (?:
53 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
54 |
55 \[([^\]]*)\] # Reference ID in group 6
56 )"#
57 ).unwrap()
58});
59
60static REF_DEF_PATTERN: LazyLock<Regex> =
62 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub visual_indent: usize,
86 pub is_blank: bool,
88 pub in_code_block: bool,
90 pub in_front_matter: bool,
92 pub in_html_block: bool,
94 pub in_html_comment: bool,
96 pub list_item: Option<ListItemInfo>,
98 pub heading: Option<HeadingInfo>,
100 pub blockquote: Option<BlockquoteInfo>,
102 pub in_mkdocstrings: bool,
104 pub in_esm_block: bool,
106 pub in_code_span_continuation: bool,
108 pub is_horizontal_rule: bool,
111 pub in_math_block: bool,
113 pub in_quarto_div: bool,
115 pub in_jsx_expression: bool,
117 pub in_mdx_comment: bool,
119 pub in_jsx_component: bool,
121 pub in_jsx_fragment: bool,
123 pub in_admonition: bool,
125 pub in_content_tab: bool,
127 pub in_definition_list: bool,
129}
130
131impl LineInfo {
132 pub fn content<'a>(&self, source: &'a str) -> &'a str {
134 &source[self.byte_offset..self.byte_offset + self.byte_len]
135 }
136}
137
138#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141 pub marker: String,
143 pub is_ordered: bool,
145 pub number: Option<usize>,
147 pub marker_column: usize,
149 pub content_column: usize,
151}
152
153#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156 ATX,
158 Setext1,
160 Setext2,
162}
163
164#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167 pub line: usize,
169 pub start_col: usize,
171 pub end_col: usize,
173 pub byte_offset: usize,
175 pub byte_end: usize,
177 pub text: Cow<'a, str>,
179 pub url: Cow<'a, str>,
181 pub is_reference: bool,
183 pub reference_id: Option<Cow<'a, str>>,
185 pub link_type: LinkType,
187}
188
189#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192 pub reference: String,
194 pub span: std::ops::Range<usize>,
196}
197
198#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201 pub id: String,
203 pub line: usize,
205 pub byte_offset: usize,
207 pub byte_end: usize,
209}
210
211#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214 pub line: usize,
216 pub start_col: usize,
218 pub end_col: usize,
220 pub byte_offset: usize,
222 pub byte_end: usize,
224 pub alt_text: Cow<'a, str>,
226 pub url: Cow<'a, str>,
228 pub is_reference: bool,
230 pub reference_id: Option<Cow<'a, str>>,
232 pub link_type: LinkType,
234}
235
236#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239 pub line: usize,
241 pub id: String,
243 pub url: String,
245 pub title: Option<String>,
247 pub byte_offset: usize,
249 pub byte_end: usize,
251 pub title_byte_start: Option<usize>,
253 pub title_byte_end: Option<usize>,
255}
256
257#[derive(Debug, Clone)]
259pub struct CodeSpan {
260 pub line: usize,
262 pub end_line: usize,
264 pub start_col: usize,
266 pub end_col: usize,
268 pub byte_offset: usize,
270 pub byte_end: usize,
272 pub backtick_count: usize,
274 pub content: String,
276}
277
278#[derive(Debug, Clone)]
280pub struct MathSpan {
281 pub line: usize,
283 pub end_line: usize,
285 pub start_col: usize,
287 pub end_col: usize,
289 pub byte_offset: usize,
291 pub byte_end: usize,
293 pub is_display: bool,
295 pub content: String,
297}
298
299#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302 pub level: u8,
304 pub style: HeadingStyle,
306 pub marker: String,
308 pub marker_column: usize,
310 pub content_column: usize,
312 pub text: String,
314 pub custom_id: Option<String>,
316 pub raw_text: String,
318 pub has_closing_sequence: bool,
320 pub closing_sequence: String,
322 pub is_valid: bool,
325}
326
327#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333 pub line_num: usize,
335 pub heading: &'a HeadingInfo,
337 pub line_info: &'a LineInfo,
339}
340
341pub struct ValidHeadingsIter<'a> {
346 lines: &'a [LineInfo],
347 current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351 fn new(lines: &'a [LineInfo]) -> Self {
352 Self {
353 lines,
354 current_index: 0,
355 }
356 }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360 type Item = ValidHeading<'a>;
361
362 fn next(&mut self) -> Option<Self::Item> {
363 while self.current_index < self.lines.len() {
364 let idx = self.current_index;
365 self.current_index += 1;
366
367 let line_info = &self.lines[idx];
368 if let Some(heading) = &line_info.heading
369 && heading.is_valid
370 {
371 return Some(ValidHeading {
372 line_num: idx + 1, heading,
374 line_info,
375 });
376 }
377 }
378 None
379 }
380}
381
382#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385 pub nesting_level: usize,
387 pub indent: String,
389 pub marker_column: usize,
391 pub prefix: String,
393 pub content: String,
395 pub has_no_space_after_marker: bool,
397 pub has_multiple_spaces_after_marker: bool,
399 pub needs_md028_fix: bool,
401}
402
403#[derive(Debug, Clone)]
405pub struct ListBlock {
406 pub start_line: usize,
408 pub end_line: usize,
410 pub is_ordered: bool,
412 pub marker: Option<String>,
414 pub blockquote_prefix: String,
416 pub item_lines: Vec<usize>,
418 pub nesting_level: usize,
420 pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429type ByteRanges = Vec<(usize, usize)>;
431
432#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435 pub hash_count: usize,
437 pub asterisk_count: usize,
439 pub underscore_count: usize,
441 pub hyphen_count: usize,
443 pub plus_count: usize,
445 pub gt_count: usize,
447 pub pipe_count: usize,
449 pub bracket_count: usize,
451 pub backtick_count: usize,
453 pub lt_count: usize,
455 pub exclamation_count: usize,
457 pub newline_count: usize,
459}
460
461#[derive(Debug, Clone)]
463pub struct HtmlTag {
464 pub line: usize,
466 pub start_col: usize,
468 pub end_col: usize,
470 pub byte_offset: usize,
472 pub byte_end: usize,
474 pub tag_name: String,
476 pub is_closing: bool,
478 pub is_self_closing: bool,
480 pub raw_content: String,
482}
483
484#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487 pub line: usize,
489 pub start_col: usize,
491 pub end_col: usize,
493 pub byte_offset: usize,
495 pub byte_end: usize,
497 pub marker: char,
499 pub marker_count: usize,
501 pub content: String,
503}
504
505#[derive(Debug, Clone)]
507pub struct TableRow {
508 pub line: usize,
510 pub is_separator: bool,
512 pub column_count: usize,
514 pub column_alignments: Vec<String>, }
517
518#[derive(Debug, Clone)]
520pub struct BareUrl {
521 pub line: usize,
523 pub start_col: usize,
525 pub end_col: usize,
527 pub byte_offset: usize,
529 pub byte_end: usize,
531 pub url: String,
533 pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538 pub content: &'a str,
539 pub line_offsets: Vec<usize>,
540 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, }
569
570struct BlockquoteComponents<'a> {
572 indent: &'a str,
573 markers: &'a str,
574 spaces_after: &'a str,
575 content: &'a str,
576}
577
578#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581 let bytes = line.as_bytes();
582 let mut pos = 0;
583
584 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586 pos += 1;
587 }
588 let indent_end = pos;
589
590 if pos >= bytes.len() || bytes[pos] != b'>' {
592 return None;
593 }
594
595 while pos < bytes.len() && bytes[pos] == b'>' {
597 pos += 1;
598 }
599 let markers_end = pos;
600
601 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603 pos += 1;
604 }
605 let spaces_end = pos;
606
607 Some(BlockquoteComponents {
608 indent: &line[0..indent_end],
609 markers: &line[indent_end..markers_end],
610 spaces_after: &line[markers_end..spaces_end],
611 content: &line[spaces_end..],
612 })
613}
614
615impl<'a> LintContext<'a> {
616 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617 #[cfg(not(target_arch = "wasm32"))]
618 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619 #[cfg(target_arch = "wasm32")]
620 let profile = false;
621
622 let line_offsets = profile_section!("Line offsets", profile, {
623 let mut offsets = vec![0];
624 for (i, c) in content.char_indices() {
625 if c == '\n' {
626 offsets.push(i + 1);
627 }
628 }
629 offsets
630 });
631
632 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
634
635 let html_comment_ranges = profile_section!(
637 "HTML comment ranges",
638 profile,
639 crate::utils::skip_context::compute_html_comment_ranges(content)
640 );
641
642 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
644 if flavor == MarkdownFlavor::MkDocs {
645 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
646 } else {
647 Vec::new()
648 }
649 });
650
651 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
653 if flavor == MarkdownFlavor::Quarto {
654 crate::utils::quarto_divs::detect_div_block_ranges(content)
655 } else {
656 Vec::new()
657 }
658 });
659
660 let (mut lines, emphasis_spans) = profile_section!(
663 "Basic line info",
664 profile,
665 Self::compute_basic_line_info(
666 content,
667 &line_offsets,
668 &code_blocks,
669 flavor,
670 &html_comment_ranges,
671 &autodoc_ranges,
672 &quarto_div_ranges,
673 )
674 );
675
676 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
678
679 profile_section!(
681 "ESM blocks",
682 profile,
683 Self::detect_esm_blocks(content, &mut lines, flavor)
684 );
685
686 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
688 "JSX/MDX detection",
689 profile,
690 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
691 );
692
693 profile_section!(
695 "MkDocs constructs",
696 profile,
697 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
698 );
699
700 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
702
703 profile_section!(
705 "Headings & blockquotes",
706 profile,
707 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
708 );
709
710 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
712
713 for span in &code_spans {
716 if span.end_line > span.line {
717 for line_num in (span.line + 1)..=span.end_line {
719 if let Some(line_info) = lines.get_mut(line_num - 1) {
720 line_info.in_code_span_continuation = true;
721 }
722 }
723 }
724 }
725
726 let (links, broken_links, footnote_refs) = profile_section!(
728 "Links",
729 profile,
730 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
731 );
732
733 let images = profile_section!(
734 "Images",
735 profile,
736 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
737 );
738
739 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
740
741 let reference_defs_map: HashMap<String, usize> = reference_defs
743 .iter()
744 .enumerate()
745 .map(|(idx, def)| (def.id.to_lowercase(), idx))
746 .collect();
747
748 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
749
750 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
752
753 let table_blocks = profile_section!(
755 "Table blocks",
756 profile,
757 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
758 content,
759 &code_blocks,
760 &code_spans,
761 &html_comment_ranges,
762 )
763 );
764
765 let line_index = profile_section!(
767 "Line index",
768 profile,
769 crate::utils::range_utils::LineIndex::new(content)
770 );
771
772 let jinja_ranges = profile_section!(
774 "Jinja ranges",
775 profile,
776 crate::utils::jinja_utils::find_jinja_ranges(content)
777 );
778
779 let citation_ranges = profile_section!("Citation ranges", profile, {
781 if flavor == MarkdownFlavor::Quarto {
782 crate::utils::quarto_divs::find_citation_ranges(content)
783 } else {
784 Vec::new()
785 }
786 });
787
788 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
790 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
791 let mut ranges = Vec::new();
792 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
793 ranges.push((mat.start(), mat.end()));
794 }
795 ranges
796 });
797
798 Self {
799 content,
800 line_offsets,
801 code_blocks,
802 lines,
803 links,
804 images,
805 broken_links,
806 footnote_refs,
807 reference_defs,
808 reference_defs_map,
809 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
810 math_spans_cache: OnceLock::new(), list_blocks,
812 char_frequency,
813 html_tags_cache: OnceLock::new(),
814 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
815 table_rows_cache: OnceLock::new(),
816 bare_urls_cache: OnceLock::new(),
817 has_mixed_list_nesting_cache: OnceLock::new(),
818 html_comment_ranges,
819 table_blocks,
820 line_index,
821 jinja_ranges,
822 flavor,
823 source_file,
824 jsx_expression_ranges,
825 mdx_comment_ranges,
826 citation_ranges,
827 shortcode_ranges,
828 inline_config: InlineConfig::from_content(content),
829 }
830 }
831
832 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
837 self.inline_config.is_rule_disabled(rule_name, line_number)
838 }
839
840 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
842 Arc::clone(
843 self.code_spans_cache
844 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
845 )
846 }
847
848 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
850 Arc::clone(
851 self.math_spans_cache
852 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
853 )
854 }
855
856 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
858 let math_spans = self.math_spans();
859 math_spans
860 .iter()
861 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
862 }
863
864 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
866 &self.html_comment_ranges
867 }
868
869 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
871 Arc::clone(self.html_tags_cache.get_or_init(|| {
872 Arc::new(Self::parse_html_tags(
873 self.content,
874 &self.lines,
875 &self.code_blocks,
876 self.flavor,
877 ))
878 }))
879 }
880
881 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
883 Arc::clone(
884 self.emphasis_spans_cache
885 .get()
886 .expect("emphasis_spans_cache initialized during construction"),
887 )
888 }
889
890 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
892 Arc::clone(
893 self.table_rows_cache
894 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
895 )
896 }
897
898 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
900 Arc::clone(
901 self.bare_urls_cache
902 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
903 )
904 }
905
906 pub fn has_mixed_list_nesting(&self) -> bool {
910 *self
911 .has_mixed_list_nesting_cache
912 .get_or_init(|| self.compute_mixed_list_nesting())
913 }
914
915 fn compute_mixed_list_nesting(&self) -> bool {
917 let mut stack: Vec<(usize, bool)> = Vec::new();
922 let mut last_was_blank = false;
923
924 for line_info in &self.lines {
925 if line_info.in_code_block
927 || line_info.in_front_matter
928 || line_info.in_mkdocstrings
929 || line_info.in_html_comment
930 || line_info.in_esm_block
931 {
932 continue;
933 }
934
935 if line_info.is_blank {
937 last_was_blank = true;
938 continue;
939 }
940
941 if let Some(list_item) = &line_info.list_item {
942 let current_pos = if list_item.marker_column == 1 {
944 0
945 } else {
946 list_item.marker_column
947 };
948
949 if last_was_blank && current_pos == 0 {
951 stack.clear();
952 }
953 last_was_blank = false;
954
955 while let Some(&(pos, _)) = stack.last() {
957 if pos >= current_pos {
958 stack.pop();
959 } else {
960 break;
961 }
962 }
963
964 if let Some(&(_, parent_is_ordered)) = stack.last()
966 && parent_is_ordered != list_item.is_ordered
967 {
968 return true; }
970
971 stack.push((current_pos, list_item.is_ordered));
972 } else {
973 last_was_blank = false;
975 }
976 }
977
978 false
979 }
980
981 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
983 match self.line_offsets.binary_search(&offset) {
984 Ok(line) => (line + 1, 1),
985 Err(line) => {
986 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
987 (line, offset - line_start + 1)
988 }
989 }
990 }
991
992 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
994 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
996 return true;
997 }
998
999 self.code_spans()
1001 .iter()
1002 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1003 }
1004
1005 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1007 if line_num > 0 {
1008 self.lines.get(line_num - 1)
1009 } else {
1010 None
1011 }
1012 }
1013
1014 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1016 self.line_info(line_num).map(|info| info.byte_offset)
1017 }
1018
1019 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1021 let normalized_id = ref_id.to_lowercase();
1022 self.reference_defs_map
1023 .get(&normalized_id)
1024 .map(|&idx| self.reference_defs[idx].url.as_str())
1025 }
1026
1027 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1029 let normalized_id = ref_id.to_lowercase();
1030 self.reference_defs_map
1031 .get(&normalized_id)
1032 .map(|&idx| &self.reference_defs[idx])
1033 }
1034
1035 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1037 let normalized_id = ref_id.to_lowercase();
1038 self.reference_defs_map.contains_key(&normalized_id)
1039 }
1040
1041 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1043 self.list_blocks
1044 .iter()
1045 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1046 }
1047
1048 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1050 self.list_blocks
1051 .iter()
1052 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1053 }
1054
1055 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1059 if line_num == 0 || line_num > self.lines.len() {
1060 return false;
1061 }
1062 self.lines[line_num - 1].in_code_block
1063 }
1064
1065 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1067 if line_num == 0 || line_num > self.lines.len() {
1068 return false;
1069 }
1070 self.lines[line_num - 1].in_front_matter
1071 }
1072
1073 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1075 if line_num == 0 || line_num > self.lines.len() {
1076 return false;
1077 }
1078 self.lines[line_num - 1].in_html_block
1079 }
1080
1081 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1083 if line_num == 0 || line_num > self.lines.len() {
1084 return false;
1085 }
1086
1087 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1091 let code_spans = self.code_spans();
1092 code_spans.iter().any(|span| {
1093 if line_num < span.line || line_num > span.end_line {
1095 return false;
1096 }
1097
1098 if span.line == span.end_line {
1099 col_0indexed >= span.start_col && col_0indexed < span.end_col
1101 } else if line_num == span.line {
1102 col_0indexed >= span.start_col
1104 } else if line_num == span.end_line {
1105 col_0indexed < span.end_col
1107 } else {
1108 true
1110 }
1111 })
1112 }
1113
1114 #[inline]
1116 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1117 let code_spans = self.code_spans();
1118 code_spans
1119 .iter()
1120 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1121 }
1122
1123 #[inline]
1126 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1127 self.reference_defs
1128 .iter()
1129 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1130 }
1131
1132 #[inline]
1136 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1137 self.html_comment_ranges
1138 .iter()
1139 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1140 }
1141
1142 #[inline]
1145 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1146 self.html_tags()
1147 .iter()
1148 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1149 }
1150
1151 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1153 self.jinja_ranges
1154 .iter()
1155 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1156 }
1157
1158 #[inline]
1160 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1161 self.jsx_expression_ranges
1162 .iter()
1163 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1164 }
1165
1166 #[inline]
1168 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1169 self.mdx_comment_ranges
1170 .iter()
1171 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1172 }
1173
1174 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1176 &self.jsx_expression_ranges
1177 }
1178
1179 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1181 &self.mdx_comment_ranges
1182 }
1183
1184 #[inline]
1187 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1188 self.citation_ranges
1189 .iter()
1190 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1191 }
1192
1193 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1195 &self.citation_ranges
1196 }
1197
1198 #[inline]
1200 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1201 self.shortcode_ranges
1202 .iter()
1203 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1204 }
1205
1206 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1208 &self.shortcode_ranges
1209 }
1210
1211 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1213 self.reference_defs.iter().any(|def| {
1214 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1215 byte_pos >= start && byte_pos < end
1216 } else {
1217 false
1218 }
1219 })
1220 }
1221
1222 pub fn has_char(&self, ch: char) -> bool {
1224 match ch {
1225 '#' => self.char_frequency.hash_count > 0,
1226 '*' => self.char_frequency.asterisk_count > 0,
1227 '_' => self.char_frequency.underscore_count > 0,
1228 '-' => self.char_frequency.hyphen_count > 0,
1229 '+' => self.char_frequency.plus_count > 0,
1230 '>' => self.char_frequency.gt_count > 0,
1231 '|' => self.char_frequency.pipe_count > 0,
1232 '[' => self.char_frequency.bracket_count > 0,
1233 '`' => self.char_frequency.backtick_count > 0,
1234 '<' => self.char_frequency.lt_count > 0,
1235 '!' => self.char_frequency.exclamation_count > 0,
1236 '\n' => self.char_frequency.newline_count > 0,
1237 _ => self.content.contains(ch), }
1239 }
1240
1241 pub fn char_count(&self, ch: char) -> usize {
1243 match ch {
1244 '#' => self.char_frequency.hash_count,
1245 '*' => self.char_frequency.asterisk_count,
1246 '_' => self.char_frequency.underscore_count,
1247 '-' => self.char_frequency.hyphen_count,
1248 '+' => self.char_frequency.plus_count,
1249 '>' => self.char_frequency.gt_count,
1250 '|' => self.char_frequency.pipe_count,
1251 '[' => self.char_frequency.bracket_count,
1252 '`' => self.char_frequency.backtick_count,
1253 '<' => self.char_frequency.lt_count,
1254 '!' => self.char_frequency.exclamation_count,
1255 '\n' => self.char_frequency.newline_count,
1256 _ => self.content.matches(ch).count(), }
1258 }
1259
1260 pub fn likely_has_headings(&self) -> bool {
1262 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1264
1265 pub fn likely_has_lists(&self) -> bool {
1267 self.char_frequency.asterisk_count > 0
1268 || self.char_frequency.hyphen_count > 0
1269 || self.char_frequency.plus_count > 0
1270 }
1271
1272 pub fn likely_has_emphasis(&self) -> bool {
1274 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1275 }
1276
1277 pub fn likely_has_tables(&self) -> bool {
1279 self.char_frequency.pipe_count > 2
1280 }
1281
1282 pub fn likely_has_blockquotes(&self) -> bool {
1284 self.char_frequency.gt_count > 0
1285 }
1286
1287 pub fn likely_has_code(&self) -> bool {
1289 self.char_frequency.backtick_count > 0
1290 }
1291
1292 pub fn likely_has_links_or_images(&self) -> bool {
1294 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1295 }
1296
1297 pub fn likely_has_html(&self) -> bool {
1299 self.char_frequency.lt_count > 0
1300 }
1301
1302 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1307 if let Some(line_info) = self.lines.get(line_idx)
1308 && let Some(ref bq) = line_info.blockquote
1309 {
1310 bq.prefix.trim_end().to_string()
1311 } else {
1312 String::new()
1313 }
1314 }
1315
1316 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1318 self.html_tags()
1319 .iter()
1320 .filter(|tag| tag.line == line_num)
1321 .cloned()
1322 .collect()
1323 }
1324
1325 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1327 self.emphasis_spans()
1328 .iter()
1329 .filter(|span| span.line == line_num)
1330 .cloned()
1331 .collect()
1332 }
1333
1334 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1336 self.table_rows()
1337 .iter()
1338 .filter(|row| row.line == line_num)
1339 .cloned()
1340 .collect()
1341 }
1342
1343 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1345 self.bare_urls()
1346 .iter()
1347 .filter(|url| url.line == line_num)
1348 .cloned()
1349 .collect()
1350 }
1351
1352 #[inline]
1358 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1359 let idx = match lines.binary_search_by(|line| {
1361 if byte_offset < line.byte_offset {
1362 std::cmp::Ordering::Greater
1363 } else if byte_offset > line.byte_offset + line.byte_len {
1364 std::cmp::Ordering::Less
1365 } else {
1366 std::cmp::Ordering::Equal
1367 }
1368 }) {
1369 Ok(idx) => idx,
1370 Err(idx) => idx.saturating_sub(1),
1371 };
1372
1373 let line = &lines[idx];
1374 let line_num = idx + 1;
1375 let col = byte_offset.saturating_sub(line.byte_offset);
1376
1377 (idx, line_num, col)
1378 }
1379
1380 #[inline]
1382 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1383 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1385
1386 if idx > 0 {
1388 let span = &code_spans[idx - 1];
1389 if offset >= span.byte_offset && offset < span.byte_end {
1390 return true;
1391 }
1392 }
1393
1394 false
1395 }
1396
1397 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1401 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1402
1403 let mut link_ranges = Vec::new();
1404 let mut options = Options::empty();
1405 options.insert(Options::ENABLE_WIKILINKS);
1406 options.insert(Options::ENABLE_FOOTNOTES);
1407
1408 let parser = Parser::new_ext(content, options).into_offset_iter();
1409 let mut link_stack: Vec<usize> = Vec::new();
1410
1411 for (event, range) in parser {
1412 match event {
1413 Event::Start(Tag::Link { .. }) => {
1414 link_stack.push(range.start);
1415 }
1416 Event::End(TagEnd::Link) => {
1417 if let Some(start_pos) = link_stack.pop() {
1418 link_ranges.push((start_pos, range.end));
1419 }
1420 }
1421 _ => {}
1422 }
1423 }
1424
1425 link_ranges
1426 }
1427
1428 fn parse_links(
1430 content: &'a str,
1431 lines: &[LineInfo],
1432 code_blocks: &[(usize, usize)],
1433 code_spans: &[CodeSpan],
1434 flavor: MarkdownFlavor,
1435 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1436 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1437 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1438 use std::collections::HashSet;
1439
1440 let mut links = Vec::with_capacity(content.len() / 500);
1441 let mut broken_links = Vec::new();
1442 let mut footnote_refs = Vec::new();
1443
1444 let mut found_positions = HashSet::new();
1446
1447 let mut options = Options::empty();
1457 options.insert(Options::ENABLE_WIKILINKS);
1458 options.insert(Options::ENABLE_FOOTNOTES);
1459
1460 let parser = Parser::new_with_broken_link_callback(
1461 content,
1462 options,
1463 Some(|link: BrokenLink<'_>| {
1464 broken_links.push(BrokenLinkInfo {
1465 reference: link.reference.to_string(),
1466 span: link.span.clone(),
1467 });
1468 None
1469 }),
1470 )
1471 .into_offset_iter();
1472
1473 let mut link_stack: Vec<(
1474 usize,
1475 usize,
1476 pulldown_cmark::CowStr<'a>,
1477 LinkType,
1478 pulldown_cmark::CowStr<'a>,
1479 )> = Vec::new();
1480 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1483 match event {
1484 Event::Start(Tag::Link {
1485 link_type,
1486 dest_url,
1487 id,
1488 ..
1489 }) => {
1490 link_stack.push((range.start, range.end, dest_url, link_type, id));
1492 text_chunks.clear();
1493 }
1494 Event::Text(text) if !link_stack.is_empty() => {
1495 text_chunks.push((text.to_string(), range.start, range.end));
1497 }
1498 Event::Code(code) if !link_stack.is_empty() => {
1499 let code_text = format!("`{code}`");
1501 text_chunks.push((code_text, range.start, range.end));
1502 }
1503 Event::End(TagEnd::Link) => {
1504 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1505 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1507 text_chunks.clear();
1508 continue;
1509 }
1510
1511 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1513
1514 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1516 text_chunks.clear();
1517 continue;
1518 }
1519
1520 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1521
1522 let is_reference = matches!(
1523 link_type,
1524 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1525 );
1526
1527 let link_text = if start_pos < content.len() {
1530 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1531
1532 let mut close_pos = None;
1536 let mut depth = 0;
1537 let mut in_code_span = false;
1538
1539 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1540 let mut backslash_count = 0;
1542 let mut j = i;
1543 while j > 0 && link_bytes[j - 1] == b'\\' {
1544 backslash_count += 1;
1545 j -= 1;
1546 }
1547 let is_escaped = backslash_count % 2 != 0;
1548
1549 if byte == b'`' && !is_escaped {
1551 in_code_span = !in_code_span;
1552 }
1553
1554 if !is_escaped && !in_code_span {
1556 if byte == b'[' {
1557 depth += 1;
1558 } else if byte == b']' {
1559 if depth == 0 {
1560 close_pos = Some(i);
1562 break;
1563 } else {
1564 depth -= 1;
1565 }
1566 }
1567 }
1568 }
1569
1570 if let Some(pos) = close_pos {
1571 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1572 } else {
1573 Cow::Borrowed("")
1574 }
1575 } else {
1576 Cow::Borrowed("")
1577 };
1578
1579 let reference_id = if is_reference && !ref_id.is_empty() {
1581 Some(Cow::Owned(ref_id.to_lowercase()))
1582 } else if is_reference {
1583 Some(Cow::Owned(link_text.to_lowercase()))
1585 } else {
1586 None
1587 };
1588
1589 found_positions.insert(start_pos);
1591
1592 links.push(ParsedLink {
1593 line: line_num,
1594 start_col: col_start,
1595 end_col: col_end,
1596 byte_offset: start_pos,
1597 byte_end: range.end,
1598 text: link_text,
1599 url: Cow::Owned(url.to_string()),
1600 is_reference,
1601 reference_id,
1602 link_type,
1603 });
1604
1605 text_chunks.clear();
1606 }
1607 }
1608 Event::FootnoteReference(footnote_id) => {
1609 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1612 continue;
1613 }
1614
1615 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1616 footnote_refs.push(FootnoteRef {
1617 id: footnote_id.to_string(),
1618 line: line_num,
1619 byte_offset: range.start,
1620 byte_end: range.end,
1621 });
1622 }
1623 _ => {}
1624 }
1625 }
1626
1627 for cap in LINK_PATTERN.captures_iter(content) {
1631 let full_match = cap.get(0).unwrap();
1632 let match_start = full_match.start();
1633 let match_end = full_match.end();
1634
1635 if found_positions.contains(&match_start) {
1637 continue;
1638 }
1639
1640 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1642 continue;
1643 }
1644
1645 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1647 continue;
1648 }
1649
1650 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1652 continue;
1653 }
1654
1655 if Self::is_offset_in_code_span(code_spans, match_start) {
1657 continue;
1658 }
1659
1660 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1662 continue;
1663 }
1664
1665 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1667
1668 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1670 continue;
1671 }
1672
1673 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1674
1675 let text = cap.get(1).map_or("", |m| m.as_str());
1676
1677 if let Some(ref_id) = cap.get(6) {
1679 let ref_id_str = ref_id.as_str();
1680 let normalized_ref = if ref_id_str.is_empty() {
1681 Cow::Owned(text.to_lowercase()) } else {
1683 Cow::Owned(ref_id_str.to_lowercase())
1684 };
1685
1686 links.push(ParsedLink {
1688 line: line_num,
1689 start_col: col_start,
1690 end_col: col_end,
1691 byte_offset: match_start,
1692 byte_end: match_end,
1693 text: Cow::Borrowed(text),
1694 url: Cow::Borrowed(""), is_reference: true,
1696 reference_id: Some(normalized_ref),
1697 link_type: LinkType::Reference, });
1699 }
1700 }
1701
1702 (links, broken_links, footnote_refs)
1703 }
1704
1705 fn parse_images(
1707 content: &'a str,
1708 lines: &[LineInfo],
1709 code_blocks: &[(usize, usize)],
1710 code_spans: &[CodeSpan],
1711 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1712 ) -> Vec<ParsedImage<'a>> {
1713 use crate::utils::skip_context::is_in_html_comment_ranges;
1714 use std::collections::HashSet;
1715
1716 let mut images = Vec::with_capacity(content.len() / 1000);
1718 let mut found_positions = HashSet::new();
1719
1720 let parser = Parser::new(content).into_offset_iter();
1722 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1723 Vec::new();
1724 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1727 match event {
1728 Event::Start(Tag::Image {
1729 link_type,
1730 dest_url,
1731 id,
1732 ..
1733 }) => {
1734 image_stack.push((range.start, dest_url, link_type, id));
1735 text_chunks.clear();
1736 }
1737 Event::Text(text) if !image_stack.is_empty() => {
1738 text_chunks.push((text.to_string(), range.start, range.end));
1739 }
1740 Event::Code(code) if !image_stack.is_empty() => {
1741 let code_text = format!("`{code}`");
1742 text_chunks.push((code_text, range.start, range.end));
1743 }
1744 Event::End(TagEnd::Image) => {
1745 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1746 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1748 continue;
1749 }
1750
1751 if Self::is_offset_in_code_span(code_spans, start_pos) {
1753 continue;
1754 }
1755
1756 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1758 continue;
1759 }
1760
1761 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1763 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1764
1765 let is_reference = matches!(
1766 link_type,
1767 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1768 );
1769
1770 let alt_text = if start_pos < content.len() {
1773 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1774
1775 let mut close_pos = None;
1778 let mut depth = 0;
1779
1780 if image_bytes.len() > 2 {
1781 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1782 let mut backslash_count = 0;
1784 let mut j = i;
1785 while j > 0 && image_bytes[j - 1] == b'\\' {
1786 backslash_count += 1;
1787 j -= 1;
1788 }
1789 let is_escaped = backslash_count % 2 != 0;
1790
1791 if !is_escaped {
1792 if byte == b'[' {
1793 depth += 1;
1794 } else if byte == b']' {
1795 if depth == 0 {
1796 close_pos = Some(i);
1798 break;
1799 } else {
1800 depth -= 1;
1801 }
1802 }
1803 }
1804 }
1805 }
1806
1807 if let Some(pos) = close_pos {
1808 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1809 } else {
1810 Cow::Borrowed("")
1811 }
1812 } else {
1813 Cow::Borrowed("")
1814 };
1815
1816 let reference_id = if is_reference && !ref_id.is_empty() {
1817 Some(Cow::Owned(ref_id.to_lowercase()))
1818 } else if is_reference {
1819 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1821 None
1822 };
1823
1824 found_positions.insert(start_pos);
1825 images.push(ParsedImage {
1826 line: line_num,
1827 start_col: col_start,
1828 end_col: col_end,
1829 byte_offset: start_pos,
1830 byte_end: range.end,
1831 alt_text,
1832 url: Cow::Owned(url.to_string()),
1833 is_reference,
1834 reference_id,
1835 link_type,
1836 });
1837 }
1838 }
1839 _ => {}
1840 }
1841 }
1842
1843 for cap in IMAGE_PATTERN.captures_iter(content) {
1845 let full_match = cap.get(0).unwrap();
1846 let match_start = full_match.start();
1847 let match_end = full_match.end();
1848
1849 if found_positions.contains(&match_start) {
1851 continue;
1852 }
1853
1854 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1856 continue;
1857 }
1858
1859 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1861 || Self::is_offset_in_code_span(code_spans, match_start)
1862 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1863 {
1864 continue;
1865 }
1866
1867 if let Some(ref_id) = cap.get(6) {
1869 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1870 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1871 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1872 let ref_id_str = ref_id.as_str();
1873 let normalized_ref = if ref_id_str.is_empty() {
1874 Cow::Owned(alt_text.to_lowercase())
1875 } else {
1876 Cow::Owned(ref_id_str.to_lowercase())
1877 };
1878
1879 images.push(ParsedImage {
1880 line: line_num,
1881 start_col: col_start,
1882 end_col: col_end,
1883 byte_offset: match_start,
1884 byte_end: match_end,
1885 alt_text: Cow::Borrowed(alt_text),
1886 url: Cow::Borrowed(""),
1887 is_reference: true,
1888 reference_id: Some(normalized_ref),
1889 link_type: LinkType::Reference, });
1891 }
1892 }
1893
1894 images
1895 }
1896
1897 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1899 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1903 if line_info.in_code_block {
1905 continue;
1906 }
1907
1908 let line = line_info.content(content);
1909 let line_num = line_idx + 1;
1910
1911 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1912 let id_raw = cap.get(1).unwrap().as_str();
1913
1914 if id_raw.starts_with('^') {
1917 continue;
1918 }
1919
1920 let id = id_raw.to_lowercase();
1921 let url = cap.get(2).unwrap().as_str().to_string();
1922 let title_match = cap.get(3).or_else(|| cap.get(4));
1923 let title = title_match.map(|m| m.as_str().to_string());
1924
1925 let match_obj = cap.get(0).unwrap();
1928 let byte_offset = line_info.byte_offset + match_obj.start();
1929 let byte_end = line_info.byte_offset + match_obj.end();
1930
1931 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1933 let start = line_info.byte_offset + m.start().saturating_sub(1);
1935 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1937 } else {
1938 (None, None)
1939 };
1940
1941 refs.push(ReferenceDef {
1942 line: line_num,
1943 id,
1944 url,
1945 title,
1946 byte_offset,
1947 byte_end,
1948 title_byte_start,
1949 title_byte_end,
1950 });
1951 }
1952 }
1953
1954 refs
1955 }
1956
1957 #[inline]
1961 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1962 let trimmed_start = line.trim_start();
1963 if !trimmed_start.starts_with('>') {
1964 return None;
1965 }
1966
1967 let mut remaining = line;
1969 let mut total_prefix_len = 0;
1970
1971 loop {
1972 let trimmed = remaining.trim_start();
1973 if !trimmed.starts_with('>') {
1974 break;
1975 }
1976
1977 let leading_ws_len = remaining.len() - trimmed.len();
1979 total_prefix_len += leading_ws_len + 1;
1980
1981 let after_gt = &trimmed[1..];
1982
1983 if let Some(stripped) = after_gt.strip_prefix(' ') {
1985 total_prefix_len += 1;
1986 remaining = stripped;
1987 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1988 total_prefix_len += 1;
1989 remaining = stripped;
1990 } else {
1991 remaining = after_gt;
1992 }
1993 }
1994
1995 Some((&line[..total_prefix_len], remaining))
1996 }
1997
1998 fn detect_list_items_and_emphasis_with_pulldown(
2022 content: &str,
2023 line_offsets: &[usize],
2024 flavor: MarkdownFlavor,
2025 front_matter_end: usize,
2026 code_blocks: &[(usize, usize)],
2027 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2028 use std::collections::HashMap;
2029
2030 let mut list_items = HashMap::new();
2031 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2032
2033 let mut options = Options::empty();
2034 options.insert(Options::ENABLE_TABLES);
2035 options.insert(Options::ENABLE_FOOTNOTES);
2036 options.insert(Options::ENABLE_STRIKETHROUGH);
2037 options.insert(Options::ENABLE_TASKLISTS);
2038 options.insert(Options::ENABLE_GFM);
2040
2041 let _ = flavor;
2043
2044 let parser = Parser::new_ext(content, options).into_offset_iter();
2045 let mut list_depth: usize = 0;
2046 let mut list_stack: Vec<bool> = Vec::new();
2047
2048 for (event, range) in parser {
2049 match event {
2050 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2052 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2053 2
2054 } else {
2055 1
2056 };
2057 let match_start = range.start;
2058 let match_end = range.end;
2059
2060 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2062 let marker = content[match_start..].chars().next().unwrap_or('*');
2064 if marker == '*' || marker == '_' {
2065 let content_start = match_start + marker_count;
2067 let content_end = if match_end >= marker_count {
2068 match_end - marker_count
2069 } else {
2070 match_end
2071 };
2072 let content_part = if content_start < content_end && content_end <= content.len() {
2073 &content[content_start..content_end]
2074 } else {
2075 ""
2076 };
2077
2078 let line_idx = match line_offsets.binary_search(&match_start) {
2080 Ok(idx) => idx,
2081 Err(idx) => idx.saturating_sub(1),
2082 };
2083 let line_num = line_idx + 1;
2084 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2085 let col_start = match_start - line_start;
2086 let col_end = match_end - line_start;
2087
2088 emphasis_spans.push(EmphasisSpan {
2089 line: line_num,
2090 start_col: col_start,
2091 end_col: col_end,
2092 byte_offset: match_start,
2093 byte_end: match_end,
2094 marker,
2095 marker_count,
2096 content: content_part.to_string(),
2097 });
2098 }
2099 }
2100 }
2101 Event::Start(Tag::List(start_number)) => {
2102 list_depth += 1;
2103 list_stack.push(start_number.is_some());
2104 }
2105 Event::End(TagEnd::List(_)) => {
2106 list_depth = list_depth.saturating_sub(1);
2107 list_stack.pop();
2108 }
2109 Event::Start(Tag::Item) if list_depth > 0 => {
2110 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2112 let item_start = range.start;
2114
2115 let mut line_idx = match line_offsets.binary_search(&item_start) {
2117 Ok(idx) => idx,
2118 Err(idx) => idx.saturating_sub(1),
2119 };
2120
2121 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2125 line_idx += 1;
2126 }
2127
2128 if front_matter_end > 0 && line_idx < front_matter_end {
2130 continue;
2131 }
2132
2133 if line_idx < line_offsets.len() {
2134 let line_start_byte = line_offsets[line_idx];
2135 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2136 let line = &content[line_start_byte..line_end.min(content.len())];
2137
2138 let line = line
2140 .strip_suffix('\n')
2141 .or_else(|| line.strip_suffix("\r\n"))
2142 .unwrap_or(line);
2143
2144 let blockquote_parse = Self::parse_blockquote_prefix(line);
2146 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2147 (prefix.len(), content)
2148 } else {
2149 (0, line)
2150 };
2151
2152 if current_list_is_ordered {
2154 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2155 Self::parse_ordered_list(line_to_parse)
2156 {
2157 let marker = format!("{number_str}{delimiter}");
2158 let marker_column = blockquote_prefix_len + leading_spaces.len();
2159 let content_column = marker_column + marker.len() + spacing.len();
2160 let number = number_str.parse().ok();
2161
2162 list_items.entry(line_start_byte).or_insert((
2163 true,
2164 marker,
2165 marker_column,
2166 content_column,
2167 number,
2168 ));
2169 }
2170 } else if let Some((leading_spaces, marker, spacing, _content)) =
2171 Self::parse_unordered_list(line_to_parse)
2172 {
2173 let marker_column = blockquote_prefix_len + leading_spaces.len();
2174 let content_column = marker_column + 1 + spacing.len();
2175
2176 list_items.entry(line_start_byte).or_insert((
2177 false,
2178 marker.to_string(),
2179 marker_column,
2180 content_column,
2181 None,
2182 ));
2183 }
2184 }
2185 }
2186 _ => {}
2187 }
2188 }
2189
2190 (list_items, emphasis_spans)
2191 }
2192
2193 #[inline]
2197 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2198 let bytes = line.as_bytes();
2199 let mut i = 0;
2200
2201 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2203 i += 1;
2204 }
2205
2206 if i >= bytes.len() {
2208 return None;
2209 }
2210 let marker = bytes[i] as char;
2211 if marker != '-' && marker != '*' && marker != '+' {
2212 return None;
2213 }
2214 let marker_pos = i;
2215 i += 1;
2216
2217 let spacing_start = i;
2219 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2220 i += 1;
2221 }
2222
2223 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2224 }
2225
2226 #[inline]
2230 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2231 let bytes = line.as_bytes();
2232 let mut i = 0;
2233
2234 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2236 i += 1;
2237 }
2238
2239 let number_start = i;
2241 while i < bytes.len() && bytes[i].is_ascii_digit() {
2242 i += 1;
2243 }
2244 if i == number_start {
2245 return None; }
2247
2248 if i >= bytes.len() {
2250 return None;
2251 }
2252 let delimiter = bytes[i] as char;
2253 if delimiter != '.' && delimiter != ')' {
2254 return None;
2255 }
2256 let delimiter_pos = i;
2257 i += 1;
2258
2259 let spacing_start = i;
2261 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2262 i += 1;
2263 }
2264
2265 Some((
2266 &line[..number_start],
2267 &line[number_start..delimiter_pos],
2268 delimiter,
2269 &line[spacing_start..i],
2270 &line[i..],
2271 ))
2272 }
2273
2274 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2277 let num_lines = line_offsets.len();
2278 let mut in_code_block = vec![false; num_lines];
2279
2280 for &(start, end) in code_blocks {
2282 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2284 let mut boundary = start;
2285 while boundary > 0 && !content.is_char_boundary(boundary) {
2286 boundary -= 1;
2287 }
2288 boundary
2289 } else {
2290 start
2291 };
2292
2293 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2294 let mut boundary = end;
2295 while boundary < content.len() && !content.is_char_boundary(boundary) {
2296 boundary += 1;
2297 }
2298 boundary
2299 } else {
2300 end.min(content.len())
2301 };
2302
2303 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2322 let first_line = first_line_after.saturating_sub(1);
2323 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2324
2325 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2327 *flag = true;
2328 }
2329 }
2330
2331 in_code_block
2332 }
2333
2334 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2337 let content_lines: Vec<&str> = content.lines().collect();
2338 let num_lines = content_lines.len();
2339 let mut in_math_block = vec![false; num_lines];
2340
2341 let mut inside_math = false;
2342
2343 for (i, line) in content_lines.iter().enumerate() {
2344 if code_block_map.get(i).copied().unwrap_or(false) {
2346 continue;
2347 }
2348
2349 let trimmed = line.trim();
2350
2351 if trimmed == "$$" {
2354 if inside_math {
2355 in_math_block[i] = true;
2357 inside_math = false;
2358 } else {
2359 in_math_block[i] = true;
2361 inside_math = true;
2362 }
2363 } else if inside_math {
2364 in_math_block[i] = true;
2366 }
2367 }
2368
2369 in_math_block
2370 }
2371
2372 fn compute_basic_line_info(
2375 content: &str,
2376 line_offsets: &[usize],
2377 code_blocks: &[(usize, usize)],
2378 flavor: MarkdownFlavor,
2379 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2380 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2381 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2382 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2383 let content_lines: Vec<&str> = content.lines().collect();
2384 let mut lines = Vec::with_capacity(content_lines.len());
2385
2386 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2388
2389 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2391
2392 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2395
2396 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2399 content,
2400 line_offsets,
2401 flavor,
2402 front_matter_end,
2403 code_blocks,
2404 );
2405
2406 for (i, line) in content_lines.iter().enumerate() {
2407 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2408 let indent = line.len() - line.trim_start().len();
2409 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2411
2412 let blockquote_parse = Self::parse_blockquote_prefix(line);
2414
2415 let is_blank = if let Some((_, content)) = blockquote_parse {
2417 content.trim().is_empty()
2419 } else {
2420 line.trim().is_empty()
2421 };
2422
2423 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2425
2426 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2428 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2429 let line_end_offset = byte_offset + line.len();
2432 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2433 html_comment_ranges,
2434 byte_offset,
2435 line_end_offset,
2436 );
2437 let list_item =
2440 list_item_map
2441 .get(&byte_offset)
2442 .map(
2443 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2444 marker: marker.clone(),
2445 is_ordered: *is_ordered,
2446 number: *number,
2447 marker_column: *marker_column,
2448 content_column: *content_column,
2449 },
2450 );
2451
2452 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2455 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2456
2457 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2459
2460 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2462 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2463
2464 lines.push(LineInfo {
2465 byte_offset,
2466 byte_len: line.len(),
2467 indent,
2468 visual_indent,
2469 is_blank,
2470 in_code_block,
2471 in_front_matter,
2472 in_html_block: false, in_html_comment,
2474 list_item,
2475 heading: None, blockquote: None, in_mkdocstrings,
2478 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2481 in_math_block,
2482 in_quarto_div,
2483 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, });
2491 }
2492
2493 (lines, emphasis_spans)
2494 }
2495
2496 fn detect_headings_and_blockquotes(
2498 content: &str,
2499 lines: &mut [LineInfo],
2500 flavor: MarkdownFlavor,
2501 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2502 link_byte_ranges: &[(usize, usize)],
2503 ) {
2504 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2506 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2507 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2508 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2509
2510 let content_lines: Vec<&str> = content.lines().collect();
2511
2512 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2514
2515 for i in 0..lines.len() {
2517 let line = content_lines[i];
2518
2519 if !(front_matter_end > 0 && i < front_matter_end)
2524 && let Some(bq) = parse_blockquote_detailed(line)
2525 {
2526 let nesting_level = bq.markers.len();
2527 let marker_column = bq.indent.len();
2528 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2529 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2530 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2531 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2532
2533 lines[i].blockquote = Some(BlockquoteInfo {
2534 nesting_level,
2535 indent: bq.indent.to_string(),
2536 marker_column,
2537 prefix,
2538 content: bq.content.to_string(),
2539 has_no_space_after_marker: has_no_space,
2540 has_multiple_spaces_after_marker: has_multiple_spaces,
2541 needs_md028_fix,
2542 });
2543
2544 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2547 lines[i].is_horizontal_rule = true;
2548 }
2549 }
2550
2551 if lines[i].in_code_block {
2553 continue;
2554 }
2555
2556 if front_matter_end > 0 && i < front_matter_end {
2558 continue;
2559 }
2560
2561 if lines[i].in_html_block {
2563 continue;
2564 }
2565
2566 if lines[i].is_blank {
2568 continue;
2569 }
2570
2571 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2574 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2575 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2576 } else {
2577 false
2578 };
2579
2580 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2581 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2583 continue;
2584 }
2585 let line_offset = lines[i].byte_offset;
2588 if link_byte_ranges
2589 .iter()
2590 .any(|&(start, end)| line_offset > start && line_offset < end)
2591 {
2592 continue;
2593 }
2594 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2595 let hashes = caps.get(2).map_or("", |m| m.as_str());
2596 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2597 let rest = caps.get(4).map_or("", |m| m.as_str());
2598
2599 let level = hashes.len() as u8;
2600 let marker_column = leading_spaces.len();
2601
2602 let (text, has_closing, closing_seq) = {
2604 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2606 if rest[id_start..].trim_end().ends_with('}') {
2608 (&rest[..id_start], &rest[id_start..])
2610 } else {
2611 (rest, "")
2612 }
2613 } else {
2614 (rest, "")
2615 };
2616
2617 let trimmed_rest = rest_without_id.trim_end();
2619 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2620 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2623
2624 let last_hash_char_idx = char_positions
2626 .iter()
2627 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2628
2629 if let Some(mut char_idx) = last_hash_char_idx {
2630 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2632 char_idx -= 1;
2633 }
2634
2635 let start_of_hashes = char_positions[char_idx].0;
2637
2638 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2640
2641 let potential_closing = &trimmed_rest[start_of_hashes..];
2643 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2644
2645 if is_all_hashes && has_space_before {
2646 let closing_hashes = potential_closing.to_string();
2648 let text_part = if !custom_id_part.is_empty() {
2651 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2654 } else {
2655 trimmed_rest[..start_of_hashes].trim_end().to_string()
2656 };
2657 (text_part, true, closing_hashes)
2658 } else {
2659 (rest.to_string(), false, String::new())
2661 }
2662 } else {
2663 (rest.to_string(), false, String::new())
2665 }
2666 } else {
2667 (rest.to_string(), false, String::new())
2669 }
2670 };
2671
2672 let content_column = marker_column + hashes.len() + spaces_after.len();
2673
2674 let raw_text = text.trim().to_string();
2676 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2677
2678 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2680 let next_line = content_lines[i + 1];
2681 if !lines[i + 1].in_code_block
2682 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2683 && let Some(next_line_id) =
2684 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2685 {
2686 custom_id = Some(next_line_id);
2687 }
2688 }
2689
2690 let is_valid = !spaces_after.is_empty()
2700 || rest.is_empty()
2701 || level > 1
2702 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2703
2704 lines[i].heading = Some(HeadingInfo {
2705 level,
2706 style: HeadingStyle::ATX,
2707 marker: hashes.to_string(),
2708 marker_column,
2709 content_column,
2710 text: clean_text,
2711 custom_id,
2712 raw_text,
2713 has_closing_sequence: has_closing,
2714 closing_sequence: closing_seq,
2715 is_valid,
2716 });
2717 }
2718 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2720 let next_line = content_lines[i + 1];
2721 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2722 if front_matter_end > 0 && i < front_matter_end {
2724 continue;
2725 }
2726
2727 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2729 {
2730 continue;
2731 }
2732
2733 let content_line = line.trim();
2736
2737 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2739 continue;
2740 }
2741
2742 if content_line.starts_with('_') {
2744 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2745 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2746 continue;
2747 }
2748 }
2749
2750 if let Some(first_char) = content_line.chars().next()
2752 && first_char.is_ascii_digit()
2753 {
2754 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2755 if num_end < content_line.len() {
2756 let next = content_line.chars().nth(num_end);
2757 if next == Some('.') || next == Some(')') {
2758 continue;
2759 }
2760 }
2761 }
2762
2763 if ATX_HEADING_REGEX.is_match(line) {
2765 continue;
2766 }
2767
2768 if content_line.starts_with('>') {
2770 continue;
2771 }
2772
2773 let trimmed_start = line.trim_start();
2775 if trimmed_start.len() >= 3 {
2776 let first_three: String = trimmed_start.chars().take(3).collect();
2777 if first_three == "```" || first_three == "~~~" {
2778 continue;
2779 }
2780 }
2781
2782 if content_line.starts_with('<') {
2784 continue;
2785 }
2786
2787 let underline = next_line.trim();
2788
2789 let level = if underline.starts_with('=') { 1 } else { 2 };
2790 let style = if level == 1 {
2791 HeadingStyle::Setext1
2792 } else {
2793 HeadingStyle::Setext2
2794 };
2795
2796 let raw_text = line.trim().to_string();
2798 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2799
2800 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2802 let attr_line = content_lines[i + 2];
2803 if !lines[i + 2].in_code_block
2804 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2805 && let Some(attr_line_id) =
2806 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2807 {
2808 custom_id = Some(attr_line_id);
2809 }
2810 }
2811
2812 lines[i].heading = Some(HeadingInfo {
2813 level,
2814 style,
2815 marker: underline.to_string(),
2816 marker_column: next_line.len() - next_line.trim_start().len(),
2817 content_column: lines[i].indent,
2818 text: clean_text,
2819 custom_id,
2820 raw_text,
2821 has_closing_sequence: false,
2822 closing_sequence: String::new(),
2823 is_valid: true, });
2825 }
2826 }
2827 }
2828 }
2829
2830 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2832 const BLOCK_ELEMENTS: &[&str] = &[
2835 "address",
2836 "article",
2837 "aside",
2838 "audio",
2839 "blockquote",
2840 "canvas",
2841 "details",
2842 "dialog",
2843 "dd",
2844 "div",
2845 "dl",
2846 "dt",
2847 "embed",
2848 "fieldset",
2849 "figcaption",
2850 "figure",
2851 "footer",
2852 "form",
2853 "h1",
2854 "h2",
2855 "h3",
2856 "h4",
2857 "h5",
2858 "h6",
2859 "header",
2860 "hr",
2861 "iframe",
2862 "li",
2863 "main",
2864 "menu",
2865 "nav",
2866 "noscript",
2867 "object",
2868 "ol",
2869 "p",
2870 "picture",
2871 "pre",
2872 "script",
2873 "search",
2874 "section",
2875 "source",
2876 "style",
2877 "summary",
2878 "svg",
2879 "table",
2880 "tbody",
2881 "td",
2882 "template",
2883 "textarea",
2884 "tfoot",
2885 "th",
2886 "thead",
2887 "tr",
2888 "track",
2889 "ul",
2890 "video",
2891 ];
2892
2893 let mut i = 0;
2894 while i < lines.len() {
2895 if lines[i].in_code_block || lines[i].in_front_matter {
2897 i += 1;
2898 continue;
2899 }
2900
2901 let trimmed = lines[i].content(content).trim_start();
2902
2903 if trimmed.starts_with('<') && trimmed.len() > 1 {
2905 let after_bracket = &trimmed[1..];
2907 let is_closing = after_bracket.starts_with('/');
2908 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2909
2910 let tag_name = tag_start
2912 .chars()
2913 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2914 .collect::<String>()
2915 .to_lowercase();
2916
2917 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2919 lines[i].in_html_block = true;
2921
2922 if !is_closing {
2925 let closing_tag = format!("</{tag_name}>");
2926 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2928 let mut j = i + 1;
2929 let mut found_closing_tag = false;
2930 while j < lines.len() && j < i + 100 {
2931 if !allow_blank_lines && lines[j].is_blank {
2934 break;
2935 }
2936
2937 lines[j].in_html_block = true;
2938
2939 if lines[j].content(content).contains(&closing_tag) {
2941 found_closing_tag = true;
2942 }
2943
2944 if found_closing_tag {
2947 j += 1;
2948 while j < lines.len() && j < i + 100 {
2950 if lines[j].is_blank {
2951 break;
2952 }
2953 lines[j].in_html_block = true;
2954 j += 1;
2955 }
2956 break;
2957 }
2958 j += 1;
2959 }
2960 }
2961 }
2962 }
2963
2964 i += 1;
2965 }
2966 }
2967
2968 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2971 if !flavor.supports_esm_blocks() {
2973 return;
2974 }
2975
2976 let mut in_multiline_import = false;
2977
2978 for line in lines.iter_mut() {
2979 if line.in_code_block || line.in_front_matter || line.in_html_comment {
2981 in_multiline_import = false;
2982 continue;
2983 }
2984
2985 let line_content = line.content(content);
2986 let trimmed = line_content.trim();
2987
2988 if in_multiline_import {
2990 line.in_esm_block = true;
2991 if trimmed.ends_with('\'')
2994 || trimmed.ends_with('"')
2995 || trimmed.ends_with("';")
2996 || trimmed.ends_with("\";")
2997 || line_content.contains(';')
2998 {
2999 in_multiline_import = false;
3000 }
3001 continue;
3002 }
3003
3004 if line.is_blank {
3006 continue;
3007 }
3008
3009 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3011 line.in_esm_block = true;
3012
3013 let is_import = trimmed.starts_with("import ");
3021
3022 let is_complete =
3024 trimmed.ends_with(';')
3026 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3028 || (!is_import && !trimmed.contains(" from ") && (
3030 trimmed.starts_with("export const ")
3031 || trimmed.starts_with("export let ")
3032 || trimmed.starts_with("export var ")
3033 || trimmed.starts_with("export function ")
3034 || trimmed.starts_with("export class ")
3035 || trimmed.starts_with("export default ")
3036 ));
3037
3038 if !is_complete && is_import {
3039 if trimmed.contains('{') && !trimmed.contains('}') {
3043 in_multiline_import = true;
3044 }
3045 }
3046 }
3047 }
3048 }
3049
3050 fn detect_jsx_and_mdx_comments(
3053 content: &str,
3054 lines: &mut [LineInfo],
3055 flavor: MarkdownFlavor,
3056 code_blocks: &[(usize, usize)],
3057 ) -> (ByteRanges, ByteRanges) {
3058 if !flavor.supports_jsx() {
3060 return (Vec::new(), Vec::new());
3061 }
3062
3063 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3064 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3065
3066 if !content.contains('{') {
3068 return (jsx_expression_ranges, mdx_comment_ranges);
3069 }
3070
3071 let bytes = content.as_bytes();
3072 let mut i = 0;
3073
3074 while i < bytes.len() {
3075 if bytes[i] == b'{' {
3076 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3078 i += 1;
3079 continue;
3080 }
3081
3082 let start = i;
3083
3084 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3086 let mut j = i + 3;
3088 while j + 2 < bytes.len() {
3089 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3090 let end = j + 3;
3091 mdx_comment_ranges.push((start, end));
3092
3093 Self::mark_lines_in_range(lines, content, start, end, |line| {
3095 line.in_mdx_comment = true;
3096 });
3097
3098 i = end;
3099 break;
3100 }
3101 j += 1;
3102 }
3103 if j + 2 >= bytes.len() {
3104 mdx_comment_ranges.push((start, bytes.len()));
3106 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3107 line.in_mdx_comment = true;
3108 });
3109 break;
3110 }
3111 } else {
3112 let mut brace_depth = 1;
3115 let mut j = i + 1;
3116 let mut in_string = false;
3117 let mut string_char = b'"';
3118
3119 while j < bytes.len() && brace_depth > 0 {
3120 let c = bytes[j];
3121
3122 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3124 in_string = true;
3125 string_char = c;
3126 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3127 in_string = false;
3128 } else if !in_string {
3129 if c == b'{' {
3130 brace_depth += 1;
3131 } else if c == b'}' {
3132 brace_depth -= 1;
3133 }
3134 }
3135 j += 1;
3136 }
3137
3138 if brace_depth == 0 {
3139 let end = j;
3140 jsx_expression_ranges.push((start, end));
3141
3142 Self::mark_lines_in_range(lines, content, start, end, |line| {
3144 line.in_jsx_expression = true;
3145 });
3146
3147 i = end;
3148 } else {
3149 i += 1;
3150 }
3151 }
3152 } else {
3153 i += 1;
3154 }
3155 }
3156
3157 (jsx_expression_ranges, mdx_comment_ranges)
3158 }
3159
3160 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3163 if flavor != MarkdownFlavor::MkDocs {
3164 return;
3165 }
3166
3167 use crate::utils::mkdocs_admonitions;
3168 use crate::utils::mkdocs_definition_lists;
3169 use crate::utils::mkdocs_tabs;
3170
3171 let content_lines: Vec<&str> = content.lines().collect();
3172
3173 let mut in_admonition = false;
3175 let mut admonition_indent = 0;
3176
3177 let mut in_tab = false;
3179 let mut tab_indent = 0;
3180
3181 let mut in_definition = false;
3183
3184 for (i, line) in content_lines.iter().enumerate() {
3185 if i >= lines.len() {
3186 break;
3187 }
3188
3189 if lines[i].in_code_block {
3191 continue;
3192 }
3193
3194 if mkdocs_admonitions::is_admonition_start(line) {
3196 in_admonition = true;
3197 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3198 lines[i].in_admonition = true;
3199 } else if in_admonition {
3200 if line.trim().is_empty() {
3202 lines[i].in_admonition = true;
3204 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3205 lines[i].in_admonition = true;
3206 } else {
3207 in_admonition = false;
3209 if mkdocs_admonitions::is_admonition_start(line) {
3211 in_admonition = true;
3212 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3213 lines[i].in_admonition = true;
3214 }
3215 }
3216 }
3217
3218 if mkdocs_tabs::is_tab_marker(line) {
3220 in_tab = true;
3221 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3222 lines[i].in_content_tab = true;
3223 } else if in_tab {
3224 if line.trim().is_empty() {
3226 lines[i].in_content_tab = true;
3228 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3229 lines[i].in_content_tab = true;
3230 } else {
3231 in_tab = false;
3233 if mkdocs_tabs::is_tab_marker(line) {
3235 in_tab = true;
3236 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3237 lines[i].in_content_tab = true;
3238 }
3239 }
3240 }
3241
3242 if mkdocs_definition_lists::is_definition_line(line) {
3244 in_definition = true;
3245 lines[i].in_definition_list = true;
3246 } else if in_definition {
3247 if mkdocs_definition_lists::is_definition_continuation(line) {
3249 lines[i].in_definition_list = true;
3250 } else if line.trim().is_empty() {
3251 lines[i].in_definition_list = true;
3253 } else if mkdocs_definition_lists::could_be_term_line(line) {
3254 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3256 {
3257 lines[i].in_definition_list = true;
3258 } else {
3259 in_definition = false;
3260 }
3261 } else {
3262 in_definition = false;
3263 }
3264 } else if mkdocs_definition_lists::could_be_term_line(line) {
3265 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3267 lines[i].in_definition_list = true;
3268 in_definition = true;
3269 }
3270 }
3271 }
3272 }
3273
3274 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3276 where
3277 F: FnMut(&mut LineInfo),
3278 {
3279 for line in lines.iter_mut() {
3281 let line_start = line.byte_offset;
3282 let line_end = line.byte_offset + line.byte_len;
3283
3284 if line_start < end && line_end > start {
3286 f(line);
3287 }
3288 }
3289
3290 let _ = content;
3292 }
3293
3294 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3296 let mut code_spans = Vec::new();
3297
3298 if !content.contains('`') {
3300 return code_spans;
3301 }
3302
3303 let parser = Parser::new(content).into_offset_iter();
3305
3306 for (event, range) in parser {
3307 if let Event::Code(_) = event {
3308 let start_pos = range.start;
3309 let end_pos = range.end;
3310
3311 let full_span = &content[start_pos..end_pos];
3313 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3314
3315 let content_start = start_pos + backtick_count;
3317 let content_end = end_pos - backtick_count;
3318 let span_content = if content_start < content_end {
3319 content[content_start..content_end].to_string()
3320 } else {
3321 String::new()
3322 };
3323
3324 let line_idx = lines
3327 .partition_point(|line| line.byte_offset <= start_pos)
3328 .saturating_sub(1);
3329 let line_num = line_idx + 1;
3330 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3331
3332 let end_line_idx = lines
3334 .partition_point(|line| line.byte_offset <= end_pos)
3335 .saturating_sub(1);
3336 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3337
3338 let line_content = lines[line_idx].content(content);
3341 let col_start = if byte_col_start <= line_content.len() {
3342 line_content[..byte_col_start].chars().count()
3343 } else {
3344 line_content.chars().count()
3345 };
3346
3347 let end_line_content = lines[end_line_idx].content(content);
3348 let col_end = if byte_col_end <= end_line_content.len() {
3349 end_line_content[..byte_col_end].chars().count()
3350 } else {
3351 end_line_content.chars().count()
3352 };
3353
3354 code_spans.push(CodeSpan {
3355 line: line_num,
3356 end_line: end_line_idx + 1,
3357 start_col: col_start,
3358 end_col: col_end,
3359 byte_offset: start_pos,
3360 byte_end: end_pos,
3361 backtick_count,
3362 content: span_content,
3363 });
3364 }
3365 }
3366
3367 code_spans.sort_by_key(|span| span.byte_offset);
3369
3370 code_spans
3371 }
3372
3373 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3375 let mut math_spans = Vec::new();
3376
3377 if !content.contains('$') {
3379 return math_spans;
3380 }
3381
3382 let mut options = Options::empty();
3384 options.insert(Options::ENABLE_MATH);
3385 let parser = Parser::new_ext(content, options).into_offset_iter();
3386
3387 for (event, range) in parser {
3388 let (is_display, math_content) = match &event {
3389 Event::InlineMath(text) => (false, text.as_ref()),
3390 Event::DisplayMath(text) => (true, text.as_ref()),
3391 _ => continue,
3392 };
3393
3394 let start_pos = range.start;
3395 let end_pos = range.end;
3396
3397 let line_idx = lines
3399 .partition_point(|line| line.byte_offset <= start_pos)
3400 .saturating_sub(1);
3401 let line_num = line_idx + 1;
3402 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3403
3404 let end_line_idx = lines
3406 .partition_point(|line| line.byte_offset <= end_pos)
3407 .saturating_sub(1);
3408 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3409
3410 let line_content = lines[line_idx].content(content);
3412 let col_start = if byte_col_start <= line_content.len() {
3413 line_content[..byte_col_start].chars().count()
3414 } else {
3415 line_content.chars().count()
3416 };
3417
3418 let end_line_content = lines[end_line_idx].content(content);
3419 let col_end = if byte_col_end <= end_line_content.len() {
3420 end_line_content[..byte_col_end].chars().count()
3421 } else {
3422 end_line_content.chars().count()
3423 };
3424
3425 math_spans.push(MathSpan {
3426 line: line_num,
3427 end_line: end_line_idx + 1,
3428 start_col: col_start,
3429 end_col: col_end,
3430 byte_offset: start_pos,
3431 byte_end: end_pos,
3432 is_display,
3433 content: math_content.to_string(),
3434 });
3435 }
3436
3437 math_spans.sort_by_key(|span| span.byte_offset);
3439
3440 math_spans
3441 }
3442
3443 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3454 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3456
3457 #[inline]
3460 fn reset_tracking_state(
3461 list_item: &ListItemInfo,
3462 has_list_breaking_content: &mut bool,
3463 min_continuation: &mut usize,
3464 ) {
3465 *has_list_breaking_content = false;
3466 let marker_width = if list_item.is_ordered {
3467 list_item.marker.len() + 1 } else {
3469 list_item.marker.len()
3470 };
3471 *min_continuation = if list_item.is_ordered {
3472 marker_width
3473 } else {
3474 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3475 };
3476 }
3477
3478 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3481 let mut last_list_item_line = 0;
3482 let mut current_indent_level = 0;
3483 let mut last_marker_width = 0;
3484
3485 let mut has_list_breaking_content_since_last_item = false;
3487 let mut min_continuation_for_tracking = 0;
3488
3489 for (line_idx, line_info) in lines.iter().enumerate() {
3490 let line_num = line_idx + 1;
3491
3492 if line_info.in_code_block {
3494 if let Some(ref mut block) = current_block {
3495 let min_continuation_indent =
3497 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3498
3499 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3501
3502 match context {
3503 CodeBlockContext::Indented => {
3504 block.end_line = line_num;
3506 continue;
3507 }
3508 CodeBlockContext::Standalone => {
3509 let completed_block = current_block.take().unwrap();
3511 list_blocks.push(completed_block);
3512 continue;
3513 }
3514 CodeBlockContext::Adjacent => {
3515 block.end_line = line_num;
3517 continue;
3518 }
3519 }
3520 } else {
3521 continue;
3523 }
3524 }
3525
3526 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3528 caps.get(0).unwrap().as_str().to_string()
3529 } else {
3530 String::new()
3531 };
3532
3533 if let Some(ref block) = current_block
3536 && line_info.list_item.is_none()
3537 && !line_info.is_blank
3538 && !line_info.in_code_span_continuation
3539 {
3540 let line_content = line_info.content(content).trim();
3541
3542 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3547
3548 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3551
3552 let breaks_list = line_info.heading.is_some()
3553 || line_content.starts_with("---")
3554 || line_content.starts_with("***")
3555 || line_content.starts_with("___")
3556 || crate::utils::skip_context::is_table_line(line_content)
3557 || blockquote_prefix_changes
3558 || (line_info.indent > 0
3559 && line_info.indent < min_continuation_for_tracking
3560 && !is_lazy_continuation);
3561
3562 if breaks_list {
3563 has_list_breaking_content_since_last_item = true;
3564 }
3565 }
3566
3567 if line_info.in_code_span_continuation
3570 && line_info.list_item.is_none()
3571 && let Some(ref mut block) = current_block
3572 {
3573 block.end_line = line_num;
3574 }
3575
3576 let effective_continuation_indent = if let Some(ref block) = current_block {
3582 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3583 let line_content = line_info.content(content);
3584 let line_bq_level = line_content
3585 .chars()
3586 .take_while(|c| *c == '>' || c.is_whitespace())
3587 .filter(|&c| c == '>')
3588 .count();
3589 if line_bq_level > 0 && line_bq_level == block_bq_level {
3590 let mut pos = 0;
3592 let mut found_markers = 0;
3593 for c in line_content.chars() {
3594 pos += c.len_utf8();
3595 if c == '>' {
3596 found_markers += 1;
3597 if found_markers == line_bq_level {
3598 if line_content.get(pos..pos + 1) == Some(" ") {
3599 pos += 1;
3600 }
3601 break;
3602 }
3603 }
3604 }
3605 let after_bq = &line_content[pos..];
3606 after_bq.len() - after_bq.trim_start().len()
3607 } else {
3608 line_info.indent
3609 }
3610 } else {
3611 line_info.indent
3612 };
3613 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3614 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3615 if block_bq_level > 0 {
3616 if block.is_ordered { last_marker_width } else { 2 }
3617 } else {
3618 min_continuation_for_tracking
3619 }
3620 } else {
3621 min_continuation_for_tracking
3622 };
3623 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3624 || (line_info.indent == 0 && !line_info.is_blank); if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3627 eprintln!(
3628 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3629 line_num,
3630 effective_continuation_indent,
3631 adjusted_min_continuation_for_tracking,
3632 is_valid_continuation,
3633 line_info.in_code_span_continuation,
3634 line_info.in_code_block,
3635 current_block.is_some()
3636 );
3637 }
3638
3639 if !line_info.in_code_span_continuation
3640 && line_info.list_item.is_none()
3641 && !line_info.is_blank
3642 && !line_info.in_code_block
3643 && is_valid_continuation
3644 && let Some(ref mut block) = current_block
3645 {
3646 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3647 eprintln!(
3648 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3649 line_num, block.end_line, line_num
3650 );
3651 }
3652 block.end_line = line_num;
3653 }
3654
3655 if let Some(list_item) = &line_info.list_item {
3657 let item_indent = list_item.marker_column;
3659 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3662 eprintln!(
3663 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3664 line_num, list_item.marker, item_indent
3665 );
3666 }
3667
3668 if let Some(ref mut block) = current_block {
3669 let is_nested = nesting > block.nesting_level;
3673 let same_type =
3674 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3675 let same_context = block.blockquote_prefix == blockquote_prefix;
3676 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3678
3679 let marker_compatible =
3681 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3682
3683 let has_non_list_content = has_list_breaking_content_since_last_item;
3686
3687 let mut continues_list = if is_nested {
3691 same_context && reasonable_distance && !has_non_list_content
3693 } else {
3694 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3696 };
3697
3698 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3699 eprintln!(
3700 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3701 line_num,
3702 continues_list,
3703 is_nested,
3704 same_type,
3705 same_context,
3706 reasonable_distance,
3707 marker_compatible,
3708 has_non_list_content,
3709 last_list_item_line,
3710 block.end_line
3711 );
3712 }
3713
3714 if !continues_list
3718 && (is_nested || same_type)
3719 && reasonable_distance
3720 && line_num > 0
3721 && block.end_line == line_num - 1
3722 {
3723 if block.item_lines.contains(&(line_num - 1)) {
3726 continues_list = true;
3728 } else {
3729 continues_list = true;
3733 }
3734 }
3735
3736 if continues_list {
3737 block.end_line = line_num;
3739 block.item_lines.push(line_num);
3740
3741 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3743 list_item.marker.len() + 1
3744 } else {
3745 list_item.marker.len()
3746 });
3747
3748 if !block.is_ordered
3750 && block.marker.is_some()
3751 && block.marker.as_ref() != Some(&list_item.marker)
3752 {
3753 block.marker = None;
3755 }
3756
3757 reset_tracking_state(
3759 list_item,
3760 &mut has_list_breaking_content_since_last_item,
3761 &mut min_continuation_for_tracking,
3762 );
3763 } else {
3764 if !same_type
3769 && !is_nested
3770 && let Some(&last_item) = block.item_lines.last()
3771 {
3772 block.end_line = last_item;
3773 }
3774
3775 list_blocks.push(block.clone());
3776
3777 *block = ListBlock {
3778 start_line: line_num,
3779 end_line: line_num,
3780 is_ordered: list_item.is_ordered,
3781 marker: if list_item.is_ordered {
3782 None
3783 } else {
3784 Some(list_item.marker.clone())
3785 },
3786 blockquote_prefix: blockquote_prefix.clone(),
3787 item_lines: vec![line_num],
3788 nesting_level: nesting,
3789 max_marker_width: if list_item.is_ordered {
3790 list_item.marker.len() + 1
3791 } else {
3792 list_item.marker.len()
3793 },
3794 };
3795
3796 reset_tracking_state(
3798 list_item,
3799 &mut has_list_breaking_content_since_last_item,
3800 &mut min_continuation_for_tracking,
3801 );
3802 }
3803 } else {
3804 current_block = Some(ListBlock {
3806 start_line: line_num,
3807 end_line: line_num,
3808 is_ordered: list_item.is_ordered,
3809 marker: if list_item.is_ordered {
3810 None
3811 } else {
3812 Some(list_item.marker.clone())
3813 },
3814 blockquote_prefix,
3815 item_lines: vec![line_num],
3816 nesting_level: nesting,
3817 max_marker_width: list_item.marker.len(),
3818 });
3819
3820 reset_tracking_state(
3822 list_item,
3823 &mut has_list_breaking_content_since_last_item,
3824 &mut min_continuation_for_tracking,
3825 );
3826 }
3827
3828 last_list_item_line = line_num;
3829 current_indent_level = item_indent;
3830 last_marker_width = if list_item.is_ordered {
3831 list_item.marker.len() + 1 } else {
3833 list_item.marker.len()
3834 };
3835 } else if let Some(ref mut block) = current_block {
3836 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3838 eprintln!(
3839 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3840 line_num, line_info.is_blank
3841 );
3842 }
3843
3844 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3852 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3853 } else {
3854 false
3855 };
3856
3857 let min_continuation_indent = if block.is_ordered {
3861 current_indent_level + last_marker_width
3862 } else {
3863 current_indent_level + 2 };
3865
3866 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3867 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3869 eprintln!(
3870 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3871 line_num, line_info.indent, min_continuation_indent
3872 );
3873 }
3874 block.end_line = line_num;
3875 } else if line_info.is_blank {
3876 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3879 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3880 }
3881 let mut check_idx = line_idx + 1;
3882 let mut found_continuation = false;
3883
3884 while check_idx < lines.len() && lines[check_idx].is_blank {
3886 check_idx += 1;
3887 }
3888
3889 if check_idx < lines.len() {
3890 let next_line = &lines[check_idx];
3891 let next_content = next_line.content(content);
3893 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3896 let next_bq_level_for_indent = next_content
3897 .chars()
3898 .take_while(|c| *c == '>' || c.is_whitespace())
3899 .filter(|&c| c == '>')
3900 .count();
3901 let effective_indent =
3902 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3903 let mut pos = 0;
3906 let mut found_markers = 0;
3907 for c in next_content.chars() {
3908 pos += c.len_utf8();
3909 if c == '>' {
3910 found_markers += 1;
3911 if found_markers == next_bq_level_for_indent {
3912 if next_content.get(pos..pos + 1) == Some(" ") {
3914 pos += 1;
3915 }
3916 break;
3917 }
3918 }
3919 }
3920 let after_blockquote_marker = &next_content[pos..];
3921 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3922 } else {
3923 next_line.indent
3924 };
3925 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3928 if block.is_ordered { last_marker_width } else { 2 }
3931 } else {
3932 min_continuation_indent
3933 };
3934 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3936 eprintln!(
3937 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3938 line_num,
3939 check_idx + 1,
3940 effective_indent,
3941 adjusted_min_continuation,
3942 next_line.list_item.is_some(),
3943 next_line.in_code_block
3944 );
3945 }
3946 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3947 found_continuation = true;
3948 }
3949 else if !next_line.in_code_block
3951 && next_line.list_item.is_some()
3952 && let Some(item) = &next_line.list_item
3953 {
3954 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3955 .find(next_line.content(content))
3956 .map_or(String::new(), |m| m.as_str().to_string());
3957 if item.marker_column == current_indent_level
3958 && item.is_ordered == block.is_ordered
3959 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3960 {
3961 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3965 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3966 if let Some(between_line) = lines.get(idx) {
3967 let between_content = between_line.content(content);
3968 let trimmed = between_content.trim();
3969 if trimmed.is_empty() {
3971 return false;
3972 }
3973 let line_indent = between_content.len() - between_content.trim_start().len();
3975
3976 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3978 .find(between_content)
3979 .map_or(String::new(), |m| m.as_str().to_string());
3980 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3981 let blockquote_level_changed =
3982 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3983
3984 if trimmed.starts_with("```")
3986 || trimmed.starts_with("~~~")
3987 || trimmed.starts_with("---")
3988 || trimmed.starts_with("***")
3989 || trimmed.starts_with("___")
3990 || blockquote_level_changed
3991 || crate::utils::skip_context::is_table_line(trimmed)
3992 || between_line.heading.is_some()
3993 {
3994 return true; }
3996
3997 line_indent >= min_continuation_indent
3999 } else {
4000 false
4001 }
4002 });
4003
4004 if block.is_ordered {
4005 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4008 if let Some(between_line) = lines.get(idx) {
4009 let between_content = between_line.content(content);
4010 let trimmed = between_content.trim();
4011 if trimmed.is_empty() {
4012 return false;
4013 }
4014 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4016 .find(between_content)
4017 .map_or(String::new(), |m| m.as_str().to_string());
4018 let between_bq_level =
4019 between_bq_prefix.chars().filter(|&c| c == '>').count();
4020 let blockquote_level_changed =
4021 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4022 trimmed.starts_with("```")
4024 || trimmed.starts_with("~~~")
4025 || trimmed.starts_with("---")
4026 || trimmed.starts_with("***")
4027 || trimmed.starts_with("___")
4028 || blockquote_level_changed
4029 || crate::utils::skip_context::is_table_line(trimmed)
4030 || between_line.heading.is_some()
4031 } else {
4032 false
4033 }
4034 });
4035 found_continuation = !has_structural_separators;
4036 } else {
4037 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4039 if let Some(between_line) = lines.get(idx) {
4040 let between_content = between_line.content(content);
4041 let trimmed = between_content.trim();
4042 if trimmed.is_empty() {
4043 return false;
4044 }
4045 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4047 .find(between_content)
4048 .map_or(String::new(), |m| m.as_str().to_string());
4049 let between_bq_level =
4050 between_bq_prefix.chars().filter(|&c| c == '>').count();
4051 let blockquote_level_changed =
4052 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4053 trimmed.starts_with("```")
4055 || trimmed.starts_with("~~~")
4056 || trimmed.starts_with("---")
4057 || trimmed.starts_with("***")
4058 || trimmed.starts_with("___")
4059 || blockquote_level_changed
4060 || crate::utils::skip_context::is_table_line(trimmed)
4061 || between_line.heading.is_some()
4062 } else {
4063 false
4064 }
4065 });
4066 found_continuation = !has_structural_separators;
4067 }
4068 }
4069 }
4070 }
4071
4072 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4073 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4074 }
4075 if found_continuation {
4076 block.end_line = line_num;
4078 } else {
4079 list_blocks.push(block.clone());
4081 current_block = None;
4082 }
4083 } else {
4084 let min_required_indent = if block.is_ordered {
4087 current_indent_level + last_marker_width
4088 } else {
4089 current_indent_level + 2
4090 };
4091
4092 let line_content = line_info.content(content).trim();
4097
4098 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4100
4101 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4104 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4105 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4106
4107 let is_structural_separator = line_info.heading.is_some()
4108 || line_content.starts_with("```")
4109 || line_content.starts_with("~~~")
4110 || line_content.starts_with("---")
4111 || line_content.starts_with("***")
4112 || line_content.starts_with("___")
4113 || blockquote_level_changed
4114 || looks_like_table;
4115
4116 let is_lazy_continuation = !is_structural_separator
4120 && !line_info.is_blank
4121 && (line_info.indent == 0
4122 || line_info.indent >= min_required_indent
4123 || line_info.in_code_span_continuation);
4124
4125 if is_lazy_continuation {
4126 block.end_line = line_num;
4129 } else {
4130 list_blocks.push(block.clone());
4132 current_block = None;
4133 }
4134 }
4135 }
4136 }
4137
4138 if let Some(block) = current_block {
4140 list_blocks.push(block);
4141 }
4142
4143 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4145
4146 list_blocks
4147 }
4148
4149 fn compute_char_frequency(content: &str) -> CharFrequency {
4151 let mut frequency = CharFrequency::default();
4152
4153 for ch in content.chars() {
4154 match ch {
4155 '#' => frequency.hash_count += 1,
4156 '*' => frequency.asterisk_count += 1,
4157 '_' => frequency.underscore_count += 1,
4158 '-' => frequency.hyphen_count += 1,
4159 '+' => frequency.plus_count += 1,
4160 '>' => frequency.gt_count += 1,
4161 '|' => frequency.pipe_count += 1,
4162 '[' => frequency.bracket_count += 1,
4163 '`' => frequency.backtick_count += 1,
4164 '<' => frequency.lt_count += 1,
4165 '!' => frequency.exclamation_count += 1,
4166 '\n' => frequency.newline_count += 1,
4167 _ => {}
4168 }
4169 }
4170
4171 frequency
4172 }
4173
4174 fn parse_html_tags(
4176 content: &str,
4177 lines: &[LineInfo],
4178 code_blocks: &[(usize, usize)],
4179 flavor: MarkdownFlavor,
4180 ) -> Vec<HtmlTag> {
4181 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4182 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4183
4184 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4185
4186 for cap in HTML_TAG_REGEX.captures_iter(content) {
4187 let full_match = cap.get(0).unwrap();
4188 let match_start = full_match.start();
4189 let match_end = full_match.end();
4190
4191 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4193 continue;
4194 }
4195
4196 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4197 let tag_name_original = cap.get(2).unwrap().as_str();
4198 let tag_name = tag_name_original.to_lowercase();
4199 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4200
4201 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4204 continue;
4205 }
4206
4207 let mut line_num = 1;
4209 let mut col_start = match_start;
4210 let mut col_end = match_end;
4211 for (idx, line_info) in lines.iter().enumerate() {
4212 if match_start >= line_info.byte_offset {
4213 line_num = idx + 1;
4214 col_start = match_start - line_info.byte_offset;
4215 col_end = match_end - line_info.byte_offset;
4216 } else {
4217 break;
4218 }
4219 }
4220
4221 html_tags.push(HtmlTag {
4222 line: line_num,
4223 start_col: col_start,
4224 end_col: col_end,
4225 byte_offset: match_start,
4226 byte_end: match_end,
4227 tag_name,
4228 is_closing,
4229 is_self_closing,
4230 raw_content: full_match.as_str().to_string(),
4231 });
4232 }
4233
4234 html_tags
4235 }
4236
4237 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4239 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4240
4241 for (line_idx, line_info) in lines.iter().enumerate() {
4242 if line_info.in_code_block || line_info.is_blank {
4244 continue;
4245 }
4246
4247 let line = line_info.content(content);
4248 let line_num = line_idx + 1;
4249
4250 if !line.contains('|') {
4252 continue;
4253 }
4254
4255 let parts: Vec<&str> = line.split('|').collect();
4257 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4258
4259 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4261 let mut column_alignments = Vec::new();
4262
4263 if is_separator {
4264 for part in &parts[1..parts.len() - 1] {
4265 let trimmed = part.trim();
4267 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4268 "center".to_string()
4269 } else if trimmed.ends_with(':') {
4270 "right".to_string()
4271 } else if trimmed.starts_with(':') {
4272 "left".to_string()
4273 } else {
4274 "none".to_string()
4275 };
4276 column_alignments.push(alignment);
4277 }
4278 }
4279
4280 table_rows.push(TableRow {
4281 line: line_num,
4282 is_separator,
4283 column_count,
4284 column_alignments,
4285 });
4286 }
4287
4288 table_rows
4289 }
4290
4291 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4293 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4294
4295 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4297 let full_match = cap.get(0).unwrap();
4298 let match_start = full_match.start();
4299 let match_end = full_match.end();
4300
4301 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4303 continue;
4304 }
4305
4306 let preceding_char = if match_start > 0 {
4308 content.chars().nth(match_start - 1)
4309 } else {
4310 None
4311 };
4312 let following_char = content.chars().nth(match_end);
4313
4314 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4315 continue;
4316 }
4317 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4318 continue;
4319 }
4320
4321 let url = full_match.as_str();
4322 let url_type = if url.starts_with("https://") {
4323 "https"
4324 } else if url.starts_with("http://") {
4325 "http"
4326 } else if url.starts_with("ftp://") {
4327 "ftp"
4328 } else {
4329 "other"
4330 };
4331
4332 let mut line_num = 1;
4334 let mut col_start = match_start;
4335 let mut col_end = match_end;
4336 for (idx, line_info) in lines.iter().enumerate() {
4337 if match_start >= line_info.byte_offset {
4338 line_num = idx + 1;
4339 col_start = match_start - line_info.byte_offset;
4340 col_end = match_end - line_info.byte_offset;
4341 } else {
4342 break;
4343 }
4344 }
4345
4346 bare_urls.push(BareUrl {
4347 line: line_num,
4348 start_col: col_start,
4349 end_col: col_end,
4350 byte_offset: match_start,
4351 byte_end: match_end,
4352 url: url.to_string(),
4353 url_type: url_type.to_string(),
4354 });
4355 }
4356
4357 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4359 let full_match = cap.get(0).unwrap();
4360 let match_start = full_match.start();
4361 let match_end = full_match.end();
4362
4363 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4365 continue;
4366 }
4367
4368 let preceding_char = if match_start > 0 {
4370 content.chars().nth(match_start - 1)
4371 } else {
4372 None
4373 };
4374 let following_char = content.chars().nth(match_end);
4375
4376 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4377 continue;
4378 }
4379 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4380 continue;
4381 }
4382
4383 let email = full_match.as_str();
4384
4385 let mut line_num = 1;
4387 let mut col_start = match_start;
4388 let mut col_end = match_end;
4389 for (idx, line_info) in lines.iter().enumerate() {
4390 if match_start >= line_info.byte_offset {
4391 line_num = idx + 1;
4392 col_start = match_start - line_info.byte_offset;
4393 col_end = match_end - line_info.byte_offset;
4394 } else {
4395 break;
4396 }
4397 }
4398
4399 bare_urls.push(BareUrl {
4400 line: line_num,
4401 start_col: col_start,
4402 end_col: col_end,
4403 byte_offset: match_start,
4404 byte_end: match_end,
4405 url: email.to_string(),
4406 url_type: "email".to_string(),
4407 });
4408 }
4409
4410 bare_urls
4411 }
4412
4413 #[must_use]
4433 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4434 ValidHeadingsIter::new(&self.lines)
4435 }
4436
4437 #[must_use]
4441 pub fn has_valid_headings(&self) -> bool {
4442 self.lines
4443 .iter()
4444 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4445 }
4446}
4447
4448fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4450 if list_blocks.len() < 2 {
4451 return;
4452 }
4453
4454 let mut merger = ListBlockMerger::new(content, lines);
4455 *list_blocks = merger.merge(list_blocks);
4456}
4457
4458struct ListBlockMerger<'a> {
4460 content: &'a str,
4461 lines: &'a [LineInfo],
4462}
4463
4464impl<'a> ListBlockMerger<'a> {
4465 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4466 Self { content, lines }
4467 }
4468
4469 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4470 let mut merged = Vec::with_capacity(list_blocks.len());
4471 let mut current = list_blocks[0].clone();
4472
4473 for next in list_blocks.iter().skip(1) {
4474 if self.should_merge_blocks(¤t, next) {
4475 current = self.merge_two_blocks(current, next);
4476 } else {
4477 merged.push(current);
4478 current = next.clone();
4479 }
4480 }
4481
4482 merged.push(current);
4483 merged
4484 }
4485
4486 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4488 if !self.blocks_are_compatible(current, next) {
4490 return false;
4491 }
4492
4493 let spacing = self.analyze_spacing_between(current, next);
4495 match spacing {
4496 BlockSpacing::Consecutive => true,
4497 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4498 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4499 self.can_merge_with_content_between(current, next)
4500 }
4501 }
4502 }
4503
4504 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4506 current.is_ordered == next.is_ordered
4507 && current.blockquote_prefix == next.blockquote_prefix
4508 && current.nesting_level == next.nesting_level
4509 }
4510
4511 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4513 let gap = next.start_line - current.end_line;
4514
4515 match gap {
4516 1 => BlockSpacing::Consecutive,
4517 2 => BlockSpacing::SingleBlank,
4518 _ if gap > 2 => {
4519 if self.has_only_blank_lines_between(current, next) {
4520 BlockSpacing::MultipleBlanks
4521 } else {
4522 BlockSpacing::ContentBetween
4523 }
4524 }
4525 _ => BlockSpacing::Consecutive, }
4527 }
4528
4529 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4531 if has_meaningful_content_between(self.content, current, next, self.lines) {
4534 return false; }
4536
4537 !current.is_ordered && current.marker == next.marker
4539 }
4540
4541 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4543 if has_meaningful_content_between(self.content, current, next, self.lines) {
4545 return false; }
4547
4548 current.is_ordered && next.is_ordered
4550 }
4551
4552 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4554 for line_num in (current.end_line + 1)..next.start_line {
4555 if let Some(line_info) = self.lines.get(line_num - 1)
4556 && !line_info.content(self.content).trim().is_empty()
4557 {
4558 return false;
4559 }
4560 }
4561 true
4562 }
4563
4564 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4566 current.end_line = next.end_line;
4567 current.item_lines.extend_from_slice(&next.item_lines);
4568
4569 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4571
4572 if !current.is_ordered && self.markers_differ(¤t, next) {
4574 current.marker = None; }
4576
4577 current
4578 }
4579
4580 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4582 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4583 }
4584}
4585
4586#[derive(Debug, PartialEq)]
4588enum BlockSpacing {
4589 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4594
4595fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4597 for line_num in (current.end_line + 1)..next.start_line {
4599 if let Some(line_info) = lines.get(line_num - 1) {
4600 let trimmed = line_info.content(content).trim();
4602
4603 if trimmed.is_empty() {
4605 continue;
4606 }
4607
4608 if line_info.heading.is_some() {
4612 return true; }
4614
4615 if is_horizontal_rule(trimmed) {
4617 return true; }
4619
4620 if crate::utils::skip_context::is_table_line(trimmed) {
4622 return true; }
4624
4625 if trimmed.starts_with('>') {
4627 return true; }
4629
4630 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4632 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4633
4634 let min_continuation_indent = if current.is_ordered {
4636 current.nesting_level + current.max_marker_width + 1 } else {
4638 current.nesting_level + 2
4639 };
4640
4641 if line_indent < min_continuation_indent {
4642 return true; }
4645 }
4646
4647 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4649
4650 let min_indent = if current.is_ordered {
4652 current.nesting_level + current.max_marker_width
4653 } else {
4654 current.nesting_level + 2
4655 };
4656
4657 if line_indent < min_indent {
4659 return true; }
4661
4662 }
4665 }
4666
4667 false
4669}
4670
4671pub fn is_horizontal_rule_line(line: &str) -> bool {
4678 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4680 if leading_spaces > 3 || line.starts_with('\t') {
4681 return false;
4682 }
4683
4684 is_horizontal_rule_content(line.trim())
4685}
4686
4687pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4690 if trimmed.len() < 3 {
4691 return false;
4692 }
4693
4694 let chars: Vec<char> = trimmed.chars().collect();
4696 if let Some(&first_char) = chars.first()
4697 && (first_char == '-' || first_char == '*' || first_char == '_')
4698 {
4699 let mut count = 0;
4700 for &ch in &chars {
4701 if ch == first_char {
4702 count += 1;
4703 } else if ch != ' ' && ch != '\t' {
4704 return false; }
4706 }
4707 return count >= 3;
4708 }
4709 false
4710}
4711
4712pub fn is_horizontal_rule(trimmed: &str) -> bool {
4714 is_horizontal_rule_content(trimmed)
4715}
4716
4717#[cfg(test)]
4719mod tests {
4720 use super::*;
4721
4722 #[test]
4723 fn test_empty_content() {
4724 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4725 assert_eq!(ctx.content, "");
4726 assert_eq!(ctx.line_offsets, vec![0]);
4727 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4728 assert_eq!(ctx.lines.len(), 0);
4729 }
4730
4731 #[test]
4732 fn test_single_line() {
4733 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4734 assert_eq!(ctx.content, "# Hello");
4735 assert_eq!(ctx.line_offsets, vec![0]);
4736 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4737 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4738 }
4739
4740 #[test]
4741 fn test_multi_line() {
4742 let content = "# Title\n\nSecond line\nThird line";
4743 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4744 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4745 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4752
4753 #[test]
4754 fn test_line_info() {
4755 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4756 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4757
4758 assert_eq!(ctx.lines.len(), 7);
4760
4761 let line1 = &ctx.lines[0];
4763 assert_eq!(line1.content(ctx.content), "# Title");
4764 assert_eq!(line1.byte_offset, 0);
4765 assert_eq!(line1.indent, 0);
4766 assert!(!line1.is_blank);
4767 assert!(!line1.in_code_block);
4768 assert!(line1.list_item.is_none());
4769
4770 let line2 = &ctx.lines[1];
4772 assert_eq!(line2.content(ctx.content), " indented");
4773 assert_eq!(line2.byte_offset, 8);
4774 assert_eq!(line2.indent, 4);
4775 assert!(!line2.is_blank);
4776
4777 let line3 = &ctx.lines[2];
4779 assert_eq!(line3.content(ctx.content), "");
4780 assert!(line3.is_blank);
4781
4782 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4784 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4785 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4786 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4787 }
4788
4789 #[test]
4790 fn test_list_item_detection() {
4791 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4792 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4793
4794 let line1 = &ctx.lines[0];
4796 assert!(line1.list_item.is_some());
4797 let list1 = line1.list_item.as_ref().unwrap();
4798 assert_eq!(list1.marker, "-");
4799 assert!(!list1.is_ordered);
4800 assert_eq!(list1.marker_column, 0);
4801 assert_eq!(list1.content_column, 2);
4802
4803 let line2 = &ctx.lines[1];
4805 assert!(line2.list_item.is_some());
4806 let list2 = line2.list_item.as_ref().unwrap();
4807 assert_eq!(list2.marker, "*");
4808 assert_eq!(list2.marker_column, 2);
4809
4810 let line3 = &ctx.lines[2];
4812 assert!(line3.list_item.is_some());
4813 let list3 = line3.list_item.as_ref().unwrap();
4814 assert_eq!(list3.marker, "1.");
4815 assert!(list3.is_ordered);
4816 assert_eq!(list3.number, Some(1));
4817
4818 let line6 = &ctx.lines[5];
4820 assert!(line6.list_item.is_none());
4821 }
4822
4823 #[test]
4824 fn test_offset_to_line_col_edge_cases() {
4825 let content = "a\nb\nc";
4826 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4827 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4835
4836 #[test]
4837 fn test_mdx_esm_blocks() {
4838 let content = r##"import {Chart} from './snowfall.js'
4839export const year = 2023
4840
4841# Last year's snowfall
4842
4843In {year}, the snowfall was above average.
4844It was followed by a warm spring which caused
4845flood conditions in many of the nearby rivers.
4846
4847<Chart color="#fcb32c" year={year} />
4848"##;
4849
4850 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4851
4852 assert_eq!(ctx.lines.len(), 10);
4854 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4855 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4856 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4857 assert!(
4858 !ctx.lines[3].in_esm_block,
4859 "Line 4 (heading) should NOT be in_esm_block"
4860 );
4861 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4862 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4863 }
4864
4865 #[test]
4866 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4867 let content = r#"import {Chart} from './snowfall.js'
4868export const year = 2023
4869
4870# Last year's snowfall
4871"#;
4872
4873 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4874
4875 assert!(
4877 !ctx.lines[0].in_esm_block,
4878 "Line 1 should NOT be in_esm_block in Standard flavor"
4879 );
4880 assert!(
4881 !ctx.lines[1].in_esm_block,
4882 "Line 2 should NOT be in_esm_block in Standard flavor"
4883 );
4884 }
4885
4886 #[test]
4887 fn test_blockquote_with_indented_content() {
4888 let content = r#"# Heading
4892
4893> -S socket-path
4894> More text
4895"#;
4896 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4897
4898 assert!(
4900 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4901 "Line 3 should be a blockquote"
4902 );
4903 assert!(
4905 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4906 "Line 4 should be a blockquote"
4907 );
4908
4909 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4912 assert_eq!(bq3.content, "-S socket-path");
4913 assert_eq!(bq3.nesting_level, 1);
4914 assert!(bq3.has_multiple_spaces_after_marker);
4916
4917 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4918 assert_eq!(bq4.content, "More text");
4919 assert_eq!(bq4.nesting_level, 1);
4920 }
4921
4922 #[test]
4923 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4924 let content = r#"# Title
4926
4927A footnote[^1].
4928
4929[^1]: This is the footnote content.
4930
4931[^note]: Another footnote with [link](https://example.com).
4932
4933[regular]: ./path.md "A real reference definition"
4934"#;
4935 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4936
4937 assert_eq!(
4939 ctx.reference_defs.len(),
4940 1,
4941 "Footnotes should not be parsed as reference definitions"
4942 );
4943
4944 assert_eq!(ctx.reference_defs[0].id, "regular");
4946 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4947 assert_eq!(
4948 ctx.reference_defs[0].title,
4949 Some("A real reference definition".to_string())
4950 );
4951 }
4952
4953 #[test]
4954 fn test_footnote_with_inline_link_not_misidentified() {
4955 let content = r#"# Title
4958
4959A footnote[^1].
4960
4961[^1]: [link](https://www.google.com).
4962"#;
4963 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4964
4965 assert!(
4967 ctx.reference_defs.is_empty(),
4968 "Footnote with inline link should not create a reference definition"
4969 );
4970 }
4971
4972 #[test]
4973 fn test_various_footnote_formats_excluded() {
4974 let content = r#"[^1]: Numeric footnote
4976[^note]: Named footnote
4977[^a]: Single char footnote
4978[^long-footnote-name]: Long named footnote
4979[^123abc]: Mixed alphanumeric
4980
4981[ref1]: ./file1.md
4982[ref2]: ./file2.md
4983"#;
4984 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4985
4986 assert_eq!(
4988 ctx.reference_defs.len(),
4989 2,
4990 "Only regular reference definitions should be parsed"
4991 );
4992
4993 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4994 assert!(ids.contains(&"ref1"));
4995 assert!(ids.contains(&"ref2"));
4996 assert!(!ids.iter().any(|id| id.starts_with('^')));
4997 }
4998
4999 #[test]
5004 fn test_has_char_tracked_characters() {
5005 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5007 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5008
5009 assert!(ctx.has_char('#'), "Should detect hash");
5011 assert!(ctx.has_char('*'), "Should detect asterisk");
5012 assert!(ctx.has_char('_'), "Should detect underscore");
5013 assert!(ctx.has_char('-'), "Should detect hyphen");
5014 assert!(ctx.has_char('+'), "Should detect plus");
5015 assert!(ctx.has_char('>'), "Should detect gt");
5016 assert!(ctx.has_char('|'), "Should detect pipe");
5017 assert!(ctx.has_char('['), "Should detect bracket");
5018 assert!(ctx.has_char('`'), "Should detect backtick");
5019 assert!(ctx.has_char('<'), "Should detect lt");
5020 assert!(ctx.has_char('!'), "Should detect exclamation");
5021 assert!(ctx.has_char('\n'), "Should detect newline");
5022 }
5023
5024 #[test]
5025 fn test_has_char_absent_characters() {
5026 let content = "Simple text without special chars";
5027 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5028
5029 assert!(!ctx.has_char('#'), "Should not detect hash");
5031 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5032 assert!(!ctx.has_char('_'), "Should not detect underscore");
5033 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5034 assert!(!ctx.has_char('+'), "Should not detect plus");
5035 assert!(!ctx.has_char('>'), "Should not detect gt");
5036 assert!(!ctx.has_char('|'), "Should not detect pipe");
5037 assert!(!ctx.has_char('['), "Should not detect bracket");
5038 assert!(!ctx.has_char('`'), "Should not detect backtick");
5039 assert!(!ctx.has_char('<'), "Should not detect lt");
5040 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5041 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5043 }
5044
5045 #[test]
5046 fn test_has_char_fallback_for_untracked() {
5047 let content = "Text with @mention and $dollar and %percent";
5048 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5049
5050 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5052 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5053 assert!(ctx.has_char('%'), "Should detect % via fallback");
5054 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5055 }
5056
5057 #[test]
5058 fn test_char_count_tracked_characters() {
5059 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5060 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5061
5062 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5064 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5065 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5066 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5067 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5068 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5069 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5070 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5071 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5072 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5073 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5074 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5075 }
5076
5077 #[test]
5078 fn test_char_count_zero_for_absent() {
5079 let content = "Plain text";
5080 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5081
5082 assert_eq!(ctx.char_count('#'), 0);
5083 assert_eq!(ctx.char_count('*'), 0);
5084 assert_eq!(ctx.char_count('_'), 0);
5085 assert_eq!(ctx.char_count('\n'), 0);
5086 }
5087
5088 #[test]
5089 fn test_char_count_fallback_for_untracked() {
5090 let content = "@@@ $$ %%%";
5091 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5092
5093 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5094 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5095 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5096 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5097 }
5098
5099 #[test]
5100 fn test_char_count_empty_content() {
5101 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5102
5103 assert_eq!(ctx.char_count('#'), 0);
5104 assert_eq!(ctx.char_count('*'), 0);
5105 assert_eq!(ctx.char_count('@'), 0);
5106 assert!(!ctx.has_char('#'));
5107 assert!(!ctx.has_char('@'));
5108 }
5109
5110 #[test]
5115 fn test_is_in_html_tag_simple() {
5116 let content = "<div>content</div>";
5117 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5118
5119 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5121 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5122 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5123
5124 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5126 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5127
5128 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5130 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5131 }
5132
5133 #[test]
5134 fn test_is_in_html_tag_self_closing() {
5135 let content = "Text <br/> more text";
5136 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5137
5138 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5140 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5141
5142 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5144 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5145 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5146
5147 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5149 }
5150
5151 #[test]
5152 fn test_is_in_html_tag_with_attributes() {
5153 let content = r#"<a href="url" class="link">text</a>"#;
5154 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5155
5156 assert!(ctx.is_in_html_tag(0), "Start of tag");
5158 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5159 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5160 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5161
5162 assert!(!ctx.is_in_html_tag(27), "Start of content");
5164 assert!(!ctx.is_in_html_tag(30), "End of content");
5165
5166 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5168 }
5169
5170 #[test]
5171 fn test_is_in_html_tag_multiline() {
5172 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5173 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5174
5175 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5177 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5178 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5179
5180 let closing_bracket_pos = content.find(">\n").unwrap();
5182 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5183 }
5184
5185 #[test]
5186 fn test_is_in_html_tag_no_tags() {
5187 let content = "Plain text without any HTML";
5188 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5189
5190 for i in 0..content.len() {
5192 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5193 }
5194 }
5195
5196 #[test]
5201 fn test_is_in_jinja_range_expression() {
5202 let content = "Hello {{ name }}!";
5203 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5204
5205 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5207 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5208
5209 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5211 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5212 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5213 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5214 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5215
5216 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5218 }
5219
5220 #[test]
5221 fn test_is_in_jinja_range_statement() {
5222 let content = "{% if condition %}content{% endif %}";
5223 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5224
5225 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5227 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5228 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5229
5230 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5232
5233 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5235 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5236 }
5237
5238 #[test]
5239 fn test_is_in_jinja_range_multiple() {
5240 let content = "{{ a }} and {{ b }}";
5241 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5242
5243 assert!(ctx.is_in_jinja_range(0));
5245 assert!(ctx.is_in_jinja_range(3));
5246 assert!(ctx.is_in_jinja_range(6));
5247
5248 assert!(!ctx.is_in_jinja_range(8));
5250 assert!(!ctx.is_in_jinja_range(11));
5251
5252 assert!(ctx.is_in_jinja_range(12));
5254 assert!(ctx.is_in_jinja_range(15));
5255 assert!(ctx.is_in_jinja_range(18));
5256 }
5257
5258 #[test]
5259 fn test_is_in_jinja_range_no_jinja() {
5260 let content = "Plain text with single braces but not Jinja";
5261 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5262
5263 for i in 0..content.len() {
5265 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5266 }
5267 }
5268
5269 #[test]
5274 fn test_is_in_link_title_with_title() {
5275 let content = r#"[ref]: https://example.com "Title text"
5276
5277Some content."#;
5278 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5279
5280 assert_eq!(ctx.reference_defs.len(), 1);
5282 let def = &ctx.reference_defs[0];
5283 assert!(def.title_byte_start.is_some());
5284 assert!(def.title_byte_end.is_some());
5285
5286 let title_start = def.title_byte_start.unwrap();
5287 let title_end = def.title_byte_end.unwrap();
5288
5289 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5291
5292 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5294 assert!(
5295 ctx.is_in_link_title(title_start + 5),
5296 "Middle of title should be in title"
5297 );
5298 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5299
5300 assert!(
5302 !ctx.is_in_link_title(title_end),
5303 "After title end should not be in title"
5304 );
5305 }
5306
5307 #[test]
5308 fn test_is_in_link_title_without_title() {
5309 let content = "[ref]: https://example.com\n\nSome content.";
5310 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5311
5312 assert_eq!(ctx.reference_defs.len(), 1);
5314 let def = &ctx.reference_defs[0];
5315 assert!(def.title_byte_start.is_none());
5316 assert!(def.title_byte_end.is_none());
5317
5318 for i in 0..content.len() {
5320 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5321 }
5322 }
5323
5324 #[test]
5325 fn test_is_in_link_title_multiple_refs() {
5326 let content = r#"[ref1]: /url1 "Title One"
5327[ref2]: /url2
5328[ref3]: /url3 "Title Three"
5329"#;
5330 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5331
5332 assert_eq!(ctx.reference_defs.len(), 3);
5334
5335 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5337 assert!(ref1.title_byte_start.is_some());
5338
5339 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5341 assert!(ref2.title_byte_start.is_none());
5342
5343 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5345 assert!(ref3.title_byte_start.is_some());
5346
5347 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5349 assert!(ctx.is_in_link_title(start + 1));
5350 assert!(!ctx.is_in_link_title(end + 5));
5351 }
5352
5353 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5355 assert!(ctx.is_in_link_title(start + 1));
5356 }
5357 }
5358
5359 #[test]
5360 fn test_is_in_link_title_single_quotes() {
5361 let content = "[ref]: /url 'Single quoted title'\n";
5362 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5363
5364 assert_eq!(ctx.reference_defs.len(), 1);
5365 let def = &ctx.reference_defs[0];
5366
5367 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5368 assert!(ctx.is_in_link_title(start));
5369 assert!(ctx.is_in_link_title(start + 5));
5370 assert!(!ctx.is_in_link_title(end));
5371 }
5372 }
5373
5374 #[test]
5375 fn test_is_in_link_title_parentheses() {
5376 let content = "[ref]: /url (Parenthesized title)\n";
5379 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5380
5381 if ctx.reference_defs.is_empty() {
5384 for i in 0..content.len() {
5386 assert!(!ctx.is_in_link_title(i));
5387 }
5388 } else {
5389 let def = &ctx.reference_defs[0];
5390 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5391 assert!(ctx.is_in_link_title(start));
5392 assert!(ctx.is_in_link_title(start + 5));
5393 assert!(!ctx.is_in_link_title(end));
5394 } else {
5395 for i in 0..content.len() {
5397 assert!(!ctx.is_in_link_title(i));
5398 }
5399 }
5400 }
5401 }
5402
5403 #[test]
5404 fn test_is_in_link_title_no_refs() {
5405 let content = "Just plain text without any reference definitions.";
5406 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5407
5408 assert!(ctx.reference_defs.is_empty());
5409
5410 for i in 0..content.len() {
5411 assert!(!ctx.is_in_link_title(i));
5412 }
5413 }
5414
5415 #[test]
5420 fn test_math_spans_inline() {
5421 let content = "Text with inline math $[f](x)$ in it.";
5422 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5423
5424 let math_spans = ctx.math_spans();
5425 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5426
5427 let span = &math_spans[0];
5428 assert!(!span.is_display, "Should be inline math, not display");
5429 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5430 }
5431
5432 #[test]
5433 fn test_math_spans_display_single_line() {
5434 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5435 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5436
5437 let math_spans = ctx.math_spans();
5438 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5439
5440 let span = &math_spans[0];
5441 assert!(span.is_display, "Should be display math");
5442 assert!(
5443 span.content.contains("[x](\\zeta)"),
5444 "Content should contain the link-like pattern"
5445 );
5446 }
5447
5448 #[test]
5449 fn test_math_spans_display_multiline() {
5450 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5451 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5452
5453 let math_spans = ctx.math_spans();
5454 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5455
5456 let span = &math_spans[0];
5457 assert!(span.is_display, "Should be display math");
5458 }
5459
5460 #[test]
5461 fn test_is_in_math_span() {
5462 let content = "Text $[f](x)$ more text";
5463 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5464
5465 let math_start = content.find('$').unwrap();
5467 let math_end = content.rfind('$').unwrap() + 1;
5468
5469 assert!(
5470 ctx.is_in_math_span(math_start + 1),
5471 "Position inside math span should return true"
5472 );
5473 assert!(
5474 ctx.is_in_math_span(math_start + 3),
5475 "Position inside math span should return true"
5476 );
5477
5478 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5480 assert!(
5481 !ctx.is_in_math_span(math_end + 1),
5482 "Position after math span should return false"
5483 );
5484 }
5485
5486 #[test]
5487 fn test_math_spans_mixed_with_code() {
5488 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5489 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5490
5491 let math_spans = ctx.math_spans();
5492 let code_spans = ctx.code_spans();
5493
5494 assert_eq!(math_spans.len(), 1, "Should have one math span");
5495 assert_eq!(code_spans.len(), 1, "Should have one code span");
5496
5497 assert_eq!(math_spans[0].content, "[f](x)");
5499 assert_eq!(code_spans[0].content, "[g](y)");
5501 }
5502
5503 #[test]
5504 fn test_math_spans_no_math() {
5505 let content = "Regular text without any math at all.";
5506 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5507
5508 let math_spans = ctx.math_spans();
5509 assert!(math_spans.is_empty(), "Should have no math spans");
5510 }
5511
5512 #[test]
5513 fn test_math_spans_multiple() {
5514 let content = "First $a$ and second $b$ and display $$c$$";
5515 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5516
5517 let math_spans = ctx.math_spans();
5518 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5519
5520 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5522 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5523
5524 assert_eq!(inline_count, 2, "Should have two inline math spans");
5525 assert_eq!(display_count, 1, "Should have one display math span");
5526 }
5527
5528 #[test]
5529 fn test_is_in_math_span_boundary_positions() {
5530 let content = "$[f](x)$";
5533 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5534
5535 let math_spans = ctx.math_spans();
5536 assert_eq!(math_spans.len(), 1, "Should have one math span");
5537
5538 let span = &math_spans[0];
5539
5540 assert!(
5542 ctx.is_in_math_span(span.byte_offset),
5543 "Start position should be in span"
5544 );
5545
5546 assert!(
5548 ctx.is_in_math_span(span.byte_offset + 1),
5549 "Position after start should be in span"
5550 );
5551
5552 assert!(
5554 ctx.is_in_math_span(span.byte_end - 1),
5555 "Position at end-1 should be in span"
5556 );
5557
5558 assert!(
5560 !ctx.is_in_math_span(span.byte_end),
5561 "Position at byte_end should NOT be in span (exclusive)"
5562 );
5563 }
5564
5565 #[test]
5566 fn test_math_spans_at_document_start() {
5567 let content = "$x$ text";
5568 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5569
5570 let math_spans = ctx.math_spans();
5571 assert_eq!(math_spans.len(), 1);
5572 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5573 }
5574
5575 #[test]
5576 fn test_math_spans_at_document_end() {
5577 let content = "text $x$";
5578 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5579
5580 let math_spans = ctx.math_spans();
5581 assert_eq!(math_spans.len(), 1);
5582 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5583 }
5584
5585 #[test]
5586 fn test_math_spans_consecutive() {
5587 let content = "$a$$b$";
5588 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5589
5590 let math_spans = ctx.math_spans();
5591 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5593
5594 for i in 0..content.len() {
5596 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5597 }
5598 }
5599
5600 #[test]
5601 fn test_math_spans_currency_not_math() {
5602 let content = "Price is $100";
5604 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5605
5606 let math_spans = ctx.math_spans();
5607 assert!(
5610 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5611 "Unbalanced $ should not create math span containing 100"
5612 );
5613 }
5614
5615 #[test]
5620 fn test_reference_lookup_o1_basic() {
5621 let content = r#"[ref1]: /url1
5622[REF2]: /url2 "Title"
5623[Ref3]: /url3
5624
5625Use [link][ref1] and [link][REF2]."#;
5626 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5627
5628 assert_eq!(ctx.reference_defs.len(), 3);
5630
5631 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5633 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5636 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5637 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5638 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5639 }
5640
5641 #[test]
5642 fn test_reference_lookup_o1_get_reference_def() {
5643 let content = r#"[myref]: https://example.com "My Title"
5644"#;
5645 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5646
5647 let def = ctx.get_reference_def("myref").expect("Should find myref");
5649 assert_eq!(def.url, "https://example.com");
5650 assert_eq!(def.title.as_deref(), Some("My Title"));
5651
5652 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5654 assert_eq!(def2.url, "https://example.com");
5655
5656 assert!(ctx.get_reference_def("nonexistent").is_none());
5658 }
5659
5660 #[test]
5661 fn test_reference_lookup_o1_has_reference_def() {
5662 let content = r#"[foo]: /foo
5663[BAR]: /bar
5664"#;
5665 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5666
5667 assert!(ctx.has_reference_def("foo"));
5669 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5671 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5674
5675 #[test]
5676 fn test_reference_lookup_o1_empty_content() {
5677 let content = "No references here.";
5678 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5679
5680 assert!(ctx.reference_defs.is_empty());
5681 assert_eq!(ctx.get_reference_url("anything"), None);
5682 assert!(ctx.get_reference_def("anything").is_none());
5683 assert!(!ctx.has_reference_def("anything"));
5684 }
5685
5686 #[test]
5687 fn test_reference_lookup_o1_special_characters_in_id() {
5688 let content = r#"[ref-with-dash]: /url1
5689[ref_with_underscore]: /url2
5690[ref.with.dots]: /url3
5691"#;
5692 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5693
5694 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5695 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5696 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5697 }
5698
5699 #[test]
5700 fn test_reference_lookup_o1_unicode_id() {
5701 let content = r#"[日本語]: /japanese
5702[émoji]: /emoji
5703"#;
5704 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5705
5706 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5707 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5708 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5710}