1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::LazyLock;
12
13#[cfg(not(target_arch = "wasm32"))]
15macro_rules! profile_section {
16 ($name:expr, $profile:expr, $code:expr) => {{
17 let start = std::time::Instant::now();
18 let result = $code;
19 if $profile {
20 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
21 }
22 result
23 }};
24}
25
26#[cfg(target_arch = "wasm32")]
27macro_rules! profile_section {
28 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
29}
30
31static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
34 Regex::new(
35 r#"(?sx)
36 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
37 (?:
38 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
39 |
40 \[([^\]]*)\] # Reference ID in group 6
41 )"#
42 ).unwrap()
43});
44
45static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
48 Regex::new(
49 r#"(?sx)
50 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
51 (?:
52 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
53 |
54 \[([^\]]*)\] # Reference ID in group 6
55 )"#
56 ).unwrap()
57});
58
59static REF_DEF_PATTERN: LazyLock<Regex> =
61 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
62
63static BARE_EMAIL_PATTERN: LazyLock<Regex> =
67 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
68
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
71
72#[derive(Debug, Clone)]
74pub struct LineInfo {
75 pub byte_offset: usize,
77 pub byte_len: usize,
79 pub indent: usize,
81 pub visual_indent: usize,
85 pub is_blank: bool,
87 pub in_code_block: bool,
89 pub in_front_matter: bool,
91 pub in_html_block: bool,
93 pub in_html_comment: bool,
95 pub list_item: Option<ListItemInfo>,
97 pub heading: Option<HeadingInfo>,
99 pub blockquote: Option<BlockquoteInfo>,
101 pub in_mkdocstrings: bool,
103 pub in_esm_block: bool,
105 pub in_code_span_continuation: bool,
107 pub is_horizontal_rule: bool,
110 pub in_math_block: bool,
112 pub in_quarto_div: bool,
114 pub in_jsx_expression: bool,
116 pub in_mdx_comment: bool,
118 pub in_jsx_component: bool,
120 pub in_jsx_fragment: bool,
122 pub in_admonition: bool,
124 pub in_content_tab: bool,
126 pub in_definition_list: bool,
128}
129
130impl LineInfo {
131 pub fn content<'a>(&self, source: &'a str) -> &'a str {
133 &source[self.byte_offset..self.byte_offset + self.byte_len]
134 }
135}
136
137#[derive(Debug, Clone)]
139pub struct ListItemInfo {
140 pub marker: String,
142 pub is_ordered: bool,
144 pub number: Option<usize>,
146 pub marker_column: usize,
148 pub content_column: usize,
150}
151
152#[derive(Debug, Clone, PartialEq)]
154pub enum HeadingStyle {
155 ATX,
157 Setext1,
159 Setext2,
161}
162
163#[derive(Debug, Clone)]
165pub struct ParsedLink<'a> {
166 pub line: usize,
168 pub start_col: usize,
170 pub end_col: usize,
172 pub byte_offset: usize,
174 pub byte_end: usize,
176 pub text: Cow<'a, str>,
178 pub url: Cow<'a, str>,
180 pub is_reference: bool,
182 pub reference_id: Option<Cow<'a, str>>,
184 pub link_type: LinkType,
186}
187
188#[derive(Debug, Clone)]
190pub struct BrokenLinkInfo {
191 pub reference: String,
193 pub span: std::ops::Range<usize>,
195}
196
197#[derive(Debug, Clone)]
199pub struct FootnoteRef {
200 pub id: String,
202 pub line: usize,
204 pub byte_offset: usize,
206 pub byte_end: usize,
208}
209
210#[derive(Debug, Clone)]
212pub struct ParsedImage<'a> {
213 pub line: usize,
215 pub start_col: usize,
217 pub end_col: usize,
219 pub byte_offset: usize,
221 pub byte_end: usize,
223 pub alt_text: Cow<'a, str>,
225 pub url: Cow<'a, str>,
227 pub is_reference: bool,
229 pub reference_id: Option<Cow<'a, str>>,
231 pub link_type: LinkType,
233}
234
235#[derive(Debug, Clone)]
237pub struct ReferenceDef {
238 pub line: usize,
240 pub id: String,
242 pub url: String,
244 pub title: Option<String>,
246 pub byte_offset: usize,
248 pub byte_end: usize,
250 pub title_byte_start: Option<usize>,
252 pub title_byte_end: Option<usize>,
254}
255
256#[derive(Debug, Clone)]
258pub struct CodeSpan {
259 pub line: usize,
261 pub end_line: usize,
263 pub start_col: usize,
265 pub end_col: usize,
267 pub byte_offset: usize,
269 pub byte_end: usize,
271 pub backtick_count: usize,
273 pub content: String,
275}
276
277#[derive(Debug, Clone)]
279pub struct MathSpan {
280 pub line: usize,
282 pub end_line: usize,
284 pub start_col: usize,
286 pub end_col: usize,
288 pub byte_offset: usize,
290 pub byte_end: usize,
292 pub is_display: bool,
294 pub content: String,
296}
297
298#[derive(Debug, Clone)]
300pub struct HeadingInfo {
301 pub level: u8,
303 pub style: HeadingStyle,
305 pub marker: String,
307 pub marker_column: usize,
309 pub content_column: usize,
311 pub text: String,
313 pub custom_id: Option<String>,
315 pub raw_text: String,
317 pub has_closing_sequence: bool,
319 pub closing_sequence: String,
321 pub is_valid: bool,
324}
325
326#[derive(Debug, Clone)]
331pub struct ValidHeading<'a> {
332 pub line_num: usize,
334 pub heading: &'a HeadingInfo,
336 pub line_info: &'a LineInfo,
338}
339
340pub struct ValidHeadingsIter<'a> {
345 lines: &'a [LineInfo],
346 current_index: usize,
347}
348
349impl<'a> ValidHeadingsIter<'a> {
350 fn new(lines: &'a [LineInfo]) -> Self {
351 Self {
352 lines,
353 current_index: 0,
354 }
355 }
356}
357
358impl<'a> Iterator for ValidHeadingsIter<'a> {
359 type Item = ValidHeading<'a>;
360
361 fn next(&mut self) -> Option<Self::Item> {
362 while self.current_index < self.lines.len() {
363 let idx = self.current_index;
364 self.current_index += 1;
365
366 let line_info = &self.lines[idx];
367 if let Some(heading) = &line_info.heading
368 && heading.is_valid
369 {
370 return Some(ValidHeading {
371 line_num: idx + 1, heading,
373 line_info,
374 });
375 }
376 }
377 None
378 }
379}
380
381#[derive(Debug, Clone)]
383pub struct BlockquoteInfo {
384 pub nesting_level: usize,
386 pub indent: String,
388 pub marker_column: usize,
390 pub prefix: String,
392 pub content: String,
394 pub has_no_space_after_marker: bool,
396 pub has_multiple_spaces_after_marker: bool,
398 pub needs_md028_fix: bool,
400}
401
402#[derive(Debug, Clone)]
404pub struct ListBlock {
405 pub start_line: usize,
407 pub end_line: usize,
409 pub is_ordered: bool,
411 pub marker: Option<String>,
413 pub blockquote_prefix: String,
415 pub item_lines: Vec<usize>,
417 pub nesting_level: usize,
419 pub max_marker_width: usize,
421}
422
423use std::sync::{Arc, OnceLock};
424
425type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
427
428type ByteRanges = Vec<(usize, usize)>;
430
431#[derive(Debug, Clone, Default)]
433pub struct CharFrequency {
434 pub hash_count: usize,
436 pub asterisk_count: usize,
438 pub underscore_count: usize,
440 pub hyphen_count: usize,
442 pub plus_count: usize,
444 pub gt_count: usize,
446 pub pipe_count: usize,
448 pub bracket_count: usize,
450 pub backtick_count: usize,
452 pub lt_count: usize,
454 pub exclamation_count: usize,
456 pub newline_count: usize,
458}
459
460#[derive(Debug, Clone)]
462pub struct HtmlTag {
463 pub line: usize,
465 pub start_col: usize,
467 pub end_col: usize,
469 pub byte_offset: usize,
471 pub byte_end: usize,
473 pub tag_name: String,
475 pub is_closing: bool,
477 pub is_self_closing: bool,
479 pub raw_content: String,
481}
482
483#[derive(Debug, Clone)]
485pub struct EmphasisSpan {
486 pub line: usize,
488 pub start_col: usize,
490 pub end_col: usize,
492 pub byte_offset: usize,
494 pub byte_end: usize,
496 pub marker: char,
498 pub marker_count: usize,
500 pub content: String,
502}
503
504#[derive(Debug, Clone)]
506pub struct TableRow {
507 pub line: usize,
509 pub is_separator: bool,
511 pub column_count: usize,
513 pub column_alignments: Vec<String>, }
516
517#[derive(Debug, Clone)]
519pub struct BareUrl {
520 pub line: usize,
522 pub start_col: usize,
524 pub end_col: usize,
526 pub byte_offset: usize,
528 pub byte_end: usize,
530 pub url: String,
532 pub url_type: String,
534}
535
536pub struct LintContext<'a> {
537 pub content: &'a str,
538 pub line_offsets: Vec<usize>,
539 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, }
567
568struct BlockquoteComponents<'a> {
570 indent: &'a str,
571 markers: &'a str,
572 spaces_after: &'a str,
573 content: &'a str,
574}
575
576#[inline]
578fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
579 let bytes = line.as_bytes();
580 let mut pos = 0;
581
582 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
584 pos += 1;
585 }
586 let indent_end = pos;
587
588 if pos >= bytes.len() || bytes[pos] != b'>' {
590 return None;
591 }
592
593 while pos < bytes.len() && bytes[pos] == b'>' {
595 pos += 1;
596 }
597 let markers_end = pos;
598
599 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
601 pos += 1;
602 }
603 let spaces_end = pos;
604
605 Some(BlockquoteComponents {
606 indent: &line[0..indent_end],
607 markers: &line[indent_end..markers_end],
608 spaces_after: &line[markers_end..spaces_end],
609 content: &line[spaces_end..],
610 })
611}
612
613impl<'a> LintContext<'a> {
614 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
615 #[cfg(not(target_arch = "wasm32"))]
616 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
617 #[cfg(target_arch = "wasm32")]
618 let profile = false;
619
620 let line_offsets = profile_section!("Line offsets", profile, {
621 let mut offsets = vec![0];
622 for (i, c) in content.char_indices() {
623 if c == '\n' {
624 offsets.push(i + 1);
625 }
626 }
627 offsets
628 });
629
630 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
632
633 let html_comment_ranges = profile_section!(
635 "HTML comment ranges",
636 profile,
637 crate::utils::skip_context::compute_html_comment_ranges(content)
638 );
639
640 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
642 if flavor == MarkdownFlavor::MkDocs {
643 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
644 } else {
645 Vec::new()
646 }
647 });
648
649 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
651 if flavor == MarkdownFlavor::Quarto {
652 crate::utils::quarto_divs::detect_div_block_ranges(content)
653 } else {
654 Vec::new()
655 }
656 });
657
658 let (mut lines, emphasis_spans) = profile_section!(
661 "Basic line info",
662 profile,
663 Self::compute_basic_line_info(
664 content,
665 &line_offsets,
666 &code_blocks,
667 flavor,
668 &html_comment_ranges,
669 &autodoc_ranges,
670 &quarto_div_ranges,
671 )
672 );
673
674 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
676
677 profile_section!(
679 "ESM blocks",
680 profile,
681 Self::detect_esm_blocks(content, &mut lines, flavor)
682 );
683
684 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
686 "JSX/MDX detection",
687 profile,
688 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
689 );
690
691 profile_section!(
693 "MkDocs constructs",
694 profile,
695 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
696 );
697
698 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
700
701 profile_section!(
703 "Headings & blockquotes",
704 profile,
705 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
706 );
707
708 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
710
711 for span in &code_spans {
714 if span.end_line > span.line {
715 for line_num in (span.line + 1)..=span.end_line {
717 if let Some(line_info) = lines.get_mut(line_num - 1) {
718 line_info.in_code_span_continuation = true;
719 }
720 }
721 }
722 }
723
724 let (links, broken_links, footnote_refs) = profile_section!(
726 "Links",
727 profile,
728 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
729 );
730
731 let images = profile_section!(
732 "Images",
733 profile,
734 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
735 );
736
737 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
738
739 let reference_defs_map: HashMap<String, usize> = reference_defs
741 .iter()
742 .enumerate()
743 .map(|(idx, def)| (def.id.to_lowercase(), idx))
744 .collect();
745
746 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
747
748 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
750
751 let table_blocks = profile_section!(
753 "Table blocks",
754 profile,
755 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
756 content,
757 &code_blocks,
758 &code_spans,
759 &html_comment_ranges,
760 )
761 );
762
763 let line_index = profile_section!(
765 "Line index",
766 profile,
767 crate::utils::range_utils::LineIndex::new(content)
768 );
769
770 let jinja_ranges = profile_section!(
772 "Jinja ranges",
773 profile,
774 crate::utils::jinja_utils::find_jinja_ranges(content)
775 );
776
777 let citation_ranges = profile_section!("Citation ranges", profile, {
779 if flavor == MarkdownFlavor::Quarto {
780 crate::utils::quarto_divs::find_citation_ranges(content)
781 } else {
782 Vec::new()
783 }
784 });
785
786 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
788 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
789 let mut ranges = Vec::new();
790 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
791 ranges.push((mat.start(), mat.end()));
792 }
793 ranges
794 });
795
796 Self {
797 content,
798 line_offsets,
799 code_blocks,
800 lines,
801 links,
802 images,
803 broken_links,
804 footnote_refs,
805 reference_defs,
806 reference_defs_map,
807 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
808 math_spans_cache: OnceLock::new(), list_blocks,
810 char_frequency,
811 html_tags_cache: OnceLock::new(),
812 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
813 table_rows_cache: OnceLock::new(),
814 bare_urls_cache: OnceLock::new(),
815 has_mixed_list_nesting_cache: OnceLock::new(),
816 html_comment_ranges,
817 table_blocks,
818 line_index,
819 jinja_ranges,
820 flavor,
821 source_file,
822 jsx_expression_ranges,
823 mdx_comment_ranges,
824 citation_ranges,
825 shortcode_ranges,
826 }
827 }
828
829 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
831 Arc::clone(
832 self.code_spans_cache
833 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
834 )
835 }
836
837 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
839 Arc::clone(
840 self.math_spans_cache
841 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
842 )
843 }
844
845 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
847 let math_spans = self.math_spans();
848 math_spans
849 .iter()
850 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
851 }
852
853 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
855 &self.html_comment_ranges
856 }
857
858 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
860 Arc::clone(self.html_tags_cache.get_or_init(|| {
861 Arc::new(Self::parse_html_tags(
862 self.content,
863 &self.lines,
864 &self.code_blocks,
865 self.flavor,
866 ))
867 }))
868 }
869
870 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
872 Arc::clone(
873 self.emphasis_spans_cache
874 .get()
875 .expect("emphasis_spans_cache initialized during construction"),
876 )
877 }
878
879 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
881 Arc::clone(
882 self.table_rows_cache
883 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
884 )
885 }
886
887 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
889 Arc::clone(
890 self.bare_urls_cache
891 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
892 )
893 }
894
895 pub fn has_mixed_list_nesting(&self) -> bool {
899 *self
900 .has_mixed_list_nesting_cache
901 .get_or_init(|| self.compute_mixed_list_nesting())
902 }
903
904 fn compute_mixed_list_nesting(&self) -> bool {
906 let mut stack: Vec<(usize, bool)> = Vec::new();
911 let mut last_was_blank = false;
912
913 for line_info in &self.lines {
914 if line_info.in_code_block
916 || line_info.in_front_matter
917 || line_info.in_mkdocstrings
918 || line_info.in_html_comment
919 || line_info.in_esm_block
920 {
921 continue;
922 }
923
924 if line_info.is_blank {
926 last_was_blank = true;
927 continue;
928 }
929
930 if let Some(list_item) = &line_info.list_item {
931 let current_pos = if list_item.marker_column == 1 {
933 0
934 } else {
935 list_item.marker_column
936 };
937
938 if last_was_blank && current_pos == 0 {
940 stack.clear();
941 }
942 last_was_blank = false;
943
944 while let Some(&(pos, _)) = stack.last() {
946 if pos >= current_pos {
947 stack.pop();
948 } else {
949 break;
950 }
951 }
952
953 if let Some(&(_, parent_is_ordered)) = stack.last()
955 && parent_is_ordered != list_item.is_ordered
956 {
957 return true; }
959
960 stack.push((current_pos, list_item.is_ordered));
961 } else {
962 last_was_blank = false;
964 }
965 }
966
967 false
968 }
969
970 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
972 match self.line_offsets.binary_search(&offset) {
973 Ok(line) => (line + 1, 1),
974 Err(line) => {
975 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
976 (line, offset - line_start + 1)
977 }
978 }
979 }
980
981 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
983 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
985 return true;
986 }
987
988 self.code_spans()
990 .iter()
991 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
992 }
993
994 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
996 if line_num > 0 {
997 self.lines.get(line_num - 1)
998 } else {
999 None
1000 }
1001 }
1002
1003 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1005 self.line_info(line_num).map(|info| info.byte_offset)
1006 }
1007
1008 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1010 let normalized_id = ref_id.to_lowercase();
1011 self.reference_defs_map
1012 .get(&normalized_id)
1013 .map(|&idx| self.reference_defs[idx].url.as_str())
1014 }
1015
1016 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1018 let normalized_id = ref_id.to_lowercase();
1019 self.reference_defs_map
1020 .get(&normalized_id)
1021 .map(|&idx| &self.reference_defs[idx])
1022 }
1023
1024 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1026 let normalized_id = ref_id.to_lowercase();
1027 self.reference_defs_map.contains_key(&normalized_id)
1028 }
1029
1030 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1032 self.list_blocks
1033 .iter()
1034 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1035 }
1036
1037 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1039 self.list_blocks
1040 .iter()
1041 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1042 }
1043
1044 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1048 if line_num == 0 || line_num > self.lines.len() {
1049 return false;
1050 }
1051 self.lines[line_num - 1].in_code_block
1052 }
1053
1054 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1056 if line_num == 0 || line_num > self.lines.len() {
1057 return false;
1058 }
1059 self.lines[line_num - 1].in_front_matter
1060 }
1061
1062 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1064 if line_num == 0 || line_num > self.lines.len() {
1065 return false;
1066 }
1067 self.lines[line_num - 1].in_html_block
1068 }
1069
1070 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1072 if line_num == 0 || line_num > self.lines.len() {
1073 return false;
1074 }
1075
1076 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1080 let code_spans = self.code_spans();
1081 code_spans.iter().any(|span| {
1082 if line_num < span.line || line_num > span.end_line {
1084 return false;
1085 }
1086
1087 if span.line == span.end_line {
1088 col_0indexed >= span.start_col && col_0indexed < span.end_col
1090 } else if line_num == span.line {
1091 col_0indexed >= span.start_col
1093 } else if line_num == span.end_line {
1094 col_0indexed < span.end_col
1096 } else {
1097 true
1099 }
1100 })
1101 }
1102
1103 #[inline]
1105 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1106 let code_spans = self.code_spans();
1107 code_spans
1108 .iter()
1109 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1110 }
1111
1112 #[inline]
1115 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1116 self.reference_defs
1117 .iter()
1118 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1119 }
1120
1121 #[inline]
1125 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1126 self.html_comment_ranges
1127 .iter()
1128 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1129 }
1130
1131 #[inline]
1134 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1135 self.html_tags()
1136 .iter()
1137 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1138 }
1139
1140 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1142 self.jinja_ranges
1143 .iter()
1144 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1145 }
1146
1147 #[inline]
1149 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1150 self.jsx_expression_ranges
1151 .iter()
1152 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1153 }
1154
1155 #[inline]
1157 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1158 self.mdx_comment_ranges
1159 .iter()
1160 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1161 }
1162
1163 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1165 &self.jsx_expression_ranges
1166 }
1167
1168 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1170 &self.mdx_comment_ranges
1171 }
1172
1173 #[inline]
1176 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1177 self.citation_ranges
1178 .iter()
1179 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1180 }
1181
1182 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1184 &self.citation_ranges
1185 }
1186
1187 #[inline]
1189 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1190 self.shortcode_ranges
1191 .iter()
1192 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1193 }
1194
1195 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1197 &self.shortcode_ranges
1198 }
1199
1200 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1202 self.reference_defs.iter().any(|def| {
1203 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1204 byte_pos >= start && byte_pos < end
1205 } else {
1206 false
1207 }
1208 })
1209 }
1210
1211 pub fn has_char(&self, ch: char) -> bool {
1213 match ch {
1214 '#' => self.char_frequency.hash_count > 0,
1215 '*' => self.char_frequency.asterisk_count > 0,
1216 '_' => self.char_frequency.underscore_count > 0,
1217 '-' => self.char_frequency.hyphen_count > 0,
1218 '+' => self.char_frequency.plus_count > 0,
1219 '>' => self.char_frequency.gt_count > 0,
1220 '|' => self.char_frequency.pipe_count > 0,
1221 '[' => self.char_frequency.bracket_count > 0,
1222 '`' => self.char_frequency.backtick_count > 0,
1223 '<' => self.char_frequency.lt_count > 0,
1224 '!' => self.char_frequency.exclamation_count > 0,
1225 '\n' => self.char_frequency.newline_count > 0,
1226 _ => self.content.contains(ch), }
1228 }
1229
1230 pub fn char_count(&self, ch: char) -> usize {
1232 match ch {
1233 '#' => self.char_frequency.hash_count,
1234 '*' => self.char_frequency.asterisk_count,
1235 '_' => self.char_frequency.underscore_count,
1236 '-' => self.char_frequency.hyphen_count,
1237 '+' => self.char_frequency.plus_count,
1238 '>' => self.char_frequency.gt_count,
1239 '|' => self.char_frequency.pipe_count,
1240 '[' => self.char_frequency.bracket_count,
1241 '`' => self.char_frequency.backtick_count,
1242 '<' => self.char_frequency.lt_count,
1243 '!' => self.char_frequency.exclamation_count,
1244 '\n' => self.char_frequency.newline_count,
1245 _ => self.content.matches(ch).count(), }
1247 }
1248
1249 pub fn likely_has_headings(&self) -> bool {
1251 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1253
1254 pub fn likely_has_lists(&self) -> bool {
1256 self.char_frequency.asterisk_count > 0
1257 || self.char_frequency.hyphen_count > 0
1258 || self.char_frequency.plus_count > 0
1259 }
1260
1261 pub fn likely_has_emphasis(&self) -> bool {
1263 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1264 }
1265
1266 pub fn likely_has_tables(&self) -> bool {
1268 self.char_frequency.pipe_count > 2
1269 }
1270
1271 pub fn likely_has_blockquotes(&self) -> bool {
1273 self.char_frequency.gt_count > 0
1274 }
1275
1276 pub fn likely_has_code(&self) -> bool {
1278 self.char_frequency.backtick_count > 0
1279 }
1280
1281 pub fn likely_has_links_or_images(&self) -> bool {
1283 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1284 }
1285
1286 pub fn likely_has_html(&self) -> bool {
1288 self.char_frequency.lt_count > 0
1289 }
1290
1291 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1296 if let Some(line_info) = self.lines.get(line_idx)
1297 && let Some(ref bq) = line_info.blockquote
1298 {
1299 bq.prefix.trim_end().to_string()
1300 } else {
1301 String::new()
1302 }
1303 }
1304
1305 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1307 self.html_tags()
1308 .iter()
1309 .filter(|tag| tag.line == line_num)
1310 .cloned()
1311 .collect()
1312 }
1313
1314 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1316 self.emphasis_spans()
1317 .iter()
1318 .filter(|span| span.line == line_num)
1319 .cloned()
1320 .collect()
1321 }
1322
1323 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1325 self.table_rows()
1326 .iter()
1327 .filter(|row| row.line == line_num)
1328 .cloned()
1329 .collect()
1330 }
1331
1332 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1334 self.bare_urls()
1335 .iter()
1336 .filter(|url| url.line == line_num)
1337 .cloned()
1338 .collect()
1339 }
1340
1341 #[inline]
1347 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1348 let idx = match lines.binary_search_by(|line| {
1350 if byte_offset < line.byte_offset {
1351 std::cmp::Ordering::Greater
1352 } else if byte_offset > line.byte_offset + line.byte_len {
1353 std::cmp::Ordering::Less
1354 } else {
1355 std::cmp::Ordering::Equal
1356 }
1357 }) {
1358 Ok(idx) => idx,
1359 Err(idx) => idx.saturating_sub(1),
1360 };
1361
1362 let line = &lines[idx];
1363 let line_num = idx + 1;
1364 let col = byte_offset.saturating_sub(line.byte_offset);
1365
1366 (idx, line_num, col)
1367 }
1368
1369 #[inline]
1371 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1372 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1374
1375 if idx > 0 {
1377 let span = &code_spans[idx - 1];
1378 if offset >= span.byte_offset && offset < span.byte_end {
1379 return true;
1380 }
1381 }
1382
1383 false
1384 }
1385
1386 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1390 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1391
1392 let mut link_ranges = Vec::new();
1393 let mut options = Options::empty();
1394 options.insert(Options::ENABLE_WIKILINKS);
1395 options.insert(Options::ENABLE_FOOTNOTES);
1396
1397 let parser = Parser::new_ext(content, options).into_offset_iter();
1398 let mut link_stack: Vec<usize> = Vec::new();
1399
1400 for (event, range) in parser {
1401 match event {
1402 Event::Start(Tag::Link { .. }) => {
1403 link_stack.push(range.start);
1404 }
1405 Event::End(TagEnd::Link) => {
1406 if let Some(start_pos) = link_stack.pop() {
1407 link_ranges.push((start_pos, range.end));
1408 }
1409 }
1410 _ => {}
1411 }
1412 }
1413
1414 link_ranges
1415 }
1416
1417 fn parse_links(
1419 content: &'a str,
1420 lines: &[LineInfo],
1421 code_blocks: &[(usize, usize)],
1422 code_spans: &[CodeSpan],
1423 flavor: MarkdownFlavor,
1424 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1425 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1426 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1427 use std::collections::HashSet;
1428
1429 let mut links = Vec::with_capacity(content.len() / 500);
1430 let mut broken_links = Vec::new();
1431 let mut footnote_refs = Vec::new();
1432
1433 let mut found_positions = HashSet::new();
1435
1436 let mut options = Options::empty();
1446 options.insert(Options::ENABLE_WIKILINKS);
1447 options.insert(Options::ENABLE_FOOTNOTES);
1448
1449 let parser = Parser::new_with_broken_link_callback(
1450 content,
1451 options,
1452 Some(|link: BrokenLink<'_>| {
1453 broken_links.push(BrokenLinkInfo {
1454 reference: link.reference.to_string(),
1455 span: link.span.clone(),
1456 });
1457 None
1458 }),
1459 )
1460 .into_offset_iter();
1461
1462 let mut link_stack: Vec<(
1463 usize,
1464 usize,
1465 pulldown_cmark::CowStr<'a>,
1466 LinkType,
1467 pulldown_cmark::CowStr<'a>,
1468 )> = Vec::new();
1469 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1472 match event {
1473 Event::Start(Tag::Link {
1474 link_type,
1475 dest_url,
1476 id,
1477 ..
1478 }) => {
1479 link_stack.push((range.start, range.end, dest_url, link_type, id));
1481 text_chunks.clear();
1482 }
1483 Event::Text(text) if !link_stack.is_empty() => {
1484 text_chunks.push((text.to_string(), range.start, range.end));
1486 }
1487 Event::Code(code) if !link_stack.is_empty() => {
1488 let code_text = format!("`{code}`");
1490 text_chunks.push((code_text, range.start, range.end));
1491 }
1492 Event::End(TagEnd::Link) => {
1493 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1494 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1496 text_chunks.clear();
1497 continue;
1498 }
1499
1500 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1502
1503 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1505 text_chunks.clear();
1506 continue;
1507 }
1508
1509 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1510
1511 let is_reference = matches!(
1512 link_type,
1513 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1514 );
1515
1516 let link_text = if start_pos < content.len() {
1519 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1520
1521 let mut close_pos = None;
1525 let mut depth = 0;
1526 let mut in_code_span = false;
1527
1528 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1529 let mut backslash_count = 0;
1531 let mut j = i;
1532 while j > 0 && link_bytes[j - 1] == b'\\' {
1533 backslash_count += 1;
1534 j -= 1;
1535 }
1536 let is_escaped = backslash_count % 2 != 0;
1537
1538 if byte == b'`' && !is_escaped {
1540 in_code_span = !in_code_span;
1541 }
1542
1543 if !is_escaped && !in_code_span {
1545 if byte == b'[' {
1546 depth += 1;
1547 } else if byte == b']' {
1548 if depth == 0 {
1549 close_pos = Some(i);
1551 break;
1552 } else {
1553 depth -= 1;
1554 }
1555 }
1556 }
1557 }
1558
1559 if let Some(pos) = close_pos {
1560 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1561 } else {
1562 Cow::Borrowed("")
1563 }
1564 } else {
1565 Cow::Borrowed("")
1566 };
1567
1568 let reference_id = if is_reference && !ref_id.is_empty() {
1570 Some(Cow::Owned(ref_id.to_lowercase()))
1571 } else if is_reference {
1572 Some(Cow::Owned(link_text.to_lowercase()))
1574 } else {
1575 None
1576 };
1577
1578 found_positions.insert(start_pos);
1580
1581 links.push(ParsedLink {
1582 line: line_num,
1583 start_col: col_start,
1584 end_col: col_end,
1585 byte_offset: start_pos,
1586 byte_end: range.end,
1587 text: link_text,
1588 url: Cow::Owned(url.to_string()),
1589 is_reference,
1590 reference_id,
1591 link_type,
1592 });
1593
1594 text_chunks.clear();
1595 }
1596 }
1597 Event::FootnoteReference(footnote_id) => {
1598 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1601 continue;
1602 }
1603
1604 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1605 footnote_refs.push(FootnoteRef {
1606 id: footnote_id.to_string(),
1607 line: line_num,
1608 byte_offset: range.start,
1609 byte_end: range.end,
1610 });
1611 }
1612 _ => {}
1613 }
1614 }
1615
1616 for cap in LINK_PATTERN.captures_iter(content) {
1620 let full_match = cap.get(0).unwrap();
1621 let match_start = full_match.start();
1622 let match_end = full_match.end();
1623
1624 if found_positions.contains(&match_start) {
1626 continue;
1627 }
1628
1629 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1631 continue;
1632 }
1633
1634 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1636 continue;
1637 }
1638
1639 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1641 continue;
1642 }
1643
1644 if Self::is_offset_in_code_span(code_spans, match_start) {
1646 continue;
1647 }
1648
1649 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1651 continue;
1652 }
1653
1654 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1656
1657 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1659 continue;
1660 }
1661
1662 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1663
1664 let text = cap.get(1).map_or("", |m| m.as_str());
1665
1666 if let Some(ref_id) = cap.get(6) {
1668 let ref_id_str = ref_id.as_str();
1669 let normalized_ref = if ref_id_str.is_empty() {
1670 Cow::Owned(text.to_lowercase()) } else {
1672 Cow::Owned(ref_id_str.to_lowercase())
1673 };
1674
1675 links.push(ParsedLink {
1677 line: line_num,
1678 start_col: col_start,
1679 end_col: col_end,
1680 byte_offset: match_start,
1681 byte_end: match_end,
1682 text: Cow::Borrowed(text),
1683 url: Cow::Borrowed(""), is_reference: true,
1685 reference_id: Some(normalized_ref),
1686 link_type: LinkType::Reference, });
1688 }
1689 }
1690
1691 (links, broken_links, footnote_refs)
1692 }
1693
1694 fn parse_images(
1696 content: &'a str,
1697 lines: &[LineInfo],
1698 code_blocks: &[(usize, usize)],
1699 code_spans: &[CodeSpan],
1700 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1701 ) -> Vec<ParsedImage<'a>> {
1702 use crate::utils::skip_context::is_in_html_comment_ranges;
1703 use std::collections::HashSet;
1704
1705 let mut images = Vec::with_capacity(content.len() / 1000);
1707 let mut found_positions = HashSet::new();
1708
1709 let parser = Parser::new(content).into_offset_iter();
1711 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1712 Vec::new();
1713 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1716 match event {
1717 Event::Start(Tag::Image {
1718 link_type,
1719 dest_url,
1720 id,
1721 ..
1722 }) => {
1723 image_stack.push((range.start, dest_url, link_type, id));
1724 text_chunks.clear();
1725 }
1726 Event::Text(text) if !image_stack.is_empty() => {
1727 text_chunks.push((text.to_string(), range.start, range.end));
1728 }
1729 Event::Code(code) if !image_stack.is_empty() => {
1730 let code_text = format!("`{code}`");
1731 text_chunks.push((code_text, range.start, range.end));
1732 }
1733 Event::End(TagEnd::Image) => {
1734 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1735 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1737 continue;
1738 }
1739
1740 if Self::is_offset_in_code_span(code_spans, start_pos) {
1742 continue;
1743 }
1744
1745 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1747 continue;
1748 }
1749
1750 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1752 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1753
1754 let is_reference = matches!(
1755 link_type,
1756 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1757 );
1758
1759 let alt_text = if start_pos < content.len() {
1762 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1763
1764 let mut close_pos = None;
1767 let mut depth = 0;
1768
1769 if image_bytes.len() > 2 {
1770 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1771 let mut backslash_count = 0;
1773 let mut j = i;
1774 while j > 0 && image_bytes[j - 1] == b'\\' {
1775 backslash_count += 1;
1776 j -= 1;
1777 }
1778 let is_escaped = backslash_count % 2 != 0;
1779
1780 if !is_escaped {
1781 if byte == b'[' {
1782 depth += 1;
1783 } else if byte == b']' {
1784 if depth == 0 {
1785 close_pos = Some(i);
1787 break;
1788 } else {
1789 depth -= 1;
1790 }
1791 }
1792 }
1793 }
1794 }
1795
1796 if let Some(pos) = close_pos {
1797 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1798 } else {
1799 Cow::Borrowed("")
1800 }
1801 } else {
1802 Cow::Borrowed("")
1803 };
1804
1805 let reference_id = if is_reference && !ref_id.is_empty() {
1806 Some(Cow::Owned(ref_id.to_lowercase()))
1807 } else if is_reference {
1808 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1810 None
1811 };
1812
1813 found_positions.insert(start_pos);
1814 images.push(ParsedImage {
1815 line: line_num,
1816 start_col: col_start,
1817 end_col: col_end,
1818 byte_offset: start_pos,
1819 byte_end: range.end,
1820 alt_text,
1821 url: Cow::Owned(url.to_string()),
1822 is_reference,
1823 reference_id,
1824 link_type,
1825 });
1826 }
1827 }
1828 _ => {}
1829 }
1830 }
1831
1832 for cap in IMAGE_PATTERN.captures_iter(content) {
1834 let full_match = cap.get(0).unwrap();
1835 let match_start = full_match.start();
1836 let match_end = full_match.end();
1837
1838 if found_positions.contains(&match_start) {
1840 continue;
1841 }
1842
1843 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1845 continue;
1846 }
1847
1848 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1850 || Self::is_offset_in_code_span(code_spans, match_start)
1851 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1852 {
1853 continue;
1854 }
1855
1856 if let Some(ref_id) = cap.get(6) {
1858 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1859 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1860 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1861 let ref_id_str = ref_id.as_str();
1862 let normalized_ref = if ref_id_str.is_empty() {
1863 Cow::Owned(alt_text.to_lowercase())
1864 } else {
1865 Cow::Owned(ref_id_str.to_lowercase())
1866 };
1867
1868 images.push(ParsedImage {
1869 line: line_num,
1870 start_col: col_start,
1871 end_col: col_end,
1872 byte_offset: match_start,
1873 byte_end: match_end,
1874 alt_text: Cow::Borrowed(alt_text),
1875 url: Cow::Borrowed(""),
1876 is_reference: true,
1877 reference_id: Some(normalized_ref),
1878 link_type: LinkType::Reference, });
1880 }
1881 }
1882
1883 images
1884 }
1885
1886 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1888 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1892 if line_info.in_code_block {
1894 continue;
1895 }
1896
1897 let line = line_info.content(content);
1898 let line_num = line_idx + 1;
1899
1900 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1901 let id_raw = cap.get(1).unwrap().as_str();
1902
1903 if id_raw.starts_with('^') {
1906 continue;
1907 }
1908
1909 let id = id_raw.to_lowercase();
1910 let url = cap.get(2).unwrap().as_str().to_string();
1911 let title_match = cap.get(3).or_else(|| cap.get(4));
1912 let title = title_match.map(|m| m.as_str().to_string());
1913
1914 let match_obj = cap.get(0).unwrap();
1917 let byte_offset = line_info.byte_offset + match_obj.start();
1918 let byte_end = line_info.byte_offset + match_obj.end();
1919
1920 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1922 let start = line_info.byte_offset + m.start().saturating_sub(1);
1924 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1926 } else {
1927 (None, None)
1928 };
1929
1930 refs.push(ReferenceDef {
1931 line: line_num,
1932 id,
1933 url,
1934 title,
1935 byte_offset,
1936 byte_end,
1937 title_byte_start,
1938 title_byte_end,
1939 });
1940 }
1941 }
1942
1943 refs
1944 }
1945
1946 #[inline]
1950 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1951 let trimmed_start = line.trim_start();
1952 if !trimmed_start.starts_with('>') {
1953 return None;
1954 }
1955
1956 let mut remaining = line;
1958 let mut total_prefix_len = 0;
1959
1960 loop {
1961 let trimmed = remaining.trim_start();
1962 if !trimmed.starts_with('>') {
1963 break;
1964 }
1965
1966 let leading_ws_len = remaining.len() - trimmed.len();
1968 total_prefix_len += leading_ws_len + 1;
1969
1970 let after_gt = &trimmed[1..];
1971
1972 if let Some(stripped) = after_gt.strip_prefix(' ') {
1974 total_prefix_len += 1;
1975 remaining = stripped;
1976 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1977 total_prefix_len += 1;
1978 remaining = stripped;
1979 } else {
1980 remaining = after_gt;
1981 }
1982 }
1983
1984 Some((&line[..total_prefix_len], remaining))
1985 }
1986
1987 fn detect_list_items_and_emphasis_with_pulldown(
2011 content: &str,
2012 line_offsets: &[usize],
2013 flavor: MarkdownFlavor,
2014 front_matter_end: usize,
2015 code_blocks: &[(usize, usize)],
2016 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2017 use std::collections::HashMap;
2018
2019 let mut list_items = HashMap::new();
2020 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2021
2022 let mut options = Options::empty();
2023 options.insert(Options::ENABLE_TABLES);
2024 options.insert(Options::ENABLE_FOOTNOTES);
2025 options.insert(Options::ENABLE_STRIKETHROUGH);
2026 options.insert(Options::ENABLE_TASKLISTS);
2027 options.insert(Options::ENABLE_GFM);
2029
2030 let _ = flavor;
2032
2033 let parser = Parser::new_ext(content, options).into_offset_iter();
2034 let mut list_depth: usize = 0;
2035 let mut list_stack: Vec<bool> = Vec::new();
2036
2037 for (event, range) in parser {
2038 match event {
2039 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2041 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2042 2
2043 } else {
2044 1
2045 };
2046 let match_start = range.start;
2047 let match_end = range.end;
2048
2049 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2051 let marker = content[match_start..].chars().next().unwrap_or('*');
2053 if marker == '*' || marker == '_' {
2054 let content_start = match_start + marker_count;
2056 let content_end = if match_end >= marker_count {
2057 match_end - marker_count
2058 } else {
2059 match_end
2060 };
2061 let content_part = if content_start < content_end && content_end <= content.len() {
2062 &content[content_start..content_end]
2063 } else {
2064 ""
2065 };
2066
2067 let line_idx = match line_offsets.binary_search(&match_start) {
2069 Ok(idx) => idx,
2070 Err(idx) => idx.saturating_sub(1),
2071 };
2072 let line_num = line_idx + 1;
2073 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2074 let col_start = match_start - line_start;
2075 let col_end = match_end - line_start;
2076
2077 emphasis_spans.push(EmphasisSpan {
2078 line: line_num,
2079 start_col: col_start,
2080 end_col: col_end,
2081 byte_offset: match_start,
2082 byte_end: match_end,
2083 marker,
2084 marker_count,
2085 content: content_part.to_string(),
2086 });
2087 }
2088 }
2089 }
2090 Event::Start(Tag::List(start_number)) => {
2091 list_depth += 1;
2092 list_stack.push(start_number.is_some());
2093 }
2094 Event::End(TagEnd::List(_)) => {
2095 list_depth = list_depth.saturating_sub(1);
2096 list_stack.pop();
2097 }
2098 Event::Start(Tag::Item) if list_depth > 0 => {
2099 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2101 let item_start = range.start;
2103
2104 let mut line_idx = match line_offsets.binary_search(&item_start) {
2106 Ok(idx) => idx,
2107 Err(idx) => idx.saturating_sub(1),
2108 };
2109
2110 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2114 line_idx += 1;
2115 }
2116
2117 if front_matter_end > 0 && line_idx < front_matter_end {
2119 continue;
2120 }
2121
2122 if line_idx < line_offsets.len() {
2123 let line_start_byte = line_offsets[line_idx];
2124 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2125 let line = &content[line_start_byte..line_end.min(content.len())];
2126
2127 let line = line
2129 .strip_suffix('\n')
2130 .or_else(|| line.strip_suffix("\r\n"))
2131 .unwrap_or(line);
2132
2133 let blockquote_parse = Self::parse_blockquote_prefix(line);
2135 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2136 (prefix.len(), content)
2137 } else {
2138 (0, line)
2139 };
2140
2141 if current_list_is_ordered {
2143 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2144 Self::parse_ordered_list(line_to_parse)
2145 {
2146 let marker = format!("{number_str}{delimiter}");
2147 let marker_column = blockquote_prefix_len + leading_spaces.len();
2148 let content_column = marker_column + marker.len() + spacing.len();
2149 let number = number_str.parse().ok();
2150
2151 list_items.entry(line_start_byte).or_insert((
2152 true,
2153 marker,
2154 marker_column,
2155 content_column,
2156 number,
2157 ));
2158 }
2159 } else if let Some((leading_spaces, marker, spacing, _content)) =
2160 Self::parse_unordered_list(line_to_parse)
2161 {
2162 let marker_column = blockquote_prefix_len + leading_spaces.len();
2163 let content_column = marker_column + 1 + spacing.len();
2164
2165 list_items.entry(line_start_byte).or_insert((
2166 false,
2167 marker.to_string(),
2168 marker_column,
2169 content_column,
2170 None,
2171 ));
2172 }
2173 }
2174 }
2175 _ => {}
2176 }
2177 }
2178
2179 (list_items, emphasis_spans)
2180 }
2181
2182 #[inline]
2186 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2187 let bytes = line.as_bytes();
2188 let mut i = 0;
2189
2190 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2192 i += 1;
2193 }
2194
2195 if i >= bytes.len() {
2197 return None;
2198 }
2199 let marker = bytes[i] as char;
2200 if marker != '-' && marker != '*' && marker != '+' {
2201 return None;
2202 }
2203 let marker_pos = i;
2204 i += 1;
2205
2206 let spacing_start = i;
2208 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2209 i += 1;
2210 }
2211
2212 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2213 }
2214
2215 #[inline]
2219 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2220 let bytes = line.as_bytes();
2221 let mut i = 0;
2222
2223 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2225 i += 1;
2226 }
2227
2228 let number_start = i;
2230 while i < bytes.len() && bytes[i].is_ascii_digit() {
2231 i += 1;
2232 }
2233 if i == number_start {
2234 return None; }
2236
2237 if i >= bytes.len() {
2239 return None;
2240 }
2241 let delimiter = bytes[i] as char;
2242 if delimiter != '.' && delimiter != ')' {
2243 return None;
2244 }
2245 let delimiter_pos = i;
2246 i += 1;
2247
2248 let spacing_start = i;
2250 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2251 i += 1;
2252 }
2253
2254 Some((
2255 &line[..number_start],
2256 &line[number_start..delimiter_pos],
2257 delimiter,
2258 &line[spacing_start..i],
2259 &line[i..],
2260 ))
2261 }
2262
2263 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2266 let num_lines = line_offsets.len();
2267 let mut in_code_block = vec![false; num_lines];
2268
2269 for &(start, end) in code_blocks {
2271 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2273 let mut boundary = start;
2274 while boundary > 0 && !content.is_char_boundary(boundary) {
2275 boundary -= 1;
2276 }
2277 boundary
2278 } else {
2279 start
2280 };
2281
2282 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2283 let mut boundary = end;
2284 while boundary < content.len() && !content.is_char_boundary(boundary) {
2285 boundary += 1;
2286 }
2287 boundary
2288 } else {
2289 end.min(content.len())
2290 };
2291
2292 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2311 let first_line = first_line_after.saturating_sub(1);
2312 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2313
2314 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2316 *flag = true;
2317 }
2318 }
2319
2320 in_code_block
2321 }
2322
2323 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2326 let content_lines: Vec<&str> = content.lines().collect();
2327 let num_lines = content_lines.len();
2328 let mut in_math_block = vec![false; num_lines];
2329
2330 let mut inside_math = false;
2331
2332 for (i, line) in content_lines.iter().enumerate() {
2333 if code_block_map.get(i).copied().unwrap_or(false) {
2335 continue;
2336 }
2337
2338 let trimmed = line.trim();
2339
2340 if trimmed == "$$" {
2343 if inside_math {
2344 in_math_block[i] = true;
2346 inside_math = false;
2347 } else {
2348 in_math_block[i] = true;
2350 inside_math = true;
2351 }
2352 } else if inside_math {
2353 in_math_block[i] = true;
2355 }
2356 }
2357
2358 in_math_block
2359 }
2360
2361 fn compute_basic_line_info(
2364 content: &str,
2365 line_offsets: &[usize],
2366 code_blocks: &[(usize, usize)],
2367 flavor: MarkdownFlavor,
2368 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2369 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2370 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2371 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2372 let content_lines: Vec<&str> = content.lines().collect();
2373 let mut lines = Vec::with_capacity(content_lines.len());
2374
2375 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2377
2378 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2380
2381 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2384
2385 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2388 content,
2389 line_offsets,
2390 flavor,
2391 front_matter_end,
2392 code_blocks,
2393 );
2394
2395 for (i, line) in content_lines.iter().enumerate() {
2396 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2397 let indent = line.len() - line.trim_start().len();
2398 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2400
2401 let blockquote_parse = Self::parse_blockquote_prefix(line);
2403
2404 let is_blank = if let Some((_, content)) = blockquote_parse {
2406 content.trim().is_empty()
2408 } else {
2409 line.trim().is_empty()
2410 };
2411
2412 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2414
2415 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2417 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2418 let line_end_offset = byte_offset + line.len();
2421 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2422 html_comment_ranges,
2423 byte_offset,
2424 line_end_offset,
2425 );
2426 let list_item =
2429 list_item_map
2430 .get(&byte_offset)
2431 .map(
2432 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2433 marker: marker.clone(),
2434 is_ordered: *is_ordered,
2435 number: *number,
2436 marker_column: *marker_column,
2437 content_column: *content_column,
2438 },
2439 );
2440
2441 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2444 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2445
2446 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2448
2449 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2451 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2452
2453 lines.push(LineInfo {
2454 byte_offset,
2455 byte_len: line.len(),
2456 indent,
2457 visual_indent,
2458 is_blank,
2459 in_code_block,
2460 in_front_matter,
2461 in_html_block: false, in_html_comment,
2463 list_item,
2464 heading: None, blockquote: None, in_mkdocstrings,
2467 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2470 in_math_block,
2471 in_quarto_div,
2472 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, });
2480 }
2481
2482 (lines, emphasis_spans)
2483 }
2484
2485 fn detect_headings_and_blockquotes(
2487 content: &str,
2488 lines: &mut [LineInfo],
2489 flavor: MarkdownFlavor,
2490 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2491 link_byte_ranges: &[(usize, usize)],
2492 ) {
2493 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2495 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2496 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2497 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2498
2499 let content_lines: Vec<&str> = content.lines().collect();
2500
2501 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2503
2504 for i in 0..lines.len() {
2506 let line = content_lines[i];
2507
2508 if !(front_matter_end > 0 && i < front_matter_end)
2513 && let Some(bq) = parse_blockquote_detailed(line)
2514 {
2515 let nesting_level = bq.markers.len();
2516 let marker_column = bq.indent.len();
2517 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2518 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2519 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2520 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2521
2522 lines[i].blockquote = Some(BlockquoteInfo {
2523 nesting_level,
2524 indent: bq.indent.to_string(),
2525 marker_column,
2526 prefix,
2527 content: bq.content.to_string(),
2528 has_no_space_after_marker: has_no_space,
2529 has_multiple_spaces_after_marker: has_multiple_spaces,
2530 needs_md028_fix,
2531 });
2532
2533 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2536 lines[i].is_horizontal_rule = true;
2537 }
2538 }
2539
2540 if lines[i].in_code_block {
2542 continue;
2543 }
2544
2545 if front_matter_end > 0 && i < front_matter_end {
2547 continue;
2548 }
2549
2550 if lines[i].in_html_block {
2552 continue;
2553 }
2554
2555 if lines[i].is_blank {
2557 continue;
2558 }
2559
2560 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2563 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2564 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2565 } else {
2566 false
2567 };
2568
2569 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2570 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2572 continue;
2573 }
2574 let line_offset = lines[i].byte_offset;
2577 if link_byte_ranges
2578 .iter()
2579 .any(|&(start, end)| line_offset > start && line_offset < end)
2580 {
2581 continue;
2582 }
2583 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2584 let hashes = caps.get(2).map_or("", |m| m.as_str());
2585 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2586 let rest = caps.get(4).map_or("", |m| m.as_str());
2587
2588 let level = hashes.len() as u8;
2589 let marker_column = leading_spaces.len();
2590
2591 let (text, has_closing, closing_seq) = {
2593 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2595 if rest[id_start..].trim_end().ends_with('}') {
2597 (&rest[..id_start], &rest[id_start..])
2599 } else {
2600 (rest, "")
2601 }
2602 } else {
2603 (rest, "")
2604 };
2605
2606 let trimmed_rest = rest_without_id.trim_end();
2608 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2609 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2612
2613 let last_hash_char_idx = char_positions
2615 .iter()
2616 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2617
2618 if let Some(mut char_idx) = last_hash_char_idx {
2619 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2621 char_idx -= 1;
2622 }
2623
2624 let start_of_hashes = char_positions[char_idx].0;
2626
2627 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2629
2630 let potential_closing = &trimmed_rest[start_of_hashes..];
2632 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2633
2634 if is_all_hashes && has_space_before {
2635 let closing_hashes = potential_closing.to_string();
2637 let text_part = if !custom_id_part.is_empty() {
2640 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2643 } else {
2644 trimmed_rest[..start_of_hashes].trim_end().to_string()
2645 };
2646 (text_part, true, closing_hashes)
2647 } else {
2648 (rest.to_string(), false, String::new())
2650 }
2651 } else {
2652 (rest.to_string(), false, String::new())
2654 }
2655 } else {
2656 (rest.to_string(), false, String::new())
2658 }
2659 };
2660
2661 let content_column = marker_column + hashes.len() + spaces_after.len();
2662
2663 let raw_text = text.trim().to_string();
2665 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2666
2667 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2669 let next_line = content_lines[i + 1];
2670 if !lines[i + 1].in_code_block
2671 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2672 && let Some(next_line_id) =
2673 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2674 {
2675 custom_id = Some(next_line_id);
2676 }
2677 }
2678
2679 let is_valid = !spaces_after.is_empty()
2689 || rest.is_empty()
2690 || level > 1
2691 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2692
2693 lines[i].heading = Some(HeadingInfo {
2694 level,
2695 style: HeadingStyle::ATX,
2696 marker: hashes.to_string(),
2697 marker_column,
2698 content_column,
2699 text: clean_text,
2700 custom_id,
2701 raw_text,
2702 has_closing_sequence: has_closing,
2703 closing_sequence: closing_seq,
2704 is_valid,
2705 });
2706 }
2707 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2709 let next_line = content_lines[i + 1];
2710 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2711 if front_matter_end > 0 && i < front_matter_end {
2713 continue;
2714 }
2715
2716 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2718 {
2719 continue;
2720 }
2721
2722 let content_line = line.trim();
2725
2726 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2728 continue;
2729 }
2730
2731 if content_line.starts_with('_') {
2733 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2734 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2735 continue;
2736 }
2737 }
2738
2739 if let Some(first_char) = content_line.chars().next()
2741 && first_char.is_ascii_digit()
2742 {
2743 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2744 if num_end < content_line.len() {
2745 let next = content_line.chars().nth(num_end);
2746 if next == Some('.') || next == Some(')') {
2747 continue;
2748 }
2749 }
2750 }
2751
2752 if ATX_HEADING_REGEX.is_match(line) {
2754 continue;
2755 }
2756
2757 if content_line.starts_with('>') {
2759 continue;
2760 }
2761
2762 let trimmed_start = line.trim_start();
2764 if trimmed_start.len() >= 3 {
2765 let first_three: String = trimmed_start.chars().take(3).collect();
2766 if first_three == "```" || first_three == "~~~" {
2767 continue;
2768 }
2769 }
2770
2771 if content_line.starts_with('<') {
2773 continue;
2774 }
2775
2776 let underline = next_line.trim();
2777
2778 let level = if underline.starts_with('=') { 1 } else { 2 };
2779 let style = if level == 1 {
2780 HeadingStyle::Setext1
2781 } else {
2782 HeadingStyle::Setext2
2783 };
2784
2785 let raw_text = line.trim().to_string();
2787 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2788
2789 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2791 let attr_line = content_lines[i + 2];
2792 if !lines[i + 2].in_code_block
2793 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2794 && let Some(attr_line_id) =
2795 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2796 {
2797 custom_id = Some(attr_line_id);
2798 }
2799 }
2800
2801 lines[i].heading = Some(HeadingInfo {
2802 level,
2803 style,
2804 marker: underline.to_string(),
2805 marker_column: next_line.len() - next_line.trim_start().len(),
2806 content_column: lines[i].indent,
2807 text: clean_text,
2808 custom_id,
2809 raw_text,
2810 has_closing_sequence: false,
2811 closing_sequence: String::new(),
2812 is_valid: true, });
2814 }
2815 }
2816 }
2817 }
2818
2819 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2821 const BLOCK_ELEMENTS: &[&str] = &[
2824 "address",
2825 "article",
2826 "aside",
2827 "audio",
2828 "blockquote",
2829 "canvas",
2830 "details",
2831 "dialog",
2832 "dd",
2833 "div",
2834 "dl",
2835 "dt",
2836 "embed",
2837 "fieldset",
2838 "figcaption",
2839 "figure",
2840 "footer",
2841 "form",
2842 "h1",
2843 "h2",
2844 "h3",
2845 "h4",
2846 "h5",
2847 "h6",
2848 "header",
2849 "hr",
2850 "iframe",
2851 "li",
2852 "main",
2853 "menu",
2854 "nav",
2855 "noscript",
2856 "object",
2857 "ol",
2858 "p",
2859 "picture",
2860 "pre",
2861 "script",
2862 "search",
2863 "section",
2864 "source",
2865 "style",
2866 "summary",
2867 "svg",
2868 "table",
2869 "tbody",
2870 "td",
2871 "template",
2872 "textarea",
2873 "tfoot",
2874 "th",
2875 "thead",
2876 "tr",
2877 "track",
2878 "ul",
2879 "video",
2880 ];
2881
2882 let mut i = 0;
2883 while i < lines.len() {
2884 if lines[i].in_code_block || lines[i].in_front_matter {
2886 i += 1;
2887 continue;
2888 }
2889
2890 let trimmed = lines[i].content(content).trim_start();
2891
2892 if trimmed.starts_with('<') && trimmed.len() > 1 {
2894 let after_bracket = &trimmed[1..];
2896 let is_closing = after_bracket.starts_with('/');
2897 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2898
2899 let tag_name = tag_start
2901 .chars()
2902 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2903 .collect::<String>()
2904 .to_lowercase();
2905
2906 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2908 lines[i].in_html_block = true;
2910
2911 if !is_closing {
2914 let closing_tag = format!("</{tag_name}>");
2915 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2917 let mut j = i + 1;
2918 let mut found_closing_tag = false;
2919 while j < lines.len() && j < i + 100 {
2920 if !allow_blank_lines && lines[j].is_blank {
2923 break;
2924 }
2925
2926 lines[j].in_html_block = true;
2927
2928 if lines[j].content(content).contains(&closing_tag) {
2930 found_closing_tag = true;
2931 }
2932
2933 if found_closing_tag {
2936 j += 1;
2937 while j < lines.len() && j < i + 100 {
2939 if lines[j].is_blank {
2940 break;
2941 }
2942 lines[j].in_html_block = true;
2943 j += 1;
2944 }
2945 break;
2946 }
2947 j += 1;
2948 }
2949 }
2950 }
2951 }
2952
2953 i += 1;
2954 }
2955 }
2956
2957 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2960 if !flavor.supports_esm_blocks() {
2962 return;
2963 }
2964
2965 let mut in_multiline_import = false;
2966
2967 for line in lines.iter_mut() {
2968 if line.in_code_block || line.in_front_matter || line.in_html_comment {
2970 in_multiline_import = false;
2971 continue;
2972 }
2973
2974 let line_content = line.content(content);
2975 let trimmed = line_content.trim();
2976
2977 if in_multiline_import {
2979 line.in_esm_block = true;
2980 if trimmed.ends_with('\'')
2983 || trimmed.ends_with('"')
2984 || trimmed.ends_with("';")
2985 || trimmed.ends_with("\";")
2986 || line_content.contains(';')
2987 {
2988 in_multiline_import = false;
2989 }
2990 continue;
2991 }
2992
2993 if line.is_blank {
2995 continue;
2996 }
2997
2998 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3000 line.in_esm_block = true;
3001
3002 let is_import = trimmed.starts_with("import ");
3010
3011 let is_complete =
3013 trimmed.ends_with(';')
3015 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3017 || (!is_import && !trimmed.contains(" from ") && (
3019 trimmed.starts_with("export const ")
3020 || trimmed.starts_with("export let ")
3021 || trimmed.starts_with("export var ")
3022 || trimmed.starts_with("export function ")
3023 || trimmed.starts_with("export class ")
3024 || trimmed.starts_with("export default ")
3025 ));
3026
3027 if !is_complete && is_import {
3028 if trimmed.contains('{') && !trimmed.contains('}') {
3032 in_multiline_import = true;
3033 }
3034 }
3035 }
3036 }
3037 }
3038
3039 fn detect_jsx_and_mdx_comments(
3042 content: &str,
3043 lines: &mut [LineInfo],
3044 flavor: MarkdownFlavor,
3045 code_blocks: &[(usize, usize)],
3046 ) -> (ByteRanges, ByteRanges) {
3047 if !flavor.supports_jsx() {
3049 return (Vec::new(), Vec::new());
3050 }
3051
3052 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3053 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3054
3055 if !content.contains('{') {
3057 return (jsx_expression_ranges, mdx_comment_ranges);
3058 }
3059
3060 let bytes = content.as_bytes();
3061 let mut i = 0;
3062
3063 while i < bytes.len() {
3064 if bytes[i] == b'{' {
3065 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3067 i += 1;
3068 continue;
3069 }
3070
3071 let start = i;
3072
3073 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3075 let mut j = i + 3;
3077 while j + 2 < bytes.len() {
3078 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3079 let end = j + 3;
3080 mdx_comment_ranges.push((start, end));
3081
3082 Self::mark_lines_in_range(lines, content, start, end, |line| {
3084 line.in_mdx_comment = true;
3085 });
3086
3087 i = end;
3088 break;
3089 }
3090 j += 1;
3091 }
3092 if j + 2 >= bytes.len() {
3093 mdx_comment_ranges.push((start, bytes.len()));
3095 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3096 line.in_mdx_comment = true;
3097 });
3098 break;
3099 }
3100 } else {
3101 let mut brace_depth = 1;
3104 let mut j = i + 1;
3105 let mut in_string = false;
3106 let mut string_char = b'"';
3107
3108 while j < bytes.len() && brace_depth > 0 {
3109 let c = bytes[j];
3110
3111 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3113 in_string = true;
3114 string_char = c;
3115 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3116 in_string = false;
3117 } else if !in_string {
3118 if c == b'{' {
3119 brace_depth += 1;
3120 } else if c == b'}' {
3121 brace_depth -= 1;
3122 }
3123 }
3124 j += 1;
3125 }
3126
3127 if brace_depth == 0 {
3128 let end = j;
3129 jsx_expression_ranges.push((start, end));
3130
3131 Self::mark_lines_in_range(lines, content, start, end, |line| {
3133 line.in_jsx_expression = true;
3134 });
3135
3136 i = end;
3137 } else {
3138 i += 1;
3139 }
3140 }
3141 } else {
3142 i += 1;
3143 }
3144 }
3145
3146 (jsx_expression_ranges, mdx_comment_ranges)
3147 }
3148
3149 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3152 if flavor != MarkdownFlavor::MkDocs {
3153 return;
3154 }
3155
3156 use crate::utils::mkdocs_admonitions;
3157 use crate::utils::mkdocs_definition_lists;
3158 use crate::utils::mkdocs_tabs;
3159
3160 let content_lines: Vec<&str> = content.lines().collect();
3161
3162 let mut in_admonition = false;
3164 let mut admonition_indent = 0;
3165
3166 let mut in_tab = false;
3168 let mut tab_indent = 0;
3169
3170 let mut in_definition = false;
3172
3173 for (i, line) in content_lines.iter().enumerate() {
3174 if i >= lines.len() {
3175 break;
3176 }
3177
3178 if lines[i].in_code_block {
3180 continue;
3181 }
3182
3183 if mkdocs_admonitions::is_admonition_start(line) {
3185 in_admonition = true;
3186 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3187 lines[i].in_admonition = true;
3188 } else if in_admonition {
3189 if line.trim().is_empty() {
3191 lines[i].in_admonition = true;
3193 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3194 lines[i].in_admonition = true;
3195 } else {
3196 in_admonition = false;
3198 if mkdocs_admonitions::is_admonition_start(line) {
3200 in_admonition = true;
3201 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3202 lines[i].in_admonition = true;
3203 }
3204 }
3205 }
3206
3207 if mkdocs_tabs::is_tab_marker(line) {
3209 in_tab = true;
3210 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3211 lines[i].in_content_tab = true;
3212 } else if in_tab {
3213 if line.trim().is_empty() {
3215 lines[i].in_content_tab = true;
3217 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3218 lines[i].in_content_tab = true;
3219 } else {
3220 in_tab = false;
3222 if mkdocs_tabs::is_tab_marker(line) {
3224 in_tab = true;
3225 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3226 lines[i].in_content_tab = true;
3227 }
3228 }
3229 }
3230
3231 if mkdocs_definition_lists::is_definition_line(line) {
3233 in_definition = true;
3234 lines[i].in_definition_list = true;
3235 } else if in_definition {
3236 if mkdocs_definition_lists::is_definition_continuation(line) {
3238 lines[i].in_definition_list = true;
3239 } else if line.trim().is_empty() {
3240 lines[i].in_definition_list = true;
3242 } else if mkdocs_definition_lists::could_be_term_line(line) {
3243 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3245 {
3246 lines[i].in_definition_list = true;
3247 } else {
3248 in_definition = false;
3249 }
3250 } else {
3251 in_definition = false;
3252 }
3253 } else if mkdocs_definition_lists::could_be_term_line(line) {
3254 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3256 lines[i].in_definition_list = true;
3257 in_definition = true;
3258 }
3259 }
3260 }
3261 }
3262
3263 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3265 where
3266 F: FnMut(&mut LineInfo),
3267 {
3268 for line in lines.iter_mut() {
3270 let line_start = line.byte_offset;
3271 let line_end = line.byte_offset + line.byte_len;
3272
3273 if line_start < end && line_end > start {
3275 f(line);
3276 }
3277 }
3278
3279 let _ = content;
3281 }
3282
3283 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3285 let mut code_spans = Vec::new();
3286
3287 if !content.contains('`') {
3289 return code_spans;
3290 }
3291
3292 let parser = Parser::new(content).into_offset_iter();
3294
3295 for (event, range) in parser {
3296 if let Event::Code(_) = event {
3297 let start_pos = range.start;
3298 let end_pos = range.end;
3299
3300 let full_span = &content[start_pos..end_pos];
3302 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3303
3304 let content_start = start_pos + backtick_count;
3306 let content_end = end_pos - backtick_count;
3307 let span_content = if content_start < content_end {
3308 content[content_start..content_end].to_string()
3309 } else {
3310 String::new()
3311 };
3312
3313 let line_idx = lines
3316 .partition_point(|line| line.byte_offset <= start_pos)
3317 .saturating_sub(1);
3318 let line_num = line_idx + 1;
3319 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3320
3321 let end_line_idx = lines
3323 .partition_point(|line| line.byte_offset <= end_pos)
3324 .saturating_sub(1);
3325 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3326
3327 let line_content = lines[line_idx].content(content);
3330 let col_start = if byte_col_start <= line_content.len() {
3331 line_content[..byte_col_start].chars().count()
3332 } else {
3333 line_content.chars().count()
3334 };
3335
3336 let end_line_content = lines[end_line_idx].content(content);
3337 let col_end = if byte_col_end <= end_line_content.len() {
3338 end_line_content[..byte_col_end].chars().count()
3339 } else {
3340 end_line_content.chars().count()
3341 };
3342
3343 code_spans.push(CodeSpan {
3344 line: line_num,
3345 end_line: end_line_idx + 1,
3346 start_col: col_start,
3347 end_col: col_end,
3348 byte_offset: start_pos,
3349 byte_end: end_pos,
3350 backtick_count,
3351 content: span_content,
3352 });
3353 }
3354 }
3355
3356 code_spans.sort_by_key(|span| span.byte_offset);
3358
3359 code_spans
3360 }
3361
3362 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3364 let mut math_spans = Vec::new();
3365
3366 if !content.contains('$') {
3368 return math_spans;
3369 }
3370
3371 let mut options = Options::empty();
3373 options.insert(Options::ENABLE_MATH);
3374 let parser = Parser::new_ext(content, options).into_offset_iter();
3375
3376 for (event, range) in parser {
3377 let (is_display, math_content) = match &event {
3378 Event::InlineMath(text) => (false, text.as_ref()),
3379 Event::DisplayMath(text) => (true, text.as_ref()),
3380 _ => continue,
3381 };
3382
3383 let start_pos = range.start;
3384 let end_pos = range.end;
3385
3386 let line_idx = lines
3388 .partition_point(|line| line.byte_offset <= start_pos)
3389 .saturating_sub(1);
3390 let line_num = line_idx + 1;
3391 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3392
3393 let end_line_idx = lines
3395 .partition_point(|line| line.byte_offset <= end_pos)
3396 .saturating_sub(1);
3397 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3398
3399 let line_content = lines[line_idx].content(content);
3401 let col_start = if byte_col_start <= line_content.len() {
3402 line_content[..byte_col_start].chars().count()
3403 } else {
3404 line_content.chars().count()
3405 };
3406
3407 let end_line_content = lines[end_line_idx].content(content);
3408 let col_end = if byte_col_end <= end_line_content.len() {
3409 end_line_content[..byte_col_end].chars().count()
3410 } else {
3411 end_line_content.chars().count()
3412 };
3413
3414 math_spans.push(MathSpan {
3415 line: line_num,
3416 end_line: end_line_idx + 1,
3417 start_col: col_start,
3418 end_col: col_end,
3419 byte_offset: start_pos,
3420 byte_end: end_pos,
3421 is_display,
3422 content: math_content.to_string(),
3423 });
3424 }
3425
3426 math_spans.sort_by_key(|span| span.byte_offset);
3428
3429 math_spans
3430 }
3431
3432 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3443 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3445
3446 #[inline]
3449 fn reset_tracking_state(
3450 list_item: &ListItemInfo,
3451 has_list_breaking_content: &mut bool,
3452 min_continuation: &mut usize,
3453 ) {
3454 *has_list_breaking_content = false;
3455 let marker_width = if list_item.is_ordered {
3456 list_item.marker.len() + 1 } else {
3458 list_item.marker.len()
3459 };
3460 *min_continuation = if list_item.is_ordered {
3461 marker_width
3462 } else {
3463 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3464 };
3465 }
3466
3467 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3470 let mut last_list_item_line = 0;
3471 let mut current_indent_level = 0;
3472 let mut last_marker_width = 0;
3473
3474 let mut has_list_breaking_content_since_last_item = false;
3476 let mut min_continuation_for_tracking = 0;
3477
3478 for (line_idx, line_info) in lines.iter().enumerate() {
3479 let line_num = line_idx + 1;
3480
3481 if line_info.in_code_block {
3483 if let Some(ref mut block) = current_block {
3484 let min_continuation_indent =
3486 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3487
3488 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3490
3491 match context {
3492 CodeBlockContext::Indented => {
3493 block.end_line = line_num;
3495 continue;
3496 }
3497 CodeBlockContext::Standalone => {
3498 let completed_block = current_block.take().unwrap();
3500 list_blocks.push(completed_block);
3501 continue;
3502 }
3503 CodeBlockContext::Adjacent => {
3504 block.end_line = line_num;
3506 continue;
3507 }
3508 }
3509 } else {
3510 continue;
3512 }
3513 }
3514
3515 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3517 caps.get(0).unwrap().as_str().to_string()
3518 } else {
3519 String::new()
3520 };
3521
3522 if let Some(ref block) = current_block
3525 && line_info.list_item.is_none()
3526 && !line_info.is_blank
3527 && !line_info.in_code_span_continuation
3528 {
3529 let line_content = line_info.content(content).trim();
3530
3531 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3536
3537 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3540
3541 let breaks_list = line_info.heading.is_some()
3542 || line_content.starts_with("---")
3543 || line_content.starts_with("***")
3544 || line_content.starts_with("___")
3545 || crate::utils::skip_context::is_table_line(line_content)
3546 || blockquote_prefix_changes
3547 || (line_info.indent > 0
3548 && line_info.indent < min_continuation_for_tracking
3549 && !is_lazy_continuation);
3550
3551 if breaks_list {
3552 has_list_breaking_content_since_last_item = true;
3553 }
3554 }
3555
3556 if line_info.in_code_span_continuation
3559 && line_info.list_item.is_none()
3560 && let Some(ref mut block) = current_block
3561 {
3562 block.end_line = line_num;
3563 }
3564
3565 let effective_continuation_indent = if let Some(ref block) = current_block {
3571 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3572 let line_content = line_info.content(content);
3573 let line_bq_level = line_content
3574 .chars()
3575 .take_while(|c| *c == '>' || c.is_whitespace())
3576 .filter(|&c| c == '>')
3577 .count();
3578 if line_bq_level > 0 && line_bq_level == block_bq_level {
3579 let mut pos = 0;
3581 let mut found_markers = 0;
3582 for c in line_content.chars() {
3583 pos += c.len_utf8();
3584 if c == '>' {
3585 found_markers += 1;
3586 if found_markers == line_bq_level {
3587 if line_content.get(pos..pos + 1) == Some(" ") {
3588 pos += 1;
3589 }
3590 break;
3591 }
3592 }
3593 }
3594 let after_bq = &line_content[pos..];
3595 after_bq.len() - after_bq.trim_start().len()
3596 } else {
3597 line_info.indent
3598 }
3599 } else {
3600 line_info.indent
3601 };
3602 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3603 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3604 if block_bq_level > 0 {
3605 if block.is_ordered { last_marker_width } else { 2 }
3606 } else {
3607 min_continuation_for_tracking
3608 }
3609 } else {
3610 min_continuation_for_tracking
3611 };
3612 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3613 || (line_info.indent == 0 && !line_info.is_blank); if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3616 eprintln!(
3617 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3618 line_num,
3619 effective_continuation_indent,
3620 adjusted_min_continuation_for_tracking,
3621 is_valid_continuation,
3622 line_info.in_code_span_continuation,
3623 line_info.in_code_block,
3624 current_block.is_some()
3625 );
3626 }
3627
3628 if !line_info.in_code_span_continuation
3629 && line_info.list_item.is_none()
3630 && !line_info.is_blank
3631 && !line_info.in_code_block
3632 && is_valid_continuation
3633 && let Some(ref mut block) = current_block
3634 {
3635 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3636 eprintln!(
3637 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3638 line_num, block.end_line, line_num
3639 );
3640 }
3641 block.end_line = line_num;
3642 }
3643
3644 if let Some(list_item) = &line_info.list_item {
3646 let item_indent = list_item.marker_column;
3648 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3651 eprintln!(
3652 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3653 line_num, list_item.marker, item_indent
3654 );
3655 }
3656
3657 if let Some(ref mut block) = current_block {
3658 let is_nested = nesting > block.nesting_level;
3662 let same_type =
3663 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3664 let same_context = block.blockquote_prefix == blockquote_prefix;
3665 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3667
3668 let marker_compatible =
3670 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3671
3672 let has_non_list_content = has_list_breaking_content_since_last_item;
3675
3676 let mut continues_list = if is_nested {
3680 same_context && reasonable_distance && !has_non_list_content
3682 } else {
3683 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3685 };
3686
3687 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3688 eprintln!(
3689 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3690 line_num,
3691 continues_list,
3692 is_nested,
3693 same_type,
3694 same_context,
3695 reasonable_distance,
3696 marker_compatible,
3697 has_non_list_content,
3698 last_list_item_line,
3699 block.end_line
3700 );
3701 }
3702
3703 if !continues_list
3707 && (is_nested || same_type)
3708 && reasonable_distance
3709 && line_num > 0
3710 && block.end_line == line_num - 1
3711 {
3712 if block.item_lines.contains(&(line_num - 1)) {
3715 continues_list = true;
3717 } else {
3718 continues_list = true;
3722 }
3723 }
3724
3725 if continues_list {
3726 block.end_line = line_num;
3728 block.item_lines.push(line_num);
3729
3730 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3732 list_item.marker.len() + 1
3733 } else {
3734 list_item.marker.len()
3735 });
3736
3737 if !block.is_ordered
3739 && block.marker.is_some()
3740 && block.marker.as_ref() != Some(&list_item.marker)
3741 {
3742 block.marker = None;
3744 }
3745
3746 reset_tracking_state(
3748 list_item,
3749 &mut has_list_breaking_content_since_last_item,
3750 &mut min_continuation_for_tracking,
3751 );
3752 } else {
3753 if !same_type
3758 && !is_nested
3759 && let Some(&last_item) = block.item_lines.last()
3760 {
3761 block.end_line = last_item;
3762 }
3763
3764 list_blocks.push(block.clone());
3765
3766 *block = ListBlock {
3767 start_line: line_num,
3768 end_line: line_num,
3769 is_ordered: list_item.is_ordered,
3770 marker: if list_item.is_ordered {
3771 None
3772 } else {
3773 Some(list_item.marker.clone())
3774 },
3775 blockquote_prefix: blockquote_prefix.clone(),
3776 item_lines: vec![line_num],
3777 nesting_level: nesting,
3778 max_marker_width: if list_item.is_ordered {
3779 list_item.marker.len() + 1
3780 } else {
3781 list_item.marker.len()
3782 },
3783 };
3784
3785 reset_tracking_state(
3787 list_item,
3788 &mut has_list_breaking_content_since_last_item,
3789 &mut min_continuation_for_tracking,
3790 );
3791 }
3792 } else {
3793 current_block = Some(ListBlock {
3795 start_line: line_num,
3796 end_line: line_num,
3797 is_ordered: list_item.is_ordered,
3798 marker: if list_item.is_ordered {
3799 None
3800 } else {
3801 Some(list_item.marker.clone())
3802 },
3803 blockquote_prefix,
3804 item_lines: vec![line_num],
3805 nesting_level: nesting,
3806 max_marker_width: list_item.marker.len(),
3807 });
3808
3809 reset_tracking_state(
3811 list_item,
3812 &mut has_list_breaking_content_since_last_item,
3813 &mut min_continuation_for_tracking,
3814 );
3815 }
3816
3817 last_list_item_line = line_num;
3818 current_indent_level = item_indent;
3819 last_marker_width = if list_item.is_ordered {
3820 list_item.marker.len() + 1 } else {
3822 list_item.marker.len()
3823 };
3824 } else if let Some(ref mut block) = current_block {
3825 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3827 eprintln!(
3828 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3829 line_num, line_info.is_blank
3830 );
3831 }
3832
3833 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3841 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3842 } else {
3843 false
3844 };
3845
3846 let min_continuation_indent = if block.is_ordered {
3850 current_indent_level + last_marker_width
3851 } else {
3852 current_indent_level + 2 };
3854
3855 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3856 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3858 eprintln!(
3859 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3860 line_num, line_info.indent, min_continuation_indent
3861 );
3862 }
3863 block.end_line = line_num;
3864 } else if line_info.is_blank {
3865 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3868 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3869 }
3870 let mut check_idx = line_idx + 1;
3871 let mut found_continuation = false;
3872
3873 while check_idx < lines.len() && lines[check_idx].is_blank {
3875 check_idx += 1;
3876 }
3877
3878 if check_idx < lines.len() {
3879 let next_line = &lines[check_idx];
3880 let next_content = next_line.content(content);
3882 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3885 let next_bq_level_for_indent = next_content
3886 .chars()
3887 .take_while(|c| *c == '>' || c.is_whitespace())
3888 .filter(|&c| c == '>')
3889 .count();
3890 let effective_indent =
3891 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3892 let mut pos = 0;
3895 let mut found_markers = 0;
3896 for c in next_content.chars() {
3897 pos += c.len_utf8();
3898 if c == '>' {
3899 found_markers += 1;
3900 if found_markers == next_bq_level_for_indent {
3901 if next_content.get(pos..pos + 1) == Some(" ") {
3903 pos += 1;
3904 }
3905 break;
3906 }
3907 }
3908 }
3909 let after_blockquote_marker = &next_content[pos..];
3910 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3911 } else {
3912 next_line.indent
3913 };
3914 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3917 if block.is_ordered { last_marker_width } else { 2 }
3920 } else {
3921 min_continuation_indent
3922 };
3923 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3925 eprintln!(
3926 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3927 line_num,
3928 check_idx + 1,
3929 effective_indent,
3930 adjusted_min_continuation,
3931 next_line.list_item.is_some(),
3932 next_line.in_code_block
3933 );
3934 }
3935 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3936 found_continuation = true;
3937 }
3938 else if !next_line.in_code_block
3940 && next_line.list_item.is_some()
3941 && let Some(item) = &next_line.list_item
3942 {
3943 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3944 .find(next_line.content(content))
3945 .map_or(String::new(), |m| m.as_str().to_string());
3946 if item.marker_column == current_indent_level
3947 && item.is_ordered == block.is_ordered
3948 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3949 {
3950 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3954 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3955 if let Some(between_line) = lines.get(idx) {
3956 let between_content = between_line.content(content);
3957 let trimmed = between_content.trim();
3958 if trimmed.is_empty() {
3960 return false;
3961 }
3962 let line_indent = between_content.len() - between_content.trim_start().len();
3964
3965 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3967 .find(between_content)
3968 .map_or(String::new(), |m| m.as_str().to_string());
3969 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3970 let blockquote_level_changed =
3971 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3972
3973 if trimmed.starts_with("```")
3975 || trimmed.starts_with("~~~")
3976 || trimmed.starts_with("---")
3977 || trimmed.starts_with("***")
3978 || trimmed.starts_with("___")
3979 || blockquote_level_changed
3980 || crate::utils::skip_context::is_table_line(trimmed)
3981 || between_line.heading.is_some()
3982 {
3983 return true; }
3985
3986 line_indent >= min_continuation_indent
3988 } else {
3989 false
3990 }
3991 });
3992
3993 if block.is_ordered {
3994 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3997 if let Some(between_line) = lines.get(idx) {
3998 let between_content = between_line.content(content);
3999 let trimmed = between_content.trim();
4000 if trimmed.is_empty() {
4001 return false;
4002 }
4003 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4005 .find(between_content)
4006 .map_or(String::new(), |m| m.as_str().to_string());
4007 let between_bq_level =
4008 between_bq_prefix.chars().filter(|&c| c == '>').count();
4009 let blockquote_level_changed =
4010 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4011 trimmed.starts_with("```")
4013 || trimmed.starts_with("~~~")
4014 || trimmed.starts_with("---")
4015 || trimmed.starts_with("***")
4016 || trimmed.starts_with("___")
4017 || blockquote_level_changed
4018 || crate::utils::skip_context::is_table_line(trimmed)
4019 || between_line.heading.is_some()
4020 } else {
4021 false
4022 }
4023 });
4024 found_continuation = !has_structural_separators;
4025 } else {
4026 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4028 if let Some(between_line) = lines.get(idx) {
4029 let between_content = between_line.content(content);
4030 let trimmed = between_content.trim();
4031 if trimmed.is_empty() {
4032 return false;
4033 }
4034 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4036 .find(between_content)
4037 .map_or(String::new(), |m| m.as_str().to_string());
4038 let between_bq_level =
4039 between_bq_prefix.chars().filter(|&c| c == '>').count();
4040 let blockquote_level_changed =
4041 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4042 trimmed.starts_with("```")
4044 || trimmed.starts_with("~~~")
4045 || trimmed.starts_with("---")
4046 || trimmed.starts_with("***")
4047 || trimmed.starts_with("___")
4048 || blockquote_level_changed
4049 || crate::utils::skip_context::is_table_line(trimmed)
4050 || between_line.heading.is_some()
4051 } else {
4052 false
4053 }
4054 });
4055 found_continuation = !has_structural_separators;
4056 }
4057 }
4058 }
4059 }
4060
4061 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4062 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4063 }
4064 if found_continuation {
4065 block.end_line = line_num;
4067 } else {
4068 list_blocks.push(block.clone());
4070 current_block = None;
4071 }
4072 } else {
4073 let min_required_indent = if block.is_ordered {
4076 current_indent_level + last_marker_width
4077 } else {
4078 current_indent_level + 2
4079 };
4080
4081 let line_content = line_info.content(content).trim();
4086
4087 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4089
4090 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4093 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4094 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4095
4096 let is_structural_separator = line_info.heading.is_some()
4097 || line_content.starts_with("```")
4098 || line_content.starts_with("~~~")
4099 || line_content.starts_with("---")
4100 || line_content.starts_with("***")
4101 || line_content.starts_with("___")
4102 || blockquote_level_changed
4103 || looks_like_table;
4104
4105 let is_lazy_continuation = !is_structural_separator
4109 && !line_info.is_blank
4110 && (line_info.indent == 0
4111 || line_info.indent >= min_required_indent
4112 || line_info.in_code_span_continuation);
4113
4114 if is_lazy_continuation {
4115 block.end_line = line_num;
4118 } else {
4119 list_blocks.push(block.clone());
4121 current_block = None;
4122 }
4123 }
4124 }
4125 }
4126
4127 if let Some(block) = current_block {
4129 list_blocks.push(block);
4130 }
4131
4132 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4134
4135 list_blocks
4136 }
4137
4138 fn compute_char_frequency(content: &str) -> CharFrequency {
4140 let mut frequency = CharFrequency::default();
4141
4142 for ch in content.chars() {
4143 match ch {
4144 '#' => frequency.hash_count += 1,
4145 '*' => frequency.asterisk_count += 1,
4146 '_' => frequency.underscore_count += 1,
4147 '-' => frequency.hyphen_count += 1,
4148 '+' => frequency.plus_count += 1,
4149 '>' => frequency.gt_count += 1,
4150 '|' => frequency.pipe_count += 1,
4151 '[' => frequency.bracket_count += 1,
4152 '`' => frequency.backtick_count += 1,
4153 '<' => frequency.lt_count += 1,
4154 '!' => frequency.exclamation_count += 1,
4155 '\n' => frequency.newline_count += 1,
4156 _ => {}
4157 }
4158 }
4159
4160 frequency
4161 }
4162
4163 fn parse_html_tags(
4165 content: &str,
4166 lines: &[LineInfo],
4167 code_blocks: &[(usize, usize)],
4168 flavor: MarkdownFlavor,
4169 ) -> Vec<HtmlTag> {
4170 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4171 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4172
4173 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4174
4175 for cap in HTML_TAG_REGEX.captures_iter(content) {
4176 let full_match = cap.get(0).unwrap();
4177 let match_start = full_match.start();
4178 let match_end = full_match.end();
4179
4180 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4182 continue;
4183 }
4184
4185 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4186 let tag_name_original = cap.get(2).unwrap().as_str();
4187 let tag_name = tag_name_original.to_lowercase();
4188 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4189
4190 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4193 continue;
4194 }
4195
4196 let mut line_num = 1;
4198 let mut col_start = match_start;
4199 let mut col_end = match_end;
4200 for (idx, line_info) in lines.iter().enumerate() {
4201 if match_start >= line_info.byte_offset {
4202 line_num = idx + 1;
4203 col_start = match_start - line_info.byte_offset;
4204 col_end = match_end - line_info.byte_offset;
4205 } else {
4206 break;
4207 }
4208 }
4209
4210 html_tags.push(HtmlTag {
4211 line: line_num,
4212 start_col: col_start,
4213 end_col: col_end,
4214 byte_offset: match_start,
4215 byte_end: match_end,
4216 tag_name,
4217 is_closing,
4218 is_self_closing,
4219 raw_content: full_match.as_str().to_string(),
4220 });
4221 }
4222
4223 html_tags
4224 }
4225
4226 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4228 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4229
4230 for (line_idx, line_info) in lines.iter().enumerate() {
4231 if line_info.in_code_block || line_info.is_blank {
4233 continue;
4234 }
4235
4236 let line = line_info.content(content);
4237 let line_num = line_idx + 1;
4238
4239 if !line.contains('|') {
4241 continue;
4242 }
4243
4244 let parts: Vec<&str> = line.split('|').collect();
4246 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4247
4248 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4250 let mut column_alignments = Vec::new();
4251
4252 if is_separator {
4253 for part in &parts[1..parts.len() - 1] {
4254 let trimmed = part.trim();
4256 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4257 "center".to_string()
4258 } else if trimmed.ends_with(':') {
4259 "right".to_string()
4260 } else if trimmed.starts_with(':') {
4261 "left".to_string()
4262 } else {
4263 "none".to_string()
4264 };
4265 column_alignments.push(alignment);
4266 }
4267 }
4268
4269 table_rows.push(TableRow {
4270 line: line_num,
4271 is_separator,
4272 column_count,
4273 column_alignments,
4274 });
4275 }
4276
4277 table_rows
4278 }
4279
4280 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4282 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4283
4284 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4286 let full_match = cap.get(0).unwrap();
4287 let match_start = full_match.start();
4288 let match_end = full_match.end();
4289
4290 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4292 continue;
4293 }
4294
4295 let preceding_char = if match_start > 0 {
4297 content.chars().nth(match_start - 1)
4298 } else {
4299 None
4300 };
4301 let following_char = content.chars().nth(match_end);
4302
4303 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4304 continue;
4305 }
4306 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4307 continue;
4308 }
4309
4310 let url = full_match.as_str();
4311 let url_type = if url.starts_with("https://") {
4312 "https"
4313 } else if url.starts_with("http://") {
4314 "http"
4315 } else if url.starts_with("ftp://") {
4316 "ftp"
4317 } else {
4318 "other"
4319 };
4320
4321 let mut line_num = 1;
4323 let mut col_start = match_start;
4324 let mut col_end = match_end;
4325 for (idx, line_info) in lines.iter().enumerate() {
4326 if match_start >= line_info.byte_offset {
4327 line_num = idx + 1;
4328 col_start = match_start - line_info.byte_offset;
4329 col_end = match_end - line_info.byte_offset;
4330 } else {
4331 break;
4332 }
4333 }
4334
4335 bare_urls.push(BareUrl {
4336 line: line_num,
4337 start_col: col_start,
4338 end_col: col_end,
4339 byte_offset: match_start,
4340 byte_end: match_end,
4341 url: url.to_string(),
4342 url_type: url_type.to_string(),
4343 });
4344 }
4345
4346 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4348 let full_match = cap.get(0).unwrap();
4349 let match_start = full_match.start();
4350 let match_end = full_match.end();
4351
4352 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4354 continue;
4355 }
4356
4357 let preceding_char = if match_start > 0 {
4359 content.chars().nth(match_start - 1)
4360 } else {
4361 None
4362 };
4363 let following_char = content.chars().nth(match_end);
4364
4365 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4366 continue;
4367 }
4368 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4369 continue;
4370 }
4371
4372 let email = full_match.as_str();
4373
4374 let mut line_num = 1;
4376 let mut col_start = match_start;
4377 let mut col_end = match_end;
4378 for (idx, line_info) in lines.iter().enumerate() {
4379 if match_start >= line_info.byte_offset {
4380 line_num = idx + 1;
4381 col_start = match_start - line_info.byte_offset;
4382 col_end = match_end - line_info.byte_offset;
4383 } else {
4384 break;
4385 }
4386 }
4387
4388 bare_urls.push(BareUrl {
4389 line: line_num,
4390 start_col: col_start,
4391 end_col: col_end,
4392 byte_offset: match_start,
4393 byte_end: match_end,
4394 url: email.to_string(),
4395 url_type: "email".to_string(),
4396 });
4397 }
4398
4399 bare_urls
4400 }
4401
4402 #[must_use]
4422 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4423 ValidHeadingsIter::new(&self.lines)
4424 }
4425
4426 #[must_use]
4430 pub fn has_valid_headings(&self) -> bool {
4431 self.lines
4432 .iter()
4433 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4434 }
4435}
4436
4437fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4439 if list_blocks.len() < 2 {
4440 return;
4441 }
4442
4443 let mut merger = ListBlockMerger::new(content, lines);
4444 *list_blocks = merger.merge(list_blocks);
4445}
4446
4447struct ListBlockMerger<'a> {
4449 content: &'a str,
4450 lines: &'a [LineInfo],
4451}
4452
4453impl<'a> ListBlockMerger<'a> {
4454 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4455 Self { content, lines }
4456 }
4457
4458 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4459 let mut merged = Vec::with_capacity(list_blocks.len());
4460 let mut current = list_blocks[0].clone();
4461
4462 for next in list_blocks.iter().skip(1) {
4463 if self.should_merge_blocks(¤t, next) {
4464 current = self.merge_two_blocks(current, next);
4465 } else {
4466 merged.push(current);
4467 current = next.clone();
4468 }
4469 }
4470
4471 merged.push(current);
4472 merged
4473 }
4474
4475 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4477 if !self.blocks_are_compatible(current, next) {
4479 return false;
4480 }
4481
4482 let spacing = self.analyze_spacing_between(current, next);
4484 match spacing {
4485 BlockSpacing::Consecutive => true,
4486 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4487 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4488 self.can_merge_with_content_between(current, next)
4489 }
4490 }
4491 }
4492
4493 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4495 current.is_ordered == next.is_ordered
4496 && current.blockquote_prefix == next.blockquote_prefix
4497 && current.nesting_level == next.nesting_level
4498 }
4499
4500 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4502 let gap = next.start_line - current.end_line;
4503
4504 match gap {
4505 1 => BlockSpacing::Consecutive,
4506 2 => BlockSpacing::SingleBlank,
4507 _ if gap > 2 => {
4508 if self.has_only_blank_lines_between(current, next) {
4509 BlockSpacing::MultipleBlanks
4510 } else {
4511 BlockSpacing::ContentBetween
4512 }
4513 }
4514 _ => BlockSpacing::Consecutive, }
4516 }
4517
4518 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4520 if has_meaningful_content_between(self.content, current, next, self.lines) {
4523 return false; }
4525
4526 !current.is_ordered && current.marker == next.marker
4528 }
4529
4530 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4532 if has_meaningful_content_between(self.content, current, next, self.lines) {
4534 return false; }
4536
4537 current.is_ordered && next.is_ordered
4539 }
4540
4541 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4543 for line_num in (current.end_line + 1)..next.start_line {
4544 if let Some(line_info) = self.lines.get(line_num - 1)
4545 && !line_info.content(self.content).trim().is_empty()
4546 {
4547 return false;
4548 }
4549 }
4550 true
4551 }
4552
4553 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4555 current.end_line = next.end_line;
4556 current.item_lines.extend_from_slice(&next.item_lines);
4557
4558 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4560
4561 if !current.is_ordered && self.markers_differ(¤t, next) {
4563 current.marker = None; }
4565
4566 current
4567 }
4568
4569 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4571 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4572 }
4573}
4574
4575#[derive(Debug, PartialEq)]
4577enum BlockSpacing {
4578 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4583
4584fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4586 for line_num in (current.end_line + 1)..next.start_line {
4588 if let Some(line_info) = lines.get(line_num - 1) {
4589 let trimmed = line_info.content(content).trim();
4591
4592 if trimmed.is_empty() {
4594 continue;
4595 }
4596
4597 if line_info.heading.is_some() {
4601 return true; }
4603
4604 if is_horizontal_rule(trimmed) {
4606 return true; }
4608
4609 if crate::utils::skip_context::is_table_line(trimmed) {
4611 return true; }
4613
4614 if trimmed.starts_with('>') {
4616 return true; }
4618
4619 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4621 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4622
4623 let min_continuation_indent = if current.is_ordered {
4625 current.nesting_level + current.max_marker_width + 1 } else {
4627 current.nesting_level + 2
4628 };
4629
4630 if line_indent < min_continuation_indent {
4631 return true; }
4634 }
4635
4636 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4638
4639 let min_indent = if current.is_ordered {
4641 current.nesting_level + current.max_marker_width
4642 } else {
4643 current.nesting_level + 2
4644 };
4645
4646 if line_indent < min_indent {
4648 return true; }
4650
4651 }
4654 }
4655
4656 false
4658}
4659
4660pub fn is_horizontal_rule_line(line: &str) -> bool {
4667 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4669 if leading_spaces > 3 || line.starts_with('\t') {
4670 return false;
4671 }
4672
4673 is_horizontal_rule_content(line.trim())
4674}
4675
4676pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4679 if trimmed.len() < 3 {
4680 return false;
4681 }
4682
4683 let chars: Vec<char> = trimmed.chars().collect();
4685 if let Some(&first_char) = chars.first()
4686 && (first_char == '-' || first_char == '*' || first_char == '_')
4687 {
4688 let mut count = 0;
4689 for &ch in &chars {
4690 if ch == first_char {
4691 count += 1;
4692 } else if ch != ' ' && ch != '\t' {
4693 return false; }
4695 }
4696 return count >= 3;
4697 }
4698 false
4699}
4700
4701pub fn is_horizontal_rule(trimmed: &str) -> bool {
4703 is_horizontal_rule_content(trimmed)
4704}
4705
4706#[cfg(test)]
4708mod tests {
4709 use super::*;
4710
4711 #[test]
4712 fn test_empty_content() {
4713 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4714 assert_eq!(ctx.content, "");
4715 assert_eq!(ctx.line_offsets, vec![0]);
4716 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4717 assert_eq!(ctx.lines.len(), 0);
4718 }
4719
4720 #[test]
4721 fn test_single_line() {
4722 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4723 assert_eq!(ctx.content, "# Hello");
4724 assert_eq!(ctx.line_offsets, vec![0]);
4725 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4726 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4727 }
4728
4729 #[test]
4730 fn test_multi_line() {
4731 let content = "# Title\n\nSecond line\nThird line";
4732 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4733 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4734 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4741
4742 #[test]
4743 fn test_line_info() {
4744 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4745 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4746
4747 assert_eq!(ctx.lines.len(), 7);
4749
4750 let line1 = &ctx.lines[0];
4752 assert_eq!(line1.content(ctx.content), "# Title");
4753 assert_eq!(line1.byte_offset, 0);
4754 assert_eq!(line1.indent, 0);
4755 assert!(!line1.is_blank);
4756 assert!(!line1.in_code_block);
4757 assert!(line1.list_item.is_none());
4758
4759 let line2 = &ctx.lines[1];
4761 assert_eq!(line2.content(ctx.content), " indented");
4762 assert_eq!(line2.byte_offset, 8);
4763 assert_eq!(line2.indent, 4);
4764 assert!(!line2.is_blank);
4765
4766 let line3 = &ctx.lines[2];
4768 assert_eq!(line3.content(ctx.content), "");
4769 assert!(line3.is_blank);
4770
4771 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4773 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4774 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4775 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4776 }
4777
4778 #[test]
4779 fn test_list_item_detection() {
4780 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4781 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4782
4783 let line1 = &ctx.lines[0];
4785 assert!(line1.list_item.is_some());
4786 let list1 = line1.list_item.as_ref().unwrap();
4787 assert_eq!(list1.marker, "-");
4788 assert!(!list1.is_ordered);
4789 assert_eq!(list1.marker_column, 0);
4790 assert_eq!(list1.content_column, 2);
4791
4792 let line2 = &ctx.lines[1];
4794 assert!(line2.list_item.is_some());
4795 let list2 = line2.list_item.as_ref().unwrap();
4796 assert_eq!(list2.marker, "*");
4797 assert_eq!(list2.marker_column, 2);
4798
4799 let line3 = &ctx.lines[2];
4801 assert!(line3.list_item.is_some());
4802 let list3 = line3.list_item.as_ref().unwrap();
4803 assert_eq!(list3.marker, "1.");
4804 assert!(list3.is_ordered);
4805 assert_eq!(list3.number, Some(1));
4806
4807 let line6 = &ctx.lines[5];
4809 assert!(line6.list_item.is_none());
4810 }
4811
4812 #[test]
4813 fn test_offset_to_line_col_edge_cases() {
4814 let content = "a\nb\nc";
4815 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4816 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4824
4825 #[test]
4826 fn test_mdx_esm_blocks() {
4827 let content = r##"import {Chart} from './snowfall.js'
4828export const year = 2023
4829
4830# Last year's snowfall
4831
4832In {year}, the snowfall was above average.
4833It was followed by a warm spring which caused
4834flood conditions in many of the nearby rivers.
4835
4836<Chart color="#fcb32c" year={year} />
4837"##;
4838
4839 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4840
4841 assert_eq!(ctx.lines.len(), 10);
4843 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4844 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4845 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4846 assert!(
4847 !ctx.lines[3].in_esm_block,
4848 "Line 4 (heading) should NOT be in_esm_block"
4849 );
4850 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4851 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4852 }
4853
4854 #[test]
4855 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4856 let content = r#"import {Chart} from './snowfall.js'
4857export const year = 2023
4858
4859# Last year's snowfall
4860"#;
4861
4862 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4863
4864 assert!(
4866 !ctx.lines[0].in_esm_block,
4867 "Line 1 should NOT be in_esm_block in Standard flavor"
4868 );
4869 assert!(
4870 !ctx.lines[1].in_esm_block,
4871 "Line 2 should NOT be in_esm_block in Standard flavor"
4872 );
4873 }
4874
4875 #[test]
4876 fn test_blockquote_with_indented_content() {
4877 let content = r#"# Heading
4881
4882> -S socket-path
4883> More text
4884"#;
4885 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4886
4887 assert!(
4889 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4890 "Line 3 should be a blockquote"
4891 );
4892 assert!(
4894 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4895 "Line 4 should be a blockquote"
4896 );
4897
4898 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4901 assert_eq!(bq3.content, "-S socket-path");
4902 assert_eq!(bq3.nesting_level, 1);
4903 assert!(bq3.has_multiple_spaces_after_marker);
4905
4906 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4907 assert_eq!(bq4.content, "More text");
4908 assert_eq!(bq4.nesting_level, 1);
4909 }
4910
4911 #[test]
4912 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4913 let content = r#"# Title
4915
4916A footnote[^1].
4917
4918[^1]: This is the footnote content.
4919
4920[^note]: Another footnote with [link](https://example.com).
4921
4922[regular]: ./path.md "A real reference definition"
4923"#;
4924 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4925
4926 assert_eq!(
4928 ctx.reference_defs.len(),
4929 1,
4930 "Footnotes should not be parsed as reference definitions"
4931 );
4932
4933 assert_eq!(ctx.reference_defs[0].id, "regular");
4935 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4936 assert_eq!(
4937 ctx.reference_defs[0].title,
4938 Some("A real reference definition".to_string())
4939 );
4940 }
4941
4942 #[test]
4943 fn test_footnote_with_inline_link_not_misidentified() {
4944 let content = r#"# Title
4947
4948A footnote[^1].
4949
4950[^1]: [link](https://www.google.com).
4951"#;
4952 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4953
4954 assert!(
4956 ctx.reference_defs.is_empty(),
4957 "Footnote with inline link should not create a reference definition"
4958 );
4959 }
4960
4961 #[test]
4962 fn test_various_footnote_formats_excluded() {
4963 let content = r#"[^1]: Numeric footnote
4965[^note]: Named footnote
4966[^a]: Single char footnote
4967[^long-footnote-name]: Long named footnote
4968[^123abc]: Mixed alphanumeric
4969
4970[ref1]: ./file1.md
4971[ref2]: ./file2.md
4972"#;
4973 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4974
4975 assert_eq!(
4977 ctx.reference_defs.len(),
4978 2,
4979 "Only regular reference definitions should be parsed"
4980 );
4981
4982 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4983 assert!(ids.contains(&"ref1"));
4984 assert!(ids.contains(&"ref2"));
4985 assert!(!ids.iter().any(|id| id.starts_with('^')));
4986 }
4987
4988 #[test]
4993 fn test_has_char_tracked_characters() {
4994 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
4996 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4997
4998 assert!(ctx.has_char('#'), "Should detect hash");
5000 assert!(ctx.has_char('*'), "Should detect asterisk");
5001 assert!(ctx.has_char('_'), "Should detect underscore");
5002 assert!(ctx.has_char('-'), "Should detect hyphen");
5003 assert!(ctx.has_char('+'), "Should detect plus");
5004 assert!(ctx.has_char('>'), "Should detect gt");
5005 assert!(ctx.has_char('|'), "Should detect pipe");
5006 assert!(ctx.has_char('['), "Should detect bracket");
5007 assert!(ctx.has_char('`'), "Should detect backtick");
5008 assert!(ctx.has_char('<'), "Should detect lt");
5009 assert!(ctx.has_char('!'), "Should detect exclamation");
5010 assert!(ctx.has_char('\n'), "Should detect newline");
5011 }
5012
5013 #[test]
5014 fn test_has_char_absent_characters() {
5015 let content = "Simple text without special chars";
5016 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5017
5018 assert!(!ctx.has_char('#'), "Should not detect hash");
5020 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5021 assert!(!ctx.has_char('_'), "Should not detect underscore");
5022 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5023 assert!(!ctx.has_char('+'), "Should not detect plus");
5024 assert!(!ctx.has_char('>'), "Should not detect gt");
5025 assert!(!ctx.has_char('|'), "Should not detect pipe");
5026 assert!(!ctx.has_char('['), "Should not detect bracket");
5027 assert!(!ctx.has_char('`'), "Should not detect backtick");
5028 assert!(!ctx.has_char('<'), "Should not detect lt");
5029 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5030 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5032 }
5033
5034 #[test]
5035 fn test_has_char_fallback_for_untracked() {
5036 let content = "Text with @mention and $dollar and %percent";
5037 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5038
5039 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5041 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5042 assert!(ctx.has_char('%'), "Should detect % via fallback");
5043 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5044 }
5045
5046 #[test]
5047 fn test_char_count_tracked_characters() {
5048 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5049 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5050
5051 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5053 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5054 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5055 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5056 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5057 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5058 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5059 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5060 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5061 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5062 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5063 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5064 }
5065
5066 #[test]
5067 fn test_char_count_zero_for_absent() {
5068 let content = "Plain text";
5069 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5070
5071 assert_eq!(ctx.char_count('#'), 0);
5072 assert_eq!(ctx.char_count('*'), 0);
5073 assert_eq!(ctx.char_count('_'), 0);
5074 assert_eq!(ctx.char_count('\n'), 0);
5075 }
5076
5077 #[test]
5078 fn test_char_count_fallback_for_untracked() {
5079 let content = "@@@ $$ %%%";
5080 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5081
5082 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5083 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5084 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5085 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5086 }
5087
5088 #[test]
5089 fn test_char_count_empty_content() {
5090 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5091
5092 assert_eq!(ctx.char_count('#'), 0);
5093 assert_eq!(ctx.char_count('*'), 0);
5094 assert_eq!(ctx.char_count('@'), 0);
5095 assert!(!ctx.has_char('#'));
5096 assert!(!ctx.has_char('@'));
5097 }
5098
5099 #[test]
5104 fn test_is_in_html_tag_simple() {
5105 let content = "<div>content</div>";
5106 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5107
5108 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5110 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5111 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5112
5113 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5115 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5116
5117 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5119 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5120 }
5121
5122 #[test]
5123 fn test_is_in_html_tag_self_closing() {
5124 let content = "Text <br/> more text";
5125 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5126
5127 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5129 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5130
5131 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5133 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5134 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5135
5136 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5138 }
5139
5140 #[test]
5141 fn test_is_in_html_tag_with_attributes() {
5142 let content = r#"<a href="url" class="link">text</a>"#;
5143 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5144
5145 assert!(ctx.is_in_html_tag(0), "Start of tag");
5147 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5148 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5149 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5150
5151 assert!(!ctx.is_in_html_tag(27), "Start of content");
5153 assert!(!ctx.is_in_html_tag(30), "End of content");
5154
5155 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5157 }
5158
5159 #[test]
5160 fn test_is_in_html_tag_multiline() {
5161 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5162 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5163
5164 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5166 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5167 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5168
5169 let closing_bracket_pos = content.find(">\n").unwrap();
5171 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5172 }
5173
5174 #[test]
5175 fn test_is_in_html_tag_no_tags() {
5176 let content = "Plain text without any HTML";
5177 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5178
5179 for i in 0..content.len() {
5181 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5182 }
5183 }
5184
5185 #[test]
5190 fn test_is_in_jinja_range_expression() {
5191 let content = "Hello {{ name }}!";
5192 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5193
5194 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5196 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5197
5198 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5200 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5201 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5202 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5203 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5204
5205 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5207 }
5208
5209 #[test]
5210 fn test_is_in_jinja_range_statement() {
5211 let content = "{% if condition %}content{% endif %}";
5212 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5213
5214 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5216 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5217 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5218
5219 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5221
5222 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5224 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5225 }
5226
5227 #[test]
5228 fn test_is_in_jinja_range_multiple() {
5229 let content = "{{ a }} and {{ b }}";
5230 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5231
5232 assert!(ctx.is_in_jinja_range(0));
5234 assert!(ctx.is_in_jinja_range(3));
5235 assert!(ctx.is_in_jinja_range(6));
5236
5237 assert!(!ctx.is_in_jinja_range(8));
5239 assert!(!ctx.is_in_jinja_range(11));
5240
5241 assert!(ctx.is_in_jinja_range(12));
5243 assert!(ctx.is_in_jinja_range(15));
5244 assert!(ctx.is_in_jinja_range(18));
5245 }
5246
5247 #[test]
5248 fn test_is_in_jinja_range_no_jinja() {
5249 let content = "Plain text with single braces but not Jinja";
5250 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5251
5252 for i in 0..content.len() {
5254 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5255 }
5256 }
5257
5258 #[test]
5263 fn test_is_in_link_title_with_title() {
5264 let content = r#"[ref]: https://example.com "Title text"
5265
5266Some content."#;
5267 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5268
5269 assert_eq!(ctx.reference_defs.len(), 1);
5271 let def = &ctx.reference_defs[0];
5272 assert!(def.title_byte_start.is_some());
5273 assert!(def.title_byte_end.is_some());
5274
5275 let title_start = def.title_byte_start.unwrap();
5276 let title_end = def.title_byte_end.unwrap();
5277
5278 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5280
5281 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5283 assert!(
5284 ctx.is_in_link_title(title_start + 5),
5285 "Middle of title should be in title"
5286 );
5287 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5288
5289 assert!(
5291 !ctx.is_in_link_title(title_end),
5292 "After title end should not be in title"
5293 );
5294 }
5295
5296 #[test]
5297 fn test_is_in_link_title_without_title() {
5298 let content = "[ref]: https://example.com\n\nSome content.";
5299 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5300
5301 assert_eq!(ctx.reference_defs.len(), 1);
5303 let def = &ctx.reference_defs[0];
5304 assert!(def.title_byte_start.is_none());
5305 assert!(def.title_byte_end.is_none());
5306
5307 for i in 0..content.len() {
5309 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5310 }
5311 }
5312
5313 #[test]
5314 fn test_is_in_link_title_multiple_refs() {
5315 let content = r#"[ref1]: /url1 "Title One"
5316[ref2]: /url2
5317[ref3]: /url3 "Title Three"
5318"#;
5319 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5320
5321 assert_eq!(ctx.reference_defs.len(), 3);
5323
5324 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5326 assert!(ref1.title_byte_start.is_some());
5327
5328 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5330 assert!(ref2.title_byte_start.is_none());
5331
5332 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5334 assert!(ref3.title_byte_start.is_some());
5335
5336 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5338 assert!(ctx.is_in_link_title(start + 1));
5339 assert!(!ctx.is_in_link_title(end + 5));
5340 }
5341
5342 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5344 assert!(ctx.is_in_link_title(start + 1));
5345 }
5346 }
5347
5348 #[test]
5349 fn test_is_in_link_title_single_quotes() {
5350 let content = "[ref]: /url 'Single quoted title'\n";
5351 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5352
5353 assert_eq!(ctx.reference_defs.len(), 1);
5354 let def = &ctx.reference_defs[0];
5355
5356 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5357 assert!(ctx.is_in_link_title(start));
5358 assert!(ctx.is_in_link_title(start + 5));
5359 assert!(!ctx.is_in_link_title(end));
5360 }
5361 }
5362
5363 #[test]
5364 fn test_is_in_link_title_parentheses() {
5365 let content = "[ref]: /url (Parenthesized title)\n";
5368 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5369
5370 if ctx.reference_defs.is_empty() {
5373 for i in 0..content.len() {
5375 assert!(!ctx.is_in_link_title(i));
5376 }
5377 } else {
5378 let def = &ctx.reference_defs[0];
5379 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5380 assert!(ctx.is_in_link_title(start));
5381 assert!(ctx.is_in_link_title(start + 5));
5382 assert!(!ctx.is_in_link_title(end));
5383 } else {
5384 for i in 0..content.len() {
5386 assert!(!ctx.is_in_link_title(i));
5387 }
5388 }
5389 }
5390 }
5391
5392 #[test]
5393 fn test_is_in_link_title_no_refs() {
5394 let content = "Just plain text without any reference definitions.";
5395 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5396
5397 assert!(ctx.reference_defs.is_empty());
5398
5399 for i in 0..content.len() {
5400 assert!(!ctx.is_in_link_title(i));
5401 }
5402 }
5403
5404 #[test]
5409 fn test_math_spans_inline() {
5410 let content = "Text with inline math $[f](x)$ in it.";
5411 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5412
5413 let math_spans = ctx.math_spans();
5414 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5415
5416 let span = &math_spans[0];
5417 assert!(!span.is_display, "Should be inline math, not display");
5418 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5419 }
5420
5421 #[test]
5422 fn test_math_spans_display_single_line() {
5423 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5424 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5425
5426 let math_spans = ctx.math_spans();
5427 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5428
5429 let span = &math_spans[0];
5430 assert!(span.is_display, "Should be display math");
5431 assert!(
5432 span.content.contains("[x](\\zeta)"),
5433 "Content should contain the link-like pattern"
5434 );
5435 }
5436
5437 #[test]
5438 fn test_math_spans_display_multiline() {
5439 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5440 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5441
5442 let math_spans = ctx.math_spans();
5443 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5444
5445 let span = &math_spans[0];
5446 assert!(span.is_display, "Should be display math");
5447 }
5448
5449 #[test]
5450 fn test_is_in_math_span() {
5451 let content = "Text $[f](x)$ more text";
5452 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5453
5454 let math_start = content.find('$').unwrap();
5456 let math_end = content.rfind('$').unwrap() + 1;
5457
5458 assert!(
5459 ctx.is_in_math_span(math_start + 1),
5460 "Position inside math span should return true"
5461 );
5462 assert!(
5463 ctx.is_in_math_span(math_start + 3),
5464 "Position inside math span should return true"
5465 );
5466
5467 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5469 assert!(
5470 !ctx.is_in_math_span(math_end + 1),
5471 "Position after math span should return false"
5472 );
5473 }
5474
5475 #[test]
5476 fn test_math_spans_mixed_with_code() {
5477 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5478 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5479
5480 let math_spans = ctx.math_spans();
5481 let code_spans = ctx.code_spans();
5482
5483 assert_eq!(math_spans.len(), 1, "Should have one math span");
5484 assert_eq!(code_spans.len(), 1, "Should have one code span");
5485
5486 assert_eq!(math_spans[0].content, "[f](x)");
5488 assert_eq!(code_spans[0].content, "[g](y)");
5490 }
5491
5492 #[test]
5493 fn test_math_spans_no_math() {
5494 let content = "Regular text without any math at all.";
5495 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5496
5497 let math_spans = ctx.math_spans();
5498 assert!(math_spans.is_empty(), "Should have no math spans");
5499 }
5500
5501 #[test]
5502 fn test_math_spans_multiple() {
5503 let content = "First $a$ and second $b$ and display $$c$$";
5504 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5505
5506 let math_spans = ctx.math_spans();
5507 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5508
5509 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5511 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5512
5513 assert_eq!(inline_count, 2, "Should have two inline math spans");
5514 assert_eq!(display_count, 1, "Should have one display math span");
5515 }
5516
5517 #[test]
5518 fn test_is_in_math_span_boundary_positions() {
5519 let content = "$[f](x)$";
5522 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5523
5524 let math_spans = ctx.math_spans();
5525 assert_eq!(math_spans.len(), 1, "Should have one math span");
5526
5527 let span = &math_spans[0];
5528
5529 assert!(
5531 ctx.is_in_math_span(span.byte_offset),
5532 "Start position should be in span"
5533 );
5534
5535 assert!(
5537 ctx.is_in_math_span(span.byte_offset + 1),
5538 "Position after start should be in span"
5539 );
5540
5541 assert!(
5543 ctx.is_in_math_span(span.byte_end - 1),
5544 "Position at end-1 should be in span"
5545 );
5546
5547 assert!(
5549 !ctx.is_in_math_span(span.byte_end),
5550 "Position at byte_end should NOT be in span (exclusive)"
5551 );
5552 }
5553
5554 #[test]
5555 fn test_math_spans_at_document_start() {
5556 let content = "$x$ text";
5557 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5558
5559 let math_spans = ctx.math_spans();
5560 assert_eq!(math_spans.len(), 1);
5561 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5562 }
5563
5564 #[test]
5565 fn test_math_spans_at_document_end() {
5566 let content = "text $x$";
5567 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5568
5569 let math_spans = ctx.math_spans();
5570 assert_eq!(math_spans.len(), 1);
5571 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5572 }
5573
5574 #[test]
5575 fn test_math_spans_consecutive() {
5576 let content = "$a$$b$";
5577 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5578
5579 let math_spans = ctx.math_spans();
5580 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5582
5583 for i in 0..content.len() {
5585 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5586 }
5587 }
5588
5589 #[test]
5590 fn test_math_spans_currency_not_math() {
5591 let content = "Price is $100";
5593 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5594
5595 let math_spans = ctx.math_spans();
5596 assert!(
5599 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5600 "Unbalanced $ should not create math span containing 100"
5601 );
5602 }
5603
5604 #[test]
5609 fn test_reference_lookup_o1_basic() {
5610 let content = r#"[ref1]: /url1
5611[REF2]: /url2 "Title"
5612[Ref3]: /url3
5613
5614Use [link][ref1] and [link][REF2]."#;
5615 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5616
5617 assert_eq!(ctx.reference_defs.len(), 3);
5619
5620 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5622 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5625 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5626 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5627 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5628 }
5629
5630 #[test]
5631 fn test_reference_lookup_o1_get_reference_def() {
5632 let content = r#"[myref]: https://example.com "My Title"
5633"#;
5634 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5635
5636 let def = ctx.get_reference_def("myref").expect("Should find myref");
5638 assert_eq!(def.url, "https://example.com");
5639 assert_eq!(def.title.as_deref(), Some("My Title"));
5640
5641 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5643 assert_eq!(def2.url, "https://example.com");
5644
5645 assert!(ctx.get_reference_def("nonexistent").is_none());
5647 }
5648
5649 #[test]
5650 fn test_reference_lookup_o1_has_reference_def() {
5651 let content = r#"[foo]: /foo
5652[BAR]: /bar
5653"#;
5654 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5655
5656 assert!(ctx.has_reference_def("foo"));
5658 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5660 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5663
5664 #[test]
5665 fn test_reference_lookup_o1_empty_content() {
5666 let content = "No references here.";
5667 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5668
5669 assert!(ctx.reference_defs.is_empty());
5670 assert_eq!(ctx.get_reference_url("anything"), None);
5671 assert!(ctx.get_reference_def("anything").is_none());
5672 assert!(!ctx.has_reference_def("anything"));
5673 }
5674
5675 #[test]
5676 fn test_reference_lookup_o1_special_characters_in_id() {
5677 let content = r#"[ref-with-dash]: /url1
5678[ref_with_underscore]: /url2
5679[ref.with.dots]: /url3
5680"#;
5681 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5682
5683 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5684 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5685 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5686 }
5687
5688 #[test]
5689 fn test_reference_lookup_o1_unicode_id() {
5690 let content = r#"[日本語]: /japanese
5691[émoji]: /emoji
5692"#;
5693 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5694
5695 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5696 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5697 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5699}