1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17 ($name:expr, $profile:expr, $code:expr) => {{
18 let start = std::time::Instant::now();
19 let result = $code;
20 if $profile {
21 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22 }
23 result
24 }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(
36 r#"(?sx)
37 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38 (?:
39 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
40 |
41 \[([^\]]*)\] # Reference ID in group 6
42 )"#
43 ).unwrap()
44});
45
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?sx)
51 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52 (?:
53 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
54 |
55 \[([^\]]*)\] # Reference ID in group 6
56 )"#
57 ).unwrap()
58});
59
60static REF_DEF_PATTERN: LazyLock<Regex> =
62 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73#[derive(Debug, Clone)]
75pub struct LineInfo {
76 pub byte_offset: usize,
78 pub byte_len: usize,
80 pub indent: usize,
82 pub visual_indent: usize,
86 pub is_blank: bool,
88 pub in_code_block: bool,
90 pub in_front_matter: bool,
92 pub in_html_block: bool,
94 pub in_html_comment: bool,
96 pub list_item: Option<ListItemInfo>,
98 pub heading: Option<HeadingInfo>,
100 pub blockquote: Option<BlockquoteInfo>,
102 pub in_mkdocstrings: bool,
104 pub in_esm_block: bool,
106 pub in_code_span_continuation: bool,
108 pub is_horizontal_rule: bool,
111 pub in_math_block: bool,
113 pub in_quarto_div: bool,
115 pub in_jsx_expression: bool,
117 pub in_mdx_comment: bool,
119 pub in_jsx_component: bool,
121 pub in_jsx_fragment: bool,
123 pub in_admonition: bool,
125 pub in_content_tab: bool,
127 pub in_definition_list: bool,
129}
130
131impl LineInfo {
132 pub fn content<'a>(&self, source: &'a str) -> &'a str {
134 &source[self.byte_offset..self.byte_offset + self.byte_len]
135 }
136
137 #[inline]
141 pub fn in_mkdocs_container(&self) -> bool {
142 self.in_admonition || self.in_content_tab
143 }
144}
145
146#[derive(Debug, Clone)]
148pub struct ListItemInfo {
149 pub marker: String,
151 pub is_ordered: bool,
153 pub number: Option<usize>,
155 pub marker_column: usize,
157 pub content_column: usize,
159}
160
161#[derive(Debug, Clone, PartialEq)]
163pub enum HeadingStyle {
164 ATX,
166 Setext1,
168 Setext2,
170}
171
172#[derive(Debug, Clone)]
174pub struct ParsedLink<'a> {
175 pub line: usize,
177 pub start_col: usize,
179 pub end_col: usize,
181 pub byte_offset: usize,
183 pub byte_end: usize,
185 pub text: Cow<'a, str>,
187 pub url: Cow<'a, str>,
189 pub is_reference: bool,
191 pub reference_id: Option<Cow<'a, str>>,
193 pub link_type: LinkType,
195}
196
197#[derive(Debug, Clone)]
199pub struct BrokenLinkInfo {
200 pub reference: String,
202 pub span: std::ops::Range<usize>,
204}
205
206#[derive(Debug, Clone)]
208pub struct FootnoteRef {
209 pub id: String,
211 pub line: usize,
213 pub byte_offset: usize,
215 pub byte_end: usize,
217}
218
219#[derive(Debug, Clone)]
221pub struct ParsedImage<'a> {
222 pub line: usize,
224 pub start_col: usize,
226 pub end_col: usize,
228 pub byte_offset: usize,
230 pub byte_end: usize,
232 pub alt_text: Cow<'a, str>,
234 pub url: Cow<'a, str>,
236 pub is_reference: bool,
238 pub reference_id: Option<Cow<'a, str>>,
240 pub link_type: LinkType,
242}
243
244#[derive(Debug, Clone)]
246pub struct ReferenceDef {
247 pub line: usize,
249 pub id: String,
251 pub url: String,
253 pub title: Option<String>,
255 pub byte_offset: usize,
257 pub byte_end: usize,
259 pub title_byte_start: Option<usize>,
261 pub title_byte_end: Option<usize>,
263}
264
265#[derive(Debug, Clone)]
267pub struct CodeSpan {
268 pub line: usize,
270 pub end_line: usize,
272 pub start_col: usize,
274 pub end_col: usize,
276 pub byte_offset: usize,
278 pub byte_end: usize,
280 pub backtick_count: usize,
282 pub content: String,
284}
285
286#[derive(Debug, Clone)]
288pub struct MathSpan {
289 pub line: usize,
291 pub end_line: usize,
293 pub start_col: usize,
295 pub end_col: usize,
297 pub byte_offset: usize,
299 pub byte_end: usize,
301 pub is_display: bool,
303 pub content: String,
305}
306
307#[derive(Debug, Clone)]
309pub struct HeadingInfo {
310 pub level: u8,
312 pub style: HeadingStyle,
314 pub marker: String,
316 pub marker_column: usize,
318 pub content_column: usize,
320 pub text: String,
322 pub custom_id: Option<String>,
324 pub raw_text: String,
326 pub has_closing_sequence: bool,
328 pub closing_sequence: String,
330 pub is_valid: bool,
333}
334
335#[derive(Debug, Clone)]
340pub struct ValidHeading<'a> {
341 pub line_num: usize,
343 pub heading: &'a HeadingInfo,
345 pub line_info: &'a LineInfo,
347}
348
349pub struct ValidHeadingsIter<'a> {
354 lines: &'a [LineInfo],
355 current_index: usize,
356}
357
358impl<'a> ValidHeadingsIter<'a> {
359 fn new(lines: &'a [LineInfo]) -> Self {
360 Self {
361 lines,
362 current_index: 0,
363 }
364 }
365}
366
367impl<'a> Iterator for ValidHeadingsIter<'a> {
368 type Item = ValidHeading<'a>;
369
370 fn next(&mut self) -> Option<Self::Item> {
371 while self.current_index < self.lines.len() {
372 let idx = self.current_index;
373 self.current_index += 1;
374
375 let line_info = &self.lines[idx];
376 if let Some(heading) = &line_info.heading
377 && heading.is_valid
378 {
379 return Some(ValidHeading {
380 line_num: idx + 1, heading,
382 line_info,
383 });
384 }
385 }
386 None
387 }
388}
389
390#[derive(Debug, Clone)]
392pub struct BlockquoteInfo {
393 pub nesting_level: usize,
395 pub indent: String,
397 pub marker_column: usize,
399 pub prefix: String,
401 pub content: String,
403 pub has_no_space_after_marker: bool,
405 pub has_multiple_spaces_after_marker: bool,
407 pub needs_md028_fix: bool,
409}
410
411#[derive(Debug, Clone)]
413pub struct ListBlock {
414 pub start_line: usize,
416 pub end_line: usize,
418 pub is_ordered: bool,
420 pub marker: Option<String>,
422 pub blockquote_prefix: String,
424 pub item_lines: Vec<usize>,
426 pub nesting_level: usize,
428 pub max_marker_width: usize,
430}
431
432use std::sync::{Arc, OnceLock};
433
434type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
436
437type ByteRanges = Vec<(usize, usize)>;
439
440#[derive(Debug, Clone, Default)]
442pub struct CharFrequency {
443 pub hash_count: usize,
445 pub asterisk_count: usize,
447 pub underscore_count: usize,
449 pub hyphen_count: usize,
451 pub plus_count: usize,
453 pub gt_count: usize,
455 pub pipe_count: usize,
457 pub bracket_count: usize,
459 pub backtick_count: usize,
461 pub lt_count: usize,
463 pub exclamation_count: usize,
465 pub newline_count: usize,
467}
468
469#[derive(Debug, Clone)]
471pub struct HtmlTag {
472 pub line: usize,
474 pub start_col: usize,
476 pub end_col: usize,
478 pub byte_offset: usize,
480 pub byte_end: usize,
482 pub tag_name: String,
484 pub is_closing: bool,
486 pub is_self_closing: bool,
488 pub raw_content: String,
490}
491
492#[derive(Debug, Clone)]
494pub struct EmphasisSpan {
495 pub line: usize,
497 pub start_col: usize,
499 pub end_col: usize,
501 pub byte_offset: usize,
503 pub byte_end: usize,
505 pub marker: char,
507 pub marker_count: usize,
509 pub content: String,
511}
512
513#[derive(Debug, Clone)]
515pub struct TableRow {
516 pub line: usize,
518 pub is_separator: bool,
520 pub column_count: usize,
522 pub column_alignments: Vec<String>, }
525
526#[derive(Debug, Clone)]
528pub struct BareUrl {
529 pub line: usize,
531 pub start_col: usize,
533 pub end_col: usize,
535 pub byte_offset: usize,
537 pub byte_end: usize,
539 pub url: String,
541 pub url_type: String,
543}
544
545pub struct LintContext<'a> {
546 pub content: &'a str,
547 pub line_offsets: Vec<usize>,
548 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, }
577
578struct BlockquoteComponents<'a> {
580 indent: &'a str,
581 markers: &'a str,
582 spaces_after: &'a str,
583 content: &'a str,
584}
585
586#[inline]
588fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
589 let bytes = line.as_bytes();
590 let mut pos = 0;
591
592 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
594 pos += 1;
595 }
596 let indent_end = pos;
597
598 if pos >= bytes.len() || bytes[pos] != b'>' {
600 return None;
601 }
602
603 while pos < bytes.len() && bytes[pos] == b'>' {
605 pos += 1;
606 }
607 let markers_end = pos;
608
609 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
611 pos += 1;
612 }
613 let spaces_end = pos;
614
615 Some(BlockquoteComponents {
616 indent: &line[0..indent_end],
617 markers: &line[indent_end..markers_end],
618 spaces_after: &line[markers_end..spaces_end],
619 content: &line[spaces_end..],
620 })
621}
622
623impl<'a> LintContext<'a> {
624 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
625 #[cfg(not(target_arch = "wasm32"))]
626 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
627 #[cfg(target_arch = "wasm32")]
628 let profile = false;
629
630 let line_offsets = profile_section!("Line offsets", profile, {
631 let mut offsets = vec![0];
632 for (i, c) in content.char_indices() {
633 if c == '\n' {
634 offsets.push(i + 1);
635 }
636 }
637 offsets
638 });
639
640 let (code_blocks, code_span_ranges) = profile_section!(
642 "Code blocks",
643 profile,
644 CodeBlockUtils::detect_code_blocks_and_spans(content)
645 );
646
647 let html_comment_ranges = profile_section!(
649 "HTML comment ranges",
650 profile,
651 crate::utils::skip_context::compute_html_comment_ranges(content)
652 );
653
654 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
656 if flavor == MarkdownFlavor::MkDocs {
657 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
658 } else {
659 Vec::new()
660 }
661 });
662
663 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
665 if flavor == MarkdownFlavor::Quarto {
666 crate::utils::quarto_divs::detect_div_block_ranges(content)
667 } else {
668 Vec::new()
669 }
670 });
671
672 let (mut lines, emphasis_spans) = profile_section!(
675 "Basic line info",
676 profile,
677 Self::compute_basic_line_info(
678 content,
679 &line_offsets,
680 &code_blocks,
681 flavor,
682 &html_comment_ranges,
683 &autodoc_ranges,
684 &quarto_div_ranges,
685 )
686 );
687
688 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
690
691 profile_section!(
693 "ESM blocks",
694 profile,
695 Self::detect_esm_blocks(content, &mut lines, flavor)
696 );
697
698 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
700 "JSX/MDX detection",
701 profile,
702 Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
703 );
704
705 profile_section!(
707 "MkDocs constructs",
708 profile,
709 Self::detect_mkdocs_line_info(content, &mut lines, flavor)
710 );
711
712 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
714
715 profile_section!(
717 "Headings & blockquotes",
718 profile,
719 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
720 );
721
722 let code_spans = profile_section!(
724 "Code spans",
725 profile,
726 Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
727 );
728
729 for span in &code_spans {
732 if span.end_line > span.line {
733 for line_num in (span.line + 1)..=span.end_line {
735 if let Some(line_info) = lines.get_mut(line_num - 1) {
736 line_info.in_code_span_continuation = true;
737 }
738 }
739 }
740 }
741
742 let (links, broken_links, footnote_refs) = profile_section!(
744 "Links",
745 profile,
746 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
747 );
748
749 let images = profile_section!(
750 "Images",
751 profile,
752 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
753 );
754
755 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
756
757 let reference_defs_map: HashMap<String, usize> = reference_defs
759 .iter()
760 .enumerate()
761 .map(|(idx, def)| (def.id.to_lowercase(), idx))
762 .collect();
763
764 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
765
766 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
768
769 let table_blocks = profile_section!(
771 "Table blocks",
772 profile,
773 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
774 content,
775 &code_blocks,
776 &code_spans,
777 &html_comment_ranges,
778 )
779 );
780
781 let line_index = profile_section!(
783 "Line index",
784 profile,
785 crate::utils::range_utils::LineIndex::new(content)
786 );
787
788 let jinja_ranges = profile_section!(
790 "Jinja ranges",
791 profile,
792 crate::utils::jinja_utils::find_jinja_ranges(content)
793 );
794
795 let citation_ranges = profile_section!("Citation ranges", profile, {
797 if flavor == MarkdownFlavor::Quarto {
798 crate::utils::quarto_divs::find_citation_ranges(content)
799 } else {
800 Vec::new()
801 }
802 });
803
804 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
806 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
807 let mut ranges = Vec::new();
808 for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
809 ranges.push((mat.start(), mat.end()));
810 }
811 ranges
812 });
813
814 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
815
816 Self {
817 content,
818 line_offsets,
819 code_blocks,
820 lines,
821 links,
822 images,
823 broken_links,
824 footnote_refs,
825 reference_defs,
826 reference_defs_map,
827 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
828 math_spans_cache: OnceLock::new(), list_blocks,
830 char_frequency,
831 html_tags_cache: OnceLock::new(),
832 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
833 table_rows_cache: OnceLock::new(),
834 bare_urls_cache: OnceLock::new(),
835 has_mixed_list_nesting_cache: OnceLock::new(),
836 html_comment_ranges,
837 table_blocks,
838 line_index,
839 jinja_ranges,
840 flavor,
841 source_file,
842 jsx_expression_ranges,
843 mdx_comment_ranges,
844 citation_ranges,
845 shortcode_ranges,
846 inline_config,
847 }
848 }
849
850 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
855 self.inline_config.is_rule_disabled(rule_name, line_number)
856 }
857
858 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
860 Arc::clone(
861 self.code_spans_cache
862 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
863 )
864 }
865
866 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
868 Arc::clone(
869 self.math_spans_cache
870 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
871 )
872 }
873
874 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
876 let math_spans = self.math_spans();
877 math_spans
878 .iter()
879 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
880 }
881
882 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
884 &self.html_comment_ranges
885 }
886
887 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
889 Arc::clone(self.html_tags_cache.get_or_init(|| {
890 Arc::new(Self::parse_html_tags(
891 self.content,
892 &self.lines,
893 &self.code_blocks,
894 self.flavor,
895 ))
896 }))
897 }
898
899 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
901 Arc::clone(
902 self.emphasis_spans_cache
903 .get()
904 .expect("emphasis_spans_cache initialized during construction"),
905 )
906 }
907
908 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
910 Arc::clone(
911 self.table_rows_cache
912 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
913 )
914 }
915
916 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
918 Arc::clone(
919 self.bare_urls_cache
920 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
921 )
922 }
923
924 pub fn has_mixed_list_nesting(&self) -> bool {
928 *self
929 .has_mixed_list_nesting_cache
930 .get_or_init(|| self.compute_mixed_list_nesting())
931 }
932
933 fn compute_mixed_list_nesting(&self) -> bool {
935 let mut stack: Vec<(usize, bool)> = Vec::new();
940 let mut last_was_blank = false;
941
942 for line_info in &self.lines {
943 if line_info.in_code_block
945 || line_info.in_front_matter
946 || line_info.in_mkdocstrings
947 || line_info.in_html_comment
948 || line_info.in_esm_block
949 {
950 continue;
951 }
952
953 if line_info.is_blank {
955 last_was_blank = true;
956 continue;
957 }
958
959 if let Some(list_item) = &line_info.list_item {
960 let current_pos = if list_item.marker_column == 1 {
962 0
963 } else {
964 list_item.marker_column
965 };
966
967 if last_was_blank && current_pos == 0 {
969 stack.clear();
970 }
971 last_was_blank = false;
972
973 while let Some(&(pos, _)) = stack.last() {
975 if pos >= current_pos {
976 stack.pop();
977 } else {
978 break;
979 }
980 }
981
982 if let Some(&(_, parent_is_ordered)) = stack.last()
984 && parent_is_ordered != list_item.is_ordered
985 {
986 return true; }
988
989 stack.push((current_pos, list_item.is_ordered));
990 } else {
991 last_was_blank = false;
993 }
994 }
995
996 false
997 }
998
999 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1001 match self.line_offsets.binary_search(&offset) {
1002 Ok(line) => (line + 1, 1),
1003 Err(line) => {
1004 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1005 (line, offset - line_start + 1)
1006 }
1007 }
1008 }
1009
1010 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1012 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1014 return true;
1015 }
1016
1017 self.code_spans()
1019 .iter()
1020 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1021 }
1022
1023 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1025 if line_num > 0 {
1026 self.lines.get(line_num - 1)
1027 } else {
1028 None
1029 }
1030 }
1031
1032 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1034 self.line_info(line_num).map(|info| info.byte_offset)
1035 }
1036
1037 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1039 let normalized_id = ref_id.to_lowercase();
1040 self.reference_defs_map
1041 .get(&normalized_id)
1042 .map(|&idx| self.reference_defs[idx].url.as_str())
1043 }
1044
1045 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1047 let normalized_id = ref_id.to_lowercase();
1048 self.reference_defs_map
1049 .get(&normalized_id)
1050 .map(|&idx| &self.reference_defs[idx])
1051 }
1052
1053 pub fn has_reference_def(&self, ref_id: &str) -> bool {
1055 let normalized_id = ref_id.to_lowercase();
1056 self.reference_defs_map.contains_key(&normalized_id)
1057 }
1058
1059 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1061 self.list_blocks
1062 .iter()
1063 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1064 }
1065
1066 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1068 self.list_blocks
1069 .iter()
1070 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1071 }
1072
1073 pub fn is_in_code_block(&self, line_num: usize) -> bool {
1077 if line_num == 0 || line_num > self.lines.len() {
1078 return false;
1079 }
1080 self.lines[line_num - 1].in_code_block
1081 }
1082
1083 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1085 if line_num == 0 || line_num > self.lines.len() {
1086 return false;
1087 }
1088 self.lines[line_num - 1].in_front_matter
1089 }
1090
1091 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1093 if line_num == 0 || line_num > self.lines.len() {
1094 return false;
1095 }
1096 self.lines[line_num - 1].in_html_block
1097 }
1098
1099 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1101 if line_num == 0 || line_num > self.lines.len() {
1102 return false;
1103 }
1104
1105 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1109 let code_spans = self.code_spans();
1110 code_spans.iter().any(|span| {
1111 if line_num < span.line || line_num > span.end_line {
1113 return false;
1114 }
1115
1116 if span.line == span.end_line {
1117 col_0indexed >= span.start_col && col_0indexed < span.end_col
1119 } else if line_num == span.line {
1120 col_0indexed >= span.start_col
1122 } else if line_num == span.end_line {
1123 col_0indexed < span.end_col
1125 } else {
1126 true
1128 }
1129 })
1130 }
1131
1132 #[inline]
1134 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1135 let code_spans = self.code_spans();
1136 code_spans
1137 .iter()
1138 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1139 }
1140
1141 #[inline]
1144 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1145 self.reference_defs
1146 .iter()
1147 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1148 }
1149
1150 #[inline]
1154 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1155 self.html_comment_ranges
1156 .iter()
1157 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1158 }
1159
1160 #[inline]
1163 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1164 self.html_tags()
1165 .iter()
1166 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1167 }
1168
1169 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1171 self.jinja_ranges
1172 .iter()
1173 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174 }
1175
1176 #[inline]
1178 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1179 self.jsx_expression_ranges
1180 .iter()
1181 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182 }
1183
1184 #[inline]
1186 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1187 self.mdx_comment_ranges
1188 .iter()
1189 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1190 }
1191
1192 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1194 &self.jsx_expression_ranges
1195 }
1196
1197 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1199 &self.mdx_comment_ranges
1200 }
1201
1202 #[inline]
1205 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1206 self.citation_ranges
1207 .iter()
1208 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1209 }
1210
1211 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1213 &self.citation_ranges
1214 }
1215
1216 #[inline]
1218 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1219 self.shortcode_ranges
1220 .iter()
1221 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1222 }
1223
1224 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1226 &self.shortcode_ranges
1227 }
1228
1229 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1231 self.reference_defs.iter().any(|def| {
1232 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1233 byte_pos >= start && byte_pos < end
1234 } else {
1235 false
1236 }
1237 })
1238 }
1239
1240 pub fn has_char(&self, ch: char) -> bool {
1242 match ch {
1243 '#' => self.char_frequency.hash_count > 0,
1244 '*' => self.char_frequency.asterisk_count > 0,
1245 '_' => self.char_frequency.underscore_count > 0,
1246 '-' => self.char_frequency.hyphen_count > 0,
1247 '+' => self.char_frequency.plus_count > 0,
1248 '>' => self.char_frequency.gt_count > 0,
1249 '|' => self.char_frequency.pipe_count > 0,
1250 '[' => self.char_frequency.bracket_count > 0,
1251 '`' => self.char_frequency.backtick_count > 0,
1252 '<' => self.char_frequency.lt_count > 0,
1253 '!' => self.char_frequency.exclamation_count > 0,
1254 '\n' => self.char_frequency.newline_count > 0,
1255 _ => self.content.contains(ch), }
1257 }
1258
1259 pub fn char_count(&self, ch: char) -> usize {
1261 match ch {
1262 '#' => self.char_frequency.hash_count,
1263 '*' => self.char_frequency.asterisk_count,
1264 '_' => self.char_frequency.underscore_count,
1265 '-' => self.char_frequency.hyphen_count,
1266 '+' => self.char_frequency.plus_count,
1267 '>' => self.char_frequency.gt_count,
1268 '|' => self.char_frequency.pipe_count,
1269 '[' => self.char_frequency.bracket_count,
1270 '`' => self.char_frequency.backtick_count,
1271 '<' => self.char_frequency.lt_count,
1272 '!' => self.char_frequency.exclamation_count,
1273 '\n' => self.char_frequency.newline_count,
1274 _ => self.content.matches(ch).count(), }
1276 }
1277
1278 pub fn likely_has_headings(&self) -> bool {
1280 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1282
1283 pub fn likely_has_lists(&self) -> bool {
1285 self.char_frequency.asterisk_count > 0
1286 || self.char_frequency.hyphen_count > 0
1287 || self.char_frequency.plus_count > 0
1288 }
1289
1290 pub fn likely_has_emphasis(&self) -> bool {
1292 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1293 }
1294
1295 pub fn likely_has_tables(&self) -> bool {
1297 self.char_frequency.pipe_count > 2
1298 }
1299
1300 pub fn likely_has_blockquotes(&self) -> bool {
1302 self.char_frequency.gt_count > 0
1303 }
1304
1305 pub fn likely_has_code(&self) -> bool {
1307 self.char_frequency.backtick_count > 0
1308 }
1309
1310 pub fn likely_has_links_or_images(&self) -> bool {
1312 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1313 }
1314
1315 pub fn likely_has_html(&self) -> bool {
1317 self.char_frequency.lt_count > 0
1318 }
1319
1320 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1325 if let Some(line_info) = self.lines.get(line_idx)
1326 && let Some(ref bq) = line_info.blockquote
1327 {
1328 bq.prefix.trim_end().to_string()
1329 } else {
1330 String::new()
1331 }
1332 }
1333
1334 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1336 self.html_tags()
1337 .iter()
1338 .filter(|tag| tag.line == line_num)
1339 .cloned()
1340 .collect()
1341 }
1342
1343 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1345 self.emphasis_spans()
1346 .iter()
1347 .filter(|span| span.line == line_num)
1348 .cloned()
1349 .collect()
1350 }
1351
1352 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1354 self.table_rows()
1355 .iter()
1356 .filter(|row| row.line == line_num)
1357 .cloned()
1358 .collect()
1359 }
1360
1361 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1363 self.bare_urls()
1364 .iter()
1365 .filter(|url| url.line == line_num)
1366 .cloned()
1367 .collect()
1368 }
1369
1370 #[inline]
1376 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1377 let idx = match lines.binary_search_by(|line| {
1379 if byte_offset < line.byte_offset {
1380 std::cmp::Ordering::Greater
1381 } else if byte_offset > line.byte_offset + line.byte_len {
1382 std::cmp::Ordering::Less
1383 } else {
1384 std::cmp::Ordering::Equal
1385 }
1386 }) {
1387 Ok(idx) => idx,
1388 Err(idx) => idx.saturating_sub(1),
1389 };
1390
1391 let line = &lines[idx];
1392 let line_num = idx + 1;
1393 let col = byte_offset.saturating_sub(line.byte_offset);
1394
1395 (idx, line_num, col)
1396 }
1397
1398 #[inline]
1400 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1401 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1403
1404 if idx > 0 {
1406 let span = &code_spans[idx - 1];
1407 if offset >= span.byte_offset && offset < span.byte_end {
1408 return true;
1409 }
1410 }
1411
1412 false
1413 }
1414
1415 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1419 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1420
1421 let mut link_ranges = Vec::new();
1422 let mut options = Options::empty();
1423 options.insert(Options::ENABLE_WIKILINKS);
1424 options.insert(Options::ENABLE_FOOTNOTES);
1425
1426 let parser = Parser::new_ext(content, options).into_offset_iter();
1427 let mut link_stack: Vec<usize> = Vec::new();
1428
1429 for (event, range) in parser {
1430 match event {
1431 Event::Start(Tag::Link { .. }) => {
1432 link_stack.push(range.start);
1433 }
1434 Event::End(TagEnd::Link) => {
1435 if let Some(start_pos) = link_stack.pop() {
1436 link_ranges.push((start_pos, range.end));
1437 }
1438 }
1439 _ => {}
1440 }
1441 }
1442
1443 link_ranges
1444 }
1445
1446 fn parse_links(
1448 content: &'a str,
1449 lines: &[LineInfo],
1450 code_blocks: &[(usize, usize)],
1451 code_spans: &[CodeSpan],
1452 flavor: MarkdownFlavor,
1453 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1454 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1455 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1456 use std::collections::HashSet;
1457
1458 let mut links = Vec::with_capacity(content.len() / 500);
1459 let mut broken_links = Vec::new();
1460 let mut footnote_refs = Vec::new();
1461
1462 let mut found_positions = HashSet::new();
1464
1465 let mut options = Options::empty();
1475 options.insert(Options::ENABLE_WIKILINKS);
1476 options.insert(Options::ENABLE_FOOTNOTES);
1477
1478 let parser = Parser::new_with_broken_link_callback(
1479 content,
1480 options,
1481 Some(|link: BrokenLink<'_>| {
1482 broken_links.push(BrokenLinkInfo {
1483 reference: link.reference.to_string(),
1484 span: link.span.clone(),
1485 });
1486 None
1487 }),
1488 )
1489 .into_offset_iter();
1490
1491 let mut link_stack: Vec<(
1492 usize,
1493 usize,
1494 pulldown_cmark::CowStr<'a>,
1495 LinkType,
1496 pulldown_cmark::CowStr<'a>,
1497 )> = Vec::new();
1498 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1501 match event {
1502 Event::Start(Tag::Link {
1503 link_type,
1504 dest_url,
1505 id,
1506 ..
1507 }) => {
1508 link_stack.push((range.start, range.end, dest_url, link_type, id));
1510 text_chunks.clear();
1511 }
1512 Event::Text(text) if !link_stack.is_empty() => {
1513 text_chunks.push((text.to_string(), range.start, range.end));
1515 }
1516 Event::Code(code) if !link_stack.is_empty() => {
1517 let code_text = format!("`{code}`");
1519 text_chunks.push((code_text, range.start, range.end));
1520 }
1521 Event::End(TagEnd::Link) => {
1522 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1523 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1525 text_chunks.clear();
1526 continue;
1527 }
1528
1529 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1531
1532 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1534 text_chunks.clear();
1535 continue;
1536 }
1537
1538 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1539
1540 let is_reference = matches!(
1541 link_type,
1542 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1543 );
1544
1545 let link_text = if start_pos < content.len() {
1548 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1549
1550 let mut close_pos = None;
1554 let mut depth = 0;
1555 let mut in_code_span = false;
1556
1557 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1558 let mut backslash_count = 0;
1560 let mut j = i;
1561 while j > 0 && link_bytes[j - 1] == b'\\' {
1562 backslash_count += 1;
1563 j -= 1;
1564 }
1565 let is_escaped = backslash_count % 2 != 0;
1566
1567 if byte == b'`' && !is_escaped {
1569 in_code_span = !in_code_span;
1570 }
1571
1572 if !is_escaped && !in_code_span {
1574 if byte == b'[' {
1575 depth += 1;
1576 } else if byte == b']' {
1577 if depth == 0 {
1578 close_pos = Some(i);
1580 break;
1581 } else {
1582 depth -= 1;
1583 }
1584 }
1585 }
1586 }
1587
1588 if let Some(pos) = close_pos {
1589 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1590 } else {
1591 Cow::Borrowed("")
1592 }
1593 } else {
1594 Cow::Borrowed("")
1595 };
1596
1597 let reference_id = if is_reference && !ref_id.is_empty() {
1599 Some(Cow::Owned(ref_id.to_lowercase()))
1600 } else if is_reference {
1601 Some(Cow::Owned(link_text.to_lowercase()))
1603 } else {
1604 None
1605 };
1606
1607 found_positions.insert(start_pos);
1609
1610 links.push(ParsedLink {
1611 line: line_num,
1612 start_col: col_start,
1613 end_col: col_end,
1614 byte_offset: start_pos,
1615 byte_end: range.end,
1616 text: link_text,
1617 url: Cow::Owned(url.to_string()),
1618 is_reference,
1619 reference_id,
1620 link_type,
1621 });
1622
1623 text_chunks.clear();
1624 }
1625 }
1626 Event::FootnoteReference(footnote_id) => {
1627 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1630 continue;
1631 }
1632
1633 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1634 footnote_refs.push(FootnoteRef {
1635 id: footnote_id.to_string(),
1636 line: line_num,
1637 byte_offset: range.start,
1638 byte_end: range.end,
1639 });
1640 }
1641 _ => {}
1642 }
1643 }
1644
1645 for cap in LINK_PATTERN.captures_iter(content) {
1649 let full_match = cap.get(0).unwrap();
1650 let match_start = full_match.start();
1651 let match_end = full_match.end();
1652
1653 if found_positions.contains(&match_start) {
1655 continue;
1656 }
1657
1658 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1660 continue;
1661 }
1662
1663 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1665 continue;
1666 }
1667
1668 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1670 continue;
1671 }
1672
1673 if Self::is_offset_in_code_span(code_spans, match_start) {
1675 continue;
1676 }
1677
1678 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1680 continue;
1681 }
1682
1683 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1685
1686 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1688 continue;
1689 }
1690
1691 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1692
1693 let text = cap.get(1).map_or("", |m| m.as_str());
1694
1695 if let Some(ref_id) = cap.get(6) {
1697 let ref_id_str = ref_id.as_str();
1698 let normalized_ref = if ref_id_str.is_empty() {
1699 Cow::Owned(text.to_lowercase()) } else {
1701 Cow::Owned(ref_id_str.to_lowercase())
1702 };
1703
1704 links.push(ParsedLink {
1706 line: line_num,
1707 start_col: col_start,
1708 end_col: col_end,
1709 byte_offset: match_start,
1710 byte_end: match_end,
1711 text: Cow::Borrowed(text),
1712 url: Cow::Borrowed(""), is_reference: true,
1714 reference_id: Some(normalized_ref),
1715 link_type: LinkType::Reference, });
1717 }
1718 }
1719
1720 (links, broken_links, footnote_refs)
1721 }
1722
1723 fn parse_images(
1725 content: &'a str,
1726 lines: &[LineInfo],
1727 code_blocks: &[(usize, usize)],
1728 code_spans: &[CodeSpan],
1729 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1730 ) -> Vec<ParsedImage<'a>> {
1731 use crate::utils::skip_context::is_in_html_comment_ranges;
1732 use std::collections::HashSet;
1733
1734 let mut images = Vec::with_capacity(content.len() / 1000);
1736 let mut found_positions = HashSet::new();
1737
1738 let parser = Parser::new(content).into_offset_iter();
1740 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1741 Vec::new();
1742 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1745 match event {
1746 Event::Start(Tag::Image {
1747 link_type,
1748 dest_url,
1749 id,
1750 ..
1751 }) => {
1752 image_stack.push((range.start, dest_url, link_type, id));
1753 text_chunks.clear();
1754 }
1755 Event::Text(text) if !image_stack.is_empty() => {
1756 text_chunks.push((text.to_string(), range.start, range.end));
1757 }
1758 Event::Code(code) if !image_stack.is_empty() => {
1759 let code_text = format!("`{code}`");
1760 text_chunks.push((code_text, range.start, range.end));
1761 }
1762 Event::End(TagEnd::Image) => {
1763 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1764 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1766 continue;
1767 }
1768
1769 if Self::is_offset_in_code_span(code_spans, start_pos) {
1771 continue;
1772 }
1773
1774 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1776 continue;
1777 }
1778
1779 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1781 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1782
1783 let is_reference = matches!(
1784 link_type,
1785 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1786 );
1787
1788 let alt_text = if start_pos < content.len() {
1791 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1792
1793 let mut close_pos = None;
1796 let mut depth = 0;
1797
1798 if image_bytes.len() > 2 {
1799 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1800 let mut backslash_count = 0;
1802 let mut j = i;
1803 while j > 0 && image_bytes[j - 1] == b'\\' {
1804 backslash_count += 1;
1805 j -= 1;
1806 }
1807 let is_escaped = backslash_count % 2 != 0;
1808
1809 if !is_escaped {
1810 if byte == b'[' {
1811 depth += 1;
1812 } else if byte == b']' {
1813 if depth == 0 {
1814 close_pos = Some(i);
1816 break;
1817 } else {
1818 depth -= 1;
1819 }
1820 }
1821 }
1822 }
1823 }
1824
1825 if let Some(pos) = close_pos {
1826 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1827 } else {
1828 Cow::Borrowed("")
1829 }
1830 } else {
1831 Cow::Borrowed("")
1832 };
1833
1834 let reference_id = if is_reference && !ref_id.is_empty() {
1835 Some(Cow::Owned(ref_id.to_lowercase()))
1836 } else if is_reference {
1837 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1839 None
1840 };
1841
1842 found_positions.insert(start_pos);
1843 images.push(ParsedImage {
1844 line: line_num,
1845 start_col: col_start,
1846 end_col: col_end,
1847 byte_offset: start_pos,
1848 byte_end: range.end,
1849 alt_text,
1850 url: Cow::Owned(url.to_string()),
1851 is_reference,
1852 reference_id,
1853 link_type,
1854 });
1855 }
1856 }
1857 _ => {}
1858 }
1859 }
1860
1861 for cap in IMAGE_PATTERN.captures_iter(content) {
1863 let full_match = cap.get(0).unwrap();
1864 let match_start = full_match.start();
1865 let match_end = full_match.end();
1866
1867 if found_positions.contains(&match_start) {
1869 continue;
1870 }
1871
1872 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1874 continue;
1875 }
1876
1877 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1879 || Self::is_offset_in_code_span(code_spans, match_start)
1880 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1881 {
1882 continue;
1883 }
1884
1885 if let Some(ref_id) = cap.get(6) {
1887 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1888 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1889 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1890 let ref_id_str = ref_id.as_str();
1891 let normalized_ref = if ref_id_str.is_empty() {
1892 Cow::Owned(alt_text.to_lowercase())
1893 } else {
1894 Cow::Owned(ref_id_str.to_lowercase())
1895 };
1896
1897 images.push(ParsedImage {
1898 line: line_num,
1899 start_col: col_start,
1900 end_col: col_end,
1901 byte_offset: match_start,
1902 byte_end: match_end,
1903 alt_text: Cow::Borrowed(alt_text),
1904 url: Cow::Borrowed(""),
1905 is_reference: true,
1906 reference_id: Some(normalized_ref),
1907 link_type: LinkType::Reference, });
1909 }
1910 }
1911
1912 images
1913 }
1914
1915 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1917 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1921 if line_info.in_code_block {
1923 continue;
1924 }
1925
1926 let line = line_info.content(content);
1927 let line_num = line_idx + 1;
1928
1929 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1930 let id_raw = cap.get(1).unwrap().as_str();
1931
1932 if id_raw.starts_with('^') {
1935 continue;
1936 }
1937
1938 let id = id_raw.to_lowercase();
1939 let url = cap.get(2).unwrap().as_str().to_string();
1940 let title_match = cap.get(3).or_else(|| cap.get(4));
1941 let title = title_match.map(|m| m.as_str().to_string());
1942
1943 let match_obj = cap.get(0).unwrap();
1946 let byte_offset = line_info.byte_offset + match_obj.start();
1947 let byte_end = line_info.byte_offset + match_obj.end();
1948
1949 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1951 let start = line_info.byte_offset + m.start().saturating_sub(1);
1953 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1955 } else {
1956 (None, None)
1957 };
1958
1959 refs.push(ReferenceDef {
1960 line: line_num,
1961 id,
1962 url,
1963 title,
1964 byte_offset,
1965 byte_end,
1966 title_byte_start,
1967 title_byte_end,
1968 });
1969 }
1970 }
1971
1972 refs
1973 }
1974
1975 #[inline]
1979 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1980 let trimmed_start = line.trim_start();
1981 if !trimmed_start.starts_with('>') {
1982 return None;
1983 }
1984
1985 let mut remaining = line;
1987 let mut total_prefix_len = 0;
1988
1989 loop {
1990 let trimmed = remaining.trim_start();
1991 if !trimmed.starts_with('>') {
1992 break;
1993 }
1994
1995 let leading_ws_len = remaining.len() - trimmed.len();
1997 total_prefix_len += leading_ws_len + 1;
1998
1999 let after_gt = &trimmed[1..];
2000
2001 if let Some(stripped) = after_gt.strip_prefix(' ') {
2003 total_prefix_len += 1;
2004 remaining = stripped;
2005 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2006 total_prefix_len += 1;
2007 remaining = stripped;
2008 } else {
2009 remaining = after_gt;
2010 }
2011 }
2012
2013 Some((&line[..total_prefix_len], remaining))
2014 }
2015
2016 fn detect_list_items_and_emphasis_with_pulldown(
2040 content: &str,
2041 line_offsets: &[usize],
2042 flavor: MarkdownFlavor,
2043 front_matter_end: usize,
2044 code_blocks: &[(usize, usize)],
2045 ) -> (ListItemMap, Vec<EmphasisSpan>) {
2046 use std::collections::HashMap;
2047
2048 let mut list_items = HashMap::new();
2049 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2050
2051 let mut options = Options::empty();
2052 options.insert(Options::ENABLE_TABLES);
2053 options.insert(Options::ENABLE_FOOTNOTES);
2054 options.insert(Options::ENABLE_STRIKETHROUGH);
2055 options.insert(Options::ENABLE_TASKLISTS);
2056 options.insert(Options::ENABLE_GFM);
2058
2059 let _ = flavor;
2061
2062 let parser = Parser::new_ext(content, options).into_offset_iter();
2063 let mut list_depth: usize = 0;
2064 let mut list_stack: Vec<bool> = Vec::new();
2065
2066 for (event, range) in parser {
2067 match event {
2068 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2070 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2071 2
2072 } else {
2073 1
2074 };
2075 let match_start = range.start;
2076 let match_end = range.end;
2077
2078 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2080 let marker = content[match_start..].chars().next().unwrap_or('*');
2082 if marker == '*' || marker == '_' {
2083 let content_start = match_start + marker_count;
2085 let content_end = if match_end >= marker_count {
2086 match_end - marker_count
2087 } else {
2088 match_end
2089 };
2090 let content_part = if content_start < content_end && content_end <= content.len() {
2091 &content[content_start..content_end]
2092 } else {
2093 ""
2094 };
2095
2096 let line_idx = match line_offsets.binary_search(&match_start) {
2098 Ok(idx) => idx,
2099 Err(idx) => idx.saturating_sub(1),
2100 };
2101 let line_num = line_idx + 1;
2102 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2103 let col_start = match_start - line_start;
2104 let col_end = match_end - line_start;
2105
2106 emphasis_spans.push(EmphasisSpan {
2107 line: line_num,
2108 start_col: col_start,
2109 end_col: col_end,
2110 byte_offset: match_start,
2111 byte_end: match_end,
2112 marker,
2113 marker_count,
2114 content: content_part.to_string(),
2115 });
2116 }
2117 }
2118 }
2119 Event::Start(Tag::List(start_number)) => {
2120 list_depth += 1;
2121 list_stack.push(start_number.is_some());
2122 }
2123 Event::End(TagEnd::List(_)) => {
2124 list_depth = list_depth.saturating_sub(1);
2125 list_stack.pop();
2126 }
2127 Event::Start(Tag::Item) if list_depth > 0 => {
2128 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2130 let item_start = range.start;
2132
2133 let mut line_idx = match line_offsets.binary_search(&item_start) {
2135 Ok(idx) => idx,
2136 Err(idx) => idx.saturating_sub(1),
2137 };
2138
2139 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2143 line_idx += 1;
2144 }
2145
2146 if front_matter_end > 0 && line_idx < front_matter_end {
2148 continue;
2149 }
2150
2151 if line_idx < line_offsets.len() {
2152 let line_start_byte = line_offsets[line_idx];
2153 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2154 let line = &content[line_start_byte..line_end.min(content.len())];
2155
2156 let line = line
2158 .strip_suffix('\n')
2159 .or_else(|| line.strip_suffix("\r\n"))
2160 .unwrap_or(line);
2161
2162 let blockquote_parse = Self::parse_blockquote_prefix(line);
2164 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2165 (prefix.len(), content)
2166 } else {
2167 (0, line)
2168 };
2169
2170 if current_list_is_ordered {
2172 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2173 Self::parse_ordered_list(line_to_parse)
2174 {
2175 let marker = format!("{number_str}{delimiter}");
2176 let marker_column = blockquote_prefix_len + leading_spaces.len();
2177 let content_column = marker_column + marker.len() + spacing.len();
2178 let number = number_str.parse().ok();
2179
2180 list_items.entry(line_start_byte).or_insert((
2181 true,
2182 marker,
2183 marker_column,
2184 content_column,
2185 number,
2186 ));
2187 }
2188 } else if let Some((leading_spaces, marker, spacing, _content)) =
2189 Self::parse_unordered_list(line_to_parse)
2190 {
2191 let marker_column = blockquote_prefix_len + leading_spaces.len();
2192 let content_column = marker_column + 1 + spacing.len();
2193
2194 list_items.entry(line_start_byte).or_insert((
2195 false,
2196 marker.to_string(),
2197 marker_column,
2198 content_column,
2199 None,
2200 ));
2201 }
2202 }
2203 }
2204 _ => {}
2205 }
2206 }
2207
2208 (list_items, emphasis_spans)
2209 }
2210
2211 #[inline]
2215 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2216 let bytes = line.as_bytes();
2217 let mut i = 0;
2218
2219 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2221 i += 1;
2222 }
2223
2224 if i >= bytes.len() {
2226 return None;
2227 }
2228 let marker = bytes[i] as char;
2229 if marker != '-' && marker != '*' && marker != '+' {
2230 return None;
2231 }
2232 let marker_pos = i;
2233 i += 1;
2234
2235 let spacing_start = i;
2237 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2238 i += 1;
2239 }
2240
2241 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2242 }
2243
2244 #[inline]
2248 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2249 let bytes = line.as_bytes();
2250 let mut i = 0;
2251
2252 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2254 i += 1;
2255 }
2256
2257 let number_start = i;
2259 while i < bytes.len() && bytes[i].is_ascii_digit() {
2260 i += 1;
2261 }
2262 if i == number_start {
2263 return None; }
2265
2266 if i >= bytes.len() {
2268 return None;
2269 }
2270 let delimiter = bytes[i] as char;
2271 if delimiter != '.' && delimiter != ')' {
2272 return None;
2273 }
2274 let delimiter_pos = i;
2275 i += 1;
2276
2277 let spacing_start = i;
2279 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2280 i += 1;
2281 }
2282
2283 Some((
2284 &line[..number_start],
2285 &line[number_start..delimiter_pos],
2286 delimiter,
2287 &line[spacing_start..i],
2288 &line[i..],
2289 ))
2290 }
2291
2292 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2295 let num_lines = line_offsets.len();
2296 let mut in_code_block = vec![false; num_lines];
2297
2298 for &(start, end) in code_blocks {
2300 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2302 let mut boundary = start;
2303 while boundary > 0 && !content.is_char_boundary(boundary) {
2304 boundary -= 1;
2305 }
2306 boundary
2307 } else {
2308 start
2309 };
2310
2311 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2312 let mut boundary = end;
2313 while boundary < content.len() && !content.is_char_boundary(boundary) {
2314 boundary += 1;
2315 }
2316 boundary
2317 } else {
2318 end.min(content.len())
2319 };
2320
2321 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2340 let first_line = first_line_after.saturating_sub(1);
2341 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2342
2343 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2345 *flag = true;
2346 }
2347 }
2348
2349 in_code_block
2350 }
2351
2352 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2355 let content_lines: Vec<&str> = content.lines().collect();
2356 let num_lines = content_lines.len();
2357 let mut in_math_block = vec![false; num_lines];
2358
2359 let mut inside_math = false;
2360
2361 for (i, line) in content_lines.iter().enumerate() {
2362 if code_block_map.get(i).copied().unwrap_or(false) {
2364 continue;
2365 }
2366
2367 let trimmed = line.trim();
2368
2369 if trimmed == "$$" {
2372 if inside_math {
2373 in_math_block[i] = true;
2375 inside_math = false;
2376 } else {
2377 in_math_block[i] = true;
2379 inside_math = true;
2380 }
2381 } else if inside_math {
2382 in_math_block[i] = true;
2384 }
2385 }
2386
2387 in_math_block
2388 }
2389
2390 fn compute_basic_line_info(
2393 content: &str,
2394 line_offsets: &[usize],
2395 code_blocks: &[(usize, usize)],
2396 flavor: MarkdownFlavor,
2397 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2398 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2399 quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2400 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2401 let content_lines: Vec<&str> = content.lines().collect();
2402 let mut lines = Vec::with_capacity(content_lines.len());
2403
2404 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2406
2407 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2409
2410 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2413
2414 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2417 content,
2418 line_offsets,
2419 flavor,
2420 front_matter_end,
2421 code_blocks,
2422 );
2423
2424 for (i, line) in content_lines.iter().enumerate() {
2425 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2426 let indent = line.len() - line.trim_start().len();
2427 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2429
2430 let blockquote_parse = Self::parse_blockquote_prefix(line);
2432
2433 let is_blank = if let Some((_, content)) = blockquote_parse {
2435 content.trim().is_empty()
2437 } else {
2438 line.trim().is_empty()
2439 };
2440
2441 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2443
2444 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2446 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2447 let line_end_offset = byte_offset + line.len();
2450 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2451 html_comment_ranges,
2452 byte_offset,
2453 line_end_offset,
2454 );
2455 let list_item =
2458 list_item_map
2459 .get(&byte_offset)
2460 .map(
2461 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2462 marker: marker.clone(),
2463 is_ordered: *is_ordered,
2464 number: *number,
2465 marker_column: *marker_column,
2466 content_column: *content_column,
2467 },
2468 );
2469
2470 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2473 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2474
2475 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2477
2478 let in_quarto_div = flavor == MarkdownFlavor::Quarto
2480 && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2481
2482 lines.push(LineInfo {
2483 byte_offset,
2484 byte_len: line.len(),
2485 indent,
2486 visual_indent,
2487 is_blank,
2488 in_code_block,
2489 in_front_matter,
2490 in_html_block: false, in_html_comment,
2492 list_item,
2493 heading: None, blockquote: None, in_mkdocstrings,
2496 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2499 in_math_block,
2500 in_quarto_div,
2501 in_jsx_expression: false, in_mdx_comment: false, in_jsx_component: false, in_jsx_fragment: false, in_admonition: false, in_content_tab: false, in_definition_list: false, });
2509 }
2510
2511 (lines, emphasis_spans)
2512 }
2513
2514 fn detect_headings_and_blockquotes(
2516 content: &str,
2517 lines: &mut [LineInfo],
2518 flavor: MarkdownFlavor,
2519 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2520 link_byte_ranges: &[(usize, usize)],
2521 ) {
2522 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2524 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2525 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2526 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2527
2528 let content_lines: Vec<&str> = content.lines().collect();
2529
2530 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2532
2533 for i in 0..lines.len() {
2535 let line = content_lines[i];
2536
2537 if !(front_matter_end > 0 && i < front_matter_end)
2542 && let Some(bq) = parse_blockquote_detailed(line)
2543 {
2544 let nesting_level = bq.markers.len();
2545 let marker_column = bq.indent.len();
2546 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2547 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2548 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2549 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2550
2551 lines[i].blockquote = Some(BlockquoteInfo {
2552 nesting_level,
2553 indent: bq.indent.to_string(),
2554 marker_column,
2555 prefix,
2556 content: bq.content.to_string(),
2557 has_no_space_after_marker: has_no_space,
2558 has_multiple_spaces_after_marker: has_multiple_spaces,
2559 needs_md028_fix,
2560 });
2561
2562 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2565 lines[i].is_horizontal_rule = true;
2566 }
2567 }
2568
2569 if lines[i].in_code_block {
2571 continue;
2572 }
2573
2574 if front_matter_end > 0 && i < front_matter_end {
2576 continue;
2577 }
2578
2579 if lines[i].in_html_block {
2581 continue;
2582 }
2583
2584 if lines[i].is_blank {
2586 continue;
2587 }
2588
2589 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2592 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2593 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2594 } else {
2595 false
2596 };
2597
2598 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2599 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2601 continue;
2602 }
2603 let line_offset = lines[i].byte_offset;
2606 if link_byte_ranges
2607 .iter()
2608 .any(|&(start, end)| line_offset > start && line_offset < end)
2609 {
2610 continue;
2611 }
2612 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2613 let hashes = caps.get(2).map_or("", |m| m.as_str());
2614 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2615 let rest = caps.get(4).map_or("", |m| m.as_str());
2616
2617 let level = hashes.len() as u8;
2618 let marker_column = leading_spaces.len();
2619
2620 let (text, has_closing, closing_seq) = {
2622 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2624 if rest[id_start..].trim_end().ends_with('}') {
2626 (&rest[..id_start], &rest[id_start..])
2628 } else {
2629 (rest, "")
2630 }
2631 } else {
2632 (rest, "")
2633 };
2634
2635 let trimmed_rest = rest_without_id.trim_end();
2637 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2638 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2641
2642 let last_hash_char_idx = char_positions
2644 .iter()
2645 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2646
2647 if let Some(mut char_idx) = last_hash_char_idx {
2648 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2650 char_idx -= 1;
2651 }
2652
2653 let start_of_hashes = char_positions[char_idx].0;
2655
2656 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2658
2659 let potential_closing = &trimmed_rest[start_of_hashes..];
2661 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2662
2663 if is_all_hashes && has_space_before {
2664 let closing_hashes = potential_closing.to_string();
2666 let text_part = if !custom_id_part.is_empty() {
2669 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2672 } else {
2673 trimmed_rest[..start_of_hashes].trim_end().to_string()
2674 };
2675 (text_part, true, closing_hashes)
2676 } else {
2677 (rest.to_string(), false, String::new())
2679 }
2680 } else {
2681 (rest.to_string(), false, String::new())
2683 }
2684 } else {
2685 (rest.to_string(), false, String::new())
2687 }
2688 };
2689
2690 let content_column = marker_column + hashes.len() + spaces_after.len();
2691
2692 let raw_text = text.trim().to_string();
2694 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2695
2696 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2698 let next_line = content_lines[i + 1];
2699 if !lines[i + 1].in_code_block
2700 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2701 && let Some(next_line_id) =
2702 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2703 {
2704 custom_id = Some(next_line_id);
2705 }
2706 }
2707
2708 let is_valid = !spaces_after.is_empty()
2718 || rest.is_empty()
2719 || level > 1
2720 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2721
2722 lines[i].heading = Some(HeadingInfo {
2723 level,
2724 style: HeadingStyle::ATX,
2725 marker: hashes.to_string(),
2726 marker_column,
2727 content_column,
2728 text: clean_text,
2729 custom_id,
2730 raw_text,
2731 has_closing_sequence: has_closing,
2732 closing_sequence: closing_seq,
2733 is_valid,
2734 });
2735 }
2736 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2738 let next_line = content_lines[i + 1];
2739 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2740 if front_matter_end > 0 && i < front_matter_end {
2742 continue;
2743 }
2744
2745 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2747 {
2748 continue;
2749 }
2750
2751 let content_line = line.trim();
2754
2755 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2757 continue;
2758 }
2759
2760 if content_line.starts_with('_') {
2762 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2763 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2764 continue;
2765 }
2766 }
2767
2768 if let Some(first_char) = content_line.chars().next()
2770 && first_char.is_ascii_digit()
2771 {
2772 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2773 if num_end < content_line.len() {
2774 let next = content_line.chars().nth(num_end);
2775 if next == Some('.') || next == Some(')') {
2776 continue;
2777 }
2778 }
2779 }
2780
2781 if ATX_HEADING_REGEX.is_match(line) {
2783 continue;
2784 }
2785
2786 if content_line.starts_with('>') {
2788 continue;
2789 }
2790
2791 let trimmed_start = line.trim_start();
2793 if trimmed_start.len() >= 3 {
2794 let first_three: String = trimmed_start.chars().take(3).collect();
2795 if first_three == "```" || first_three == "~~~" {
2796 continue;
2797 }
2798 }
2799
2800 if content_line.starts_with('<') {
2802 continue;
2803 }
2804
2805 let underline = next_line.trim();
2806
2807 let level = if underline.starts_with('=') { 1 } else { 2 };
2808 let style = if level == 1 {
2809 HeadingStyle::Setext1
2810 } else {
2811 HeadingStyle::Setext2
2812 };
2813
2814 let raw_text = line.trim().to_string();
2816 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2817
2818 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2820 let attr_line = content_lines[i + 2];
2821 if !lines[i + 2].in_code_block
2822 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2823 && let Some(attr_line_id) =
2824 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2825 {
2826 custom_id = Some(attr_line_id);
2827 }
2828 }
2829
2830 lines[i].heading = Some(HeadingInfo {
2831 level,
2832 style,
2833 marker: underline.to_string(),
2834 marker_column: next_line.len() - next_line.trim_start().len(),
2835 content_column: lines[i].indent,
2836 text: clean_text,
2837 custom_id,
2838 raw_text,
2839 has_closing_sequence: false,
2840 closing_sequence: String::new(),
2841 is_valid: true, });
2843 }
2844 }
2845 }
2846 }
2847
2848 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2850 const BLOCK_ELEMENTS: &[&str] = &[
2853 "address",
2854 "article",
2855 "aside",
2856 "audio",
2857 "blockquote",
2858 "canvas",
2859 "details",
2860 "dialog",
2861 "dd",
2862 "div",
2863 "dl",
2864 "dt",
2865 "embed",
2866 "fieldset",
2867 "figcaption",
2868 "figure",
2869 "footer",
2870 "form",
2871 "h1",
2872 "h2",
2873 "h3",
2874 "h4",
2875 "h5",
2876 "h6",
2877 "header",
2878 "hr",
2879 "iframe",
2880 "li",
2881 "main",
2882 "menu",
2883 "nav",
2884 "noscript",
2885 "object",
2886 "ol",
2887 "p",
2888 "picture",
2889 "pre",
2890 "script",
2891 "search",
2892 "section",
2893 "source",
2894 "style",
2895 "summary",
2896 "svg",
2897 "table",
2898 "tbody",
2899 "td",
2900 "template",
2901 "textarea",
2902 "tfoot",
2903 "th",
2904 "thead",
2905 "tr",
2906 "track",
2907 "ul",
2908 "video",
2909 ];
2910
2911 let mut i = 0;
2912 while i < lines.len() {
2913 if lines[i].in_code_block || lines[i].in_front_matter {
2915 i += 1;
2916 continue;
2917 }
2918
2919 let trimmed = lines[i].content(content).trim_start();
2920
2921 if trimmed.starts_with('<') && trimmed.len() > 1 {
2923 let after_bracket = &trimmed[1..];
2925 let is_closing = after_bracket.starts_with('/');
2926 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2927
2928 let tag_name = tag_start
2930 .chars()
2931 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2932 .collect::<String>()
2933 .to_lowercase();
2934
2935 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2937 lines[i].in_html_block = true;
2939
2940 if !is_closing {
2945 let closing_tag = format!("</{tag_name}>");
2946
2947 let same_line_close = lines[i].content(content).contains(&closing_tag);
2950
2951 if !same_line_close {
2953 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2955 let mut j = i + 1;
2956 let mut found_closing_tag = false;
2957 while j < lines.len() && j < i + 100 {
2958 if !allow_blank_lines && lines[j].is_blank {
2961 break;
2962 }
2963
2964 lines[j].in_html_block = true;
2965
2966 if lines[j].content(content).contains(&closing_tag) {
2968 found_closing_tag = true;
2969 }
2970
2971 if found_closing_tag {
2974 j += 1;
2975 while j < lines.len() && j < i + 100 {
2977 if lines[j].is_blank {
2978 break;
2979 }
2980 lines[j].in_html_block = true;
2981 j += 1;
2982 }
2983 break;
2984 }
2985 j += 1;
2986 }
2987 }
2988 }
2989 }
2990 }
2991
2992 i += 1;
2993 }
2994 }
2995
2996 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2999 if !flavor.supports_esm_blocks() {
3001 return;
3002 }
3003
3004 let mut in_multiline_import = false;
3005
3006 for line in lines.iter_mut() {
3007 if line.in_code_block || line.in_front_matter || line.in_html_comment {
3009 in_multiline_import = false;
3010 continue;
3011 }
3012
3013 let line_content = line.content(content);
3014 let trimmed = line_content.trim();
3015
3016 if in_multiline_import {
3018 line.in_esm_block = true;
3019 if trimmed.ends_with('\'')
3022 || trimmed.ends_with('"')
3023 || trimmed.ends_with("';")
3024 || trimmed.ends_with("\";")
3025 || line_content.contains(';')
3026 {
3027 in_multiline_import = false;
3028 }
3029 continue;
3030 }
3031
3032 if line.is_blank {
3034 continue;
3035 }
3036
3037 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3039 line.in_esm_block = true;
3040
3041 let is_import = trimmed.starts_with("import ");
3049
3050 let is_complete =
3052 trimmed.ends_with(';')
3054 || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3056 || (!is_import && !trimmed.contains(" from ") && (
3058 trimmed.starts_with("export const ")
3059 || trimmed.starts_with("export let ")
3060 || trimmed.starts_with("export var ")
3061 || trimmed.starts_with("export function ")
3062 || trimmed.starts_with("export class ")
3063 || trimmed.starts_with("export default ")
3064 ));
3065
3066 if !is_complete && is_import {
3067 if trimmed.contains('{') && !trimmed.contains('}') {
3071 in_multiline_import = true;
3072 }
3073 }
3074 }
3075 }
3076 }
3077
3078 fn detect_jsx_and_mdx_comments(
3081 content: &str,
3082 lines: &mut [LineInfo],
3083 flavor: MarkdownFlavor,
3084 code_blocks: &[(usize, usize)],
3085 ) -> (ByteRanges, ByteRanges) {
3086 if !flavor.supports_jsx() {
3088 return (Vec::new(), Vec::new());
3089 }
3090
3091 let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3092 let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3093
3094 if !content.contains('{') {
3096 return (jsx_expression_ranges, mdx_comment_ranges);
3097 }
3098
3099 let bytes = content.as_bytes();
3100 let mut i = 0;
3101
3102 while i < bytes.len() {
3103 if bytes[i] == b'{' {
3104 if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3106 i += 1;
3107 continue;
3108 }
3109
3110 let start = i;
3111
3112 if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3114 let mut j = i + 3;
3116 while j + 2 < bytes.len() {
3117 if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3118 let end = j + 3;
3119 mdx_comment_ranges.push((start, end));
3120
3121 Self::mark_lines_in_range(lines, content, start, end, |line| {
3123 line.in_mdx_comment = true;
3124 });
3125
3126 i = end;
3127 break;
3128 }
3129 j += 1;
3130 }
3131 if j + 2 >= bytes.len() {
3132 mdx_comment_ranges.push((start, bytes.len()));
3134 Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3135 line.in_mdx_comment = true;
3136 });
3137 break;
3138 }
3139 } else {
3140 let mut brace_depth = 1;
3143 let mut j = i + 1;
3144 let mut in_string = false;
3145 let mut string_char = b'"';
3146
3147 while j < bytes.len() && brace_depth > 0 {
3148 let c = bytes[j];
3149
3150 if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3152 in_string = true;
3153 string_char = c;
3154 } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3155 in_string = false;
3156 } else if !in_string {
3157 if c == b'{' {
3158 brace_depth += 1;
3159 } else if c == b'}' {
3160 brace_depth -= 1;
3161 }
3162 }
3163 j += 1;
3164 }
3165
3166 if brace_depth == 0 {
3167 let end = j;
3168 jsx_expression_ranges.push((start, end));
3169
3170 Self::mark_lines_in_range(lines, content, start, end, |line| {
3172 line.in_jsx_expression = true;
3173 });
3174
3175 i = end;
3176 } else {
3177 i += 1;
3178 }
3179 }
3180 } else {
3181 i += 1;
3182 }
3183 }
3184
3185 (jsx_expression_ranges, mdx_comment_ranges)
3186 }
3187
3188 fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3191 if flavor != MarkdownFlavor::MkDocs {
3192 return;
3193 }
3194
3195 use crate::utils::mkdocs_admonitions;
3196 use crate::utils::mkdocs_definition_lists;
3197 use crate::utils::mkdocs_tabs;
3198
3199 let content_lines: Vec<&str> = content.lines().collect();
3200
3201 let mut in_admonition = false;
3203 let mut admonition_indent = 0;
3204
3205 let mut in_tab = false;
3207 let mut tab_indent = 0;
3208
3209 let mut in_definition = false;
3211
3212 for (i, line) in content_lines.iter().enumerate() {
3213 if i >= lines.len() {
3214 break;
3215 }
3216
3217 if mkdocs_admonitions::is_admonition_start(line) {
3221 in_admonition = true;
3222 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3223 lines[i].in_admonition = true;
3224 } else if in_admonition {
3225 if line.trim().is_empty() {
3227 lines[i].in_admonition = true;
3229 lines[i].in_code_block = false;
3231 } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3232 lines[i].in_admonition = true;
3233 lines[i].in_code_block = false;
3235 } else {
3236 in_admonition = false;
3238 if mkdocs_admonitions::is_admonition_start(line) {
3240 in_admonition = true;
3241 admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3242 lines[i].in_admonition = true;
3243 }
3244 }
3245 }
3246
3247 if mkdocs_tabs::is_tab_marker(line) {
3250 in_tab = true;
3251 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3252 lines[i].in_content_tab = true;
3253 } else if in_tab {
3254 if line.trim().is_empty() {
3256 lines[i].in_content_tab = true;
3258 lines[i].in_code_block = false;
3259 } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3260 lines[i].in_content_tab = true;
3261 lines[i].in_code_block = false;
3263 } else {
3264 in_tab = false;
3266 if mkdocs_tabs::is_tab_marker(line) {
3268 in_tab = true;
3269 tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3270 lines[i].in_content_tab = true;
3271 }
3272 }
3273 }
3274
3275 if lines[i].in_code_block {
3277 continue;
3278 }
3279
3280 if mkdocs_definition_lists::is_definition_line(line) {
3282 in_definition = true;
3283 lines[i].in_definition_list = true;
3284 } else if in_definition {
3285 if mkdocs_definition_lists::is_definition_continuation(line) {
3287 lines[i].in_definition_list = true;
3288 } else if line.trim().is_empty() {
3289 lines[i].in_definition_list = true;
3291 } else if mkdocs_definition_lists::could_be_term_line(line) {
3292 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3294 {
3295 lines[i].in_definition_list = true;
3296 } else {
3297 in_definition = false;
3298 }
3299 } else {
3300 in_definition = false;
3301 }
3302 } else if mkdocs_definition_lists::could_be_term_line(line) {
3303 if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3305 lines[i].in_definition_list = true;
3306 in_definition = true;
3307 }
3308 }
3309 }
3310 }
3311
3312 fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3314 where
3315 F: FnMut(&mut LineInfo),
3316 {
3317 for line in lines.iter_mut() {
3319 let line_start = line.byte_offset;
3320 let line_end = line.byte_offset + line.byte_len;
3321
3322 if line_start < end && line_end > start {
3324 f(line);
3325 }
3326 }
3327
3328 let _ = content;
3330 }
3331
3332 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3334 if !content.contains('`') {
3336 return Vec::new();
3337 }
3338
3339 let parser = Parser::new(content).into_offset_iter();
3341 let mut ranges = Vec::new();
3342
3343 for (event, range) in parser {
3344 if let Event::Code(_) = event {
3345 ranges.push((range.start, range.end));
3346 }
3347 }
3348
3349 Self::build_code_spans_from_ranges(content, lines, &ranges)
3350 }
3351
3352 fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3353 let mut code_spans = Vec::new();
3354 if ranges.is_empty() {
3355 return code_spans;
3356 }
3357
3358 for &(start_pos, end_pos) in ranges {
3359 let full_span = &content[start_pos..end_pos];
3361 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3362
3363 let content_start = start_pos + backtick_count;
3365 let content_end = end_pos - backtick_count;
3366 let span_content = if content_start < content_end {
3367 content[content_start..content_end].to_string()
3368 } else {
3369 String::new()
3370 };
3371
3372 let line_idx = lines
3375 .partition_point(|line| line.byte_offset <= start_pos)
3376 .saturating_sub(1);
3377 let line_num = line_idx + 1;
3378 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3379
3380 let end_line_idx = lines
3382 .partition_point(|line| line.byte_offset <= end_pos)
3383 .saturating_sub(1);
3384 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3385
3386 let line_content = lines[line_idx].content(content);
3389 let col_start = if byte_col_start <= line_content.len() {
3390 line_content[..byte_col_start].chars().count()
3391 } else {
3392 line_content.chars().count()
3393 };
3394
3395 let end_line_content = lines[end_line_idx].content(content);
3396 let col_end = if byte_col_end <= end_line_content.len() {
3397 end_line_content[..byte_col_end].chars().count()
3398 } else {
3399 end_line_content.chars().count()
3400 };
3401
3402 code_spans.push(CodeSpan {
3403 line: line_num,
3404 end_line: end_line_idx + 1,
3405 start_col: col_start,
3406 end_col: col_end,
3407 byte_offset: start_pos,
3408 byte_end: end_pos,
3409 backtick_count,
3410 content: span_content,
3411 });
3412 }
3413
3414 code_spans.sort_by_key(|span| span.byte_offset);
3416
3417 code_spans
3418 }
3419
3420 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3422 let mut math_spans = Vec::new();
3423
3424 if !content.contains('$') {
3426 return math_spans;
3427 }
3428
3429 let mut options = Options::empty();
3431 options.insert(Options::ENABLE_MATH);
3432 let parser = Parser::new_ext(content, options).into_offset_iter();
3433
3434 for (event, range) in parser {
3435 let (is_display, math_content) = match &event {
3436 Event::InlineMath(text) => (false, text.as_ref()),
3437 Event::DisplayMath(text) => (true, text.as_ref()),
3438 _ => continue,
3439 };
3440
3441 let start_pos = range.start;
3442 let end_pos = range.end;
3443
3444 let line_idx = lines
3446 .partition_point(|line| line.byte_offset <= start_pos)
3447 .saturating_sub(1);
3448 let line_num = line_idx + 1;
3449 let byte_col_start = start_pos - lines[line_idx].byte_offset;
3450
3451 let end_line_idx = lines
3453 .partition_point(|line| line.byte_offset <= end_pos)
3454 .saturating_sub(1);
3455 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3456
3457 let line_content = lines[line_idx].content(content);
3459 let col_start = if byte_col_start <= line_content.len() {
3460 line_content[..byte_col_start].chars().count()
3461 } else {
3462 line_content.chars().count()
3463 };
3464
3465 let end_line_content = lines[end_line_idx].content(content);
3466 let col_end = if byte_col_end <= end_line_content.len() {
3467 end_line_content[..byte_col_end].chars().count()
3468 } else {
3469 end_line_content.chars().count()
3470 };
3471
3472 math_spans.push(MathSpan {
3473 line: line_num,
3474 end_line: end_line_idx + 1,
3475 start_col: col_start,
3476 end_col: col_end,
3477 byte_offset: start_pos,
3478 byte_end: end_pos,
3479 is_display,
3480 content: math_content.to_string(),
3481 });
3482 }
3483
3484 math_spans.sort_by_key(|span| span.byte_offset);
3486
3487 math_spans
3488 }
3489
3490 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3501 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3503
3504 #[inline]
3507 fn reset_tracking_state(
3508 list_item: &ListItemInfo,
3509 has_list_breaking_content: &mut bool,
3510 min_continuation: &mut usize,
3511 ) {
3512 *has_list_breaking_content = false;
3513 let marker_width = if list_item.is_ordered {
3514 list_item.marker.len() + 1 } else {
3516 list_item.marker.len()
3517 };
3518 *min_continuation = if list_item.is_ordered {
3519 marker_width
3520 } else {
3521 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3522 };
3523 }
3524
3525 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3528 let mut last_list_item_line = 0;
3529 let mut current_indent_level = 0;
3530 let mut last_marker_width = 0;
3531
3532 let mut has_list_breaking_content_since_last_item = false;
3534 let mut min_continuation_for_tracking = 0;
3535
3536 for (line_idx, line_info) in lines.iter().enumerate() {
3537 let line_num = line_idx + 1;
3538
3539 if line_info.in_code_block {
3541 if let Some(ref mut block) = current_block {
3542 let min_continuation_indent =
3544 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3545
3546 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3548
3549 match context {
3550 CodeBlockContext::Indented => {
3551 block.end_line = line_num;
3553 continue;
3554 }
3555 CodeBlockContext::Standalone => {
3556 let completed_block = current_block.take().unwrap();
3558 list_blocks.push(completed_block);
3559 continue;
3560 }
3561 CodeBlockContext::Adjacent => {
3562 block.end_line = line_num;
3564 continue;
3565 }
3566 }
3567 } else {
3568 continue;
3570 }
3571 }
3572
3573 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3575 caps.get(0).unwrap().as_str().to_string()
3576 } else {
3577 String::new()
3578 };
3579
3580 if let Some(ref block) = current_block
3583 && line_info.list_item.is_none()
3584 && !line_info.is_blank
3585 && !line_info.in_code_span_continuation
3586 {
3587 let line_content = line_info.content(content).trim();
3588
3589 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3594
3595 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3598
3599 let breaks_list = line_info.heading.is_some()
3600 || line_content.starts_with("---")
3601 || line_content.starts_with("***")
3602 || line_content.starts_with("___")
3603 || crate::utils::skip_context::is_table_line(line_content)
3604 || blockquote_prefix_changes
3605 || (line_info.indent > 0
3606 && line_info.indent < min_continuation_for_tracking
3607 && !is_lazy_continuation);
3608
3609 if breaks_list {
3610 has_list_breaking_content_since_last_item = true;
3611 }
3612 }
3613
3614 if line_info.in_code_span_continuation
3617 && line_info.list_item.is_none()
3618 && let Some(ref mut block) = current_block
3619 {
3620 block.end_line = line_num;
3621 }
3622
3623 let effective_continuation_indent = if let Some(ref block) = current_block {
3629 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3630 let line_content = line_info.content(content);
3631 let line_bq_level = line_content
3632 .chars()
3633 .take_while(|c| *c == '>' || c.is_whitespace())
3634 .filter(|&c| c == '>')
3635 .count();
3636 if line_bq_level > 0 && line_bq_level == block_bq_level {
3637 let mut pos = 0;
3639 let mut found_markers = 0;
3640 for c in line_content.chars() {
3641 pos += c.len_utf8();
3642 if c == '>' {
3643 found_markers += 1;
3644 if found_markers == line_bq_level {
3645 if line_content.get(pos..pos + 1) == Some(" ") {
3646 pos += 1;
3647 }
3648 break;
3649 }
3650 }
3651 }
3652 let after_bq = &line_content[pos..];
3653 after_bq.len() - after_bq.trim_start().len()
3654 } else {
3655 line_info.indent
3656 }
3657 } else {
3658 line_info.indent
3659 };
3660 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3661 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3662 if block_bq_level > 0 {
3663 if block.is_ordered { last_marker_width } else { 2 }
3664 } else {
3665 min_continuation_for_tracking
3666 }
3667 } else {
3668 min_continuation_for_tracking
3669 };
3670 let is_structural_element = line_info.heading.is_some()
3673 || line_info.content(content).trim().starts_with("```")
3674 || line_info.content(content).trim().starts_with("~~~");
3675 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3676 || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3677
3678 if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3679 eprintln!(
3680 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3681 line_num,
3682 effective_continuation_indent,
3683 adjusted_min_continuation_for_tracking,
3684 is_valid_continuation,
3685 line_info.in_code_span_continuation,
3686 line_info.in_code_block,
3687 current_block.is_some()
3688 );
3689 }
3690
3691 if !line_info.in_code_span_continuation
3692 && line_info.list_item.is_none()
3693 && !line_info.is_blank
3694 && !line_info.in_code_block
3695 && is_valid_continuation
3696 && let Some(ref mut block) = current_block
3697 {
3698 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3699 eprintln!(
3700 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3701 line_num, block.end_line, line_num
3702 );
3703 }
3704 block.end_line = line_num;
3705 }
3706
3707 if let Some(list_item) = &line_info.list_item {
3709 let item_indent = list_item.marker_column;
3711 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3714 eprintln!(
3715 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3716 line_num, list_item.marker, item_indent
3717 );
3718 }
3719
3720 if let Some(ref mut block) = current_block {
3721 let is_nested = nesting > block.nesting_level;
3725 let same_type =
3726 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3727 let same_context = block.blockquote_prefix == blockquote_prefix;
3728 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3730
3731 let marker_compatible =
3733 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3734
3735 let has_non_list_content = has_list_breaking_content_since_last_item;
3738
3739 let mut continues_list = if is_nested {
3743 same_context && reasonable_distance && !has_non_list_content
3745 } else {
3746 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3748 };
3749
3750 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3751 eprintln!(
3752 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3753 line_num,
3754 continues_list,
3755 is_nested,
3756 same_type,
3757 same_context,
3758 reasonable_distance,
3759 marker_compatible,
3760 has_non_list_content,
3761 last_list_item_line,
3762 block.end_line
3763 );
3764 }
3765
3766 if !continues_list
3770 && (is_nested || same_type)
3771 && reasonable_distance
3772 && line_num > 0
3773 && block.end_line == line_num - 1
3774 {
3775 if block.item_lines.contains(&(line_num - 1)) {
3778 continues_list = true;
3780 } else {
3781 continues_list = true;
3785 }
3786 }
3787
3788 if continues_list {
3789 block.end_line = line_num;
3791 block.item_lines.push(line_num);
3792
3793 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3795 list_item.marker.len() + 1
3796 } else {
3797 list_item.marker.len()
3798 });
3799
3800 if !block.is_ordered
3802 && block.marker.is_some()
3803 && block.marker.as_ref() != Some(&list_item.marker)
3804 {
3805 block.marker = None;
3807 }
3808
3809 reset_tracking_state(
3811 list_item,
3812 &mut has_list_breaking_content_since_last_item,
3813 &mut min_continuation_for_tracking,
3814 );
3815 } else {
3816 if !same_type
3821 && !is_nested
3822 && let Some(&last_item) = block.item_lines.last()
3823 {
3824 block.end_line = last_item;
3825 }
3826
3827 list_blocks.push(block.clone());
3828
3829 *block = ListBlock {
3830 start_line: line_num,
3831 end_line: line_num,
3832 is_ordered: list_item.is_ordered,
3833 marker: if list_item.is_ordered {
3834 None
3835 } else {
3836 Some(list_item.marker.clone())
3837 },
3838 blockquote_prefix: blockquote_prefix.clone(),
3839 item_lines: vec![line_num],
3840 nesting_level: nesting,
3841 max_marker_width: if list_item.is_ordered {
3842 list_item.marker.len() + 1
3843 } else {
3844 list_item.marker.len()
3845 },
3846 };
3847
3848 reset_tracking_state(
3850 list_item,
3851 &mut has_list_breaking_content_since_last_item,
3852 &mut min_continuation_for_tracking,
3853 );
3854 }
3855 } else {
3856 current_block = Some(ListBlock {
3858 start_line: line_num,
3859 end_line: line_num,
3860 is_ordered: list_item.is_ordered,
3861 marker: if list_item.is_ordered {
3862 None
3863 } else {
3864 Some(list_item.marker.clone())
3865 },
3866 blockquote_prefix,
3867 item_lines: vec![line_num],
3868 nesting_level: nesting,
3869 max_marker_width: list_item.marker.len(),
3870 });
3871
3872 reset_tracking_state(
3874 list_item,
3875 &mut has_list_breaking_content_since_last_item,
3876 &mut min_continuation_for_tracking,
3877 );
3878 }
3879
3880 last_list_item_line = line_num;
3881 current_indent_level = item_indent;
3882 last_marker_width = if list_item.is_ordered {
3883 list_item.marker.len() + 1 } else {
3885 list_item.marker.len()
3886 };
3887 } else if let Some(ref mut block) = current_block {
3888 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3890 eprintln!(
3891 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3892 line_num, line_info.is_blank
3893 );
3894 }
3895
3896 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3904 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3905 } else {
3906 false
3907 };
3908
3909 let min_continuation_indent = if block.is_ordered {
3913 current_indent_level + last_marker_width
3914 } else {
3915 current_indent_level + 2 };
3917
3918 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3919 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3921 eprintln!(
3922 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3923 line_num, line_info.indent, min_continuation_indent
3924 );
3925 }
3926 block.end_line = line_num;
3927 } else if line_info.is_blank {
3928 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3931 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3932 }
3933 let mut check_idx = line_idx + 1;
3934 let mut found_continuation = false;
3935
3936 while check_idx < lines.len() && lines[check_idx].is_blank {
3938 check_idx += 1;
3939 }
3940
3941 if check_idx < lines.len() {
3942 let next_line = &lines[check_idx];
3943 let next_content = next_line.content(content);
3945 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3948 let next_bq_level_for_indent = next_content
3949 .chars()
3950 .take_while(|c| *c == '>' || c.is_whitespace())
3951 .filter(|&c| c == '>')
3952 .count();
3953 let effective_indent =
3954 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3955 let mut pos = 0;
3958 let mut found_markers = 0;
3959 for c in next_content.chars() {
3960 pos += c.len_utf8();
3961 if c == '>' {
3962 found_markers += 1;
3963 if found_markers == next_bq_level_for_indent {
3964 if next_content.get(pos..pos + 1) == Some(" ") {
3966 pos += 1;
3967 }
3968 break;
3969 }
3970 }
3971 }
3972 let after_blockquote_marker = &next_content[pos..];
3973 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3974 } else {
3975 next_line.indent
3976 };
3977 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3980 if block.is_ordered { last_marker_width } else { 2 }
3983 } else {
3984 min_continuation_indent
3985 };
3986 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3988 eprintln!(
3989 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3990 line_num,
3991 check_idx + 1,
3992 effective_indent,
3993 adjusted_min_continuation,
3994 next_line.list_item.is_some(),
3995 next_line.in_code_block
3996 );
3997 }
3998 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3999 found_continuation = true;
4000 }
4001 else if !next_line.in_code_block
4003 && next_line.list_item.is_some()
4004 && let Some(item) = &next_line.list_item
4005 {
4006 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4007 .find(next_line.content(content))
4008 .map_or(String::new(), |m| m.as_str().to_string());
4009 if item.marker_column == current_indent_level
4010 && item.is_ordered == block.is_ordered
4011 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4012 {
4013 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4017 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4018 if let Some(between_line) = lines.get(idx) {
4019 let between_content = between_line.content(content);
4020 let trimmed = between_content.trim();
4021 if trimmed.is_empty() {
4023 return false;
4024 }
4025 let line_indent = between_content.len() - between_content.trim_start().len();
4027
4028 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4030 .find(between_content)
4031 .map_or(String::new(), |m| m.as_str().to_string());
4032 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4033 let blockquote_level_changed =
4034 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4035
4036 if trimmed.starts_with("```")
4038 || trimmed.starts_with("~~~")
4039 || trimmed.starts_with("---")
4040 || trimmed.starts_with("***")
4041 || trimmed.starts_with("___")
4042 || blockquote_level_changed
4043 || crate::utils::skip_context::is_table_line(trimmed)
4044 || between_line.heading.is_some()
4045 {
4046 return true; }
4048
4049 line_indent >= min_continuation_indent
4051 } else {
4052 false
4053 }
4054 });
4055
4056 if block.is_ordered {
4057 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4060 if let Some(between_line) = lines.get(idx) {
4061 let between_content = between_line.content(content);
4062 let trimmed = between_content.trim();
4063 if trimmed.is_empty() {
4064 return false;
4065 }
4066 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4068 .find(between_content)
4069 .map_or(String::new(), |m| m.as_str().to_string());
4070 let between_bq_level =
4071 between_bq_prefix.chars().filter(|&c| c == '>').count();
4072 let blockquote_level_changed =
4073 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4074 trimmed.starts_with("```")
4076 || trimmed.starts_with("~~~")
4077 || trimmed.starts_with("---")
4078 || trimmed.starts_with("***")
4079 || trimmed.starts_with("___")
4080 || blockquote_level_changed
4081 || crate::utils::skip_context::is_table_line(trimmed)
4082 || between_line.heading.is_some()
4083 } else {
4084 false
4085 }
4086 });
4087 found_continuation = !has_structural_separators;
4088 } else {
4089 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4091 if let Some(between_line) = lines.get(idx) {
4092 let between_content = between_line.content(content);
4093 let trimmed = between_content.trim();
4094 if trimmed.is_empty() {
4095 return false;
4096 }
4097 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4099 .find(between_content)
4100 .map_or(String::new(), |m| m.as_str().to_string());
4101 let between_bq_level =
4102 between_bq_prefix.chars().filter(|&c| c == '>').count();
4103 let blockquote_level_changed =
4104 trimmed.starts_with(">") && between_bq_level != block_bq_level;
4105 trimmed.starts_with("```")
4107 || trimmed.starts_with("~~~")
4108 || trimmed.starts_with("---")
4109 || trimmed.starts_with("***")
4110 || trimmed.starts_with("___")
4111 || blockquote_level_changed
4112 || crate::utils::skip_context::is_table_line(trimmed)
4113 || between_line.heading.is_some()
4114 } else {
4115 false
4116 }
4117 });
4118 found_continuation = !has_structural_separators;
4119 }
4120 }
4121 }
4122 }
4123
4124 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4125 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4126 }
4127 if found_continuation {
4128 block.end_line = line_num;
4130 } else {
4131 list_blocks.push(block.clone());
4133 current_block = None;
4134 }
4135 } else {
4136 let min_required_indent = if block.is_ordered {
4139 current_indent_level + last_marker_width
4140 } else {
4141 current_indent_level + 2
4142 };
4143
4144 let line_content = line_info.content(content).trim();
4149
4150 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4152
4153 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4156 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4157 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4158
4159 let is_structural_separator = line_info.heading.is_some()
4160 || line_content.starts_with("```")
4161 || line_content.starts_with("~~~")
4162 || line_content.starts_with("---")
4163 || line_content.starts_with("***")
4164 || line_content.starts_with("___")
4165 || blockquote_level_changed
4166 || looks_like_table;
4167
4168 let is_lazy_continuation = !is_structural_separator
4172 && !line_info.is_blank
4173 && (line_info.indent == 0
4174 || line_info.indent >= min_required_indent
4175 || line_info.in_code_span_continuation);
4176
4177 if is_lazy_continuation {
4178 block.end_line = line_num;
4181 } else {
4182 list_blocks.push(block.clone());
4184 current_block = None;
4185 }
4186 }
4187 }
4188 }
4189
4190 if let Some(block) = current_block {
4192 list_blocks.push(block);
4193 }
4194
4195 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4197
4198 list_blocks
4199 }
4200
4201 fn compute_char_frequency(content: &str) -> CharFrequency {
4203 let mut frequency = CharFrequency::default();
4204
4205 for ch in content.chars() {
4206 match ch {
4207 '#' => frequency.hash_count += 1,
4208 '*' => frequency.asterisk_count += 1,
4209 '_' => frequency.underscore_count += 1,
4210 '-' => frequency.hyphen_count += 1,
4211 '+' => frequency.plus_count += 1,
4212 '>' => frequency.gt_count += 1,
4213 '|' => frequency.pipe_count += 1,
4214 '[' => frequency.bracket_count += 1,
4215 '`' => frequency.backtick_count += 1,
4216 '<' => frequency.lt_count += 1,
4217 '!' => frequency.exclamation_count += 1,
4218 '\n' => frequency.newline_count += 1,
4219 _ => {}
4220 }
4221 }
4222
4223 frequency
4224 }
4225
4226 fn parse_html_tags(
4228 content: &str,
4229 lines: &[LineInfo],
4230 code_blocks: &[(usize, usize)],
4231 flavor: MarkdownFlavor,
4232 ) -> Vec<HtmlTag> {
4233 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4234 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4235
4236 let mut html_tags = Vec::with_capacity(content.matches('<').count());
4237
4238 for cap in HTML_TAG_REGEX.captures_iter(content) {
4239 let full_match = cap.get(0).unwrap();
4240 let match_start = full_match.start();
4241 let match_end = full_match.end();
4242
4243 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4245 continue;
4246 }
4247
4248 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4249 let tag_name_original = cap.get(2).unwrap().as_str();
4250 let tag_name = tag_name_original.to_lowercase();
4251 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4252
4253 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4256 continue;
4257 }
4258
4259 let mut line_num = 1;
4261 let mut col_start = match_start;
4262 let mut col_end = match_end;
4263 for (idx, line_info) in lines.iter().enumerate() {
4264 if match_start >= line_info.byte_offset {
4265 line_num = idx + 1;
4266 col_start = match_start - line_info.byte_offset;
4267 col_end = match_end - line_info.byte_offset;
4268 } else {
4269 break;
4270 }
4271 }
4272
4273 html_tags.push(HtmlTag {
4274 line: line_num,
4275 start_col: col_start,
4276 end_col: col_end,
4277 byte_offset: match_start,
4278 byte_end: match_end,
4279 tag_name,
4280 is_closing,
4281 is_self_closing,
4282 raw_content: full_match.as_str().to_string(),
4283 });
4284 }
4285
4286 html_tags
4287 }
4288
4289 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4291 let mut table_rows = Vec::with_capacity(lines.len() / 20);
4292
4293 for (line_idx, line_info) in lines.iter().enumerate() {
4294 if line_info.in_code_block || line_info.is_blank {
4296 continue;
4297 }
4298
4299 let line = line_info.content(content);
4300 let line_num = line_idx + 1;
4301
4302 if !line.contains('|') {
4304 continue;
4305 }
4306
4307 let parts: Vec<&str> = line.split('|').collect();
4309 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4310
4311 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4313 let mut column_alignments = Vec::new();
4314
4315 if is_separator {
4316 for part in &parts[1..parts.len() - 1] {
4317 let trimmed = part.trim();
4319 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4320 "center".to_string()
4321 } else if trimmed.ends_with(':') {
4322 "right".to_string()
4323 } else if trimmed.starts_with(':') {
4324 "left".to_string()
4325 } else {
4326 "none".to_string()
4327 };
4328 column_alignments.push(alignment);
4329 }
4330 }
4331
4332 table_rows.push(TableRow {
4333 line: line_num,
4334 is_separator,
4335 column_count,
4336 column_alignments,
4337 });
4338 }
4339
4340 table_rows
4341 }
4342
4343 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4345 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4346
4347 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4349 let full_match = cap.get(0).unwrap();
4350 let match_start = full_match.start();
4351 let match_end = full_match.end();
4352
4353 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4355 continue;
4356 }
4357
4358 let preceding_char = if match_start > 0 {
4360 content.chars().nth(match_start - 1)
4361 } else {
4362 None
4363 };
4364 let following_char = content.chars().nth(match_end);
4365
4366 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4367 continue;
4368 }
4369 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4370 continue;
4371 }
4372
4373 let url = full_match.as_str();
4374 let url_type = if url.starts_with("https://") {
4375 "https"
4376 } else if url.starts_with("http://") {
4377 "http"
4378 } else if url.starts_with("ftp://") {
4379 "ftp"
4380 } else {
4381 "other"
4382 };
4383
4384 let mut line_num = 1;
4386 let mut col_start = match_start;
4387 let mut col_end = match_end;
4388 for (idx, line_info) in lines.iter().enumerate() {
4389 if match_start >= line_info.byte_offset {
4390 line_num = idx + 1;
4391 col_start = match_start - line_info.byte_offset;
4392 col_end = match_end - line_info.byte_offset;
4393 } else {
4394 break;
4395 }
4396 }
4397
4398 bare_urls.push(BareUrl {
4399 line: line_num,
4400 start_col: col_start,
4401 end_col: col_end,
4402 byte_offset: match_start,
4403 byte_end: match_end,
4404 url: url.to_string(),
4405 url_type: url_type.to_string(),
4406 });
4407 }
4408
4409 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4411 let full_match = cap.get(0).unwrap();
4412 let match_start = full_match.start();
4413 let match_end = full_match.end();
4414
4415 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4417 continue;
4418 }
4419
4420 let preceding_char = if match_start > 0 {
4422 content.chars().nth(match_start - 1)
4423 } else {
4424 None
4425 };
4426 let following_char = content.chars().nth(match_end);
4427
4428 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4429 continue;
4430 }
4431 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4432 continue;
4433 }
4434
4435 let email = full_match.as_str();
4436
4437 let mut line_num = 1;
4439 let mut col_start = match_start;
4440 let mut col_end = match_end;
4441 for (idx, line_info) in lines.iter().enumerate() {
4442 if match_start >= line_info.byte_offset {
4443 line_num = idx + 1;
4444 col_start = match_start - line_info.byte_offset;
4445 col_end = match_end - line_info.byte_offset;
4446 } else {
4447 break;
4448 }
4449 }
4450
4451 bare_urls.push(BareUrl {
4452 line: line_num,
4453 start_col: col_start,
4454 end_col: col_end,
4455 byte_offset: match_start,
4456 byte_end: match_end,
4457 url: email.to_string(),
4458 url_type: "email".to_string(),
4459 });
4460 }
4461
4462 bare_urls
4463 }
4464
4465 #[must_use]
4485 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4486 ValidHeadingsIter::new(&self.lines)
4487 }
4488
4489 #[must_use]
4493 pub fn has_valid_headings(&self) -> bool {
4494 self.lines
4495 .iter()
4496 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4497 }
4498}
4499
4500fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4502 if list_blocks.len() < 2 {
4503 return;
4504 }
4505
4506 let mut merger = ListBlockMerger::new(content, lines);
4507 *list_blocks = merger.merge(list_blocks);
4508}
4509
4510struct ListBlockMerger<'a> {
4512 content: &'a str,
4513 lines: &'a [LineInfo],
4514}
4515
4516impl<'a> ListBlockMerger<'a> {
4517 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4518 Self { content, lines }
4519 }
4520
4521 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4522 let mut merged = Vec::with_capacity(list_blocks.len());
4523 let mut current = list_blocks[0].clone();
4524
4525 for next in list_blocks.iter().skip(1) {
4526 if self.should_merge_blocks(¤t, next) {
4527 current = self.merge_two_blocks(current, next);
4528 } else {
4529 merged.push(current);
4530 current = next.clone();
4531 }
4532 }
4533
4534 merged.push(current);
4535 merged
4536 }
4537
4538 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4540 if !self.blocks_are_compatible(current, next) {
4542 return false;
4543 }
4544
4545 let spacing = self.analyze_spacing_between(current, next);
4547 match spacing {
4548 BlockSpacing::Consecutive => true,
4549 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4550 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4551 self.can_merge_with_content_between(current, next)
4552 }
4553 }
4554 }
4555
4556 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4558 current.is_ordered == next.is_ordered
4559 && current.blockquote_prefix == next.blockquote_prefix
4560 && current.nesting_level == next.nesting_level
4561 }
4562
4563 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4565 let gap = next.start_line - current.end_line;
4566
4567 match gap {
4568 1 => BlockSpacing::Consecutive,
4569 2 => BlockSpacing::SingleBlank,
4570 _ if gap > 2 => {
4571 if self.has_only_blank_lines_between(current, next) {
4572 BlockSpacing::MultipleBlanks
4573 } else {
4574 BlockSpacing::ContentBetween
4575 }
4576 }
4577 _ => BlockSpacing::Consecutive, }
4579 }
4580
4581 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4583 if has_meaningful_content_between(self.content, current, next, self.lines) {
4586 return false; }
4588
4589 !current.is_ordered && current.marker == next.marker
4591 }
4592
4593 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4595 if has_meaningful_content_between(self.content, current, next, self.lines) {
4597 return false; }
4599
4600 current.is_ordered && next.is_ordered
4602 }
4603
4604 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4606 for line_num in (current.end_line + 1)..next.start_line {
4607 if let Some(line_info) = self.lines.get(line_num - 1)
4608 && !line_info.content(self.content).trim().is_empty()
4609 {
4610 return false;
4611 }
4612 }
4613 true
4614 }
4615
4616 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4618 current.end_line = next.end_line;
4619 current.item_lines.extend_from_slice(&next.item_lines);
4620
4621 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4623
4624 if !current.is_ordered && self.markers_differ(¤t, next) {
4626 current.marker = None; }
4628
4629 current
4630 }
4631
4632 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4634 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4635 }
4636}
4637
4638#[derive(Debug, PartialEq)]
4640enum BlockSpacing {
4641 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4646
4647fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4649 for line_num in (current.end_line + 1)..next.start_line {
4651 if let Some(line_info) = lines.get(line_num - 1) {
4652 let trimmed = line_info.content(content).trim();
4654
4655 if trimmed.is_empty() {
4657 continue;
4658 }
4659
4660 if line_info.heading.is_some() {
4664 return true; }
4666
4667 if is_horizontal_rule(trimmed) {
4669 return true; }
4671
4672 if crate::utils::skip_context::is_table_line(trimmed) {
4674 return true; }
4676
4677 if trimmed.starts_with('>') {
4679 return true; }
4681
4682 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4684 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4685
4686 let min_continuation_indent = if current.is_ordered {
4688 current.nesting_level + current.max_marker_width + 1 } else {
4690 current.nesting_level + 2
4691 };
4692
4693 if line_indent < min_continuation_indent {
4694 return true; }
4697 }
4698
4699 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4701
4702 let min_indent = if current.is_ordered {
4704 current.nesting_level + current.max_marker_width
4705 } else {
4706 current.nesting_level + 2
4707 };
4708
4709 if line_indent < min_indent {
4711 return true; }
4713
4714 }
4717 }
4718
4719 false
4721}
4722
4723pub fn is_horizontal_rule_line(line: &str) -> bool {
4730 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4732 if leading_spaces > 3 || line.starts_with('\t') {
4733 return false;
4734 }
4735
4736 is_horizontal_rule_content(line.trim())
4737}
4738
4739pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4742 if trimmed.len() < 3 {
4743 return false;
4744 }
4745
4746 let chars: Vec<char> = trimmed.chars().collect();
4748 if let Some(&first_char) = chars.first()
4749 && (first_char == '-' || first_char == '*' || first_char == '_')
4750 {
4751 let mut count = 0;
4752 for &ch in &chars {
4753 if ch == first_char {
4754 count += 1;
4755 } else if ch != ' ' && ch != '\t' {
4756 return false; }
4758 }
4759 return count >= 3;
4760 }
4761 false
4762}
4763
4764pub fn is_horizontal_rule(trimmed: &str) -> bool {
4766 is_horizontal_rule_content(trimmed)
4767}
4768
4769#[cfg(test)]
4771mod tests {
4772 use super::*;
4773
4774 #[test]
4775 fn test_empty_content() {
4776 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4777 assert_eq!(ctx.content, "");
4778 assert_eq!(ctx.line_offsets, vec![0]);
4779 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4780 assert_eq!(ctx.lines.len(), 0);
4781 }
4782
4783 #[test]
4784 fn test_single_line() {
4785 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4786 assert_eq!(ctx.content, "# Hello");
4787 assert_eq!(ctx.line_offsets, vec![0]);
4788 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4789 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4790 }
4791
4792 #[test]
4793 fn test_multi_line() {
4794 let content = "# Title\n\nSecond line\nThird line";
4795 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4796 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4797 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4804
4805 #[test]
4806 fn test_line_info() {
4807 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4808 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4809
4810 assert_eq!(ctx.lines.len(), 7);
4812
4813 let line1 = &ctx.lines[0];
4815 assert_eq!(line1.content(ctx.content), "# Title");
4816 assert_eq!(line1.byte_offset, 0);
4817 assert_eq!(line1.indent, 0);
4818 assert!(!line1.is_blank);
4819 assert!(!line1.in_code_block);
4820 assert!(line1.list_item.is_none());
4821
4822 let line2 = &ctx.lines[1];
4824 assert_eq!(line2.content(ctx.content), " indented");
4825 assert_eq!(line2.byte_offset, 8);
4826 assert_eq!(line2.indent, 4);
4827 assert!(!line2.is_blank);
4828
4829 let line3 = &ctx.lines[2];
4831 assert_eq!(line3.content(ctx.content), "");
4832 assert!(line3.is_blank);
4833
4834 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4836 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4837 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4838 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4839 }
4840
4841 #[test]
4842 fn test_list_item_detection() {
4843 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4844 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4845
4846 let line1 = &ctx.lines[0];
4848 assert!(line1.list_item.is_some());
4849 let list1 = line1.list_item.as_ref().unwrap();
4850 assert_eq!(list1.marker, "-");
4851 assert!(!list1.is_ordered);
4852 assert_eq!(list1.marker_column, 0);
4853 assert_eq!(list1.content_column, 2);
4854
4855 let line2 = &ctx.lines[1];
4857 assert!(line2.list_item.is_some());
4858 let list2 = line2.list_item.as_ref().unwrap();
4859 assert_eq!(list2.marker, "*");
4860 assert_eq!(list2.marker_column, 2);
4861
4862 let line3 = &ctx.lines[2];
4864 assert!(line3.list_item.is_some());
4865 let list3 = line3.list_item.as_ref().unwrap();
4866 assert_eq!(list3.marker, "1.");
4867 assert!(list3.is_ordered);
4868 assert_eq!(list3.number, Some(1));
4869
4870 let line6 = &ctx.lines[5];
4872 assert!(line6.list_item.is_none());
4873 }
4874
4875 #[test]
4876 fn test_offset_to_line_col_edge_cases() {
4877 let content = "a\nb\nc";
4878 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4879 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4887
4888 #[test]
4889 fn test_mdx_esm_blocks() {
4890 let content = r##"import {Chart} from './snowfall.js'
4891export const year = 2023
4892
4893# Last year's snowfall
4894
4895In {year}, the snowfall was above average.
4896It was followed by a warm spring which caused
4897flood conditions in many of the nearby rivers.
4898
4899<Chart color="#fcb32c" year={year} />
4900"##;
4901
4902 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4903
4904 assert_eq!(ctx.lines.len(), 10);
4906 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4907 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4908 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4909 assert!(
4910 !ctx.lines[3].in_esm_block,
4911 "Line 4 (heading) should NOT be in_esm_block"
4912 );
4913 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4914 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4915 }
4916
4917 #[test]
4918 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4919 let content = r#"import {Chart} from './snowfall.js'
4920export const year = 2023
4921
4922# Last year's snowfall
4923"#;
4924
4925 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4926
4927 assert!(
4929 !ctx.lines[0].in_esm_block,
4930 "Line 1 should NOT be in_esm_block in Standard flavor"
4931 );
4932 assert!(
4933 !ctx.lines[1].in_esm_block,
4934 "Line 2 should NOT be in_esm_block in Standard flavor"
4935 );
4936 }
4937
4938 #[test]
4939 fn test_blockquote_with_indented_content() {
4940 let content = r#"# Heading
4944
4945> -S socket-path
4946> More text
4947"#;
4948 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4949
4950 assert!(
4952 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4953 "Line 3 should be a blockquote"
4954 );
4955 assert!(
4957 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4958 "Line 4 should be a blockquote"
4959 );
4960
4961 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4964 assert_eq!(bq3.content, "-S socket-path");
4965 assert_eq!(bq3.nesting_level, 1);
4966 assert!(bq3.has_multiple_spaces_after_marker);
4968
4969 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4970 assert_eq!(bq4.content, "More text");
4971 assert_eq!(bq4.nesting_level, 1);
4972 }
4973
4974 #[test]
4975 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4976 let content = r#"# Title
4978
4979A footnote[^1].
4980
4981[^1]: This is the footnote content.
4982
4983[^note]: Another footnote with [link](https://example.com).
4984
4985[regular]: ./path.md "A real reference definition"
4986"#;
4987 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4988
4989 assert_eq!(
4991 ctx.reference_defs.len(),
4992 1,
4993 "Footnotes should not be parsed as reference definitions"
4994 );
4995
4996 assert_eq!(ctx.reference_defs[0].id, "regular");
4998 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4999 assert_eq!(
5000 ctx.reference_defs[0].title,
5001 Some("A real reference definition".to_string())
5002 );
5003 }
5004
5005 #[test]
5006 fn test_footnote_with_inline_link_not_misidentified() {
5007 let content = r#"# Title
5010
5011A footnote[^1].
5012
5013[^1]: [link](https://www.google.com).
5014"#;
5015 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5016
5017 assert!(
5019 ctx.reference_defs.is_empty(),
5020 "Footnote with inline link should not create a reference definition"
5021 );
5022 }
5023
5024 #[test]
5025 fn test_various_footnote_formats_excluded() {
5026 let content = r#"[^1]: Numeric footnote
5028[^note]: Named footnote
5029[^a]: Single char footnote
5030[^long-footnote-name]: Long named footnote
5031[^123abc]: Mixed alphanumeric
5032
5033[ref1]: ./file1.md
5034[ref2]: ./file2.md
5035"#;
5036 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5037
5038 assert_eq!(
5040 ctx.reference_defs.len(),
5041 2,
5042 "Only regular reference definitions should be parsed"
5043 );
5044
5045 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5046 assert!(ids.contains(&"ref1"));
5047 assert!(ids.contains(&"ref2"));
5048 assert!(!ids.iter().any(|id| id.starts_with('^')));
5049 }
5050
5051 #[test]
5056 fn test_has_char_tracked_characters() {
5057 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5059 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5060
5061 assert!(ctx.has_char('#'), "Should detect hash");
5063 assert!(ctx.has_char('*'), "Should detect asterisk");
5064 assert!(ctx.has_char('_'), "Should detect underscore");
5065 assert!(ctx.has_char('-'), "Should detect hyphen");
5066 assert!(ctx.has_char('+'), "Should detect plus");
5067 assert!(ctx.has_char('>'), "Should detect gt");
5068 assert!(ctx.has_char('|'), "Should detect pipe");
5069 assert!(ctx.has_char('['), "Should detect bracket");
5070 assert!(ctx.has_char('`'), "Should detect backtick");
5071 assert!(ctx.has_char('<'), "Should detect lt");
5072 assert!(ctx.has_char('!'), "Should detect exclamation");
5073 assert!(ctx.has_char('\n'), "Should detect newline");
5074 }
5075
5076 #[test]
5077 fn test_has_char_absent_characters() {
5078 let content = "Simple text without special chars";
5079 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5080
5081 assert!(!ctx.has_char('#'), "Should not detect hash");
5083 assert!(!ctx.has_char('*'), "Should not detect asterisk");
5084 assert!(!ctx.has_char('_'), "Should not detect underscore");
5085 assert!(!ctx.has_char('-'), "Should not detect hyphen");
5086 assert!(!ctx.has_char('+'), "Should not detect plus");
5087 assert!(!ctx.has_char('>'), "Should not detect gt");
5088 assert!(!ctx.has_char('|'), "Should not detect pipe");
5089 assert!(!ctx.has_char('['), "Should not detect bracket");
5090 assert!(!ctx.has_char('`'), "Should not detect backtick");
5091 assert!(!ctx.has_char('<'), "Should not detect lt");
5092 assert!(!ctx.has_char('!'), "Should not detect exclamation");
5093 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5095 }
5096
5097 #[test]
5098 fn test_has_char_fallback_for_untracked() {
5099 let content = "Text with @mention and $dollar and %percent";
5100 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5101
5102 assert!(ctx.has_char('@'), "Should detect @ via fallback");
5104 assert!(ctx.has_char('$'), "Should detect $ via fallback");
5105 assert!(ctx.has_char('%'), "Should detect % via fallback");
5106 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5107 }
5108
5109 #[test]
5110 fn test_char_count_tracked_characters() {
5111 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5112 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5113
5114 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5116 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5117 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5118 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5119 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5120 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5121 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5122 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5123 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5124 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5125 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5126 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5127 }
5128
5129 #[test]
5130 fn test_char_count_zero_for_absent() {
5131 let content = "Plain text";
5132 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5133
5134 assert_eq!(ctx.char_count('#'), 0);
5135 assert_eq!(ctx.char_count('*'), 0);
5136 assert_eq!(ctx.char_count('_'), 0);
5137 assert_eq!(ctx.char_count('\n'), 0);
5138 }
5139
5140 #[test]
5141 fn test_char_count_fallback_for_untracked() {
5142 let content = "@@@ $$ %%%";
5143 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5144
5145 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5146 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5147 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5148 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5149 }
5150
5151 #[test]
5152 fn test_char_count_empty_content() {
5153 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5154
5155 assert_eq!(ctx.char_count('#'), 0);
5156 assert_eq!(ctx.char_count('*'), 0);
5157 assert_eq!(ctx.char_count('@'), 0);
5158 assert!(!ctx.has_char('#'));
5159 assert!(!ctx.has_char('@'));
5160 }
5161
5162 #[test]
5167 fn test_is_in_html_tag_simple() {
5168 let content = "<div>content</div>";
5169 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5170
5171 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5173 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5174 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5175
5176 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5178 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5179
5180 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5182 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5183 }
5184
5185 #[test]
5186 fn test_is_in_html_tag_self_closing() {
5187 let content = "Text <br/> more text";
5188 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5189
5190 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5192 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5193
5194 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5196 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5197 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5198
5199 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5201 }
5202
5203 #[test]
5204 fn test_is_in_html_tag_with_attributes() {
5205 let content = r#"<a href="url" class="link">text</a>"#;
5206 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5207
5208 assert!(ctx.is_in_html_tag(0), "Start of tag");
5210 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5211 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5212 assert!(ctx.is_in_html_tag(26), "End of opening tag");
5213
5214 assert!(!ctx.is_in_html_tag(27), "Start of content");
5216 assert!(!ctx.is_in_html_tag(30), "End of content");
5217
5218 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5220 }
5221
5222 #[test]
5223 fn test_is_in_html_tag_multiline() {
5224 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
5225 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5226
5227 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5229 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5230 assert!(ctx.is_in_html_tag(15), "Inside attribute");
5231
5232 let closing_bracket_pos = content.find(">\n").unwrap();
5234 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5235 }
5236
5237 #[test]
5238 fn test_is_in_html_tag_no_tags() {
5239 let content = "Plain text without any HTML";
5240 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5241
5242 for i in 0..content.len() {
5244 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5245 }
5246 }
5247
5248 #[test]
5253 fn test_is_in_jinja_range_expression() {
5254 let content = "Hello {{ name }}!";
5255 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5256
5257 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5259 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5260
5261 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5263 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5264 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5265 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5266 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5267
5268 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5270 }
5271
5272 #[test]
5273 fn test_is_in_jinja_range_statement() {
5274 let content = "{% if condition %}content{% endif %}";
5275 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5276
5277 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5279 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5280 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5281
5282 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5284
5285 assert!(ctx.is_in_jinja_range(25), "Start of endif");
5287 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5288 }
5289
5290 #[test]
5291 fn test_is_in_jinja_range_multiple() {
5292 let content = "{{ a }} and {{ b }}";
5293 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5294
5295 assert!(ctx.is_in_jinja_range(0));
5297 assert!(ctx.is_in_jinja_range(3));
5298 assert!(ctx.is_in_jinja_range(6));
5299
5300 assert!(!ctx.is_in_jinja_range(8));
5302 assert!(!ctx.is_in_jinja_range(11));
5303
5304 assert!(ctx.is_in_jinja_range(12));
5306 assert!(ctx.is_in_jinja_range(15));
5307 assert!(ctx.is_in_jinja_range(18));
5308 }
5309
5310 #[test]
5311 fn test_is_in_jinja_range_no_jinja() {
5312 let content = "Plain text with single braces but not Jinja";
5313 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5314
5315 for i in 0..content.len() {
5317 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5318 }
5319 }
5320
5321 #[test]
5326 fn test_is_in_link_title_with_title() {
5327 let content = r#"[ref]: https://example.com "Title text"
5328
5329Some content."#;
5330 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5331
5332 assert_eq!(ctx.reference_defs.len(), 1);
5334 let def = &ctx.reference_defs[0];
5335 assert!(def.title_byte_start.is_some());
5336 assert!(def.title_byte_end.is_some());
5337
5338 let title_start = def.title_byte_start.unwrap();
5339 let title_end = def.title_byte_end.unwrap();
5340
5341 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5343
5344 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5346 assert!(
5347 ctx.is_in_link_title(title_start + 5),
5348 "Middle of title should be in title"
5349 );
5350 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5351
5352 assert!(
5354 !ctx.is_in_link_title(title_end),
5355 "After title end should not be in title"
5356 );
5357 }
5358
5359 #[test]
5360 fn test_is_in_link_title_without_title() {
5361 let content = "[ref]: https://example.com\n\nSome content.";
5362 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5363
5364 assert_eq!(ctx.reference_defs.len(), 1);
5366 let def = &ctx.reference_defs[0];
5367 assert!(def.title_byte_start.is_none());
5368 assert!(def.title_byte_end.is_none());
5369
5370 for i in 0..content.len() {
5372 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5373 }
5374 }
5375
5376 #[test]
5377 fn test_is_in_link_title_multiple_refs() {
5378 let content = r#"[ref1]: /url1 "Title One"
5379[ref2]: /url2
5380[ref3]: /url3 "Title Three"
5381"#;
5382 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5383
5384 assert_eq!(ctx.reference_defs.len(), 3);
5386
5387 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5389 assert!(ref1.title_byte_start.is_some());
5390
5391 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5393 assert!(ref2.title_byte_start.is_none());
5394
5395 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5397 assert!(ref3.title_byte_start.is_some());
5398
5399 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5401 assert!(ctx.is_in_link_title(start + 1));
5402 assert!(!ctx.is_in_link_title(end + 5));
5403 }
5404
5405 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5407 assert!(ctx.is_in_link_title(start + 1));
5408 }
5409 }
5410
5411 #[test]
5412 fn test_is_in_link_title_single_quotes() {
5413 let content = "[ref]: /url 'Single quoted title'\n";
5414 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5415
5416 assert_eq!(ctx.reference_defs.len(), 1);
5417 let def = &ctx.reference_defs[0];
5418
5419 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5420 assert!(ctx.is_in_link_title(start));
5421 assert!(ctx.is_in_link_title(start + 5));
5422 assert!(!ctx.is_in_link_title(end));
5423 }
5424 }
5425
5426 #[test]
5427 fn test_is_in_link_title_parentheses() {
5428 let content = "[ref]: /url (Parenthesized title)\n";
5431 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5432
5433 if ctx.reference_defs.is_empty() {
5436 for i in 0..content.len() {
5438 assert!(!ctx.is_in_link_title(i));
5439 }
5440 } else {
5441 let def = &ctx.reference_defs[0];
5442 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5443 assert!(ctx.is_in_link_title(start));
5444 assert!(ctx.is_in_link_title(start + 5));
5445 assert!(!ctx.is_in_link_title(end));
5446 } else {
5447 for i in 0..content.len() {
5449 assert!(!ctx.is_in_link_title(i));
5450 }
5451 }
5452 }
5453 }
5454
5455 #[test]
5456 fn test_is_in_link_title_no_refs() {
5457 let content = "Just plain text without any reference definitions.";
5458 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5459
5460 assert!(ctx.reference_defs.is_empty());
5461
5462 for i in 0..content.len() {
5463 assert!(!ctx.is_in_link_title(i));
5464 }
5465 }
5466
5467 #[test]
5472 fn test_math_spans_inline() {
5473 let content = "Text with inline math $[f](x)$ in it.";
5474 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5475
5476 let math_spans = ctx.math_spans();
5477 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5478
5479 let span = &math_spans[0];
5480 assert!(!span.is_display, "Should be inline math, not display");
5481 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5482 }
5483
5484 #[test]
5485 fn test_math_spans_display_single_line() {
5486 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5487 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5488
5489 let math_spans = ctx.math_spans();
5490 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5491
5492 let span = &math_spans[0];
5493 assert!(span.is_display, "Should be display math");
5494 assert!(
5495 span.content.contains("[x](\\zeta)"),
5496 "Content should contain the link-like pattern"
5497 );
5498 }
5499
5500 #[test]
5501 fn test_math_spans_display_multiline() {
5502 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5503 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5504
5505 let math_spans = ctx.math_spans();
5506 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5507
5508 let span = &math_spans[0];
5509 assert!(span.is_display, "Should be display math");
5510 }
5511
5512 #[test]
5513 fn test_is_in_math_span() {
5514 let content = "Text $[f](x)$ more text";
5515 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5516
5517 let math_start = content.find('$').unwrap();
5519 let math_end = content.rfind('$').unwrap() + 1;
5520
5521 assert!(
5522 ctx.is_in_math_span(math_start + 1),
5523 "Position inside math span should return true"
5524 );
5525 assert!(
5526 ctx.is_in_math_span(math_start + 3),
5527 "Position inside math span should return true"
5528 );
5529
5530 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5532 assert!(
5533 !ctx.is_in_math_span(math_end + 1),
5534 "Position after math span should return false"
5535 );
5536 }
5537
5538 #[test]
5539 fn test_math_spans_mixed_with_code() {
5540 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5541 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5542
5543 let math_spans = ctx.math_spans();
5544 let code_spans = ctx.code_spans();
5545
5546 assert_eq!(math_spans.len(), 1, "Should have one math span");
5547 assert_eq!(code_spans.len(), 1, "Should have one code span");
5548
5549 assert_eq!(math_spans[0].content, "[f](x)");
5551 assert_eq!(code_spans[0].content, "[g](y)");
5553 }
5554
5555 #[test]
5556 fn test_math_spans_no_math() {
5557 let content = "Regular text without any math at all.";
5558 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5559
5560 let math_spans = ctx.math_spans();
5561 assert!(math_spans.is_empty(), "Should have no math spans");
5562 }
5563
5564 #[test]
5565 fn test_math_spans_multiple() {
5566 let content = "First $a$ and second $b$ and display $$c$$";
5567 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5568
5569 let math_spans = ctx.math_spans();
5570 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5571
5572 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5574 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5575
5576 assert_eq!(inline_count, 2, "Should have two inline math spans");
5577 assert_eq!(display_count, 1, "Should have one display math span");
5578 }
5579
5580 #[test]
5581 fn test_is_in_math_span_boundary_positions() {
5582 let content = "$[f](x)$";
5585 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5586
5587 let math_spans = ctx.math_spans();
5588 assert_eq!(math_spans.len(), 1, "Should have one math span");
5589
5590 let span = &math_spans[0];
5591
5592 assert!(
5594 ctx.is_in_math_span(span.byte_offset),
5595 "Start position should be in span"
5596 );
5597
5598 assert!(
5600 ctx.is_in_math_span(span.byte_offset + 1),
5601 "Position after start should be in span"
5602 );
5603
5604 assert!(
5606 ctx.is_in_math_span(span.byte_end - 1),
5607 "Position at end-1 should be in span"
5608 );
5609
5610 assert!(
5612 !ctx.is_in_math_span(span.byte_end),
5613 "Position at byte_end should NOT be in span (exclusive)"
5614 );
5615 }
5616
5617 #[test]
5618 fn test_math_spans_at_document_start() {
5619 let content = "$x$ text";
5620 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5621
5622 let math_spans = ctx.math_spans();
5623 assert_eq!(math_spans.len(), 1);
5624 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5625 }
5626
5627 #[test]
5628 fn test_math_spans_at_document_end() {
5629 let content = "text $x$";
5630 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5631
5632 let math_spans = ctx.math_spans();
5633 assert_eq!(math_spans.len(), 1);
5634 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5635 }
5636
5637 #[test]
5638 fn test_math_spans_consecutive() {
5639 let content = "$a$$b$";
5640 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5641
5642 let math_spans = ctx.math_spans();
5643 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5645
5646 for i in 0..content.len() {
5648 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5649 }
5650 }
5651
5652 #[test]
5653 fn test_math_spans_currency_not_math() {
5654 let content = "Price is $100";
5656 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5657
5658 let math_spans = ctx.math_spans();
5659 assert!(
5662 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5663 "Unbalanced $ should not create math span containing 100"
5664 );
5665 }
5666
5667 #[test]
5672 fn test_reference_lookup_o1_basic() {
5673 let content = r#"[ref1]: /url1
5674[REF2]: /url2 "Title"
5675[Ref3]: /url3
5676
5677Use [link][ref1] and [link][REF2]."#;
5678 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5679
5680 assert_eq!(ctx.reference_defs.len(), 3);
5682
5683 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5685 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5688 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5689 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5690 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5691 }
5692
5693 #[test]
5694 fn test_reference_lookup_o1_get_reference_def() {
5695 let content = r#"[myref]: https://example.com "My Title"
5696"#;
5697 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5698
5699 let def = ctx.get_reference_def("myref").expect("Should find myref");
5701 assert_eq!(def.url, "https://example.com");
5702 assert_eq!(def.title.as_deref(), Some("My Title"));
5703
5704 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5706 assert_eq!(def2.url, "https://example.com");
5707
5708 assert!(ctx.get_reference_def("nonexistent").is_none());
5710 }
5711
5712 #[test]
5713 fn test_reference_lookup_o1_has_reference_def() {
5714 let content = r#"[foo]: /foo
5715[BAR]: /bar
5716"#;
5717 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5718
5719 assert!(ctx.has_reference_def("foo"));
5721 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5723 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5726
5727 #[test]
5728 fn test_reference_lookup_o1_empty_content() {
5729 let content = "No references here.";
5730 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5731
5732 assert!(ctx.reference_defs.is_empty());
5733 assert_eq!(ctx.get_reference_url("anything"), None);
5734 assert!(ctx.get_reference_def("anything").is_none());
5735 assert!(!ctx.has_reference_def("anything"));
5736 }
5737
5738 #[test]
5739 fn test_reference_lookup_o1_special_characters_in_id() {
5740 let content = r#"[ref-with-dash]: /url1
5741[ref_with_underscore]: /url2
5742[ref.with.dots]: /url3
5743"#;
5744 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5745
5746 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5747 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5748 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5749 }
5750
5751 #[test]
5752 fn test_reference_lookup_o1_unicode_id() {
5753 let content = r#"[日本語]: /japanese
5754[émoji]: /emoji
5755"#;
5756 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5757
5758 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5759 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5760 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5762}