1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::LazyLock;
12
13#[cfg(not(target_arch = "wasm32"))]
15macro_rules! profile_section {
16 ($name:expr, $profile:expr, $code:expr) => {{
17 let start = std::time::Instant::now();
18 let result = $code;
19 if $profile {
20 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
21 }
22 result
23 }};
24}
25
26#[cfg(target_arch = "wasm32")]
27macro_rules! profile_section {
28 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
29}
30
31static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
34 Regex::new(
35 r#"(?sx)
36 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
37 (?:
38 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
39 |
40 \[([^\]]*)\] # Reference ID in group 6
41 )"#
42 ).unwrap()
43});
44
45static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
48 Regex::new(
49 r#"(?sx)
50 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
51 (?:
52 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
53 |
54 \[([^\]]*)\] # Reference ID in group 6
55 )"#
56 ).unwrap()
57});
58
59static REF_DEF_PATTERN: LazyLock<Regex> =
61 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
62
63static BARE_EMAIL_PATTERN: LazyLock<Regex> =
67 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
68
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
71
72#[derive(Debug, Clone)]
74pub struct LineInfo {
75 pub byte_offset: usize,
77 pub byte_len: usize,
79 pub indent: usize,
81 pub visual_indent: usize,
85 pub is_blank: bool,
87 pub in_code_block: bool,
89 pub in_front_matter: bool,
91 pub in_html_block: bool,
93 pub in_html_comment: bool,
95 pub list_item: Option<ListItemInfo>,
97 pub heading: Option<HeadingInfo>,
99 pub blockquote: Option<BlockquoteInfo>,
101 pub in_mkdocstrings: bool,
103 pub in_esm_block: bool,
105 pub in_code_span_continuation: bool,
107 pub is_horizontal_rule: bool,
110 pub in_math_block: bool,
112}
113
114impl LineInfo {
115 pub fn content<'a>(&self, source: &'a str) -> &'a str {
117 &source[self.byte_offset..self.byte_offset + self.byte_len]
118 }
119}
120
121#[derive(Debug, Clone)]
123pub struct ListItemInfo {
124 pub marker: String,
126 pub is_ordered: bool,
128 pub number: Option<usize>,
130 pub marker_column: usize,
132 pub content_column: usize,
134}
135
136#[derive(Debug, Clone, PartialEq)]
138pub enum HeadingStyle {
139 ATX,
141 Setext1,
143 Setext2,
145}
146
147#[derive(Debug, Clone)]
149pub struct ParsedLink<'a> {
150 pub line: usize,
152 pub start_col: usize,
154 pub end_col: usize,
156 pub byte_offset: usize,
158 pub byte_end: usize,
160 pub text: Cow<'a, str>,
162 pub url: Cow<'a, str>,
164 pub is_reference: bool,
166 pub reference_id: Option<Cow<'a, str>>,
168 pub link_type: LinkType,
170}
171
172#[derive(Debug, Clone)]
174pub struct BrokenLinkInfo {
175 pub reference: String,
177 pub span: std::ops::Range<usize>,
179}
180
181#[derive(Debug, Clone)]
183pub struct FootnoteRef {
184 pub id: String,
186 pub line: usize,
188 pub byte_offset: usize,
190 pub byte_end: usize,
192}
193
194#[derive(Debug, Clone)]
196pub struct ParsedImage<'a> {
197 pub line: usize,
199 pub start_col: usize,
201 pub end_col: usize,
203 pub byte_offset: usize,
205 pub byte_end: usize,
207 pub alt_text: Cow<'a, str>,
209 pub url: Cow<'a, str>,
211 pub is_reference: bool,
213 pub reference_id: Option<Cow<'a, str>>,
215 pub link_type: LinkType,
217}
218
219#[derive(Debug, Clone)]
221pub struct ReferenceDef {
222 pub line: usize,
224 pub id: String,
226 pub url: String,
228 pub title: Option<String>,
230 pub byte_offset: usize,
232 pub byte_end: usize,
234 pub title_byte_start: Option<usize>,
236 pub title_byte_end: Option<usize>,
238}
239
240#[derive(Debug, Clone)]
242pub struct CodeSpan {
243 pub line: usize,
245 pub end_line: usize,
247 pub start_col: usize,
249 pub end_col: usize,
251 pub byte_offset: usize,
253 pub byte_end: usize,
255 pub backtick_count: usize,
257 pub content: String,
259}
260
261#[derive(Debug, Clone)]
263pub struct MathSpan {
264 pub line: usize,
266 pub end_line: usize,
268 pub start_col: usize,
270 pub end_col: usize,
272 pub byte_offset: usize,
274 pub byte_end: usize,
276 pub is_display: bool,
278 pub content: String,
280}
281
282#[derive(Debug, Clone)]
284pub struct HeadingInfo {
285 pub level: u8,
287 pub style: HeadingStyle,
289 pub marker: String,
291 pub marker_column: usize,
293 pub content_column: usize,
295 pub text: String,
297 pub custom_id: Option<String>,
299 pub raw_text: String,
301 pub has_closing_sequence: bool,
303 pub closing_sequence: String,
305 pub is_valid: bool,
308}
309
310#[derive(Debug, Clone)]
315pub struct ValidHeading<'a> {
316 pub line_num: usize,
318 pub heading: &'a HeadingInfo,
320 pub line_info: &'a LineInfo,
322}
323
324pub struct ValidHeadingsIter<'a> {
329 lines: &'a [LineInfo],
330 current_index: usize,
331}
332
333impl<'a> ValidHeadingsIter<'a> {
334 fn new(lines: &'a [LineInfo]) -> Self {
335 Self {
336 lines,
337 current_index: 0,
338 }
339 }
340}
341
342impl<'a> Iterator for ValidHeadingsIter<'a> {
343 type Item = ValidHeading<'a>;
344
345 fn next(&mut self) -> Option<Self::Item> {
346 while self.current_index < self.lines.len() {
347 let idx = self.current_index;
348 self.current_index += 1;
349
350 let line_info = &self.lines[idx];
351 if let Some(heading) = &line_info.heading
352 && heading.is_valid
353 {
354 return Some(ValidHeading {
355 line_num: idx + 1, heading,
357 line_info,
358 });
359 }
360 }
361 None
362 }
363}
364
365#[derive(Debug, Clone)]
367pub struct BlockquoteInfo {
368 pub nesting_level: usize,
370 pub indent: String,
372 pub marker_column: usize,
374 pub prefix: String,
376 pub content: String,
378 pub has_no_space_after_marker: bool,
380 pub has_multiple_spaces_after_marker: bool,
382 pub needs_md028_fix: bool,
384}
385
386#[derive(Debug, Clone)]
388pub struct ListBlock {
389 pub start_line: usize,
391 pub end_line: usize,
393 pub is_ordered: bool,
395 pub marker: Option<String>,
397 pub blockquote_prefix: String,
399 pub item_lines: Vec<usize>,
401 pub nesting_level: usize,
403 pub max_marker_width: usize,
405}
406
407use std::sync::{Arc, OnceLock};
408
409type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
411
412#[derive(Debug, Clone, Default)]
414pub struct CharFrequency {
415 pub hash_count: usize,
417 pub asterisk_count: usize,
419 pub underscore_count: usize,
421 pub hyphen_count: usize,
423 pub plus_count: usize,
425 pub gt_count: usize,
427 pub pipe_count: usize,
429 pub bracket_count: usize,
431 pub backtick_count: usize,
433 pub lt_count: usize,
435 pub exclamation_count: usize,
437 pub newline_count: usize,
439}
440
441#[derive(Debug, Clone)]
443pub struct HtmlTag {
444 pub line: usize,
446 pub start_col: usize,
448 pub end_col: usize,
450 pub byte_offset: usize,
452 pub byte_end: usize,
454 pub tag_name: String,
456 pub is_closing: bool,
458 pub is_self_closing: bool,
460 pub raw_content: String,
462}
463
464#[derive(Debug, Clone)]
466pub struct EmphasisSpan {
467 pub line: usize,
469 pub start_col: usize,
471 pub end_col: usize,
473 pub byte_offset: usize,
475 pub byte_end: usize,
477 pub marker: char,
479 pub marker_count: usize,
481 pub content: String,
483}
484
485#[derive(Debug, Clone)]
487pub struct TableRow {
488 pub line: usize,
490 pub is_separator: bool,
492 pub column_count: usize,
494 pub column_alignments: Vec<String>, }
497
498#[derive(Debug, Clone)]
500pub struct BareUrl {
501 pub line: usize,
503 pub start_col: usize,
505 pub end_col: usize,
507 pub byte_offset: usize,
509 pub byte_end: usize,
511 pub url: String,
513 pub url_type: String,
515}
516
517pub struct LintContext<'a> {
518 pub content: &'a str,
519 pub line_offsets: Vec<usize>,
520 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
544
545struct BlockquoteComponents<'a> {
547 indent: &'a str,
548 markers: &'a str,
549 spaces_after: &'a str,
550 content: &'a str,
551}
552
553#[inline]
555fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
556 let bytes = line.as_bytes();
557 let mut pos = 0;
558
559 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
561 pos += 1;
562 }
563 let indent_end = pos;
564
565 if pos >= bytes.len() || bytes[pos] != b'>' {
567 return None;
568 }
569
570 while pos < bytes.len() && bytes[pos] == b'>' {
572 pos += 1;
573 }
574 let markers_end = pos;
575
576 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
578 pos += 1;
579 }
580 let spaces_end = pos;
581
582 Some(BlockquoteComponents {
583 indent: &line[0..indent_end],
584 markers: &line[indent_end..markers_end],
585 spaces_after: &line[markers_end..spaces_end],
586 content: &line[spaces_end..],
587 })
588}
589
590impl<'a> LintContext<'a> {
591 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
592 #[cfg(not(target_arch = "wasm32"))]
593 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
594 #[cfg(target_arch = "wasm32")]
595 let profile = false;
596
597 let line_offsets = profile_section!("Line offsets", profile, {
598 let mut offsets = vec![0];
599 for (i, c) in content.char_indices() {
600 if c == '\n' {
601 offsets.push(i + 1);
602 }
603 }
604 offsets
605 });
606
607 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
609
610 let html_comment_ranges = profile_section!(
612 "HTML comment ranges",
613 profile,
614 crate::utils::skip_context::compute_html_comment_ranges(content)
615 );
616
617 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
619 if flavor == MarkdownFlavor::MkDocs {
620 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
621 } else {
622 Vec::new()
623 }
624 });
625
626 let (mut lines, emphasis_spans) = profile_section!(
629 "Basic line info",
630 profile,
631 Self::compute_basic_line_info(
632 content,
633 &line_offsets,
634 &code_blocks,
635 flavor,
636 &html_comment_ranges,
637 &autodoc_ranges,
638 )
639 );
640
641 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
643
644 profile_section!(
646 "ESM blocks",
647 profile,
648 Self::detect_esm_blocks(content, &mut lines, flavor)
649 );
650
651 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
653
654 profile_section!(
656 "Headings & blockquotes",
657 profile,
658 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
659 );
660
661 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
663
664 for span in &code_spans {
667 if span.end_line > span.line {
668 for line_num in (span.line + 1)..=span.end_line {
670 if let Some(line_info) = lines.get_mut(line_num - 1) {
671 line_info.in_code_span_continuation = true;
672 }
673 }
674 }
675 }
676
677 let (links, broken_links, footnote_refs) = profile_section!(
679 "Links",
680 profile,
681 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
682 );
683
684 let images = profile_section!(
685 "Images",
686 profile,
687 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
688 );
689
690 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
691
692 let reference_defs_map: HashMap<String, usize> = reference_defs
694 .iter()
695 .enumerate()
696 .map(|(idx, def)| (def.id.to_lowercase(), idx))
697 .collect();
698
699 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
700
701 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
703
704 let table_blocks = profile_section!(
706 "Table blocks",
707 profile,
708 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
709 content,
710 &code_blocks,
711 &code_spans,
712 &html_comment_ranges,
713 )
714 );
715
716 let line_index = profile_section!(
718 "Line index",
719 profile,
720 crate::utils::range_utils::LineIndex::new(content)
721 );
722
723 let jinja_ranges = profile_section!(
725 "Jinja ranges",
726 profile,
727 crate::utils::jinja_utils::find_jinja_ranges(content)
728 );
729
730 Self {
731 content,
732 line_offsets,
733 code_blocks,
734 lines,
735 links,
736 images,
737 broken_links,
738 footnote_refs,
739 reference_defs,
740 reference_defs_map,
741 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
742 math_spans_cache: OnceLock::new(), list_blocks,
744 char_frequency,
745 html_tags_cache: OnceLock::new(),
746 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
747 table_rows_cache: OnceLock::new(),
748 bare_urls_cache: OnceLock::new(),
749 has_mixed_list_nesting_cache: OnceLock::new(),
750 html_comment_ranges,
751 table_blocks,
752 line_index,
753 jinja_ranges,
754 flavor,
755 source_file,
756 }
757 }
758
759 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
761 Arc::clone(
762 self.code_spans_cache
763 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
764 )
765 }
766
767 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
769 Arc::clone(
770 self.math_spans_cache
771 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
772 )
773 }
774
775 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
777 let math_spans = self.math_spans();
778 math_spans
779 .iter()
780 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
781 }
782
783 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
785 &self.html_comment_ranges
786 }
787
788 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
790 Arc::clone(self.html_tags_cache.get_or_init(|| {
791 Arc::new(Self::parse_html_tags(
792 self.content,
793 &self.lines,
794 &self.code_blocks,
795 self.flavor,
796 ))
797 }))
798 }
799
800 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
802 Arc::clone(
803 self.emphasis_spans_cache
804 .get()
805 .expect("emphasis_spans_cache initialized during construction"),
806 )
807 }
808
809 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
811 Arc::clone(
812 self.table_rows_cache
813 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
814 )
815 }
816
817 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
819 Arc::clone(
820 self.bare_urls_cache
821 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
822 )
823 }
824
825 pub fn has_mixed_list_nesting(&self) -> bool {
829 *self
830 .has_mixed_list_nesting_cache
831 .get_or_init(|| self.compute_mixed_list_nesting())
832 }
833
834 fn compute_mixed_list_nesting(&self) -> bool {
836 let mut stack: Vec<(usize, bool)> = Vec::new();
841 let mut last_was_blank = false;
842
843 for line_info in &self.lines {
844 if line_info.in_code_block
846 || line_info.in_front_matter
847 || line_info.in_mkdocstrings
848 || line_info.in_html_comment
849 || line_info.in_esm_block
850 {
851 continue;
852 }
853
854 if line_info.is_blank {
856 last_was_blank = true;
857 continue;
858 }
859
860 if let Some(list_item) = &line_info.list_item {
861 let current_pos = if list_item.marker_column == 1 {
863 0
864 } else {
865 list_item.marker_column
866 };
867
868 if last_was_blank && current_pos == 0 {
870 stack.clear();
871 }
872 last_was_blank = false;
873
874 while let Some(&(pos, _)) = stack.last() {
876 if pos >= current_pos {
877 stack.pop();
878 } else {
879 break;
880 }
881 }
882
883 if let Some(&(_, parent_is_ordered)) = stack.last()
885 && parent_is_ordered != list_item.is_ordered
886 {
887 return true; }
889
890 stack.push((current_pos, list_item.is_ordered));
891 } else {
892 last_was_blank = false;
894 }
895 }
896
897 false
898 }
899
900 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
902 match self.line_offsets.binary_search(&offset) {
903 Ok(line) => (line + 1, 1),
904 Err(line) => {
905 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
906 (line, offset - line_start + 1)
907 }
908 }
909 }
910
911 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
913 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
915 return true;
916 }
917
918 self.code_spans()
920 .iter()
921 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
922 }
923
924 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
926 if line_num > 0 {
927 self.lines.get(line_num - 1)
928 } else {
929 None
930 }
931 }
932
933 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
935 self.line_info(line_num).map(|info| info.byte_offset)
936 }
937
938 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
940 let normalized_id = ref_id.to_lowercase();
941 self.reference_defs_map
942 .get(&normalized_id)
943 .map(|&idx| self.reference_defs[idx].url.as_str())
944 }
945
946 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
948 let normalized_id = ref_id.to_lowercase();
949 self.reference_defs_map
950 .get(&normalized_id)
951 .map(|&idx| &self.reference_defs[idx])
952 }
953
954 pub fn has_reference_def(&self, ref_id: &str) -> bool {
956 let normalized_id = ref_id.to_lowercase();
957 self.reference_defs_map.contains_key(&normalized_id)
958 }
959
960 pub fn is_in_list_block(&self, line_num: usize) -> bool {
962 self.list_blocks
963 .iter()
964 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
965 }
966
967 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
969 self.list_blocks
970 .iter()
971 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
972 }
973
974 pub fn is_in_code_block(&self, line_num: usize) -> bool {
978 if line_num == 0 || line_num > self.lines.len() {
979 return false;
980 }
981 self.lines[line_num - 1].in_code_block
982 }
983
984 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
986 if line_num == 0 || line_num > self.lines.len() {
987 return false;
988 }
989 self.lines[line_num - 1].in_front_matter
990 }
991
992 pub fn is_in_html_block(&self, line_num: usize) -> bool {
994 if line_num == 0 || line_num > self.lines.len() {
995 return false;
996 }
997 self.lines[line_num - 1].in_html_block
998 }
999
1000 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1002 if line_num == 0 || line_num > self.lines.len() {
1003 return false;
1004 }
1005
1006 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1010 let code_spans = self.code_spans();
1011 code_spans.iter().any(|span| {
1012 if line_num < span.line || line_num > span.end_line {
1014 return false;
1015 }
1016
1017 if span.line == span.end_line {
1018 col_0indexed >= span.start_col && col_0indexed < span.end_col
1020 } else if line_num == span.line {
1021 col_0indexed >= span.start_col
1023 } else if line_num == span.end_line {
1024 col_0indexed < span.end_col
1026 } else {
1027 true
1029 }
1030 })
1031 }
1032
1033 #[inline]
1035 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1036 let code_spans = self.code_spans();
1037 code_spans
1038 .iter()
1039 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1040 }
1041
1042 #[inline]
1045 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1046 self.reference_defs
1047 .iter()
1048 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1049 }
1050
1051 #[inline]
1055 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1056 self.html_comment_ranges
1057 .iter()
1058 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1059 }
1060
1061 #[inline]
1064 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1065 self.html_tags()
1066 .iter()
1067 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1068 }
1069
1070 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1072 self.jinja_ranges
1073 .iter()
1074 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1075 }
1076
1077 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1079 self.reference_defs.iter().any(|def| {
1080 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1081 byte_pos >= start && byte_pos < end
1082 } else {
1083 false
1084 }
1085 })
1086 }
1087
1088 pub fn has_char(&self, ch: char) -> bool {
1090 match ch {
1091 '#' => self.char_frequency.hash_count > 0,
1092 '*' => self.char_frequency.asterisk_count > 0,
1093 '_' => self.char_frequency.underscore_count > 0,
1094 '-' => self.char_frequency.hyphen_count > 0,
1095 '+' => self.char_frequency.plus_count > 0,
1096 '>' => self.char_frequency.gt_count > 0,
1097 '|' => self.char_frequency.pipe_count > 0,
1098 '[' => self.char_frequency.bracket_count > 0,
1099 '`' => self.char_frequency.backtick_count > 0,
1100 '<' => self.char_frequency.lt_count > 0,
1101 '!' => self.char_frequency.exclamation_count > 0,
1102 '\n' => self.char_frequency.newline_count > 0,
1103 _ => self.content.contains(ch), }
1105 }
1106
1107 pub fn char_count(&self, ch: char) -> usize {
1109 match ch {
1110 '#' => self.char_frequency.hash_count,
1111 '*' => self.char_frequency.asterisk_count,
1112 '_' => self.char_frequency.underscore_count,
1113 '-' => self.char_frequency.hyphen_count,
1114 '+' => self.char_frequency.plus_count,
1115 '>' => self.char_frequency.gt_count,
1116 '|' => self.char_frequency.pipe_count,
1117 '[' => self.char_frequency.bracket_count,
1118 '`' => self.char_frequency.backtick_count,
1119 '<' => self.char_frequency.lt_count,
1120 '!' => self.char_frequency.exclamation_count,
1121 '\n' => self.char_frequency.newline_count,
1122 _ => self.content.matches(ch).count(), }
1124 }
1125
1126 pub fn likely_has_headings(&self) -> bool {
1128 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1130
1131 pub fn likely_has_lists(&self) -> bool {
1133 self.char_frequency.asterisk_count > 0
1134 || self.char_frequency.hyphen_count > 0
1135 || self.char_frequency.plus_count > 0
1136 }
1137
1138 pub fn likely_has_emphasis(&self) -> bool {
1140 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1141 }
1142
1143 pub fn likely_has_tables(&self) -> bool {
1145 self.char_frequency.pipe_count > 2
1146 }
1147
1148 pub fn likely_has_blockquotes(&self) -> bool {
1150 self.char_frequency.gt_count > 0
1151 }
1152
1153 pub fn likely_has_code(&self) -> bool {
1155 self.char_frequency.backtick_count > 0
1156 }
1157
1158 pub fn likely_has_links_or_images(&self) -> bool {
1160 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1161 }
1162
1163 pub fn likely_has_html(&self) -> bool {
1165 self.char_frequency.lt_count > 0
1166 }
1167
1168 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1173 if let Some(line_info) = self.lines.get(line_idx)
1174 && let Some(ref bq) = line_info.blockquote
1175 {
1176 bq.prefix.trim_end().to_string()
1177 } else {
1178 String::new()
1179 }
1180 }
1181
1182 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1184 self.html_tags()
1185 .iter()
1186 .filter(|tag| tag.line == line_num)
1187 .cloned()
1188 .collect()
1189 }
1190
1191 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1193 self.emphasis_spans()
1194 .iter()
1195 .filter(|span| span.line == line_num)
1196 .cloned()
1197 .collect()
1198 }
1199
1200 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1202 self.table_rows()
1203 .iter()
1204 .filter(|row| row.line == line_num)
1205 .cloned()
1206 .collect()
1207 }
1208
1209 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1211 self.bare_urls()
1212 .iter()
1213 .filter(|url| url.line == line_num)
1214 .cloned()
1215 .collect()
1216 }
1217
1218 #[inline]
1224 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1225 let idx = match lines.binary_search_by(|line| {
1227 if byte_offset < line.byte_offset {
1228 std::cmp::Ordering::Greater
1229 } else if byte_offset > line.byte_offset + line.byte_len {
1230 std::cmp::Ordering::Less
1231 } else {
1232 std::cmp::Ordering::Equal
1233 }
1234 }) {
1235 Ok(idx) => idx,
1236 Err(idx) => idx.saturating_sub(1),
1237 };
1238
1239 let line = &lines[idx];
1240 let line_num = idx + 1;
1241 let col = byte_offset.saturating_sub(line.byte_offset);
1242
1243 (idx, line_num, col)
1244 }
1245
1246 #[inline]
1248 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1249 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1251
1252 if idx > 0 {
1254 let span = &code_spans[idx - 1];
1255 if offset >= span.byte_offset && offset < span.byte_end {
1256 return true;
1257 }
1258 }
1259
1260 false
1261 }
1262
1263 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1267 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1268
1269 let mut link_ranges = Vec::new();
1270 let mut options = Options::empty();
1271 options.insert(Options::ENABLE_WIKILINKS);
1272 options.insert(Options::ENABLE_FOOTNOTES);
1273
1274 let parser = Parser::new_ext(content, options).into_offset_iter();
1275 let mut link_stack: Vec<usize> = Vec::new();
1276
1277 for (event, range) in parser {
1278 match event {
1279 Event::Start(Tag::Link { .. }) => {
1280 link_stack.push(range.start);
1281 }
1282 Event::End(TagEnd::Link) => {
1283 if let Some(start_pos) = link_stack.pop() {
1284 link_ranges.push((start_pos, range.end));
1285 }
1286 }
1287 _ => {}
1288 }
1289 }
1290
1291 link_ranges
1292 }
1293
1294 fn parse_links(
1296 content: &'a str,
1297 lines: &[LineInfo],
1298 code_blocks: &[(usize, usize)],
1299 code_spans: &[CodeSpan],
1300 flavor: MarkdownFlavor,
1301 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1302 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1303 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1304 use std::collections::HashSet;
1305
1306 let mut links = Vec::with_capacity(content.len() / 500);
1307 let mut broken_links = Vec::new();
1308 let mut footnote_refs = Vec::new();
1309
1310 let mut found_positions = HashSet::new();
1312
1313 let mut options = Options::empty();
1323 options.insert(Options::ENABLE_WIKILINKS);
1324 options.insert(Options::ENABLE_FOOTNOTES);
1325
1326 let parser = Parser::new_with_broken_link_callback(
1327 content,
1328 options,
1329 Some(|link: BrokenLink<'_>| {
1330 broken_links.push(BrokenLinkInfo {
1331 reference: link.reference.to_string(),
1332 span: link.span.clone(),
1333 });
1334 None
1335 }),
1336 )
1337 .into_offset_iter();
1338
1339 let mut link_stack: Vec<(
1340 usize,
1341 usize,
1342 pulldown_cmark::CowStr<'a>,
1343 LinkType,
1344 pulldown_cmark::CowStr<'a>,
1345 )> = Vec::new();
1346 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1349 match event {
1350 Event::Start(Tag::Link {
1351 link_type,
1352 dest_url,
1353 id,
1354 ..
1355 }) => {
1356 link_stack.push((range.start, range.end, dest_url, link_type, id));
1358 text_chunks.clear();
1359 }
1360 Event::Text(text) if !link_stack.is_empty() => {
1361 text_chunks.push((text.to_string(), range.start, range.end));
1363 }
1364 Event::Code(code) if !link_stack.is_empty() => {
1365 let code_text = format!("`{code}`");
1367 text_chunks.push((code_text, range.start, range.end));
1368 }
1369 Event::End(TagEnd::Link) => {
1370 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1371 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1373 text_chunks.clear();
1374 continue;
1375 }
1376
1377 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1379
1380 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1382 text_chunks.clear();
1383 continue;
1384 }
1385
1386 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1387
1388 let is_reference = matches!(
1389 link_type,
1390 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1391 );
1392
1393 let link_text = if start_pos < content.len() {
1396 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1397
1398 let mut close_pos = None;
1402 let mut depth = 0;
1403 let mut in_code_span = false;
1404
1405 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1406 let mut backslash_count = 0;
1408 let mut j = i;
1409 while j > 0 && link_bytes[j - 1] == b'\\' {
1410 backslash_count += 1;
1411 j -= 1;
1412 }
1413 let is_escaped = backslash_count % 2 != 0;
1414
1415 if byte == b'`' && !is_escaped {
1417 in_code_span = !in_code_span;
1418 }
1419
1420 if !is_escaped && !in_code_span {
1422 if byte == b'[' {
1423 depth += 1;
1424 } else if byte == b']' {
1425 if depth == 0 {
1426 close_pos = Some(i);
1428 break;
1429 } else {
1430 depth -= 1;
1431 }
1432 }
1433 }
1434 }
1435
1436 if let Some(pos) = close_pos {
1437 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1438 } else {
1439 Cow::Borrowed("")
1440 }
1441 } else {
1442 Cow::Borrowed("")
1443 };
1444
1445 let reference_id = if is_reference && !ref_id.is_empty() {
1447 Some(Cow::Owned(ref_id.to_lowercase()))
1448 } else if is_reference {
1449 Some(Cow::Owned(link_text.to_lowercase()))
1451 } else {
1452 None
1453 };
1454
1455 found_positions.insert(start_pos);
1457
1458 links.push(ParsedLink {
1459 line: line_num,
1460 start_col: col_start,
1461 end_col: col_end,
1462 byte_offset: start_pos,
1463 byte_end: range.end,
1464 text: link_text,
1465 url: Cow::Owned(url.to_string()),
1466 is_reference,
1467 reference_id,
1468 link_type,
1469 });
1470
1471 text_chunks.clear();
1472 }
1473 }
1474 Event::FootnoteReference(footnote_id) => {
1475 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1478 continue;
1479 }
1480
1481 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1482 footnote_refs.push(FootnoteRef {
1483 id: footnote_id.to_string(),
1484 line: line_num,
1485 byte_offset: range.start,
1486 byte_end: range.end,
1487 });
1488 }
1489 _ => {}
1490 }
1491 }
1492
1493 for cap in LINK_PATTERN.captures_iter(content) {
1497 let full_match = cap.get(0).unwrap();
1498 let match_start = full_match.start();
1499 let match_end = full_match.end();
1500
1501 if found_positions.contains(&match_start) {
1503 continue;
1504 }
1505
1506 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1508 continue;
1509 }
1510
1511 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1513 continue;
1514 }
1515
1516 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1518 continue;
1519 }
1520
1521 if Self::is_offset_in_code_span(code_spans, match_start) {
1523 continue;
1524 }
1525
1526 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1528 continue;
1529 }
1530
1531 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1533
1534 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1536 continue;
1537 }
1538
1539 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1540
1541 let text = cap.get(1).map_or("", |m| m.as_str());
1542
1543 if let Some(ref_id) = cap.get(6) {
1545 let ref_id_str = ref_id.as_str();
1546 let normalized_ref = if ref_id_str.is_empty() {
1547 Cow::Owned(text.to_lowercase()) } else {
1549 Cow::Owned(ref_id_str.to_lowercase())
1550 };
1551
1552 links.push(ParsedLink {
1554 line: line_num,
1555 start_col: col_start,
1556 end_col: col_end,
1557 byte_offset: match_start,
1558 byte_end: match_end,
1559 text: Cow::Borrowed(text),
1560 url: Cow::Borrowed(""), is_reference: true,
1562 reference_id: Some(normalized_ref),
1563 link_type: LinkType::Reference, });
1565 }
1566 }
1567
1568 (links, broken_links, footnote_refs)
1569 }
1570
1571 fn parse_images(
1573 content: &'a str,
1574 lines: &[LineInfo],
1575 code_blocks: &[(usize, usize)],
1576 code_spans: &[CodeSpan],
1577 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1578 ) -> Vec<ParsedImage<'a>> {
1579 use crate::utils::skip_context::is_in_html_comment_ranges;
1580 use std::collections::HashSet;
1581
1582 let mut images = Vec::with_capacity(content.len() / 1000);
1584 let mut found_positions = HashSet::new();
1585
1586 let parser = Parser::new(content).into_offset_iter();
1588 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1589 Vec::new();
1590 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1593 match event {
1594 Event::Start(Tag::Image {
1595 link_type,
1596 dest_url,
1597 id,
1598 ..
1599 }) => {
1600 image_stack.push((range.start, dest_url, link_type, id));
1601 text_chunks.clear();
1602 }
1603 Event::Text(text) if !image_stack.is_empty() => {
1604 text_chunks.push((text.to_string(), range.start, range.end));
1605 }
1606 Event::Code(code) if !image_stack.is_empty() => {
1607 let code_text = format!("`{code}`");
1608 text_chunks.push((code_text, range.start, range.end));
1609 }
1610 Event::End(TagEnd::Image) => {
1611 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1612 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1614 continue;
1615 }
1616
1617 if Self::is_offset_in_code_span(code_spans, start_pos) {
1619 continue;
1620 }
1621
1622 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1624 continue;
1625 }
1626
1627 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1629 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1630
1631 let is_reference = matches!(
1632 link_type,
1633 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1634 );
1635
1636 let alt_text = if start_pos < content.len() {
1639 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1640
1641 let mut close_pos = None;
1644 let mut depth = 0;
1645
1646 if image_bytes.len() > 2 {
1647 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1648 let mut backslash_count = 0;
1650 let mut j = i;
1651 while j > 0 && image_bytes[j - 1] == b'\\' {
1652 backslash_count += 1;
1653 j -= 1;
1654 }
1655 let is_escaped = backslash_count % 2 != 0;
1656
1657 if !is_escaped {
1658 if byte == b'[' {
1659 depth += 1;
1660 } else if byte == b']' {
1661 if depth == 0 {
1662 close_pos = Some(i);
1664 break;
1665 } else {
1666 depth -= 1;
1667 }
1668 }
1669 }
1670 }
1671 }
1672
1673 if let Some(pos) = close_pos {
1674 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1675 } else {
1676 Cow::Borrowed("")
1677 }
1678 } else {
1679 Cow::Borrowed("")
1680 };
1681
1682 let reference_id = if is_reference && !ref_id.is_empty() {
1683 Some(Cow::Owned(ref_id.to_lowercase()))
1684 } else if is_reference {
1685 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1687 None
1688 };
1689
1690 found_positions.insert(start_pos);
1691 images.push(ParsedImage {
1692 line: line_num,
1693 start_col: col_start,
1694 end_col: col_end,
1695 byte_offset: start_pos,
1696 byte_end: range.end,
1697 alt_text,
1698 url: Cow::Owned(url.to_string()),
1699 is_reference,
1700 reference_id,
1701 link_type,
1702 });
1703 }
1704 }
1705 _ => {}
1706 }
1707 }
1708
1709 for cap in IMAGE_PATTERN.captures_iter(content) {
1711 let full_match = cap.get(0).unwrap();
1712 let match_start = full_match.start();
1713 let match_end = full_match.end();
1714
1715 if found_positions.contains(&match_start) {
1717 continue;
1718 }
1719
1720 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1722 continue;
1723 }
1724
1725 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1727 || Self::is_offset_in_code_span(code_spans, match_start)
1728 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1729 {
1730 continue;
1731 }
1732
1733 if let Some(ref_id) = cap.get(6) {
1735 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1736 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1737 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1738 let ref_id_str = ref_id.as_str();
1739 let normalized_ref = if ref_id_str.is_empty() {
1740 Cow::Owned(alt_text.to_lowercase())
1741 } else {
1742 Cow::Owned(ref_id_str.to_lowercase())
1743 };
1744
1745 images.push(ParsedImage {
1746 line: line_num,
1747 start_col: col_start,
1748 end_col: col_end,
1749 byte_offset: match_start,
1750 byte_end: match_end,
1751 alt_text: Cow::Borrowed(alt_text),
1752 url: Cow::Borrowed(""),
1753 is_reference: true,
1754 reference_id: Some(normalized_ref),
1755 link_type: LinkType::Reference, });
1757 }
1758 }
1759
1760 images
1761 }
1762
1763 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1765 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1769 if line_info.in_code_block {
1771 continue;
1772 }
1773
1774 let line = line_info.content(content);
1775 let line_num = line_idx + 1;
1776
1777 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1778 let id_raw = cap.get(1).unwrap().as_str();
1779
1780 if id_raw.starts_with('^') {
1783 continue;
1784 }
1785
1786 let id = id_raw.to_lowercase();
1787 let url = cap.get(2).unwrap().as_str().to_string();
1788 let title_match = cap.get(3).or_else(|| cap.get(4));
1789 let title = title_match.map(|m| m.as_str().to_string());
1790
1791 let match_obj = cap.get(0).unwrap();
1794 let byte_offset = line_info.byte_offset + match_obj.start();
1795 let byte_end = line_info.byte_offset + match_obj.end();
1796
1797 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1799 let start = line_info.byte_offset + m.start().saturating_sub(1);
1801 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1803 } else {
1804 (None, None)
1805 };
1806
1807 refs.push(ReferenceDef {
1808 line: line_num,
1809 id,
1810 url,
1811 title,
1812 byte_offset,
1813 byte_end,
1814 title_byte_start,
1815 title_byte_end,
1816 });
1817 }
1818 }
1819
1820 refs
1821 }
1822
1823 #[inline]
1827 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1828 let trimmed_start = line.trim_start();
1829 if !trimmed_start.starts_with('>') {
1830 return None;
1831 }
1832
1833 let mut remaining = line;
1835 let mut total_prefix_len = 0;
1836
1837 loop {
1838 let trimmed = remaining.trim_start();
1839 if !trimmed.starts_with('>') {
1840 break;
1841 }
1842
1843 let leading_ws_len = remaining.len() - trimmed.len();
1845 total_prefix_len += leading_ws_len + 1;
1846
1847 let after_gt = &trimmed[1..];
1848
1849 if let Some(stripped) = after_gt.strip_prefix(' ') {
1851 total_prefix_len += 1;
1852 remaining = stripped;
1853 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1854 total_prefix_len += 1;
1855 remaining = stripped;
1856 } else {
1857 remaining = after_gt;
1858 }
1859 }
1860
1861 Some((&line[..total_prefix_len], remaining))
1862 }
1863
1864 fn detect_list_items_and_emphasis_with_pulldown(
1888 content: &str,
1889 line_offsets: &[usize],
1890 flavor: MarkdownFlavor,
1891 front_matter_end: usize,
1892 code_blocks: &[(usize, usize)],
1893 ) -> (ListItemMap, Vec<EmphasisSpan>) {
1894 use std::collections::HashMap;
1895
1896 let mut list_items = HashMap::new();
1897 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
1898
1899 let mut options = Options::empty();
1900 options.insert(Options::ENABLE_TABLES);
1901 options.insert(Options::ENABLE_FOOTNOTES);
1902 options.insert(Options::ENABLE_STRIKETHROUGH);
1903 options.insert(Options::ENABLE_TASKLISTS);
1904 options.insert(Options::ENABLE_GFM);
1906
1907 let _ = flavor;
1909
1910 let parser = Parser::new_ext(content, options).into_offset_iter();
1911 let mut list_depth: usize = 0;
1912 let mut list_stack: Vec<bool> = Vec::new();
1913
1914 for (event, range) in parser {
1915 match event {
1916 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
1918 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
1919 2
1920 } else {
1921 1
1922 };
1923 let match_start = range.start;
1924 let match_end = range.end;
1925
1926 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
1928 let marker = content[match_start..].chars().next().unwrap_or('*');
1930 if marker == '*' || marker == '_' {
1931 let content_start = match_start + marker_count;
1933 let content_end = if match_end >= marker_count {
1934 match_end - marker_count
1935 } else {
1936 match_end
1937 };
1938 let content_part = if content_start < content_end && content_end <= content.len() {
1939 &content[content_start..content_end]
1940 } else {
1941 ""
1942 };
1943
1944 let line_idx = match line_offsets.binary_search(&match_start) {
1946 Ok(idx) => idx,
1947 Err(idx) => idx.saturating_sub(1),
1948 };
1949 let line_num = line_idx + 1;
1950 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
1951 let col_start = match_start - line_start;
1952 let col_end = match_end - line_start;
1953
1954 emphasis_spans.push(EmphasisSpan {
1955 line: line_num,
1956 start_col: col_start,
1957 end_col: col_end,
1958 byte_offset: match_start,
1959 byte_end: match_end,
1960 marker,
1961 marker_count,
1962 content: content_part.to_string(),
1963 });
1964 }
1965 }
1966 }
1967 Event::Start(Tag::List(start_number)) => {
1968 list_depth += 1;
1969 list_stack.push(start_number.is_some());
1970 }
1971 Event::End(TagEnd::List(_)) => {
1972 list_depth = list_depth.saturating_sub(1);
1973 list_stack.pop();
1974 }
1975 Event::Start(Tag::Item) if list_depth > 0 => {
1976 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1978 let item_start = range.start;
1980
1981 let mut line_idx = match line_offsets.binary_search(&item_start) {
1983 Ok(idx) => idx,
1984 Err(idx) => idx.saturating_sub(1),
1985 };
1986
1987 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1991 line_idx += 1;
1992 }
1993
1994 if front_matter_end > 0 && line_idx < front_matter_end {
1996 continue;
1997 }
1998
1999 if line_idx < line_offsets.len() {
2000 let line_start_byte = line_offsets[line_idx];
2001 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2002 let line = &content[line_start_byte..line_end.min(content.len())];
2003
2004 let line = line
2006 .strip_suffix('\n')
2007 .or_else(|| line.strip_suffix("\r\n"))
2008 .unwrap_or(line);
2009
2010 let blockquote_parse = Self::parse_blockquote_prefix(line);
2012 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2013 (prefix.len(), content)
2014 } else {
2015 (0, line)
2016 };
2017
2018 if current_list_is_ordered {
2020 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2021 Self::parse_ordered_list(line_to_parse)
2022 {
2023 let marker = format!("{number_str}{delimiter}");
2024 let marker_column = blockquote_prefix_len + leading_spaces.len();
2025 let content_column = marker_column + marker.len() + spacing.len();
2026 let number = number_str.parse().ok();
2027
2028 list_items.entry(line_start_byte).or_insert((
2029 true,
2030 marker,
2031 marker_column,
2032 content_column,
2033 number,
2034 ));
2035 }
2036 } else if let Some((leading_spaces, marker, spacing, _content)) =
2037 Self::parse_unordered_list(line_to_parse)
2038 {
2039 let marker_column = blockquote_prefix_len + leading_spaces.len();
2040 let content_column = marker_column + 1 + spacing.len();
2041
2042 list_items.entry(line_start_byte).or_insert((
2043 false,
2044 marker.to_string(),
2045 marker_column,
2046 content_column,
2047 None,
2048 ));
2049 }
2050 }
2051 }
2052 _ => {}
2053 }
2054 }
2055
2056 (list_items, emphasis_spans)
2057 }
2058
2059 #[inline]
2063 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2064 let bytes = line.as_bytes();
2065 let mut i = 0;
2066
2067 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2069 i += 1;
2070 }
2071
2072 if i >= bytes.len() {
2074 return None;
2075 }
2076 let marker = bytes[i] as char;
2077 if marker != '-' && marker != '*' && marker != '+' {
2078 return None;
2079 }
2080 let marker_pos = i;
2081 i += 1;
2082
2083 let spacing_start = i;
2085 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2086 i += 1;
2087 }
2088
2089 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2090 }
2091
2092 #[inline]
2096 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2097 let bytes = line.as_bytes();
2098 let mut i = 0;
2099
2100 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2102 i += 1;
2103 }
2104
2105 let number_start = i;
2107 while i < bytes.len() && bytes[i].is_ascii_digit() {
2108 i += 1;
2109 }
2110 if i == number_start {
2111 return None; }
2113
2114 if i >= bytes.len() {
2116 return None;
2117 }
2118 let delimiter = bytes[i] as char;
2119 if delimiter != '.' && delimiter != ')' {
2120 return None;
2121 }
2122 let delimiter_pos = i;
2123 i += 1;
2124
2125 let spacing_start = i;
2127 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2128 i += 1;
2129 }
2130
2131 Some((
2132 &line[..number_start],
2133 &line[number_start..delimiter_pos],
2134 delimiter,
2135 &line[spacing_start..i],
2136 &line[i..],
2137 ))
2138 }
2139
2140 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2143 let num_lines = line_offsets.len();
2144 let mut in_code_block = vec![false; num_lines];
2145
2146 for &(start, end) in code_blocks {
2148 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2150 let mut boundary = start;
2151 while boundary > 0 && !content.is_char_boundary(boundary) {
2152 boundary -= 1;
2153 }
2154 boundary
2155 } else {
2156 start
2157 };
2158
2159 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2160 let mut boundary = end;
2161 while boundary < content.len() && !content.is_char_boundary(boundary) {
2162 boundary += 1;
2163 }
2164 boundary
2165 } else {
2166 end.min(content.len())
2167 };
2168
2169 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2188 let first_line = first_line_after.saturating_sub(1);
2189 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2190
2191 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2193 *flag = true;
2194 }
2195 }
2196
2197 in_code_block
2198 }
2199
2200 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2203 let content_lines: Vec<&str> = content.lines().collect();
2204 let num_lines = content_lines.len();
2205 let mut in_math_block = vec![false; num_lines];
2206
2207 let mut inside_math = false;
2208
2209 for (i, line) in content_lines.iter().enumerate() {
2210 if code_block_map.get(i).copied().unwrap_or(false) {
2212 continue;
2213 }
2214
2215 let trimmed = line.trim();
2216
2217 if trimmed == "$$" {
2220 if inside_math {
2221 in_math_block[i] = true;
2223 inside_math = false;
2224 } else {
2225 in_math_block[i] = true;
2227 inside_math = true;
2228 }
2229 } else if inside_math {
2230 in_math_block[i] = true;
2232 }
2233 }
2234
2235 in_math_block
2236 }
2237
2238 fn compute_basic_line_info(
2241 content: &str,
2242 line_offsets: &[usize],
2243 code_blocks: &[(usize, usize)],
2244 flavor: MarkdownFlavor,
2245 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2246 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2247 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2248 let content_lines: Vec<&str> = content.lines().collect();
2249 let mut lines = Vec::with_capacity(content_lines.len());
2250
2251 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2253
2254 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2256
2257 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2260
2261 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2264 content,
2265 line_offsets,
2266 flavor,
2267 front_matter_end,
2268 code_blocks,
2269 );
2270
2271 for (i, line) in content_lines.iter().enumerate() {
2272 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2273 let indent = line.len() - line.trim_start().len();
2274 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2276
2277 let blockquote_parse = Self::parse_blockquote_prefix(line);
2279
2280 let is_blank = if let Some((_, content)) = blockquote_parse {
2282 content.trim().is_empty()
2284 } else {
2285 line.trim().is_empty()
2286 };
2287
2288 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2290
2291 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2293 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2294 let line_end_offset = byte_offset + line.len();
2297 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2298 html_comment_ranges,
2299 byte_offset,
2300 line_end_offset,
2301 );
2302 let list_item =
2305 list_item_map
2306 .get(&byte_offset)
2307 .map(
2308 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2309 marker: marker.clone(),
2310 is_ordered: *is_ordered,
2311 number: *number,
2312 marker_column: *marker_column,
2313 content_column: *content_column,
2314 },
2315 );
2316
2317 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2320 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2321
2322 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2324
2325 lines.push(LineInfo {
2326 byte_offset,
2327 byte_len: line.len(),
2328 indent,
2329 visual_indent,
2330 is_blank,
2331 in_code_block,
2332 in_front_matter,
2333 in_html_block: false, in_html_comment,
2335 list_item,
2336 heading: None, blockquote: None, in_mkdocstrings,
2339 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2342 in_math_block,
2343 });
2344 }
2345
2346 (lines, emphasis_spans)
2347 }
2348
2349 fn detect_headings_and_blockquotes(
2351 content: &str,
2352 lines: &mut [LineInfo],
2353 flavor: MarkdownFlavor,
2354 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2355 link_byte_ranges: &[(usize, usize)],
2356 ) {
2357 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2359 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2360 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2361 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2362
2363 let content_lines: Vec<&str> = content.lines().collect();
2364
2365 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2367
2368 for i in 0..lines.len() {
2370 let line = content_lines[i];
2371
2372 if !(front_matter_end > 0 && i < front_matter_end)
2377 && let Some(bq) = parse_blockquote_detailed(line)
2378 {
2379 let nesting_level = bq.markers.len();
2380 let marker_column = bq.indent.len();
2381 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2382 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2383 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2384 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2385
2386 lines[i].blockquote = Some(BlockquoteInfo {
2387 nesting_level,
2388 indent: bq.indent.to_string(),
2389 marker_column,
2390 prefix,
2391 content: bq.content.to_string(),
2392 has_no_space_after_marker: has_no_space,
2393 has_multiple_spaces_after_marker: has_multiple_spaces,
2394 needs_md028_fix,
2395 });
2396
2397 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2400 lines[i].is_horizontal_rule = true;
2401 }
2402 }
2403
2404 if lines[i].in_code_block {
2406 continue;
2407 }
2408
2409 if front_matter_end > 0 && i < front_matter_end {
2411 continue;
2412 }
2413
2414 if lines[i].in_html_block {
2416 continue;
2417 }
2418
2419 if lines[i].is_blank {
2421 continue;
2422 }
2423
2424 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2427 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2428 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2429 } else {
2430 false
2431 };
2432
2433 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2434 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2436 continue;
2437 }
2438 let line_offset = lines[i].byte_offset;
2441 if link_byte_ranges
2442 .iter()
2443 .any(|&(start, end)| line_offset > start && line_offset < end)
2444 {
2445 continue;
2446 }
2447 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2448 let hashes = caps.get(2).map_or("", |m| m.as_str());
2449 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2450 let rest = caps.get(4).map_or("", |m| m.as_str());
2451
2452 let level = hashes.len() as u8;
2453 let marker_column = leading_spaces.len();
2454
2455 let (text, has_closing, closing_seq) = {
2457 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2459 if rest[id_start..].trim_end().ends_with('}') {
2461 (&rest[..id_start], &rest[id_start..])
2463 } else {
2464 (rest, "")
2465 }
2466 } else {
2467 (rest, "")
2468 };
2469
2470 let trimmed_rest = rest_without_id.trim_end();
2472 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2473 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2476
2477 let last_hash_char_idx = char_positions
2479 .iter()
2480 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2481
2482 if let Some(mut char_idx) = last_hash_char_idx {
2483 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2485 char_idx -= 1;
2486 }
2487
2488 let start_of_hashes = char_positions[char_idx].0;
2490
2491 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2493
2494 let potential_closing = &trimmed_rest[start_of_hashes..];
2496 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2497
2498 if is_all_hashes && has_space_before {
2499 let closing_hashes = potential_closing.to_string();
2501 let text_part = if !custom_id_part.is_empty() {
2504 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2507 } else {
2508 trimmed_rest[..start_of_hashes].trim_end().to_string()
2509 };
2510 (text_part, true, closing_hashes)
2511 } else {
2512 (rest.to_string(), false, String::new())
2514 }
2515 } else {
2516 (rest.to_string(), false, String::new())
2518 }
2519 } else {
2520 (rest.to_string(), false, String::new())
2522 }
2523 };
2524
2525 let content_column = marker_column + hashes.len() + spaces_after.len();
2526
2527 let raw_text = text.trim().to_string();
2529 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2530
2531 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2533 let next_line = content_lines[i + 1];
2534 if !lines[i + 1].in_code_block
2535 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2536 && let Some(next_line_id) =
2537 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2538 {
2539 custom_id = Some(next_line_id);
2540 }
2541 }
2542
2543 let is_valid = !spaces_after.is_empty()
2553 || rest.is_empty()
2554 || level > 1
2555 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2556
2557 lines[i].heading = Some(HeadingInfo {
2558 level,
2559 style: HeadingStyle::ATX,
2560 marker: hashes.to_string(),
2561 marker_column,
2562 content_column,
2563 text: clean_text,
2564 custom_id,
2565 raw_text,
2566 has_closing_sequence: has_closing,
2567 closing_sequence: closing_seq,
2568 is_valid,
2569 });
2570 }
2571 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2573 let next_line = content_lines[i + 1];
2574 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2575 if front_matter_end > 0 && i < front_matter_end {
2577 continue;
2578 }
2579
2580 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2582 {
2583 continue;
2584 }
2585
2586 let content_line = line.trim();
2589
2590 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2592 continue;
2593 }
2594
2595 if content_line.starts_with('_') {
2597 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2598 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2599 continue;
2600 }
2601 }
2602
2603 if let Some(first_char) = content_line.chars().next()
2605 && first_char.is_ascii_digit()
2606 {
2607 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2608 if num_end < content_line.len() {
2609 let next = content_line.chars().nth(num_end);
2610 if next == Some('.') || next == Some(')') {
2611 continue;
2612 }
2613 }
2614 }
2615
2616 if ATX_HEADING_REGEX.is_match(line) {
2618 continue;
2619 }
2620
2621 if content_line.starts_with('>') {
2623 continue;
2624 }
2625
2626 let trimmed_start = line.trim_start();
2628 if trimmed_start.len() >= 3 {
2629 let first_three: String = trimmed_start.chars().take(3).collect();
2630 if first_three == "```" || first_three == "~~~" {
2631 continue;
2632 }
2633 }
2634
2635 if content_line.starts_with('<') {
2637 continue;
2638 }
2639
2640 let underline = next_line.trim();
2641
2642 let level = if underline.starts_with('=') { 1 } else { 2 };
2643 let style = if level == 1 {
2644 HeadingStyle::Setext1
2645 } else {
2646 HeadingStyle::Setext2
2647 };
2648
2649 let raw_text = line.trim().to_string();
2651 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2652
2653 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2655 let attr_line = content_lines[i + 2];
2656 if !lines[i + 2].in_code_block
2657 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2658 && let Some(attr_line_id) =
2659 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2660 {
2661 custom_id = Some(attr_line_id);
2662 }
2663 }
2664
2665 lines[i].heading = Some(HeadingInfo {
2666 level,
2667 style,
2668 marker: underline.to_string(),
2669 marker_column: next_line.len() - next_line.trim_start().len(),
2670 content_column: lines[i].indent,
2671 text: clean_text,
2672 custom_id,
2673 raw_text,
2674 has_closing_sequence: false,
2675 closing_sequence: String::new(),
2676 is_valid: true, });
2678 }
2679 }
2680 }
2681 }
2682
2683 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2685 const BLOCK_ELEMENTS: &[&str] = &[
2688 "address",
2689 "article",
2690 "aside",
2691 "audio",
2692 "blockquote",
2693 "canvas",
2694 "details",
2695 "dialog",
2696 "dd",
2697 "div",
2698 "dl",
2699 "dt",
2700 "embed",
2701 "fieldset",
2702 "figcaption",
2703 "figure",
2704 "footer",
2705 "form",
2706 "h1",
2707 "h2",
2708 "h3",
2709 "h4",
2710 "h5",
2711 "h6",
2712 "header",
2713 "hr",
2714 "iframe",
2715 "li",
2716 "main",
2717 "menu",
2718 "nav",
2719 "noscript",
2720 "object",
2721 "ol",
2722 "p",
2723 "picture",
2724 "pre",
2725 "script",
2726 "search",
2727 "section",
2728 "source",
2729 "style",
2730 "summary",
2731 "svg",
2732 "table",
2733 "tbody",
2734 "td",
2735 "template",
2736 "textarea",
2737 "tfoot",
2738 "th",
2739 "thead",
2740 "tr",
2741 "track",
2742 "ul",
2743 "video",
2744 ];
2745
2746 let mut i = 0;
2747 while i < lines.len() {
2748 if lines[i].in_code_block || lines[i].in_front_matter {
2750 i += 1;
2751 continue;
2752 }
2753
2754 let trimmed = lines[i].content(content).trim_start();
2755
2756 if trimmed.starts_with('<') && trimmed.len() > 1 {
2758 let after_bracket = &trimmed[1..];
2760 let is_closing = after_bracket.starts_with('/');
2761 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2762
2763 let tag_name = tag_start
2765 .chars()
2766 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2767 .collect::<String>()
2768 .to_lowercase();
2769
2770 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2772 lines[i].in_html_block = true;
2774
2775 if !is_closing {
2778 let closing_tag = format!("</{tag_name}>");
2779 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2781 let mut j = i + 1;
2782 let mut found_closing_tag = false;
2783 while j < lines.len() && j < i + 100 {
2784 if !allow_blank_lines && lines[j].is_blank {
2787 break;
2788 }
2789
2790 lines[j].in_html_block = true;
2791
2792 if lines[j].content(content).contains(&closing_tag) {
2794 found_closing_tag = true;
2795 }
2796
2797 if found_closing_tag {
2800 j += 1;
2801 while j < lines.len() && j < i + 100 {
2803 if lines[j].is_blank {
2804 break;
2805 }
2806 lines[j].in_html_block = true;
2807 j += 1;
2808 }
2809 break;
2810 }
2811 j += 1;
2812 }
2813 }
2814 }
2815 }
2816
2817 i += 1;
2818 }
2819 }
2820
2821 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2824 if !flavor.supports_esm_blocks() {
2826 return;
2827 }
2828
2829 let mut in_multiline_comment = false;
2830
2831 for line in lines.iter_mut() {
2832 if line.is_blank || line.in_html_comment {
2834 continue;
2835 }
2836
2837 let trimmed = line.content(content).trim_start();
2838
2839 if in_multiline_comment {
2841 if trimmed.contains("*/") {
2842 in_multiline_comment = false;
2843 }
2844 continue;
2845 }
2846
2847 if trimmed.starts_with("//") {
2849 continue;
2850 }
2851
2852 if trimmed.starts_with("/*") {
2854 if !trimmed.contains("*/") {
2855 in_multiline_comment = true;
2856 }
2857 continue;
2858 }
2859
2860 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2862 line.in_esm_block = true;
2863 } else {
2864 break;
2866 }
2867 }
2868 }
2869
2870 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2872 let mut code_spans = Vec::new();
2873
2874 if !content.contains('`') {
2876 return code_spans;
2877 }
2878
2879 let parser = Parser::new(content).into_offset_iter();
2881
2882 for (event, range) in parser {
2883 if let Event::Code(_) = event {
2884 let start_pos = range.start;
2885 let end_pos = range.end;
2886
2887 let full_span = &content[start_pos..end_pos];
2889 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2890
2891 let content_start = start_pos + backtick_count;
2893 let content_end = end_pos - backtick_count;
2894 let span_content = if content_start < content_end {
2895 content[content_start..content_end].to_string()
2896 } else {
2897 String::new()
2898 };
2899
2900 let line_idx = lines
2903 .partition_point(|line| line.byte_offset <= start_pos)
2904 .saturating_sub(1);
2905 let line_num = line_idx + 1;
2906 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2907
2908 let end_line_idx = lines
2910 .partition_point(|line| line.byte_offset <= end_pos)
2911 .saturating_sub(1);
2912 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2913
2914 let line_content = lines[line_idx].content(content);
2917 let col_start = if byte_col_start <= line_content.len() {
2918 line_content[..byte_col_start].chars().count()
2919 } else {
2920 line_content.chars().count()
2921 };
2922
2923 let end_line_content = lines[end_line_idx].content(content);
2924 let col_end = if byte_col_end <= end_line_content.len() {
2925 end_line_content[..byte_col_end].chars().count()
2926 } else {
2927 end_line_content.chars().count()
2928 };
2929
2930 code_spans.push(CodeSpan {
2931 line: line_num,
2932 end_line: end_line_idx + 1,
2933 start_col: col_start,
2934 end_col: col_end,
2935 byte_offset: start_pos,
2936 byte_end: end_pos,
2937 backtick_count,
2938 content: span_content,
2939 });
2940 }
2941 }
2942
2943 code_spans.sort_by_key(|span| span.byte_offset);
2945
2946 code_spans
2947 }
2948
2949 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
2951 let mut math_spans = Vec::new();
2952
2953 if !content.contains('$') {
2955 return math_spans;
2956 }
2957
2958 let mut options = Options::empty();
2960 options.insert(Options::ENABLE_MATH);
2961 let parser = Parser::new_ext(content, options).into_offset_iter();
2962
2963 for (event, range) in parser {
2964 let (is_display, math_content) = match &event {
2965 Event::InlineMath(text) => (false, text.as_ref()),
2966 Event::DisplayMath(text) => (true, text.as_ref()),
2967 _ => continue,
2968 };
2969
2970 let start_pos = range.start;
2971 let end_pos = range.end;
2972
2973 let line_idx = lines
2975 .partition_point(|line| line.byte_offset <= start_pos)
2976 .saturating_sub(1);
2977 let line_num = line_idx + 1;
2978 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2979
2980 let end_line_idx = lines
2982 .partition_point(|line| line.byte_offset <= end_pos)
2983 .saturating_sub(1);
2984 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2985
2986 let line_content = lines[line_idx].content(content);
2988 let col_start = if byte_col_start <= line_content.len() {
2989 line_content[..byte_col_start].chars().count()
2990 } else {
2991 line_content.chars().count()
2992 };
2993
2994 let end_line_content = lines[end_line_idx].content(content);
2995 let col_end = if byte_col_end <= end_line_content.len() {
2996 end_line_content[..byte_col_end].chars().count()
2997 } else {
2998 end_line_content.chars().count()
2999 };
3000
3001 math_spans.push(MathSpan {
3002 line: line_num,
3003 end_line: end_line_idx + 1,
3004 start_col: col_start,
3005 end_col: col_end,
3006 byte_offset: start_pos,
3007 byte_end: end_pos,
3008 is_display,
3009 content: math_content.to_string(),
3010 });
3011 }
3012
3013 math_spans.sort_by_key(|span| span.byte_offset);
3015
3016 math_spans
3017 }
3018
3019 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3030 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3032
3033 #[inline]
3036 fn reset_tracking_state(
3037 list_item: &ListItemInfo,
3038 has_list_breaking_content: &mut bool,
3039 min_continuation: &mut usize,
3040 ) {
3041 *has_list_breaking_content = false;
3042 let marker_width = if list_item.is_ordered {
3043 list_item.marker.len() + 1 } else {
3045 list_item.marker.len()
3046 };
3047 *min_continuation = if list_item.is_ordered {
3048 marker_width
3049 } else {
3050 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3051 };
3052 }
3053
3054 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3057 let mut last_list_item_line = 0;
3058 let mut current_indent_level = 0;
3059 let mut last_marker_width = 0;
3060
3061 let mut has_list_breaking_content_since_last_item = false;
3063 let mut min_continuation_for_tracking = 0;
3064
3065 for (line_idx, line_info) in lines.iter().enumerate() {
3066 let line_num = line_idx + 1;
3067
3068 if line_info.in_code_block {
3070 if let Some(ref mut block) = current_block {
3071 let min_continuation_indent =
3073 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3074
3075 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3077
3078 match context {
3079 CodeBlockContext::Indented => {
3080 block.end_line = line_num;
3082 continue;
3083 }
3084 CodeBlockContext::Standalone => {
3085 let completed_block = current_block.take().unwrap();
3087 list_blocks.push(completed_block);
3088 continue;
3089 }
3090 CodeBlockContext::Adjacent => {
3091 block.end_line = line_num;
3093 continue;
3094 }
3095 }
3096 } else {
3097 continue;
3099 }
3100 }
3101
3102 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3104 caps.get(0).unwrap().as_str().to_string()
3105 } else {
3106 String::new()
3107 };
3108
3109 if let Some(ref block) = current_block
3112 && line_info.list_item.is_none()
3113 && !line_info.is_blank
3114 && !line_info.in_code_span_continuation
3115 {
3116 let line_content = line_info.content(content).trim();
3117
3118 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3123
3124 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3127
3128 let breaks_list = line_info.heading.is_some()
3129 || line_content.starts_with("---")
3130 || line_content.starts_with("***")
3131 || line_content.starts_with("___")
3132 || crate::utils::skip_context::is_table_line(line_content)
3133 || blockquote_prefix_changes
3134 || (line_info.indent > 0
3135 && line_info.indent < min_continuation_for_tracking
3136 && !is_lazy_continuation);
3137
3138 if breaks_list {
3139 has_list_breaking_content_since_last_item = true;
3140 }
3141 }
3142
3143 if line_info.in_code_span_continuation
3146 && line_info.list_item.is_none()
3147 && let Some(ref mut block) = current_block
3148 {
3149 block.end_line = line_num;
3150 }
3151
3152 let effective_continuation_indent = if let Some(ref block) = current_block {
3158 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3159 let line_content = line_info.content(content);
3160 let line_bq_level = line_content
3161 .chars()
3162 .take_while(|c| *c == '>' || c.is_whitespace())
3163 .filter(|&c| c == '>')
3164 .count();
3165 if line_bq_level > 0 && line_bq_level == block_bq_level {
3166 let mut pos = 0;
3168 let mut found_markers = 0;
3169 for c in line_content.chars() {
3170 pos += c.len_utf8();
3171 if c == '>' {
3172 found_markers += 1;
3173 if found_markers == line_bq_level {
3174 if line_content.get(pos..pos + 1) == Some(" ") {
3175 pos += 1;
3176 }
3177 break;
3178 }
3179 }
3180 }
3181 let after_bq = &line_content[pos..];
3182 after_bq.len() - after_bq.trim_start().len()
3183 } else {
3184 line_info.indent
3185 }
3186 } else {
3187 line_info.indent
3188 };
3189 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3190 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3191 if block_bq_level > 0 {
3192 if block.is_ordered { last_marker_width } else { 2 }
3193 } else {
3194 min_continuation_for_tracking
3195 }
3196 } else {
3197 min_continuation_for_tracking
3198 };
3199 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3200 || (line_info.indent == 0 && !line_info.is_blank); if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3203 eprintln!(
3204 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3205 line_num,
3206 effective_continuation_indent,
3207 adjusted_min_continuation_for_tracking,
3208 is_valid_continuation,
3209 line_info.in_code_span_continuation,
3210 line_info.in_code_block,
3211 current_block.is_some()
3212 );
3213 }
3214
3215 if !line_info.in_code_span_continuation
3216 && line_info.list_item.is_none()
3217 && !line_info.is_blank
3218 && !line_info.in_code_block
3219 && is_valid_continuation
3220 && let Some(ref mut block) = current_block
3221 {
3222 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3223 eprintln!(
3224 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3225 line_num, block.end_line, line_num
3226 );
3227 }
3228 block.end_line = line_num;
3229 }
3230
3231 if let Some(list_item) = &line_info.list_item {
3233 let item_indent = list_item.marker_column;
3235 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3238 eprintln!(
3239 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3240 line_num, list_item.marker, item_indent
3241 );
3242 }
3243
3244 if let Some(ref mut block) = current_block {
3245 let is_nested = nesting > block.nesting_level;
3249 let same_type =
3250 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3251 let same_context = block.blockquote_prefix == blockquote_prefix;
3252 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3254
3255 let marker_compatible =
3257 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3258
3259 let has_non_list_content = has_list_breaking_content_since_last_item;
3262
3263 let mut continues_list = if is_nested {
3267 same_context && reasonable_distance && !has_non_list_content
3269 } else {
3270 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3272 };
3273
3274 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3275 eprintln!(
3276 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3277 line_num,
3278 continues_list,
3279 is_nested,
3280 same_type,
3281 same_context,
3282 reasonable_distance,
3283 marker_compatible,
3284 has_non_list_content,
3285 last_list_item_line,
3286 block.end_line
3287 );
3288 }
3289
3290 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
3293 if block.item_lines.contains(&(line_num - 1)) {
3296 continues_list = true;
3298 } else {
3299 continues_list = true;
3303 }
3304 }
3305
3306 if continues_list {
3307 block.end_line = line_num;
3309 block.item_lines.push(line_num);
3310
3311 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3313 list_item.marker.len() + 1
3314 } else {
3315 list_item.marker.len()
3316 });
3317
3318 if !block.is_ordered
3320 && block.marker.is_some()
3321 && block.marker.as_ref() != Some(&list_item.marker)
3322 {
3323 block.marker = None;
3325 }
3326
3327 reset_tracking_state(
3329 list_item,
3330 &mut has_list_breaking_content_since_last_item,
3331 &mut min_continuation_for_tracking,
3332 );
3333 } else {
3334 list_blocks.push(block.clone());
3337
3338 *block = ListBlock {
3339 start_line: line_num,
3340 end_line: line_num,
3341 is_ordered: list_item.is_ordered,
3342 marker: if list_item.is_ordered {
3343 None
3344 } else {
3345 Some(list_item.marker.clone())
3346 },
3347 blockquote_prefix: blockquote_prefix.clone(),
3348 item_lines: vec![line_num],
3349 nesting_level: nesting,
3350 max_marker_width: if list_item.is_ordered {
3351 list_item.marker.len() + 1
3352 } else {
3353 list_item.marker.len()
3354 },
3355 };
3356
3357 reset_tracking_state(
3359 list_item,
3360 &mut has_list_breaking_content_since_last_item,
3361 &mut min_continuation_for_tracking,
3362 );
3363 }
3364 } else {
3365 current_block = Some(ListBlock {
3367 start_line: line_num,
3368 end_line: line_num,
3369 is_ordered: list_item.is_ordered,
3370 marker: if list_item.is_ordered {
3371 None
3372 } else {
3373 Some(list_item.marker.clone())
3374 },
3375 blockquote_prefix,
3376 item_lines: vec![line_num],
3377 nesting_level: nesting,
3378 max_marker_width: list_item.marker.len(),
3379 });
3380
3381 reset_tracking_state(
3383 list_item,
3384 &mut has_list_breaking_content_since_last_item,
3385 &mut min_continuation_for_tracking,
3386 );
3387 }
3388
3389 last_list_item_line = line_num;
3390 current_indent_level = item_indent;
3391 last_marker_width = if list_item.is_ordered {
3392 list_item.marker.len() + 1 } else {
3394 list_item.marker.len()
3395 };
3396 } else if let Some(ref mut block) = current_block {
3397 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3399 eprintln!(
3400 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3401 line_num, line_info.is_blank
3402 );
3403 }
3404
3405 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3413 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3414 } else {
3415 false
3416 };
3417
3418 let min_continuation_indent = if block.is_ordered {
3422 current_indent_level + last_marker_width
3423 } else {
3424 current_indent_level + 2 };
3426
3427 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3428 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3430 eprintln!(
3431 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3432 line_num, line_info.indent, min_continuation_indent
3433 );
3434 }
3435 block.end_line = line_num;
3436 } else if line_info.is_blank {
3437 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3440 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3441 }
3442 let mut check_idx = line_idx + 1;
3443 let mut found_continuation = false;
3444
3445 while check_idx < lines.len() && lines[check_idx].is_blank {
3447 check_idx += 1;
3448 }
3449
3450 if check_idx < lines.len() {
3451 let next_line = &lines[check_idx];
3452 let next_content = next_line.content(content);
3454 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3457 let next_bq_level_for_indent = next_content
3458 .chars()
3459 .take_while(|c| *c == '>' || c.is_whitespace())
3460 .filter(|&c| c == '>')
3461 .count();
3462 let effective_indent =
3463 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3464 let mut pos = 0;
3467 let mut found_markers = 0;
3468 for c in next_content.chars() {
3469 pos += c.len_utf8();
3470 if c == '>' {
3471 found_markers += 1;
3472 if found_markers == next_bq_level_for_indent {
3473 if next_content.get(pos..pos + 1) == Some(" ") {
3475 pos += 1;
3476 }
3477 break;
3478 }
3479 }
3480 }
3481 let after_blockquote_marker = &next_content[pos..];
3482 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3483 } else {
3484 next_line.indent
3485 };
3486 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3489 if block.is_ordered { last_marker_width } else { 2 }
3492 } else {
3493 min_continuation_indent
3494 };
3495 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3497 eprintln!(
3498 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3499 line_num,
3500 check_idx + 1,
3501 effective_indent,
3502 adjusted_min_continuation,
3503 next_line.list_item.is_some(),
3504 next_line.in_code_block
3505 );
3506 }
3507 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3508 found_continuation = true;
3509 }
3510 else if !next_line.in_code_block
3512 && next_line.list_item.is_some()
3513 && let Some(item) = &next_line.list_item
3514 {
3515 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3516 .find(next_line.content(content))
3517 .map_or(String::new(), |m| m.as_str().to_string());
3518 if item.marker_column == current_indent_level
3519 && item.is_ordered == block.is_ordered
3520 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3521 {
3522 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3526 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3527 if let Some(between_line) = lines.get(idx) {
3528 let between_content = between_line.content(content);
3529 let trimmed = between_content.trim();
3530 if trimmed.is_empty() {
3532 return false;
3533 }
3534 let line_indent = between_content.len() - between_content.trim_start().len();
3536
3537 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3539 .find(between_content)
3540 .map_or(String::new(), |m| m.as_str().to_string());
3541 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3542 let blockquote_level_changed =
3543 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3544
3545 if trimmed.starts_with("```")
3547 || trimmed.starts_with("~~~")
3548 || trimmed.starts_with("---")
3549 || trimmed.starts_with("***")
3550 || trimmed.starts_with("___")
3551 || blockquote_level_changed
3552 || crate::utils::skip_context::is_table_line(trimmed)
3553 || between_line.heading.is_some()
3554 {
3555 return true; }
3557
3558 line_indent >= min_continuation_indent
3560 } else {
3561 false
3562 }
3563 });
3564
3565 if block.is_ordered {
3566 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3569 if let Some(between_line) = lines.get(idx) {
3570 let between_content = between_line.content(content);
3571 let trimmed = between_content.trim();
3572 if trimmed.is_empty() {
3573 return false;
3574 }
3575 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3577 .find(between_content)
3578 .map_or(String::new(), |m| m.as_str().to_string());
3579 let between_bq_level =
3580 between_bq_prefix.chars().filter(|&c| c == '>').count();
3581 let blockquote_level_changed =
3582 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3583 trimmed.starts_with("```")
3585 || trimmed.starts_with("~~~")
3586 || trimmed.starts_with("---")
3587 || trimmed.starts_with("***")
3588 || trimmed.starts_with("___")
3589 || blockquote_level_changed
3590 || crate::utils::skip_context::is_table_line(trimmed)
3591 || between_line.heading.is_some()
3592 } else {
3593 false
3594 }
3595 });
3596 found_continuation = !has_structural_separators;
3597 } else {
3598 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3600 if let Some(between_line) = lines.get(idx) {
3601 let between_content = between_line.content(content);
3602 let trimmed = between_content.trim();
3603 if trimmed.is_empty() {
3604 return false;
3605 }
3606 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3608 .find(between_content)
3609 .map_or(String::new(), |m| m.as_str().to_string());
3610 let between_bq_level =
3611 between_bq_prefix.chars().filter(|&c| c == '>').count();
3612 let blockquote_level_changed =
3613 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3614 trimmed.starts_with("```")
3616 || trimmed.starts_with("~~~")
3617 || trimmed.starts_with("---")
3618 || trimmed.starts_with("***")
3619 || trimmed.starts_with("___")
3620 || blockquote_level_changed
3621 || crate::utils::skip_context::is_table_line(trimmed)
3622 || between_line.heading.is_some()
3623 } else {
3624 false
3625 }
3626 });
3627 found_continuation = !has_structural_separators;
3628 }
3629 }
3630 }
3631 }
3632
3633 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3634 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
3635 }
3636 if found_continuation {
3637 block.end_line = line_num;
3639 } else {
3640 list_blocks.push(block.clone());
3642 current_block = None;
3643 }
3644 } else {
3645 let min_required_indent = if block.is_ordered {
3648 current_indent_level + last_marker_width
3649 } else {
3650 current_indent_level + 2
3651 };
3652
3653 let line_content = line_info.content(content).trim();
3658
3659 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3661
3662 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3665 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3666 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3667
3668 let is_structural_separator = line_info.heading.is_some()
3669 || line_content.starts_with("```")
3670 || line_content.starts_with("~~~")
3671 || line_content.starts_with("---")
3672 || line_content.starts_with("***")
3673 || line_content.starts_with("___")
3674 || blockquote_level_changed
3675 || looks_like_table;
3676
3677 let is_lazy_continuation = !is_structural_separator
3680 && !line_info.is_blank
3681 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3682
3683 if is_lazy_continuation {
3684 let line_content_raw = line_info.content(content);
3688 let block_bq_level_lazy = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3689 let line_bq_level_lazy = line_content_raw
3690 .chars()
3691 .take_while(|c| *c == '>' || c.is_whitespace())
3692 .filter(|&c| c == '>')
3693 .count();
3694 let has_proper_blockquote_indent =
3695 if line_bq_level_lazy > 0 && line_bq_level_lazy == block_bq_level_lazy {
3696 let mut pos = 0;
3698 let mut found_markers = 0;
3699 for c in line_content_raw.chars() {
3700 pos += c.len_utf8();
3701 if c == '>' {
3702 found_markers += 1;
3703 if found_markers == line_bq_level_lazy {
3704 if line_content_raw.get(pos..pos + 1) == Some(" ") {
3705 pos += 1;
3706 }
3707 break;
3708 }
3709 }
3710 }
3711 let after_bq = &line_content_raw[pos..];
3712 let effective_indent_lazy = after_bq.len() - after_bq.trim_start().len();
3713 let min_required_for_bq = if block.is_ordered { last_marker_width } else { 2 };
3714 effective_indent_lazy >= min_required_for_bq
3715 } else {
3716 false
3717 };
3718
3719 if has_proper_blockquote_indent {
3721 block.end_line = line_num;
3722 } else {
3723 let content_to_check = if !blockquote_prefix.is_empty() {
3724 line_info
3726 .content(content)
3727 .strip_prefix(&blockquote_prefix)
3728 .unwrap_or(line_info.content(content))
3729 .trim()
3730 } else {
3731 line_info.content(content).trim()
3732 };
3733
3734 let starts_with_uppercase =
3735 content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3736
3737 if starts_with_uppercase && last_list_item_line > 0 {
3740 list_blocks.push(block.clone());
3742 current_block = None;
3743 } else {
3744 block.end_line = line_num;
3746 }
3747 }
3748 } else {
3749 list_blocks.push(block.clone());
3751 current_block = None;
3752 }
3753 }
3754 }
3755 }
3756
3757 if let Some(block) = current_block {
3759 list_blocks.push(block);
3760 }
3761
3762 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3764
3765 list_blocks
3766 }
3767
3768 fn compute_char_frequency(content: &str) -> CharFrequency {
3770 let mut frequency = CharFrequency::default();
3771
3772 for ch in content.chars() {
3773 match ch {
3774 '#' => frequency.hash_count += 1,
3775 '*' => frequency.asterisk_count += 1,
3776 '_' => frequency.underscore_count += 1,
3777 '-' => frequency.hyphen_count += 1,
3778 '+' => frequency.plus_count += 1,
3779 '>' => frequency.gt_count += 1,
3780 '|' => frequency.pipe_count += 1,
3781 '[' => frequency.bracket_count += 1,
3782 '`' => frequency.backtick_count += 1,
3783 '<' => frequency.lt_count += 1,
3784 '!' => frequency.exclamation_count += 1,
3785 '\n' => frequency.newline_count += 1,
3786 _ => {}
3787 }
3788 }
3789
3790 frequency
3791 }
3792
3793 fn parse_html_tags(
3795 content: &str,
3796 lines: &[LineInfo],
3797 code_blocks: &[(usize, usize)],
3798 flavor: MarkdownFlavor,
3799 ) -> Vec<HtmlTag> {
3800 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3801 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3802
3803 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3804
3805 for cap in HTML_TAG_REGEX.captures_iter(content) {
3806 let full_match = cap.get(0).unwrap();
3807 let match_start = full_match.start();
3808 let match_end = full_match.end();
3809
3810 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3812 continue;
3813 }
3814
3815 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3816 let tag_name_original = cap.get(2).unwrap().as_str();
3817 let tag_name = tag_name_original.to_lowercase();
3818 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3819
3820 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3823 continue;
3824 }
3825
3826 let mut line_num = 1;
3828 let mut col_start = match_start;
3829 let mut col_end = match_end;
3830 for (idx, line_info) in lines.iter().enumerate() {
3831 if match_start >= line_info.byte_offset {
3832 line_num = idx + 1;
3833 col_start = match_start - line_info.byte_offset;
3834 col_end = match_end - line_info.byte_offset;
3835 } else {
3836 break;
3837 }
3838 }
3839
3840 html_tags.push(HtmlTag {
3841 line: line_num,
3842 start_col: col_start,
3843 end_col: col_end,
3844 byte_offset: match_start,
3845 byte_end: match_end,
3846 tag_name,
3847 is_closing,
3848 is_self_closing,
3849 raw_content: full_match.as_str().to_string(),
3850 });
3851 }
3852
3853 html_tags
3854 }
3855
3856 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3858 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3859
3860 for (line_idx, line_info) in lines.iter().enumerate() {
3861 if line_info.in_code_block || line_info.is_blank {
3863 continue;
3864 }
3865
3866 let line = line_info.content(content);
3867 let line_num = line_idx + 1;
3868
3869 if !line.contains('|') {
3871 continue;
3872 }
3873
3874 let parts: Vec<&str> = line.split('|').collect();
3876 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3877
3878 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3880 let mut column_alignments = Vec::new();
3881
3882 if is_separator {
3883 for part in &parts[1..parts.len() - 1] {
3884 let trimmed = part.trim();
3886 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3887 "center".to_string()
3888 } else if trimmed.ends_with(':') {
3889 "right".to_string()
3890 } else if trimmed.starts_with(':') {
3891 "left".to_string()
3892 } else {
3893 "none".to_string()
3894 };
3895 column_alignments.push(alignment);
3896 }
3897 }
3898
3899 table_rows.push(TableRow {
3900 line: line_num,
3901 is_separator,
3902 column_count,
3903 column_alignments,
3904 });
3905 }
3906
3907 table_rows
3908 }
3909
3910 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3912 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3913
3914 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3916 let full_match = cap.get(0).unwrap();
3917 let match_start = full_match.start();
3918 let match_end = full_match.end();
3919
3920 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3922 continue;
3923 }
3924
3925 let preceding_char = if match_start > 0 {
3927 content.chars().nth(match_start - 1)
3928 } else {
3929 None
3930 };
3931 let following_char = content.chars().nth(match_end);
3932
3933 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3934 continue;
3935 }
3936 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3937 continue;
3938 }
3939
3940 let url = full_match.as_str();
3941 let url_type = if url.starts_with("https://") {
3942 "https"
3943 } else if url.starts_with("http://") {
3944 "http"
3945 } else if url.starts_with("ftp://") {
3946 "ftp"
3947 } else {
3948 "other"
3949 };
3950
3951 let mut line_num = 1;
3953 let mut col_start = match_start;
3954 let mut col_end = match_end;
3955 for (idx, line_info) in lines.iter().enumerate() {
3956 if match_start >= line_info.byte_offset {
3957 line_num = idx + 1;
3958 col_start = match_start - line_info.byte_offset;
3959 col_end = match_end - line_info.byte_offset;
3960 } else {
3961 break;
3962 }
3963 }
3964
3965 bare_urls.push(BareUrl {
3966 line: line_num,
3967 start_col: col_start,
3968 end_col: col_end,
3969 byte_offset: match_start,
3970 byte_end: match_end,
3971 url: url.to_string(),
3972 url_type: url_type.to_string(),
3973 });
3974 }
3975
3976 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3978 let full_match = cap.get(0).unwrap();
3979 let match_start = full_match.start();
3980 let match_end = full_match.end();
3981
3982 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3984 continue;
3985 }
3986
3987 let preceding_char = if match_start > 0 {
3989 content.chars().nth(match_start - 1)
3990 } else {
3991 None
3992 };
3993 let following_char = content.chars().nth(match_end);
3994
3995 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3996 continue;
3997 }
3998 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3999 continue;
4000 }
4001
4002 let email = full_match.as_str();
4003
4004 let mut line_num = 1;
4006 let mut col_start = match_start;
4007 let mut col_end = match_end;
4008 for (idx, line_info) in lines.iter().enumerate() {
4009 if match_start >= line_info.byte_offset {
4010 line_num = idx + 1;
4011 col_start = match_start - line_info.byte_offset;
4012 col_end = match_end - line_info.byte_offset;
4013 } else {
4014 break;
4015 }
4016 }
4017
4018 bare_urls.push(BareUrl {
4019 line: line_num,
4020 start_col: col_start,
4021 end_col: col_end,
4022 byte_offset: match_start,
4023 byte_end: match_end,
4024 url: email.to_string(),
4025 url_type: "email".to_string(),
4026 });
4027 }
4028
4029 bare_urls
4030 }
4031
4032 #[must_use]
4052 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4053 ValidHeadingsIter::new(&self.lines)
4054 }
4055
4056 #[must_use]
4060 pub fn has_valid_headings(&self) -> bool {
4061 self.lines
4062 .iter()
4063 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4064 }
4065}
4066
4067fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4069 if list_blocks.len() < 2 {
4070 return;
4071 }
4072
4073 let mut merger = ListBlockMerger::new(content, lines);
4074 *list_blocks = merger.merge(list_blocks);
4075}
4076
4077struct ListBlockMerger<'a> {
4079 content: &'a str,
4080 lines: &'a [LineInfo],
4081}
4082
4083impl<'a> ListBlockMerger<'a> {
4084 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4085 Self { content, lines }
4086 }
4087
4088 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4089 let mut merged = Vec::with_capacity(list_blocks.len());
4090 let mut current = list_blocks[0].clone();
4091
4092 for next in list_blocks.iter().skip(1) {
4093 if self.should_merge_blocks(¤t, next) {
4094 current = self.merge_two_blocks(current, next);
4095 } else {
4096 merged.push(current);
4097 current = next.clone();
4098 }
4099 }
4100
4101 merged.push(current);
4102 merged
4103 }
4104
4105 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4107 if !self.blocks_are_compatible(current, next) {
4109 return false;
4110 }
4111
4112 let spacing = self.analyze_spacing_between(current, next);
4114 match spacing {
4115 BlockSpacing::Consecutive => true,
4116 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4117 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4118 self.can_merge_with_content_between(current, next)
4119 }
4120 }
4121 }
4122
4123 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4125 current.is_ordered == next.is_ordered
4126 && current.blockquote_prefix == next.blockquote_prefix
4127 && current.nesting_level == next.nesting_level
4128 }
4129
4130 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4132 let gap = next.start_line - current.end_line;
4133
4134 match gap {
4135 1 => BlockSpacing::Consecutive,
4136 2 => BlockSpacing::SingleBlank,
4137 _ if gap > 2 => {
4138 if self.has_only_blank_lines_between(current, next) {
4139 BlockSpacing::MultipleBlanks
4140 } else {
4141 BlockSpacing::ContentBetween
4142 }
4143 }
4144 _ => BlockSpacing::Consecutive, }
4146 }
4147
4148 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4150 if has_meaningful_content_between(self.content, current, next, self.lines) {
4153 return false; }
4155
4156 !current.is_ordered && current.marker == next.marker
4158 }
4159
4160 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4162 if has_meaningful_content_between(self.content, current, next, self.lines) {
4164 return false; }
4166
4167 current.is_ordered && next.is_ordered
4169 }
4170
4171 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4173 for line_num in (current.end_line + 1)..next.start_line {
4174 if let Some(line_info) = self.lines.get(line_num - 1)
4175 && !line_info.content(self.content).trim().is_empty()
4176 {
4177 return false;
4178 }
4179 }
4180 true
4181 }
4182
4183 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4185 current.end_line = next.end_line;
4186 current.item_lines.extend_from_slice(&next.item_lines);
4187
4188 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4190
4191 if !current.is_ordered && self.markers_differ(¤t, next) {
4193 current.marker = None; }
4195
4196 current
4197 }
4198
4199 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4201 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4202 }
4203}
4204
4205#[derive(Debug, PartialEq)]
4207enum BlockSpacing {
4208 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4213
4214fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4216 for line_num in (current.end_line + 1)..next.start_line {
4218 if let Some(line_info) = lines.get(line_num - 1) {
4219 let trimmed = line_info.content(content).trim();
4221
4222 if trimmed.is_empty() {
4224 continue;
4225 }
4226
4227 if line_info.heading.is_some() {
4231 return true; }
4233
4234 if is_horizontal_rule(trimmed) {
4236 return true; }
4238
4239 if crate::utils::skip_context::is_table_line(trimmed) {
4241 return true; }
4243
4244 if trimmed.starts_with('>') {
4246 return true; }
4248
4249 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4251 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4252
4253 let min_continuation_indent = if current.is_ordered {
4255 current.nesting_level + current.max_marker_width + 1 } else {
4257 current.nesting_level + 2
4258 };
4259
4260 if line_indent < min_continuation_indent {
4261 return true; }
4264 }
4265
4266 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4268
4269 let min_indent = if current.is_ordered {
4271 current.nesting_level + current.max_marker_width
4272 } else {
4273 current.nesting_level + 2
4274 };
4275
4276 if line_indent < min_indent {
4278 return true; }
4280
4281 }
4284 }
4285
4286 false
4288}
4289
4290pub fn is_horizontal_rule_line(line: &str) -> bool {
4297 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4299 if leading_spaces > 3 || line.starts_with('\t') {
4300 return false;
4301 }
4302
4303 is_horizontal_rule_content(line.trim())
4304}
4305
4306pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4309 if trimmed.len() < 3 {
4310 return false;
4311 }
4312
4313 let chars: Vec<char> = trimmed.chars().collect();
4315 if let Some(&first_char) = chars.first()
4316 && (first_char == '-' || first_char == '*' || first_char == '_')
4317 {
4318 let mut count = 0;
4319 for &ch in &chars {
4320 if ch == first_char {
4321 count += 1;
4322 } else if ch != ' ' && ch != '\t' {
4323 return false; }
4325 }
4326 return count >= 3;
4327 }
4328 false
4329}
4330
4331pub fn is_horizontal_rule(trimmed: &str) -> bool {
4333 is_horizontal_rule_content(trimmed)
4334}
4335
4336#[cfg(test)]
4338mod tests {
4339 use super::*;
4340
4341 #[test]
4342 fn test_empty_content() {
4343 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4344 assert_eq!(ctx.content, "");
4345 assert_eq!(ctx.line_offsets, vec![0]);
4346 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4347 assert_eq!(ctx.lines.len(), 0);
4348 }
4349
4350 #[test]
4351 fn test_single_line() {
4352 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4353 assert_eq!(ctx.content, "# Hello");
4354 assert_eq!(ctx.line_offsets, vec![0]);
4355 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4356 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4357 }
4358
4359 #[test]
4360 fn test_multi_line() {
4361 let content = "# Title\n\nSecond line\nThird line";
4362 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4363 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4364 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4371
4372 #[test]
4373 fn test_line_info() {
4374 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4375 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4376
4377 assert_eq!(ctx.lines.len(), 7);
4379
4380 let line1 = &ctx.lines[0];
4382 assert_eq!(line1.content(ctx.content), "# Title");
4383 assert_eq!(line1.byte_offset, 0);
4384 assert_eq!(line1.indent, 0);
4385 assert!(!line1.is_blank);
4386 assert!(!line1.in_code_block);
4387 assert!(line1.list_item.is_none());
4388
4389 let line2 = &ctx.lines[1];
4391 assert_eq!(line2.content(ctx.content), " indented");
4392 assert_eq!(line2.byte_offset, 8);
4393 assert_eq!(line2.indent, 4);
4394 assert!(!line2.is_blank);
4395
4396 let line3 = &ctx.lines[2];
4398 assert_eq!(line3.content(ctx.content), "");
4399 assert!(line3.is_blank);
4400
4401 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4403 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4404 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4405 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4406 }
4407
4408 #[test]
4409 fn test_list_item_detection() {
4410 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4411 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4412
4413 let line1 = &ctx.lines[0];
4415 assert!(line1.list_item.is_some());
4416 let list1 = line1.list_item.as_ref().unwrap();
4417 assert_eq!(list1.marker, "-");
4418 assert!(!list1.is_ordered);
4419 assert_eq!(list1.marker_column, 0);
4420 assert_eq!(list1.content_column, 2);
4421
4422 let line2 = &ctx.lines[1];
4424 assert!(line2.list_item.is_some());
4425 let list2 = line2.list_item.as_ref().unwrap();
4426 assert_eq!(list2.marker, "*");
4427 assert_eq!(list2.marker_column, 2);
4428
4429 let line3 = &ctx.lines[2];
4431 assert!(line3.list_item.is_some());
4432 let list3 = line3.list_item.as_ref().unwrap();
4433 assert_eq!(list3.marker, "1.");
4434 assert!(list3.is_ordered);
4435 assert_eq!(list3.number, Some(1));
4436
4437 let line6 = &ctx.lines[5];
4439 assert!(line6.list_item.is_none());
4440 }
4441
4442 #[test]
4443 fn test_offset_to_line_col_edge_cases() {
4444 let content = "a\nb\nc";
4445 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4446 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4454
4455 #[test]
4456 fn test_mdx_esm_blocks() {
4457 let content = r##"import {Chart} from './snowfall.js'
4458export const year = 2023
4459
4460# Last year's snowfall
4461
4462In {year}, the snowfall was above average.
4463It was followed by a warm spring which caused
4464flood conditions in many of the nearby rivers.
4465
4466<Chart color="#fcb32c" year={year} />
4467"##;
4468
4469 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4470
4471 assert_eq!(ctx.lines.len(), 10);
4473 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4474 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4475 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4476 assert!(
4477 !ctx.lines[3].in_esm_block,
4478 "Line 4 (heading) should NOT be in_esm_block"
4479 );
4480 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4481 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4482 }
4483
4484 #[test]
4485 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4486 let content = r#"import {Chart} from './snowfall.js'
4487export const year = 2023
4488
4489# Last year's snowfall
4490"#;
4491
4492 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4493
4494 assert!(
4496 !ctx.lines[0].in_esm_block,
4497 "Line 1 should NOT be in_esm_block in Standard flavor"
4498 );
4499 assert!(
4500 !ctx.lines[1].in_esm_block,
4501 "Line 2 should NOT be in_esm_block in Standard flavor"
4502 );
4503 }
4504
4505 #[test]
4506 fn test_blockquote_with_indented_content() {
4507 let content = r#"# Heading
4511
4512> -S socket-path
4513> More text
4514"#;
4515 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4516
4517 assert!(
4519 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4520 "Line 3 should be a blockquote"
4521 );
4522 assert!(
4524 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4525 "Line 4 should be a blockquote"
4526 );
4527
4528 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4531 assert_eq!(bq3.content, "-S socket-path");
4532 assert_eq!(bq3.nesting_level, 1);
4533 assert!(bq3.has_multiple_spaces_after_marker);
4535
4536 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4537 assert_eq!(bq4.content, "More text");
4538 assert_eq!(bq4.nesting_level, 1);
4539 }
4540
4541 #[test]
4542 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4543 let content = r#"# Title
4545
4546A footnote[^1].
4547
4548[^1]: This is the footnote content.
4549
4550[^note]: Another footnote with [link](https://example.com).
4551
4552[regular]: ./path.md "A real reference definition"
4553"#;
4554 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4555
4556 assert_eq!(
4558 ctx.reference_defs.len(),
4559 1,
4560 "Footnotes should not be parsed as reference definitions"
4561 );
4562
4563 assert_eq!(ctx.reference_defs[0].id, "regular");
4565 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4566 assert_eq!(
4567 ctx.reference_defs[0].title,
4568 Some("A real reference definition".to_string())
4569 );
4570 }
4571
4572 #[test]
4573 fn test_footnote_with_inline_link_not_misidentified() {
4574 let content = r#"# Title
4577
4578A footnote[^1].
4579
4580[^1]: [link](https://www.google.com).
4581"#;
4582 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4583
4584 assert!(
4586 ctx.reference_defs.is_empty(),
4587 "Footnote with inline link should not create a reference definition"
4588 );
4589 }
4590
4591 #[test]
4592 fn test_various_footnote_formats_excluded() {
4593 let content = r#"[^1]: Numeric footnote
4595[^note]: Named footnote
4596[^a]: Single char footnote
4597[^long-footnote-name]: Long named footnote
4598[^123abc]: Mixed alphanumeric
4599
4600[ref1]: ./file1.md
4601[ref2]: ./file2.md
4602"#;
4603 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4604
4605 assert_eq!(
4607 ctx.reference_defs.len(),
4608 2,
4609 "Only regular reference definitions should be parsed"
4610 );
4611
4612 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4613 assert!(ids.contains(&"ref1"));
4614 assert!(ids.contains(&"ref2"));
4615 assert!(!ids.iter().any(|id| id.starts_with('^')));
4616 }
4617
4618 #[test]
4623 fn test_has_char_tracked_characters() {
4624 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
4626 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4627
4628 assert!(ctx.has_char('#'), "Should detect hash");
4630 assert!(ctx.has_char('*'), "Should detect asterisk");
4631 assert!(ctx.has_char('_'), "Should detect underscore");
4632 assert!(ctx.has_char('-'), "Should detect hyphen");
4633 assert!(ctx.has_char('+'), "Should detect plus");
4634 assert!(ctx.has_char('>'), "Should detect gt");
4635 assert!(ctx.has_char('|'), "Should detect pipe");
4636 assert!(ctx.has_char('['), "Should detect bracket");
4637 assert!(ctx.has_char('`'), "Should detect backtick");
4638 assert!(ctx.has_char('<'), "Should detect lt");
4639 assert!(ctx.has_char('!'), "Should detect exclamation");
4640 assert!(ctx.has_char('\n'), "Should detect newline");
4641 }
4642
4643 #[test]
4644 fn test_has_char_absent_characters() {
4645 let content = "Simple text without special chars";
4646 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4647
4648 assert!(!ctx.has_char('#'), "Should not detect hash");
4650 assert!(!ctx.has_char('*'), "Should not detect asterisk");
4651 assert!(!ctx.has_char('_'), "Should not detect underscore");
4652 assert!(!ctx.has_char('-'), "Should not detect hyphen");
4653 assert!(!ctx.has_char('+'), "Should not detect plus");
4654 assert!(!ctx.has_char('>'), "Should not detect gt");
4655 assert!(!ctx.has_char('|'), "Should not detect pipe");
4656 assert!(!ctx.has_char('['), "Should not detect bracket");
4657 assert!(!ctx.has_char('`'), "Should not detect backtick");
4658 assert!(!ctx.has_char('<'), "Should not detect lt");
4659 assert!(!ctx.has_char('!'), "Should not detect exclamation");
4660 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
4662 }
4663
4664 #[test]
4665 fn test_has_char_fallback_for_untracked() {
4666 let content = "Text with @mention and $dollar and %percent";
4667 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4668
4669 assert!(ctx.has_char('@'), "Should detect @ via fallback");
4671 assert!(ctx.has_char('$'), "Should detect $ via fallback");
4672 assert!(ctx.has_char('%'), "Should detect % via fallback");
4673 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
4674 }
4675
4676 #[test]
4677 fn test_char_count_tracked_characters() {
4678 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
4679 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4680
4681 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
4683 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
4684 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
4685 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
4686 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
4687 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
4688 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
4689 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
4690 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
4691 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
4692 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
4693 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
4694 }
4695
4696 #[test]
4697 fn test_char_count_zero_for_absent() {
4698 let content = "Plain text";
4699 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4700
4701 assert_eq!(ctx.char_count('#'), 0);
4702 assert_eq!(ctx.char_count('*'), 0);
4703 assert_eq!(ctx.char_count('_'), 0);
4704 assert_eq!(ctx.char_count('\n'), 0);
4705 }
4706
4707 #[test]
4708 fn test_char_count_fallback_for_untracked() {
4709 let content = "@@@ $$ %%%";
4710 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4711
4712 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
4713 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
4714 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
4715 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
4716 }
4717
4718 #[test]
4719 fn test_char_count_empty_content() {
4720 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4721
4722 assert_eq!(ctx.char_count('#'), 0);
4723 assert_eq!(ctx.char_count('*'), 0);
4724 assert_eq!(ctx.char_count('@'), 0);
4725 assert!(!ctx.has_char('#'));
4726 assert!(!ctx.has_char('@'));
4727 }
4728
4729 #[test]
4734 fn test_is_in_html_tag_simple() {
4735 let content = "<div>content</div>";
4736 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4737
4738 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
4740 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
4741 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
4742
4743 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
4745 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
4746
4747 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
4749 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
4750 }
4751
4752 #[test]
4753 fn test_is_in_html_tag_self_closing() {
4754 let content = "Text <br/> more text";
4755 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4756
4757 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
4759 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
4760
4761 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
4763 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
4764 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
4765
4766 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
4768 }
4769
4770 #[test]
4771 fn test_is_in_html_tag_with_attributes() {
4772 let content = r#"<a href="url" class="link">text</a>"#;
4773 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4774
4775 assert!(ctx.is_in_html_tag(0), "Start of tag");
4777 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
4778 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
4779 assert!(ctx.is_in_html_tag(26), "End of opening tag");
4780
4781 assert!(!ctx.is_in_html_tag(27), "Start of content");
4783 assert!(!ctx.is_in_html_tag(30), "End of content");
4784
4785 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
4787 }
4788
4789 #[test]
4790 fn test_is_in_html_tag_multiline() {
4791 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
4792 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4793
4794 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
4796 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
4797 assert!(ctx.is_in_html_tag(15), "Inside attribute");
4798
4799 let closing_bracket_pos = content.find(">\n").unwrap();
4801 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
4802 }
4803
4804 #[test]
4805 fn test_is_in_html_tag_no_tags() {
4806 let content = "Plain text without any HTML";
4807 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4808
4809 for i in 0..content.len() {
4811 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
4812 }
4813 }
4814
4815 #[test]
4820 fn test_is_in_jinja_range_expression() {
4821 let content = "Hello {{ name }}!";
4822 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4823
4824 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
4826 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
4827
4828 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
4830 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
4831 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
4832 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
4833 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
4834
4835 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
4837 }
4838
4839 #[test]
4840 fn test_is_in_jinja_range_statement() {
4841 let content = "{% if condition %}content{% endif %}";
4842 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4843
4844 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
4846 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
4847 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
4848
4849 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
4851
4852 assert!(ctx.is_in_jinja_range(25), "Start of endif");
4854 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
4855 }
4856
4857 #[test]
4858 fn test_is_in_jinja_range_multiple() {
4859 let content = "{{ a }} and {{ b }}";
4860 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4861
4862 assert!(ctx.is_in_jinja_range(0));
4864 assert!(ctx.is_in_jinja_range(3));
4865 assert!(ctx.is_in_jinja_range(6));
4866
4867 assert!(!ctx.is_in_jinja_range(8));
4869 assert!(!ctx.is_in_jinja_range(11));
4870
4871 assert!(ctx.is_in_jinja_range(12));
4873 assert!(ctx.is_in_jinja_range(15));
4874 assert!(ctx.is_in_jinja_range(18));
4875 }
4876
4877 #[test]
4878 fn test_is_in_jinja_range_no_jinja() {
4879 let content = "Plain text with single braces but not Jinja";
4880 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4881
4882 for i in 0..content.len() {
4884 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
4885 }
4886 }
4887
4888 #[test]
4893 fn test_is_in_link_title_with_title() {
4894 let content = r#"[ref]: https://example.com "Title text"
4895
4896Some content."#;
4897 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4898
4899 assert_eq!(ctx.reference_defs.len(), 1);
4901 let def = &ctx.reference_defs[0];
4902 assert!(def.title_byte_start.is_some());
4903 assert!(def.title_byte_end.is_some());
4904
4905 let title_start = def.title_byte_start.unwrap();
4906 let title_end = def.title_byte_end.unwrap();
4907
4908 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
4910
4911 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
4913 assert!(
4914 ctx.is_in_link_title(title_start + 5),
4915 "Middle of title should be in title"
4916 );
4917 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
4918
4919 assert!(
4921 !ctx.is_in_link_title(title_end),
4922 "After title end should not be in title"
4923 );
4924 }
4925
4926 #[test]
4927 fn test_is_in_link_title_without_title() {
4928 let content = "[ref]: https://example.com\n\nSome content.";
4929 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4930
4931 assert_eq!(ctx.reference_defs.len(), 1);
4933 let def = &ctx.reference_defs[0];
4934 assert!(def.title_byte_start.is_none());
4935 assert!(def.title_byte_end.is_none());
4936
4937 for i in 0..content.len() {
4939 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
4940 }
4941 }
4942
4943 #[test]
4944 fn test_is_in_link_title_multiple_refs() {
4945 let content = r#"[ref1]: /url1 "Title One"
4946[ref2]: /url2
4947[ref3]: /url3 "Title Three"
4948"#;
4949 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4950
4951 assert_eq!(ctx.reference_defs.len(), 3);
4953
4954 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
4956 assert!(ref1.title_byte_start.is_some());
4957
4958 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
4960 assert!(ref2.title_byte_start.is_none());
4961
4962 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
4964 assert!(ref3.title_byte_start.is_some());
4965
4966 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
4968 assert!(ctx.is_in_link_title(start + 1));
4969 assert!(!ctx.is_in_link_title(end + 5));
4970 }
4971
4972 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
4974 assert!(ctx.is_in_link_title(start + 1));
4975 }
4976 }
4977
4978 #[test]
4979 fn test_is_in_link_title_single_quotes() {
4980 let content = "[ref]: /url 'Single quoted title'\n";
4981 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4982
4983 assert_eq!(ctx.reference_defs.len(), 1);
4984 let def = &ctx.reference_defs[0];
4985
4986 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
4987 assert!(ctx.is_in_link_title(start));
4988 assert!(ctx.is_in_link_title(start + 5));
4989 assert!(!ctx.is_in_link_title(end));
4990 }
4991 }
4992
4993 #[test]
4994 fn test_is_in_link_title_parentheses() {
4995 let content = "[ref]: /url (Parenthesized title)\n";
4998 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4999
5000 if ctx.reference_defs.is_empty() {
5003 for i in 0..content.len() {
5005 assert!(!ctx.is_in_link_title(i));
5006 }
5007 } else {
5008 let def = &ctx.reference_defs[0];
5009 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5010 assert!(ctx.is_in_link_title(start));
5011 assert!(ctx.is_in_link_title(start + 5));
5012 assert!(!ctx.is_in_link_title(end));
5013 } else {
5014 for i in 0..content.len() {
5016 assert!(!ctx.is_in_link_title(i));
5017 }
5018 }
5019 }
5020 }
5021
5022 #[test]
5023 fn test_is_in_link_title_no_refs() {
5024 let content = "Just plain text without any reference definitions.";
5025 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5026
5027 assert!(ctx.reference_defs.is_empty());
5028
5029 for i in 0..content.len() {
5030 assert!(!ctx.is_in_link_title(i));
5031 }
5032 }
5033
5034 #[test]
5039 fn test_math_spans_inline() {
5040 let content = "Text with inline math $[f](x)$ in it.";
5041 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5042
5043 let math_spans = ctx.math_spans();
5044 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5045
5046 let span = &math_spans[0];
5047 assert!(!span.is_display, "Should be inline math, not display");
5048 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5049 }
5050
5051 #[test]
5052 fn test_math_spans_display_single_line() {
5053 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5054 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5055
5056 let math_spans = ctx.math_spans();
5057 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5058
5059 let span = &math_spans[0];
5060 assert!(span.is_display, "Should be display math");
5061 assert!(
5062 span.content.contains("[x](\\zeta)"),
5063 "Content should contain the link-like pattern"
5064 );
5065 }
5066
5067 #[test]
5068 fn test_math_spans_display_multiline() {
5069 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5070 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5071
5072 let math_spans = ctx.math_spans();
5073 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5074
5075 let span = &math_spans[0];
5076 assert!(span.is_display, "Should be display math");
5077 }
5078
5079 #[test]
5080 fn test_is_in_math_span() {
5081 let content = "Text $[f](x)$ more text";
5082 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5083
5084 let math_start = content.find('$').unwrap();
5086 let math_end = content.rfind('$').unwrap() + 1;
5087
5088 assert!(
5089 ctx.is_in_math_span(math_start + 1),
5090 "Position inside math span should return true"
5091 );
5092 assert!(
5093 ctx.is_in_math_span(math_start + 3),
5094 "Position inside math span should return true"
5095 );
5096
5097 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5099 assert!(
5100 !ctx.is_in_math_span(math_end + 1),
5101 "Position after math span should return false"
5102 );
5103 }
5104
5105 #[test]
5106 fn test_math_spans_mixed_with_code() {
5107 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5108 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5109
5110 let math_spans = ctx.math_spans();
5111 let code_spans = ctx.code_spans();
5112
5113 assert_eq!(math_spans.len(), 1, "Should have one math span");
5114 assert_eq!(code_spans.len(), 1, "Should have one code span");
5115
5116 assert_eq!(math_spans[0].content, "[f](x)");
5118 assert_eq!(code_spans[0].content, "[g](y)");
5120 }
5121
5122 #[test]
5123 fn test_math_spans_no_math() {
5124 let content = "Regular text without any math at all.";
5125 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5126
5127 let math_spans = ctx.math_spans();
5128 assert!(math_spans.is_empty(), "Should have no math spans");
5129 }
5130
5131 #[test]
5132 fn test_math_spans_multiple() {
5133 let content = "First $a$ and second $b$ and display $$c$$";
5134 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5135
5136 let math_spans = ctx.math_spans();
5137 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5138
5139 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5141 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5142
5143 assert_eq!(inline_count, 2, "Should have two inline math spans");
5144 assert_eq!(display_count, 1, "Should have one display math span");
5145 }
5146
5147 #[test]
5148 fn test_is_in_math_span_boundary_positions() {
5149 let content = "$[f](x)$";
5152 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5153
5154 let math_spans = ctx.math_spans();
5155 assert_eq!(math_spans.len(), 1, "Should have one math span");
5156
5157 let span = &math_spans[0];
5158
5159 assert!(
5161 ctx.is_in_math_span(span.byte_offset),
5162 "Start position should be in span"
5163 );
5164
5165 assert!(
5167 ctx.is_in_math_span(span.byte_offset + 1),
5168 "Position after start should be in span"
5169 );
5170
5171 assert!(
5173 ctx.is_in_math_span(span.byte_end - 1),
5174 "Position at end-1 should be in span"
5175 );
5176
5177 assert!(
5179 !ctx.is_in_math_span(span.byte_end),
5180 "Position at byte_end should NOT be in span (exclusive)"
5181 );
5182 }
5183
5184 #[test]
5185 fn test_math_spans_at_document_start() {
5186 let content = "$x$ text";
5187 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5188
5189 let math_spans = ctx.math_spans();
5190 assert_eq!(math_spans.len(), 1);
5191 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5192 }
5193
5194 #[test]
5195 fn test_math_spans_at_document_end() {
5196 let content = "text $x$";
5197 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5198
5199 let math_spans = ctx.math_spans();
5200 assert_eq!(math_spans.len(), 1);
5201 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5202 }
5203
5204 #[test]
5205 fn test_math_spans_consecutive() {
5206 let content = "$a$$b$";
5207 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5208
5209 let math_spans = ctx.math_spans();
5210 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5212
5213 for i in 0..content.len() {
5215 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5216 }
5217 }
5218
5219 #[test]
5220 fn test_math_spans_currency_not_math() {
5221 let content = "Price is $100";
5223 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5224
5225 let math_spans = ctx.math_spans();
5226 assert!(
5229 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5230 "Unbalanced $ should not create math span containing 100"
5231 );
5232 }
5233
5234 #[test]
5239 fn test_reference_lookup_o1_basic() {
5240 let content = r#"[ref1]: /url1
5241[REF2]: /url2 "Title"
5242[Ref3]: /url3
5243
5244Use [link][ref1] and [link][REF2]."#;
5245 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5246
5247 assert_eq!(ctx.reference_defs.len(), 3);
5249
5250 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5252 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5255 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5256 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5257 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5258 }
5259
5260 #[test]
5261 fn test_reference_lookup_o1_get_reference_def() {
5262 let content = r#"[myref]: https://example.com "My Title"
5263"#;
5264 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5265
5266 let def = ctx.get_reference_def("myref").expect("Should find myref");
5268 assert_eq!(def.url, "https://example.com");
5269 assert_eq!(def.title.as_deref(), Some("My Title"));
5270
5271 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5273 assert_eq!(def2.url, "https://example.com");
5274
5275 assert!(ctx.get_reference_def("nonexistent").is_none());
5277 }
5278
5279 #[test]
5280 fn test_reference_lookup_o1_has_reference_def() {
5281 let content = r#"[foo]: /foo
5282[BAR]: /bar
5283"#;
5284 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5285
5286 assert!(ctx.has_reference_def("foo"));
5288 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5290 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5293
5294 #[test]
5295 fn test_reference_lookup_o1_empty_content() {
5296 let content = "No references here.";
5297 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5298
5299 assert!(ctx.reference_defs.is_empty());
5300 assert_eq!(ctx.get_reference_url("anything"), None);
5301 assert!(ctx.get_reference_def("anything").is_none());
5302 assert!(!ctx.has_reference_def("anything"));
5303 }
5304
5305 #[test]
5306 fn test_reference_lookup_o1_special_characters_in_id() {
5307 let content = r#"[ref-with-dash]: /url1
5308[ref_with_underscore]: /url2
5309[ref.with.dots]: /url3
5310"#;
5311 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5312
5313 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5314 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5315 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5316 }
5317
5318 #[test]
5319 fn test_reference_lookup_o1_unicode_id() {
5320 let content = r#"[日本語]: /japanese
5321[émoji]: /emoji
5322"#;
5323 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5324
5325 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5326 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5327 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5329}