1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::LazyLock;
12
13#[cfg(not(target_arch = "wasm32"))]
15macro_rules! profile_section {
16 ($name:expr, $profile:expr, $code:expr) => {{
17 let start = std::time::Instant::now();
18 let result = $code;
19 if $profile {
20 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
21 }
22 result
23 }};
24}
25
26#[cfg(target_arch = "wasm32")]
27macro_rules! profile_section {
28 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
29}
30
31static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
34 Regex::new(
35 r#"(?sx)
36 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
37 (?:
38 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
39 |
40 \[([^\]]*)\] # Reference ID in group 6
41 )"#
42 ).unwrap()
43});
44
45static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
48 Regex::new(
49 r#"(?sx)
50 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
51 (?:
52 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
53 |
54 \[([^\]]*)\] # Reference ID in group 6
55 )"#
56 ).unwrap()
57});
58
59static REF_DEF_PATTERN: LazyLock<Regex> =
61 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
62
63static BARE_EMAIL_PATTERN: LazyLock<Regex> =
67 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
68
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
71
72#[derive(Debug, Clone)]
74pub struct LineInfo {
75 pub byte_offset: usize,
77 pub byte_len: usize,
79 pub indent: usize,
81 pub visual_indent: usize,
85 pub is_blank: bool,
87 pub in_code_block: bool,
89 pub in_front_matter: bool,
91 pub in_html_block: bool,
93 pub in_html_comment: bool,
95 pub list_item: Option<ListItemInfo>,
97 pub heading: Option<HeadingInfo>,
99 pub blockquote: Option<BlockquoteInfo>,
101 pub in_mkdocstrings: bool,
103 pub in_esm_block: bool,
105 pub in_code_span_continuation: bool,
107 pub is_horizontal_rule: bool,
110 pub in_math_block: bool,
112}
113
114impl LineInfo {
115 pub fn content<'a>(&self, source: &'a str) -> &'a str {
117 &source[self.byte_offset..self.byte_offset + self.byte_len]
118 }
119}
120
121#[derive(Debug, Clone)]
123pub struct ListItemInfo {
124 pub marker: String,
126 pub is_ordered: bool,
128 pub number: Option<usize>,
130 pub marker_column: usize,
132 pub content_column: usize,
134}
135
136#[derive(Debug, Clone, PartialEq)]
138pub enum HeadingStyle {
139 ATX,
141 Setext1,
143 Setext2,
145}
146
147#[derive(Debug, Clone)]
149pub struct ParsedLink<'a> {
150 pub line: usize,
152 pub start_col: usize,
154 pub end_col: usize,
156 pub byte_offset: usize,
158 pub byte_end: usize,
160 pub text: Cow<'a, str>,
162 pub url: Cow<'a, str>,
164 pub is_reference: bool,
166 pub reference_id: Option<Cow<'a, str>>,
168 pub link_type: LinkType,
170}
171
172#[derive(Debug, Clone)]
174pub struct BrokenLinkInfo {
175 pub reference: String,
177 pub span: std::ops::Range<usize>,
179}
180
181#[derive(Debug, Clone)]
183pub struct FootnoteRef {
184 pub id: String,
186 pub line: usize,
188 pub byte_offset: usize,
190 pub byte_end: usize,
192}
193
194#[derive(Debug, Clone)]
196pub struct ParsedImage<'a> {
197 pub line: usize,
199 pub start_col: usize,
201 pub end_col: usize,
203 pub byte_offset: usize,
205 pub byte_end: usize,
207 pub alt_text: Cow<'a, str>,
209 pub url: Cow<'a, str>,
211 pub is_reference: bool,
213 pub reference_id: Option<Cow<'a, str>>,
215 pub link_type: LinkType,
217}
218
219#[derive(Debug, Clone)]
221pub struct ReferenceDef {
222 pub line: usize,
224 pub id: String,
226 pub url: String,
228 pub title: Option<String>,
230 pub byte_offset: usize,
232 pub byte_end: usize,
234 pub title_byte_start: Option<usize>,
236 pub title_byte_end: Option<usize>,
238}
239
240#[derive(Debug, Clone)]
242pub struct CodeSpan {
243 pub line: usize,
245 pub end_line: usize,
247 pub start_col: usize,
249 pub end_col: usize,
251 pub byte_offset: usize,
253 pub byte_end: usize,
255 pub backtick_count: usize,
257 pub content: String,
259}
260
261#[derive(Debug, Clone)]
263pub struct MathSpan {
264 pub line: usize,
266 pub end_line: usize,
268 pub start_col: usize,
270 pub end_col: usize,
272 pub byte_offset: usize,
274 pub byte_end: usize,
276 pub is_display: bool,
278 pub content: String,
280}
281
282#[derive(Debug, Clone)]
284pub struct HeadingInfo {
285 pub level: u8,
287 pub style: HeadingStyle,
289 pub marker: String,
291 pub marker_column: usize,
293 pub content_column: usize,
295 pub text: String,
297 pub custom_id: Option<String>,
299 pub raw_text: String,
301 pub has_closing_sequence: bool,
303 pub closing_sequence: String,
305 pub is_valid: bool,
308}
309
310#[derive(Debug, Clone)]
315pub struct ValidHeading<'a> {
316 pub line_num: usize,
318 pub heading: &'a HeadingInfo,
320 pub line_info: &'a LineInfo,
322}
323
324pub struct ValidHeadingsIter<'a> {
329 lines: &'a [LineInfo],
330 current_index: usize,
331}
332
333impl<'a> ValidHeadingsIter<'a> {
334 fn new(lines: &'a [LineInfo]) -> Self {
335 Self {
336 lines,
337 current_index: 0,
338 }
339 }
340}
341
342impl<'a> Iterator for ValidHeadingsIter<'a> {
343 type Item = ValidHeading<'a>;
344
345 fn next(&mut self) -> Option<Self::Item> {
346 while self.current_index < self.lines.len() {
347 let idx = self.current_index;
348 self.current_index += 1;
349
350 let line_info = &self.lines[idx];
351 if let Some(heading) = &line_info.heading
352 && heading.is_valid
353 {
354 return Some(ValidHeading {
355 line_num: idx + 1, heading,
357 line_info,
358 });
359 }
360 }
361 None
362 }
363}
364
365#[derive(Debug, Clone)]
367pub struct BlockquoteInfo {
368 pub nesting_level: usize,
370 pub indent: String,
372 pub marker_column: usize,
374 pub prefix: String,
376 pub content: String,
378 pub has_no_space_after_marker: bool,
380 pub has_multiple_spaces_after_marker: bool,
382 pub needs_md028_fix: bool,
384}
385
386#[derive(Debug, Clone)]
388pub struct ListBlock {
389 pub start_line: usize,
391 pub end_line: usize,
393 pub is_ordered: bool,
395 pub marker: Option<String>,
397 pub blockquote_prefix: String,
399 pub item_lines: Vec<usize>,
401 pub nesting_level: usize,
403 pub max_marker_width: usize,
405}
406
407use std::sync::{Arc, OnceLock};
408
409type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
411
412#[derive(Debug, Clone, Default)]
414pub struct CharFrequency {
415 pub hash_count: usize,
417 pub asterisk_count: usize,
419 pub underscore_count: usize,
421 pub hyphen_count: usize,
423 pub plus_count: usize,
425 pub gt_count: usize,
427 pub pipe_count: usize,
429 pub bracket_count: usize,
431 pub backtick_count: usize,
433 pub lt_count: usize,
435 pub exclamation_count: usize,
437 pub newline_count: usize,
439}
440
441#[derive(Debug, Clone)]
443pub struct HtmlTag {
444 pub line: usize,
446 pub start_col: usize,
448 pub end_col: usize,
450 pub byte_offset: usize,
452 pub byte_end: usize,
454 pub tag_name: String,
456 pub is_closing: bool,
458 pub is_self_closing: bool,
460 pub raw_content: String,
462}
463
464#[derive(Debug, Clone)]
466pub struct EmphasisSpan {
467 pub line: usize,
469 pub start_col: usize,
471 pub end_col: usize,
473 pub byte_offset: usize,
475 pub byte_end: usize,
477 pub marker: char,
479 pub marker_count: usize,
481 pub content: String,
483}
484
485#[derive(Debug, Clone)]
487pub struct TableRow {
488 pub line: usize,
490 pub is_separator: bool,
492 pub column_count: usize,
494 pub column_alignments: Vec<String>, }
497
498#[derive(Debug, Clone)]
500pub struct BareUrl {
501 pub line: usize,
503 pub start_col: usize,
505 pub end_col: usize,
507 pub byte_offset: usize,
509 pub byte_end: usize,
511 pub url: String,
513 pub url_type: String,
515}
516
517pub struct LintContext<'a> {
518 pub content: &'a str,
519 pub line_offsets: Vec<usize>,
520 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
544
545struct BlockquoteComponents<'a> {
547 indent: &'a str,
548 markers: &'a str,
549 spaces_after: &'a str,
550 content: &'a str,
551}
552
553#[inline]
555fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
556 let bytes = line.as_bytes();
557 let mut pos = 0;
558
559 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
561 pos += 1;
562 }
563 let indent_end = pos;
564
565 if pos >= bytes.len() || bytes[pos] != b'>' {
567 return None;
568 }
569
570 while pos < bytes.len() && bytes[pos] == b'>' {
572 pos += 1;
573 }
574 let markers_end = pos;
575
576 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
578 pos += 1;
579 }
580 let spaces_end = pos;
581
582 Some(BlockquoteComponents {
583 indent: &line[0..indent_end],
584 markers: &line[indent_end..markers_end],
585 spaces_after: &line[markers_end..spaces_end],
586 content: &line[spaces_end..],
587 })
588}
589
590impl<'a> LintContext<'a> {
591 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
592 #[cfg(not(target_arch = "wasm32"))]
593 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
594 #[cfg(target_arch = "wasm32")]
595 let profile = false;
596
597 let line_offsets = profile_section!("Line offsets", profile, {
598 let mut offsets = vec![0];
599 for (i, c) in content.char_indices() {
600 if c == '\n' {
601 offsets.push(i + 1);
602 }
603 }
604 offsets
605 });
606
607 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
609
610 let html_comment_ranges = profile_section!(
612 "HTML comment ranges",
613 profile,
614 crate::utils::skip_context::compute_html_comment_ranges(content)
615 );
616
617 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
619 if flavor == MarkdownFlavor::MkDocs {
620 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
621 } else {
622 Vec::new()
623 }
624 });
625
626 let (mut lines, emphasis_spans) = profile_section!(
629 "Basic line info",
630 profile,
631 Self::compute_basic_line_info(
632 content,
633 &line_offsets,
634 &code_blocks,
635 flavor,
636 &html_comment_ranges,
637 &autodoc_ranges,
638 )
639 );
640
641 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
643
644 profile_section!(
646 "ESM blocks",
647 profile,
648 Self::detect_esm_blocks(content, &mut lines, flavor)
649 );
650
651 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
653
654 profile_section!(
656 "Headings & blockquotes",
657 profile,
658 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
659 );
660
661 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
663
664 for span in &code_spans {
667 if span.end_line > span.line {
668 for line_num in (span.line + 1)..=span.end_line {
670 if let Some(line_info) = lines.get_mut(line_num - 1) {
671 line_info.in_code_span_continuation = true;
672 }
673 }
674 }
675 }
676
677 let (links, broken_links, footnote_refs) = profile_section!(
679 "Links",
680 profile,
681 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
682 );
683
684 let images = profile_section!(
685 "Images",
686 profile,
687 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
688 );
689
690 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
691
692 let reference_defs_map: HashMap<String, usize> = reference_defs
694 .iter()
695 .enumerate()
696 .map(|(idx, def)| (def.id.to_lowercase(), idx))
697 .collect();
698
699 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
700
701 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
703
704 let table_blocks = profile_section!(
706 "Table blocks",
707 profile,
708 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
709 content,
710 &code_blocks,
711 &code_spans,
712 &html_comment_ranges,
713 )
714 );
715
716 let line_index = profile_section!(
718 "Line index",
719 profile,
720 crate::utils::range_utils::LineIndex::new(content)
721 );
722
723 let jinja_ranges = profile_section!(
725 "Jinja ranges",
726 profile,
727 crate::utils::jinja_utils::find_jinja_ranges(content)
728 );
729
730 Self {
731 content,
732 line_offsets,
733 code_blocks,
734 lines,
735 links,
736 images,
737 broken_links,
738 footnote_refs,
739 reference_defs,
740 reference_defs_map,
741 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
742 math_spans_cache: OnceLock::new(), list_blocks,
744 char_frequency,
745 html_tags_cache: OnceLock::new(),
746 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
747 table_rows_cache: OnceLock::new(),
748 bare_urls_cache: OnceLock::new(),
749 has_mixed_list_nesting_cache: OnceLock::new(),
750 html_comment_ranges,
751 table_blocks,
752 line_index,
753 jinja_ranges,
754 flavor,
755 source_file,
756 }
757 }
758
759 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
761 Arc::clone(
762 self.code_spans_cache
763 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
764 )
765 }
766
767 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
769 Arc::clone(
770 self.math_spans_cache
771 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
772 )
773 }
774
775 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
777 let math_spans = self.math_spans();
778 math_spans
779 .iter()
780 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
781 }
782
783 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
785 &self.html_comment_ranges
786 }
787
788 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
790 Arc::clone(self.html_tags_cache.get_or_init(|| {
791 Arc::new(Self::parse_html_tags(
792 self.content,
793 &self.lines,
794 &self.code_blocks,
795 self.flavor,
796 ))
797 }))
798 }
799
800 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
802 Arc::clone(
803 self.emphasis_spans_cache
804 .get()
805 .expect("emphasis_spans_cache initialized during construction"),
806 )
807 }
808
809 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
811 Arc::clone(
812 self.table_rows_cache
813 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
814 )
815 }
816
817 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
819 Arc::clone(
820 self.bare_urls_cache
821 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
822 )
823 }
824
825 pub fn has_mixed_list_nesting(&self) -> bool {
829 *self
830 .has_mixed_list_nesting_cache
831 .get_or_init(|| self.compute_mixed_list_nesting())
832 }
833
834 fn compute_mixed_list_nesting(&self) -> bool {
836 let mut stack: Vec<(usize, bool)> = Vec::new();
841 let mut last_was_blank = false;
842
843 for line_info in &self.lines {
844 if line_info.in_code_block
846 || line_info.in_front_matter
847 || line_info.in_mkdocstrings
848 || line_info.in_html_comment
849 || line_info.in_esm_block
850 {
851 continue;
852 }
853
854 if line_info.is_blank {
856 last_was_blank = true;
857 continue;
858 }
859
860 if let Some(list_item) = &line_info.list_item {
861 let current_pos = if list_item.marker_column == 1 {
863 0
864 } else {
865 list_item.marker_column
866 };
867
868 if last_was_blank && current_pos == 0 {
870 stack.clear();
871 }
872 last_was_blank = false;
873
874 while let Some(&(pos, _)) = stack.last() {
876 if pos >= current_pos {
877 stack.pop();
878 } else {
879 break;
880 }
881 }
882
883 if let Some(&(_, parent_is_ordered)) = stack.last()
885 && parent_is_ordered != list_item.is_ordered
886 {
887 return true; }
889
890 stack.push((current_pos, list_item.is_ordered));
891 } else {
892 last_was_blank = false;
894 }
895 }
896
897 false
898 }
899
900 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
902 match self.line_offsets.binary_search(&offset) {
903 Ok(line) => (line + 1, 1),
904 Err(line) => {
905 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
906 (line, offset - line_start + 1)
907 }
908 }
909 }
910
911 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
913 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
915 return true;
916 }
917
918 self.code_spans()
920 .iter()
921 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
922 }
923
924 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
926 if line_num > 0 {
927 self.lines.get(line_num - 1)
928 } else {
929 None
930 }
931 }
932
933 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
935 self.line_info(line_num).map(|info| info.byte_offset)
936 }
937
938 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
940 let normalized_id = ref_id.to_lowercase();
941 self.reference_defs_map
942 .get(&normalized_id)
943 .map(|&idx| self.reference_defs[idx].url.as_str())
944 }
945
946 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
948 let normalized_id = ref_id.to_lowercase();
949 self.reference_defs_map
950 .get(&normalized_id)
951 .map(|&idx| &self.reference_defs[idx])
952 }
953
954 pub fn has_reference_def(&self, ref_id: &str) -> bool {
956 let normalized_id = ref_id.to_lowercase();
957 self.reference_defs_map.contains_key(&normalized_id)
958 }
959
960 pub fn is_in_list_block(&self, line_num: usize) -> bool {
962 self.list_blocks
963 .iter()
964 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
965 }
966
967 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
969 self.list_blocks
970 .iter()
971 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
972 }
973
974 pub fn is_in_code_block(&self, line_num: usize) -> bool {
978 if line_num == 0 || line_num > self.lines.len() {
979 return false;
980 }
981 self.lines[line_num - 1].in_code_block
982 }
983
984 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
986 if line_num == 0 || line_num > self.lines.len() {
987 return false;
988 }
989 self.lines[line_num - 1].in_front_matter
990 }
991
992 pub fn is_in_html_block(&self, line_num: usize) -> bool {
994 if line_num == 0 || line_num > self.lines.len() {
995 return false;
996 }
997 self.lines[line_num - 1].in_html_block
998 }
999
1000 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1002 if line_num == 0 || line_num > self.lines.len() {
1003 return false;
1004 }
1005
1006 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1010 let code_spans = self.code_spans();
1011 code_spans.iter().any(|span| {
1012 if line_num < span.line || line_num > span.end_line {
1014 return false;
1015 }
1016
1017 if span.line == span.end_line {
1018 col_0indexed >= span.start_col && col_0indexed < span.end_col
1020 } else if line_num == span.line {
1021 col_0indexed >= span.start_col
1023 } else if line_num == span.end_line {
1024 col_0indexed < span.end_col
1026 } else {
1027 true
1029 }
1030 })
1031 }
1032
1033 #[inline]
1035 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1036 let code_spans = self.code_spans();
1037 code_spans
1038 .iter()
1039 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1040 }
1041
1042 #[inline]
1045 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1046 self.reference_defs
1047 .iter()
1048 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1049 }
1050
1051 #[inline]
1055 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1056 self.html_comment_ranges
1057 .iter()
1058 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1059 }
1060
1061 #[inline]
1064 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1065 self.html_tags()
1066 .iter()
1067 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1068 }
1069
1070 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1072 self.jinja_ranges
1073 .iter()
1074 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1075 }
1076
1077 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1079 self.reference_defs.iter().any(|def| {
1080 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1081 byte_pos >= start && byte_pos < end
1082 } else {
1083 false
1084 }
1085 })
1086 }
1087
1088 pub fn has_char(&self, ch: char) -> bool {
1090 match ch {
1091 '#' => self.char_frequency.hash_count > 0,
1092 '*' => self.char_frequency.asterisk_count > 0,
1093 '_' => self.char_frequency.underscore_count > 0,
1094 '-' => self.char_frequency.hyphen_count > 0,
1095 '+' => self.char_frequency.plus_count > 0,
1096 '>' => self.char_frequency.gt_count > 0,
1097 '|' => self.char_frequency.pipe_count > 0,
1098 '[' => self.char_frequency.bracket_count > 0,
1099 '`' => self.char_frequency.backtick_count > 0,
1100 '<' => self.char_frequency.lt_count > 0,
1101 '!' => self.char_frequency.exclamation_count > 0,
1102 '\n' => self.char_frequency.newline_count > 0,
1103 _ => self.content.contains(ch), }
1105 }
1106
1107 pub fn char_count(&self, ch: char) -> usize {
1109 match ch {
1110 '#' => self.char_frequency.hash_count,
1111 '*' => self.char_frequency.asterisk_count,
1112 '_' => self.char_frequency.underscore_count,
1113 '-' => self.char_frequency.hyphen_count,
1114 '+' => self.char_frequency.plus_count,
1115 '>' => self.char_frequency.gt_count,
1116 '|' => self.char_frequency.pipe_count,
1117 '[' => self.char_frequency.bracket_count,
1118 '`' => self.char_frequency.backtick_count,
1119 '<' => self.char_frequency.lt_count,
1120 '!' => self.char_frequency.exclamation_count,
1121 '\n' => self.char_frequency.newline_count,
1122 _ => self.content.matches(ch).count(), }
1124 }
1125
1126 pub fn likely_has_headings(&self) -> bool {
1128 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1130
1131 pub fn likely_has_lists(&self) -> bool {
1133 self.char_frequency.asterisk_count > 0
1134 || self.char_frequency.hyphen_count > 0
1135 || self.char_frequency.plus_count > 0
1136 }
1137
1138 pub fn likely_has_emphasis(&self) -> bool {
1140 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1141 }
1142
1143 pub fn likely_has_tables(&self) -> bool {
1145 self.char_frequency.pipe_count > 2
1146 }
1147
1148 pub fn likely_has_blockquotes(&self) -> bool {
1150 self.char_frequency.gt_count > 0
1151 }
1152
1153 pub fn likely_has_code(&self) -> bool {
1155 self.char_frequency.backtick_count > 0
1156 }
1157
1158 pub fn likely_has_links_or_images(&self) -> bool {
1160 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1161 }
1162
1163 pub fn likely_has_html(&self) -> bool {
1165 self.char_frequency.lt_count > 0
1166 }
1167
1168 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1173 if let Some(line_info) = self.lines.get(line_idx)
1174 && let Some(ref bq) = line_info.blockquote
1175 {
1176 bq.prefix.trim_end().to_string()
1177 } else {
1178 String::new()
1179 }
1180 }
1181
1182 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1184 self.html_tags()
1185 .iter()
1186 .filter(|tag| tag.line == line_num)
1187 .cloned()
1188 .collect()
1189 }
1190
1191 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1193 self.emphasis_spans()
1194 .iter()
1195 .filter(|span| span.line == line_num)
1196 .cloned()
1197 .collect()
1198 }
1199
1200 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1202 self.table_rows()
1203 .iter()
1204 .filter(|row| row.line == line_num)
1205 .cloned()
1206 .collect()
1207 }
1208
1209 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1211 self.bare_urls()
1212 .iter()
1213 .filter(|url| url.line == line_num)
1214 .cloned()
1215 .collect()
1216 }
1217
1218 #[inline]
1224 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1225 let idx = match lines.binary_search_by(|line| {
1227 if byte_offset < line.byte_offset {
1228 std::cmp::Ordering::Greater
1229 } else if byte_offset > line.byte_offset + line.byte_len {
1230 std::cmp::Ordering::Less
1231 } else {
1232 std::cmp::Ordering::Equal
1233 }
1234 }) {
1235 Ok(idx) => idx,
1236 Err(idx) => idx.saturating_sub(1),
1237 };
1238
1239 let line = &lines[idx];
1240 let line_num = idx + 1;
1241 let col = byte_offset.saturating_sub(line.byte_offset);
1242
1243 (idx, line_num, col)
1244 }
1245
1246 #[inline]
1248 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1249 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1251
1252 if idx > 0 {
1254 let span = &code_spans[idx - 1];
1255 if offset >= span.byte_offset && offset < span.byte_end {
1256 return true;
1257 }
1258 }
1259
1260 false
1261 }
1262
1263 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1267 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1268
1269 let mut link_ranges = Vec::new();
1270 let mut options = Options::empty();
1271 options.insert(Options::ENABLE_WIKILINKS);
1272 options.insert(Options::ENABLE_FOOTNOTES);
1273
1274 let parser = Parser::new_ext(content, options).into_offset_iter();
1275 let mut link_stack: Vec<usize> = Vec::new();
1276
1277 for (event, range) in parser {
1278 match event {
1279 Event::Start(Tag::Link { .. }) => {
1280 link_stack.push(range.start);
1281 }
1282 Event::End(TagEnd::Link) => {
1283 if let Some(start_pos) = link_stack.pop() {
1284 link_ranges.push((start_pos, range.end));
1285 }
1286 }
1287 _ => {}
1288 }
1289 }
1290
1291 link_ranges
1292 }
1293
1294 fn parse_links(
1296 content: &'a str,
1297 lines: &[LineInfo],
1298 code_blocks: &[(usize, usize)],
1299 code_spans: &[CodeSpan],
1300 flavor: MarkdownFlavor,
1301 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1302 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1303 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1304 use std::collections::HashSet;
1305
1306 let mut links = Vec::with_capacity(content.len() / 500);
1307 let mut broken_links = Vec::new();
1308 let mut footnote_refs = Vec::new();
1309
1310 let mut found_positions = HashSet::new();
1312
1313 let mut options = Options::empty();
1323 options.insert(Options::ENABLE_WIKILINKS);
1324 options.insert(Options::ENABLE_FOOTNOTES);
1325
1326 let parser = Parser::new_with_broken_link_callback(
1327 content,
1328 options,
1329 Some(|link: BrokenLink<'_>| {
1330 broken_links.push(BrokenLinkInfo {
1331 reference: link.reference.to_string(),
1332 span: link.span.clone(),
1333 });
1334 None
1335 }),
1336 )
1337 .into_offset_iter();
1338
1339 let mut link_stack: Vec<(
1340 usize,
1341 usize,
1342 pulldown_cmark::CowStr<'a>,
1343 LinkType,
1344 pulldown_cmark::CowStr<'a>,
1345 )> = Vec::new();
1346 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1349 match event {
1350 Event::Start(Tag::Link {
1351 link_type,
1352 dest_url,
1353 id,
1354 ..
1355 }) => {
1356 link_stack.push((range.start, range.end, dest_url, link_type, id));
1358 text_chunks.clear();
1359 }
1360 Event::Text(text) if !link_stack.is_empty() => {
1361 text_chunks.push((text.to_string(), range.start, range.end));
1363 }
1364 Event::Code(code) if !link_stack.is_empty() => {
1365 let code_text = format!("`{code}`");
1367 text_chunks.push((code_text, range.start, range.end));
1368 }
1369 Event::End(TagEnd::Link) => {
1370 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1371 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1373 text_chunks.clear();
1374 continue;
1375 }
1376
1377 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1379
1380 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1382 text_chunks.clear();
1383 continue;
1384 }
1385
1386 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1387
1388 let is_reference = matches!(
1389 link_type,
1390 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1391 );
1392
1393 let link_text = if start_pos < content.len() {
1396 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1397
1398 let mut close_pos = None;
1402 let mut depth = 0;
1403 let mut in_code_span = false;
1404
1405 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1406 let mut backslash_count = 0;
1408 let mut j = i;
1409 while j > 0 && link_bytes[j - 1] == b'\\' {
1410 backslash_count += 1;
1411 j -= 1;
1412 }
1413 let is_escaped = backslash_count % 2 != 0;
1414
1415 if byte == b'`' && !is_escaped {
1417 in_code_span = !in_code_span;
1418 }
1419
1420 if !is_escaped && !in_code_span {
1422 if byte == b'[' {
1423 depth += 1;
1424 } else if byte == b']' {
1425 if depth == 0 {
1426 close_pos = Some(i);
1428 break;
1429 } else {
1430 depth -= 1;
1431 }
1432 }
1433 }
1434 }
1435
1436 if let Some(pos) = close_pos {
1437 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1438 } else {
1439 Cow::Borrowed("")
1440 }
1441 } else {
1442 Cow::Borrowed("")
1443 };
1444
1445 let reference_id = if is_reference && !ref_id.is_empty() {
1447 Some(Cow::Owned(ref_id.to_lowercase()))
1448 } else if is_reference {
1449 Some(Cow::Owned(link_text.to_lowercase()))
1451 } else {
1452 None
1453 };
1454
1455 found_positions.insert(start_pos);
1457
1458 links.push(ParsedLink {
1459 line: line_num,
1460 start_col: col_start,
1461 end_col: col_end,
1462 byte_offset: start_pos,
1463 byte_end: range.end,
1464 text: link_text,
1465 url: Cow::Owned(url.to_string()),
1466 is_reference,
1467 reference_id,
1468 link_type,
1469 });
1470
1471 text_chunks.clear();
1472 }
1473 }
1474 Event::FootnoteReference(footnote_id) => {
1475 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1478 continue;
1479 }
1480
1481 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1482 footnote_refs.push(FootnoteRef {
1483 id: footnote_id.to_string(),
1484 line: line_num,
1485 byte_offset: range.start,
1486 byte_end: range.end,
1487 });
1488 }
1489 _ => {}
1490 }
1491 }
1492
1493 for cap in LINK_PATTERN.captures_iter(content) {
1497 let full_match = cap.get(0).unwrap();
1498 let match_start = full_match.start();
1499 let match_end = full_match.end();
1500
1501 if found_positions.contains(&match_start) {
1503 continue;
1504 }
1505
1506 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1508 continue;
1509 }
1510
1511 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1513 continue;
1514 }
1515
1516 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1518 continue;
1519 }
1520
1521 if Self::is_offset_in_code_span(code_spans, match_start) {
1523 continue;
1524 }
1525
1526 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1528 continue;
1529 }
1530
1531 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1533
1534 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1536 continue;
1537 }
1538
1539 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1540
1541 let text = cap.get(1).map_or("", |m| m.as_str());
1542
1543 if let Some(ref_id) = cap.get(6) {
1545 let ref_id_str = ref_id.as_str();
1546 let normalized_ref = if ref_id_str.is_empty() {
1547 Cow::Owned(text.to_lowercase()) } else {
1549 Cow::Owned(ref_id_str.to_lowercase())
1550 };
1551
1552 links.push(ParsedLink {
1554 line: line_num,
1555 start_col: col_start,
1556 end_col: col_end,
1557 byte_offset: match_start,
1558 byte_end: match_end,
1559 text: Cow::Borrowed(text),
1560 url: Cow::Borrowed(""), is_reference: true,
1562 reference_id: Some(normalized_ref),
1563 link_type: LinkType::Reference, });
1565 }
1566 }
1567
1568 (links, broken_links, footnote_refs)
1569 }
1570
1571 fn parse_images(
1573 content: &'a str,
1574 lines: &[LineInfo],
1575 code_blocks: &[(usize, usize)],
1576 code_spans: &[CodeSpan],
1577 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1578 ) -> Vec<ParsedImage<'a>> {
1579 use crate::utils::skip_context::is_in_html_comment_ranges;
1580 use std::collections::HashSet;
1581
1582 let mut images = Vec::with_capacity(content.len() / 1000);
1584 let mut found_positions = HashSet::new();
1585
1586 let parser = Parser::new(content).into_offset_iter();
1588 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1589 Vec::new();
1590 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1593 match event {
1594 Event::Start(Tag::Image {
1595 link_type,
1596 dest_url,
1597 id,
1598 ..
1599 }) => {
1600 image_stack.push((range.start, dest_url, link_type, id));
1601 text_chunks.clear();
1602 }
1603 Event::Text(text) if !image_stack.is_empty() => {
1604 text_chunks.push((text.to_string(), range.start, range.end));
1605 }
1606 Event::Code(code) if !image_stack.is_empty() => {
1607 let code_text = format!("`{code}`");
1608 text_chunks.push((code_text, range.start, range.end));
1609 }
1610 Event::End(TagEnd::Image) => {
1611 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1612 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1614 continue;
1615 }
1616
1617 if Self::is_offset_in_code_span(code_spans, start_pos) {
1619 continue;
1620 }
1621
1622 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1624 continue;
1625 }
1626
1627 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1629 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1630
1631 let is_reference = matches!(
1632 link_type,
1633 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1634 );
1635
1636 let alt_text = if start_pos < content.len() {
1639 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1640
1641 let mut close_pos = None;
1644 let mut depth = 0;
1645
1646 if image_bytes.len() > 2 {
1647 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1648 let mut backslash_count = 0;
1650 let mut j = i;
1651 while j > 0 && image_bytes[j - 1] == b'\\' {
1652 backslash_count += 1;
1653 j -= 1;
1654 }
1655 let is_escaped = backslash_count % 2 != 0;
1656
1657 if !is_escaped {
1658 if byte == b'[' {
1659 depth += 1;
1660 } else if byte == b']' {
1661 if depth == 0 {
1662 close_pos = Some(i);
1664 break;
1665 } else {
1666 depth -= 1;
1667 }
1668 }
1669 }
1670 }
1671 }
1672
1673 if let Some(pos) = close_pos {
1674 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1675 } else {
1676 Cow::Borrowed("")
1677 }
1678 } else {
1679 Cow::Borrowed("")
1680 };
1681
1682 let reference_id = if is_reference && !ref_id.is_empty() {
1683 Some(Cow::Owned(ref_id.to_lowercase()))
1684 } else if is_reference {
1685 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1687 None
1688 };
1689
1690 found_positions.insert(start_pos);
1691 images.push(ParsedImage {
1692 line: line_num,
1693 start_col: col_start,
1694 end_col: col_end,
1695 byte_offset: start_pos,
1696 byte_end: range.end,
1697 alt_text,
1698 url: Cow::Owned(url.to_string()),
1699 is_reference,
1700 reference_id,
1701 link_type,
1702 });
1703 }
1704 }
1705 _ => {}
1706 }
1707 }
1708
1709 for cap in IMAGE_PATTERN.captures_iter(content) {
1711 let full_match = cap.get(0).unwrap();
1712 let match_start = full_match.start();
1713 let match_end = full_match.end();
1714
1715 if found_positions.contains(&match_start) {
1717 continue;
1718 }
1719
1720 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1722 continue;
1723 }
1724
1725 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1727 || Self::is_offset_in_code_span(code_spans, match_start)
1728 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1729 {
1730 continue;
1731 }
1732
1733 if let Some(ref_id) = cap.get(6) {
1735 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1736 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1737 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1738 let ref_id_str = ref_id.as_str();
1739 let normalized_ref = if ref_id_str.is_empty() {
1740 Cow::Owned(alt_text.to_lowercase())
1741 } else {
1742 Cow::Owned(ref_id_str.to_lowercase())
1743 };
1744
1745 images.push(ParsedImage {
1746 line: line_num,
1747 start_col: col_start,
1748 end_col: col_end,
1749 byte_offset: match_start,
1750 byte_end: match_end,
1751 alt_text: Cow::Borrowed(alt_text),
1752 url: Cow::Borrowed(""),
1753 is_reference: true,
1754 reference_id: Some(normalized_ref),
1755 link_type: LinkType::Reference, });
1757 }
1758 }
1759
1760 images
1761 }
1762
1763 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1765 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1769 if line_info.in_code_block {
1771 continue;
1772 }
1773
1774 let line = line_info.content(content);
1775 let line_num = line_idx + 1;
1776
1777 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1778 let id_raw = cap.get(1).unwrap().as_str();
1779
1780 if id_raw.starts_with('^') {
1783 continue;
1784 }
1785
1786 let id = id_raw.to_lowercase();
1787 let url = cap.get(2).unwrap().as_str().to_string();
1788 let title_match = cap.get(3).or_else(|| cap.get(4));
1789 let title = title_match.map(|m| m.as_str().to_string());
1790
1791 let match_obj = cap.get(0).unwrap();
1794 let byte_offset = line_info.byte_offset + match_obj.start();
1795 let byte_end = line_info.byte_offset + match_obj.end();
1796
1797 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1799 let start = line_info.byte_offset + m.start().saturating_sub(1);
1801 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1803 } else {
1804 (None, None)
1805 };
1806
1807 refs.push(ReferenceDef {
1808 line: line_num,
1809 id,
1810 url,
1811 title,
1812 byte_offset,
1813 byte_end,
1814 title_byte_start,
1815 title_byte_end,
1816 });
1817 }
1818 }
1819
1820 refs
1821 }
1822
1823 #[inline]
1827 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1828 let trimmed_start = line.trim_start();
1829 if !trimmed_start.starts_with('>') {
1830 return None;
1831 }
1832
1833 let mut remaining = line;
1835 let mut total_prefix_len = 0;
1836
1837 loop {
1838 let trimmed = remaining.trim_start();
1839 if !trimmed.starts_with('>') {
1840 break;
1841 }
1842
1843 let leading_ws_len = remaining.len() - trimmed.len();
1845 total_prefix_len += leading_ws_len + 1;
1846
1847 let after_gt = &trimmed[1..];
1848
1849 if let Some(stripped) = after_gt.strip_prefix(' ') {
1851 total_prefix_len += 1;
1852 remaining = stripped;
1853 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1854 total_prefix_len += 1;
1855 remaining = stripped;
1856 } else {
1857 remaining = after_gt;
1858 }
1859 }
1860
1861 Some((&line[..total_prefix_len], remaining))
1862 }
1863
1864 fn detect_list_items_and_emphasis_with_pulldown(
1888 content: &str,
1889 line_offsets: &[usize],
1890 flavor: MarkdownFlavor,
1891 front_matter_end: usize,
1892 code_blocks: &[(usize, usize)],
1893 ) -> (ListItemMap, Vec<EmphasisSpan>) {
1894 use std::collections::HashMap;
1895
1896 let mut list_items = HashMap::new();
1897 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
1898
1899 let mut options = Options::empty();
1900 options.insert(Options::ENABLE_TABLES);
1901 options.insert(Options::ENABLE_FOOTNOTES);
1902 options.insert(Options::ENABLE_STRIKETHROUGH);
1903 options.insert(Options::ENABLE_TASKLISTS);
1904 options.insert(Options::ENABLE_GFM);
1906
1907 let _ = flavor;
1909
1910 let parser = Parser::new_ext(content, options).into_offset_iter();
1911 let mut list_depth: usize = 0;
1912 let mut list_stack: Vec<bool> = Vec::new();
1913
1914 for (event, range) in parser {
1915 match event {
1916 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
1918 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
1919 2
1920 } else {
1921 1
1922 };
1923 let match_start = range.start;
1924 let match_end = range.end;
1925
1926 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
1928 let marker = content[match_start..].chars().next().unwrap_or('*');
1930 if marker == '*' || marker == '_' {
1931 let content_start = match_start + marker_count;
1933 let content_end = if match_end >= marker_count {
1934 match_end - marker_count
1935 } else {
1936 match_end
1937 };
1938 let content_part = if content_start < content_end && content_end <= content.len() {
1939 &content[content_start..content_end]
1940 } else {
1941 ""
1942 };
1943
1944 let line_idx = match line_offsets.binary_search(&match_start) {
1946 Ok(idx) => idx,
1947 Err(idx) => idx.saturating_sub(1),
1948 };
1949 let line_num = line_idx + 1;
1950 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
1951 let col_start = match_start - line_start;
1952 let col_end = match_end - line_start;
1953
1954 emphasis_spans.push(EmphasisSpan {
1955 line: line_num,
1956 start_col: col_start,
1957 end_col: col_end,
1958 byte_offset: match_start,
1959 byte_end: match_end,
1960 marker,
1961 marker_count,
1962 content: content_part.to_string(),
1963 });
1964 }
1965 }
1966 }
1967 Event::Start(Tag::List(start_number)) => {
1968 list_depth += 1;
1969 list_stack.push(start_number.is_some());
1970 }
1971 Event::End(TagEnd::List(_)) => {
1972 list_depth = list_depth.saturating_sub(1);
1973 list_stack.pop();
1974 }
1975 Event::Start(Tag::Item) if list_depth > 0 => {
1976 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1978 let item_start = range.start;
1980
1981 let mut line_idx = match line_offsets.binary_search(&item_start) {
1983 Ok(idx) => idx,
1984 Err(idx) => idx.saturating_sub(1),
1985 };
1986
1987 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1991 line_idx += 1;
1992 }
1993
1994 if front_matter_end > 0 && line_idx < front_matter_end {
1996 continue;
1997 }
1998
1999 if line_idx < line_offsets.len() {
2000 let line_start_byte = line_offsets[line_idx];
2001 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2002 let line = &content[line_start_byte..line_end.min(content.len())];
2003
2004 let line = line
2006 .strip_suffix('\n')
2007 .or_else(|| line.strip_suffix("\r\n"))
2008 .unwrap_or(line);
2009
2010 let blockquote_parse = Self::parse_blockquote_prefix(line);
2012 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2013 (prefix.len(), content)
2014 } else {
2015 (0, line)
2016 };
2017
2018 if current_list_is_ordered {
2020 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2021 Self::parse_ordered_list(line_to_parse)
2022 {
2023 let marker = format!("{number_str}{delimiter}");
2024 let marker_column = blockquote_prefix_len + leading_spaces.len();
2025 let content_column = marker_column + marker.len() + spacing.len();
2026 let number = number_str.parse().ok();
2027
2028 list_items.entry(line_start_byte).or_insert((
2029 true,
2030 marker,
2031 marker_column,
2032 content_column,
2033 number,
2034 ));
2035 }
2036 } else if let Some((leading_spaces, marker, spacing, _content)) =
2037 Self::parse_unordered_list(line_to_parse)
2038 {
2039 let marker_column = blockquote_prefix_len + leading_spaces.len();
2040 let content_column = marker_column + 1 + spacing.len();
2041
2042 list_items.entry(line_start_byte).or_insert((
2043 false,
2044 marker.to_string(),
2045 marker_column,
2046 content_column,
2047 None,
2048 ));
2049 }
2050 }
2051 }
2052 _ => {}
2053 }
2054 }
2055
2056 (list_items, emphasis_spans)
2057 }
2058
2059 #[inline]
2063 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2064 let bytes = line.as_bytes();
2065 let mut i = 0;
2066
2067 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2069 i += 1;
2070 }
2071
2072 if i >= bytes.len() {
2074 return None;
2075 }
2076 let marker = bytes[i] as char;
2077 if marker != '-' && marker != '*' && marker != '+' {
2078 return None;
2079 }
2080 let marker_pos = i;
2081 i += 1;
2082
2083 let spacing_start = i;
2085 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2086 i += 1;
2087 }
2088
2089 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2090 }
2091
2092 #[inline]
2096 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2097 let bytes = line.as_bytes();
2098 let mut i = 0;
2099
2100 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2102 i += 1;
2103 }
2104
2105 let number_start = i;
2107 while i < bytes.len() && bytes[i].is_ascii_digit() {
2108 i += 1;
2109 }
2110 if i == number_start {
2111 return None; }
2113
2114 if i >= bytes.len() {
2116 return None;
2117 }
2118 let delimiter = bytes[i] as char;
2119 if delimiter != '.' && delimiter != ')' {
2120 return None;
2121 }
2122 let delimiter_pos = i;
2123 i += 1;
2124
2125 let spacing_start = i;
2127 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2128 i += 1;
2129 }
2130
2131 Some((
2132 &line[..number_start],
2133 &line[number_start..delimiter_pos],
2134 delimiter,
2135 &line[spacing_start..i],
2136 &line[i..],
2137 ))
2138 }
2139
2140 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2143 let num_lines = line_offsets.len();
2144 let mut in_code_block = vec![false; num_lines];
2145
2146 for &(start, end) in code_blocks {
2148 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2150 let mut boundary = start;
2151 while boundary > 0 && !content.is_char_boundary(boundary) {
2152 boundary -= 1;
2153 }
2154 boundary
2155 } else {
2156 start
2157 };
2158
2159 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2160 let mut boundary = end;
2161 while boundary < content.len() && !content.is_char_boundary(boundary) {
2162 boundary += 1;
2163 }
2164 boundary
2165 } else {
2166 end.min(content.len())
2167 };
2168
2169 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2188 let first_line = first_line_after.saturating_sub(1);
2189 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2190
2191 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2193 *flag = true;
2194 }
2195 }
2196
2197 in_code_block
2198 }
2199
2200 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2203 let content_lines: Vec<&str> = content.lines().collect();
2204 let num_lines = content_lines.len();
2205 let mut in_math_block = vec![false; num_lines];
2206
2207 let mut inside_math = false;
2208
2209 for (i, line) in content_lines.iter().enumerate() {
2210 if code_block_map.get(i).copied().unwrap_or(false) {
2212 continue;
2213 }
2214
2215 let trimmed = line.trim();
2216
2217 if trimmed == "$$" {
2220 if inside_math {
2221 in_math_block[i] = true;
2223 inside_math = false;
2224 } else {
2225 in_math_block[i] = true;
2227 inside_math = true;
2228 }
2229 } else if inside_math {
2230 in_math_block[i] = true;
2232 }
2233 }
2234
2235 in_math_block
2236 }
2237
2238 fn compute_basic_line_info(
2241 content: &str,
2242 line_offsets: &[usize],
2243 code_blocks: &[(usize, usize)],
2244 flavor: MarkdownFlavor,
2245 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2246 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2247 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2248 let content_lines: Vec<&str> = content.lines().collect();
2249 let mut lines = Vec::with_capacity(content_lines.len());
2250
2251 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2253
2254 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2256
2257 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2260
2261 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2264 content,
2265 line_offsets,
2266 flavor,
2267 front_matter_end,
2268 code_blocks,
2269 );
2270
2271 for (i, line) in content_lines.iter().enumerate() {
2272 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2273 let indent = line.len() - line.trim_start().len();
2274 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2276
2277 let blockquote_parse = Self::parse_blockquote_prefix(line);
2279
2280 let is_blank = if let Some((_, content)) = blockquote_parse {
2282 content.trim().is_empty()
2284 } else {
2285 line.trim().is_empty()
2286 };
2287
2288 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2290
2291 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2293 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2294 let line_end_offset = byte_offset + line.len();
2297 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2298 html_comment_ranges,
2299 byte_offset,
2300 line_end_offset,
2301 );
2302 let list_item =
2305 list_item_map
2306 .get(&byte_offset)
2307 .map(
2308 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2309 marker: marker.clone(),
2310 is_ordered: *is_ordered,
2311 number: *number,
2312 marker_column: *marker_column,
2313 content_column: *content_column,
2314 },
2315 );
2316
2317 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2320 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2321
2322 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2324
2325 lines.push(LineInfo {
2326 byte_offset,
2327 byte_len: line.len(),
2328 indent,
2329 visual_indent,
2330 is_blank,
2331 in_code_block,
2332 in_front_matter,
2333 in_html_block: false, in_html_comment,
2335 list_item,
2336 heading: None, blockquote: None, in_mkdocstrings,
2339 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2342 in_math_block,
2343 });
2344 }
2345
2346 (lines, emphasis_spans)
2347 }
2348
2349 fn detect_headings_and_blockquotes(
2351 content: &str,
2352 lines: &mut [LineInfo],
2353 flavor: MarkdownFlavor,
2354 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2355 link_byte_ranges: &[(usize, usize)],
2356 ) {
2357 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2359 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2360 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2361 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2362
2363 let content_lines: Vec<&str> = content.lines().collect();
2364
2365 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2367
2368 for i in 0..lines.len() {
2370 let line = content_lines[i];
2371
2372 if !(front_matter_end > 0 && i < front_matter_end)
2377 && let Some(bq) = parse_blockquote_detailed(line)
2378 {
2379 let nesting_level = bq.markers.len();
2380 let marker_column = bq.indent.len();
2381 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2382 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2383 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2384 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2385
2386 lines[i].blockquote = Some(BlockquoteInfo {
2387 nesting_level,
2388 indent: bq.indent.to_string(),
2389 marker_column,
2390 prefix,
2391 content: bq.content.to_string(),
2392 has_no_space_after_marker: has_no_space,
2393 has_multiple_spaces_after_marker: has_multiple_spaces,
2394 needs_md028_fix,
2395 });
2396
2397 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2400 lines[i].is_horizontal_rule = true;
2401 }
2402 }
2403
2404 if lines[i].in_code_block {
2406 continue;
2407 }
2408
2409 if front_matter_end > 0 && i < front_matter_end {
2411 continue;
2412 }
2413
2414 if lines[i].in_html_block {
2416 continue;
2417 }
2418
2419 if lines[i].is_blank {
2421 continue;
2422 }
2423
2424 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2427 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2428 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2429 } else {
2430 false
2431 };
2432
2433 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2434 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2436 continue;
2437 }
2438 let line_offset = lines[i].byte_offset;
2441 if link_byte_ranges
2442 .iter()
2443 .any(|&(start, end)| line_offset > start && line_offset < end)
2444 {
2445 continue;
2446 }
2447 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2448 let hashes = caps.get(2).map_or("", |m| m.as_str());
2449 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2450 let rest = caps.get(4).map_or("", |m| m.as_str());
2451
2452 let level = hashes.len() as u8;
2453 let marker_column = leading_spaces.len();
2454
2455 let (text, has_closing, closing_seq) = {
2457 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2459 if rest[id_start..].trim_end().ends_with('}') {
2461 (&rest[..id_start], &rest[id_start..])
2463 } else {
2464 (rest, "")
2465 }
2466 } else {
2467 (rest, "")
2468 };
2469
2470 let trimmed_rest = rest_without_id.trim_end();
2472 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2473 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2476
2477 let last_hash_char_idx = char_positions
2479 .iter()
2480 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2481
2482 if let Some(mut char_idx) = last_hash_char_idx {
2483 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2485 char_idx -= 1;
2486 }
2487
2488 let start_of_hashes = char_positions[char_idx].0;
2490
2491 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2493
2494 let potential_closing = &trimmed_rest[start_of_hashes..];
2496 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2497
2498 if is_all_hashes && has_space_before {
2499 let closing_hashes = potential_closing.to_string();
2501 let text_part = if !custom_id_part.is_empty() {
2504 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2507 } else {
2508 trimmed_rest[..start_of_hashes].trim_end().to_string()
2509 };
2510 (text_part, true, closing_hashes)
2511 } else {
2512 (rest.to_string(), false, String::new())
2514 }
2515 } else {
2516 (rest.to_string(), false, String::new())
2518 }
2519 } else {
2520 (rest.to_string(), false, String::new())
2522 }
2523 };
2524
2525 let content_column = marker_column + hashes.len() + spaces_after.len();
2526
2527 let raw_text = text.trim().to_string();
2529 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2530
2531 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2533 let next_line = content_lines[i + 1];
2534 if !lines[i + 1].in_code_block
2535 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2536 && let Some(next_line_id) =
2537 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2538 {
2539 custom_id = Some(next_line_id);
2540 }
2541 }
2542
2543 let is_valid = !spaces_after.is_empty()
2553 || rest.is_empty()
2554 || level > 1
2555 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2556
2557 lines[i].heading = Some(HeadingInfo {
2558 level,
2559 style: HeadingStyle::ATX,
2560 marker: hashes.to_string(),
2561 marker_column,
2562 content_column,
2563 text: clean_text,
2564 custom_id,
2565 raw_text,
2566 has_closing_sequence: has_closing,
2567 closing_sequence: closing_seq,
2568 is_valid,
2569 });
2570 }
2571 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2573 let next_line = content_lines[i + 1];
2574 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2575 if front_matter_end > 0 && i < front_matter_end {
2577 continue;
2578 }
2579
2580 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2582 {
2583 continue;
2584 }
2585
2586 let content_line = line.trim();
2589
2590 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2592 continue;
2593 }
2594
2595 if content_line.starts_with('_') {
2597 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2598 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2599 continue;
2600 }
2601 }
2602
2603 if let Some(first_char) = content_line.chars().next()
2605 && first_char.is_ascii_digit()
2606 {
2607 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2608 if num_end < content_line.len() {
2609 let next = content_line.chars().nth(num_end);
2610 if next == Some('.') || next == Some(')') {
2611 continue;
2612 }
2613 }
2614 }
2615
2616 if ATX_HEADING_REGEX.is_match(line) {
2618 continue;
2619 }
2620
2621 if content_line.starts_with('>') {
2623 continue;
2624 }
2625
2626 let trimmed_start = line.trim_start();
2628 if trimmed_start.len() >= 3 {
2629 let first_three: String = trimmed_start.chars().take(3).collect();
2630 if first_three == "```" || first_three == "~~~" {
2631 continue;
2632 }
2633 }
2634
2635 if content_line.starts_with('<') {
2637 continue;
2638 }
2639
2640 let underline = next_line.trim();
2641
2642 let level = if underline.starts_with('=') { 1 } else { 2 };
2643 let style = if level == 1 {
2644 HeadingStyle::Setext1
2645 } else {
2646 HeadingStyle::Setext2
2647 };
2648
2649 let raw_text = line.trim().to_string();
2651 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2652
2653 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2655 let attr_line = content_lines[i + 2];
2656 if !lines[i + 2].in_code_block
2657 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2658 && let Some(attr_line_id) =
2659 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2660 {
2661 custom_id = Some(attr_line_id);
2662 }
2663 }
2664
2665 lines[i].heading = Some(HeadingInfo {
2666 level,
2667 style,
2668 marker: underline.to_string(),
2669 marker_column: next_line.len() - next_line.trim_start().len(),
2670 content_column: lines[i].indent,
2671 text: clean_text,
2672 custom_id,
2673 raw_text,
2674 has_closing_sequence: false,
2675 closing_sequence: String::new(),
2676 is_valid: true, });
2678 }
2679 }
2680 }
2681 }
2682
2683 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2685 const BLOCK_ELEMENTS: &[&str] = &[
2688 "address",
2689 "article",
2690 "aside",
2691 "audio",
2692 "blockquote",
2693 "canvas",
2694 "details",
2695 "dialog",
2696 "dd",
2697 "div",
2698 "dl",
2699 "dt",
2700 "embed",
2701 "fieldset",
2702 "figcaption",
2703 "figure",
2704 "footer",
2705 "form",
2706 "h1",
2707 "h2",
2708 "h3",
2709 "h4",
2710 "h5",
2711 "h6",
2712 "header",
2713 "hr",
2714 "iframe",
2715 "li",
2716 "main",
2717 "menu",
2718 "nav",
2719 "noscript",
2720 "object",
2721 "ol",
2722 "p",
2723 "picture",
2724 "pre",
2725 "script",
2726 "search",
2727 "section",
2728 "source",
2729 "style",
2730 "summary",
2731 "svg",
2732 "table",
2733 "tbody",
2734 "td",
2735 "template",
2736 "textarea",
2737 "tfoot",
2738 "th",
2739 "thead",
2740 "tr",
2741 "track",
2742 "ul",
2743 "video",
2744 ];
2745
2746 let mut i = 0;
2747 while i < lines.len() {
2748 if lines[i].in_code_block || lines[i].in_front_matter {
2750 i += 1;
2751 continue;
2752 }
2753
2754 let trimmed = lines[i].content(content).trim_start();
2755
2756 if trimmed.starts_with('<') && trimmed.len() > 1 {
2758 let after_bracket = &trimmed[1..];
2760 let is_closing = after_bracket.starts_with('/');
2761 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2762
2763 let tag_name = tag_start
2765 .chars()
2766 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2767 .collect::<String>()
2768 .to_lowercase();
2769
2770 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2772 lines[i].in_html_block = true;
2774
2775 if !is_closing {
2778 let closing_tag = format!("</{tag_name}>");
2779 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2781 let mut j = i + 1;
2782 let mut found_closing_tag = false;
2783 while j < lines.len() && j < i + 100 {
2784 if !allow_blank_lines && lines[j].is_blank {
2787 break;
2788 }
2789
2790 lines[j].in_html_block = true;
2791
2792 if lines[j].content(content).contains(&closing_tag) {
2794 found_closing_tag = true;
2795 }
2796
2797 if found_closing_tag {
2800 j += 1;
2801 while j < lines.len() && j < i + 100 {
2803 if lines[j].is_blank {
2804 break;
2805 }
2806 lines[j].in_html_block = true;
2807 j += 1;
2808 }
2809 break;
2810 }
2811 j += 1;
2812 }
2813 }
2814 }
2815 }
2816
2817 i += 1;
2818 }
2819 }
2820
2821 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2824 if !flavor.supports_esm_blocks() {
2826 return;
2827 }
2828
2829 let mut in_multiline_comment = false;
2830
2831 for line in lines.iter_mut() {
2832 if line.is_blank || line.in_html_comment {
2834 continue;
2835 }
2836
2837 let trimmed = line.content(content).trim_start();
2838
2839 if in_multiline_comment {
2841 if trimmed.contains("*/") {
2842 in_multiline_comment = false;
2843 }
2844 continue;
2845 }
2846
2847 if trimmed.starts_with("//") {
2849 continue;
2850 }
2851
2852 if trimmed.starts_with("/*") {
2854 if !trimmed.contains("*/") {
2855 in_multiline_comment = true;
2856 }
2857 continue;
2858 }
2859
2860 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2862 line.in_esm_block = true;
2863 } else {
2864 break;
2866 }
2867 }
2868 }
2869
2870 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2872 let mut code_spans = Vec::new();
2873
2874 if !content.contains('`') {
2876 return code_spans;
2877 }
2878
2879 let parser = Parser::new(content).into_offset_iter();
2881
2882 for (event, range) in parser {
2883 if let Event::Code(_) = event {
2884 let start_pos = range.start;
2885 let end_pos = range.end;
2886
2887 let full_span = &content[start_pos..end_pos];
2889 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2890
2891 let content_start = start_pos + backtick_count;
2893 let content_end = end_pos - backtick_count;
2894 let span_content = if content_start < content_end {
2895 content[content_start..content_end].to_string()
2896 } else {
2897 String::new()
2898 };
2899
2900 let line_idx = lines
2903 .partition_point(|line| line.byte_offset <= start_pos)
2904 .saturating_sub(1);
2905 let line_num = line_idx + 1;
2906 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2907
2908 let end_line_idx = lines
2910 .partition_point(|line| line.byte_offset <= end_pos)
2911 .saturating_sub(1);
2912 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2913
2914 let line_content = lines[line_idx].content(content);
2917 let col_start = if byte_col_start <= line_content.len() {
2918 line_content[..byte_col_start].chars().count()
2919 } else {
2920 line_content.chars().count()
2921 };
2922
2923 let end_line_content = lines[end_line_idx].content(content);
2924 let col_end = if byte_col_end <= end_line_content.len() {
2925 end_line_content[..byte_col_end].chars().count()
2926 } else {
2927 end_line_content.chars().count()
2928 };
2929
2930 code_spans.push(CodeSpan {
2931 line: line_num,
2932 end_line: end_line_idx + 1,
2933 start_col: col_start,
2934 end_col: col_end,
2935 byte_offset: start_pos,
2936 byte_end: end_pos,
2937 backtick_count,
2938 content: span_content,
2939 });
2940 }
2941 }
2942
2943 code_spans.sort_by_key(|span| span.byte_offset);
2945
2946 code_spans
2947 }
2948
2949 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
2951 let mut math_spans = Vec::new();
2952
2953 if !content.contains('$') {
2955 return math_spans;
2956 }
2957
2958 let mut options = Options::empty();
2960 options.insert(Options::ENABLE_MATH);
2961 let parser = Parser::new_ext(content, options).into_offset_iter();
2962
2963 for (event, range) in parser {
2964 let (is_display, math_content) = match &event {
2965 Event::InlineMath(text) => (false, text.as_ref()),
2966 Event::DisplayMath(text) => (true, text.as_ref()),
2967 _ => continue,
2968 };
2969
2970 let start_pos = range.start;
2971 let end_pos = range.end;
2972
2973 let line_idx = lines
2975 .partition_point(|line| line.byte_offset <= start_pos)
2976 .saturating_sub(1);
2977 let line_num = line_idx + 1;
2978 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2979
2980 let end_line_idx = lines
2982 .partition_point(|line| line.byte_offset <= end_pos)
2983 .saturating_sub(1);
2984 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2985
2986 let line_content = lines[line_idx].content(content);
2988 let col_start = if byte_col_start <= line_content.len() {
2989 line_content[..byte_col_start].chars().count()
2990 } else {
2991 line_content.chars().count()
2992 };
2993
2994 let end_line_content = lines[end_line_idx].content(content);
2995 let col_end = if byte_col_end <= end_line_content.len() {
2996 end_line_content[..byte_col_end].chars().count()
2997 } else {
2998 end_line_content.chars().count()
2999 };
3000
3001 math_spans.push(MathSpan {
3002 line: line_num,
3003 end_line: end_line_idx + 1,
3004 start_col: col_start,
3005 end_col: col_end,
3006 byte_offset: start_pos,
3007 byte_end: end_pos,
3008 is_display,
3009 content: math_content.to_string(),
3010 });
3011 }
3012
3013 math_spans.sort_by_key(|span| span.byte_offset);
3015
3016 math_spans
3017 }
3018
3019 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3030 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3032
3033 #[inline]
3036 fn reset_tracking_state(
3037 list_item: &ListItemInfo,
3038 has_list_breaking_content: &mut bool,
3039 min_continuation: &mut usize,
3040 ) {
3041 *has_list_breaking_content = false;
3042 let marker_width = if list_item.is_ordered {
3043 list_item.marker.len() + 1 } else {
3045 list_item.marker.len()
3046 };
3047 *min_continuation = if list_item.is_ordered {
3048 marker_width
3049 } else {
3050 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3051 };
3052 }
3053
3054 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3057 let mut last_list_item_line = 0;
3058 let mut current_indent_level = 0;
3059 let mut last_marker_width = 0;
3060
3061 let mut has_list_breaking_content_since_last_item = false;
3063 let mut min_continuation_for_tracking = 0;
3064
3065 for (line_idx, line_info) in lines.iter().enumerate() {
3066 let line_num = line_idx + 1;
3067
3068 if line_info.in_code_block {
3070 if let Some(ref mut block) = current_block {
3071 let min_continuation_indent =
3073 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3074
3075 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3077
3078 match context {
3079 CodeBlockContext::Indented => {
3080 block.end_line = line_num;
3082 continue;
3083 }
3084 CodeBlockContext::Standalone => {
3085 let completed_block = current_block.take().unwrap();
3087 list_blocks.push(completed_block);
3088 continue;
3089 }
3090 CodeBlockContext::Adjacent => {
3091 block.end_line = line_num;
3093 continue;
3094 }
3095 }
3096 } else {
3097 continue;
3099 }
3100 }
3101
3102 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3104 caps.get(0).unwrap().as_str().to_string()
3105 } else {
3106 String::new()
3107 };
3108
3109 if let Some(ref block) = current_block
3112 && line_info.list_item.is_none()
3113 && !line_info.is_blank
3114 && !line_info.in_code_span_continuation
3115 {
3116 let line_content = line_info.content(content).trim();
3117
3118 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3123
3124 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3127
3128 let breaks_list = line_info.heading.is_some()
3129 || line_content.starts_with("---")
3130 || line_content.starts_with("***")
3131 || line_content.starts_with("___")
3132 || crate::utils::skip_context::is_table_line(line_content)
3133 || blockquote_prefix_changes
3134 || (line_info.indent > 0
3135 && line_info.indent < min_continuation_for_tracking
3136 && !is_lazy_continuation);
3137
3138 if breaks_list {
3139 has_list_breaking_content_since_last_item = true;
3140 }
3141 }
3142
3143 if line_info.in_code_span_continuation
3146 && line_info.list_item.is_none()
3147 && let Some(ref mut block) = current_block
3148 {
3149 block.end_line = line_num;
3150 }
3151
3152 let effective_continuation_indent = if let Some(ref block) = current_block {
3158 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3159 let line_content = line_info.content(content);
3160 let line_bq_level = line_content
3161 .chars()
3162 .take_while(|c| *c == '>' || c.is_whitespace())
3163 .filter(|&c| c == '>')
3164 .count();
3165 if line_bq_level > 0 && line_bq_level == block_bq_level {
3166 let mut pos = 0;
3168 let mut found_markers = 0;
3169 for c in line_content.chars() {
3170 pos += c.len_utf8();
3171 if c == '>' {
3172 found_markers += 1;
3173 if found_markers == line_bq_level {
3174 if line_content.get(pos..pos + 1) == Some(" ") {
3175 pos += 1;
3176 }
3177 break;
3178 }
3179 }
3180 }
3181 let after_bq = &line_content[pos..];
3182 after_bq.len() - after_bq.trim_start().len()
3183 } else {
3184 line_info.indent
3185 }
3186 } else {
3187 line_info.indent
3188 };
3189 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3190 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3191 if block_bq_level > 0 {
3192 if block.is_ordered { last_marker_width } else { 2 }
3193 } else {
3194 min_continuation_for_tracking
3195 }
3196 } else {
3197 min_continuation_for_tracking
3198 };
3199 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3200 || (line_info.indent == 0 && !line_info.is_blank); if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3203 eprintln!(
3204 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3205 line_num,
3206 effective_continuation_indent,
3207 adjusted_min_continuation_for_tracking,
3208 is_valid_continuation,
3209 line_info.in_code_span_continuation,
3210 line_info.in_code_block,
3211 current_block.is_some()
3212 );
3213 }
3214
3215 if !line_info.in_code_span_continuation
3216 && line_info.list_item.is_none()
3217 && !line_info.is_blank
3218 && !line_info.in_code_block
3219 && is_valid_continuation
3220 && let Some(ref mut block) = current_block
3221 {
3222 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3223 eprintln!(
3224 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3225 line_num, block.end_line, line_num
3226 );
3227 }
3228 block.end_line = line_num;
3229 }
3230
3231 if let Some(list_item) = &line_info.list_item {
3233 let item_indent = list_item.marker_column;
3235 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3238 eprintln!(
3239 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3240 line_num, list_item.marker, item_indent
3241 );
3242 }
3243
3244 if let Some(ref mut block) = current_block {
3245 let is_nested = nesting > block.nesting_level;
3249 let same_type =
3250 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3251 let same_context = block.blockquote_prefix == blockquote_prefix;
3252 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3254
3255 let marker_compatible =
3257 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3258
3259 let has_non_list_content = has_list_breaking_content_since_last_item;
3262
3263 let mut continues_list = if is_nested {
3267 same_context && reasonable_distance && !has_non_list_content
3269 } else {
3270 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3272 };
3273
3274 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3275 eprintln!(
3276 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3277 line_num,
3278 continues_list,
3279 is_nested,
3280 same_type,
3281 same_context,
3282 reasonable_distance,
3283 marker_compatible,
3284 has_non_list_content,
3285 last_list_item_line,
3286 block.end_line
3287 );
3288 }
3289
3290 if !continues_list
3294 && (is_nested || same_type)
3295 && reasonable_distance
3296 && line_num > 0
3297 && block.end_line == line_num - 1
3298 {
3299 if block.item_lines.contains(&(line_num - 1)) {
3302 continues_list = true;
3304 } else {
3305 continues_list = true;
3309 }
3310 }
3311
3312 if continues_list {
3313 block.end_line = line_num;
3315 block.item_lines.push(line_num);
3316
3317 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3319 list_item.marker.len() + 1
3320 } else {
3321 list_item.marker.len()
3322 });
3323
3324 if !block.is_ordered
3326 && block.marker.is_some()
3327 && block.marker.as_ref() != Some(&list_item.marker)
3328 {
3329 block.marker = None;
3331 }
3332
3333 reset_tracking_state(
3335 list_item,
3336 &mut has_list_breaking_content_since_last_item,
3337 &mut min_continuation_for_tracking,
3338 );
3339 } else {
3340 if !same_type
3345 && !is_nested
3346 && let Some(&last_item) = block.item_lines.last()
3347 {
3348 block.end_line = last_item;
3349 }
3350
3351 list_blocks.push(block.clone());
3352
3353 *block = ListBlock {
3354 start_line: line_num,
3355 end_line: line_num,
3356 is_ordered: list_item.is_ordered,
3357 marker: if list_item.is_ordered {
3358 None
3359 } else {
3360 Some(list_item.marker.clone())
3361 },
3362 blockquote_prefix: blockquote_prefix.clone(),
3363 item_lines: vec![line_num],
3364 nesting_level: nesting,
3365 max_marker_width: if list_item.is_ordered {
3366 list_item.marker.len() + 1
3367 } else {
3368 list_item.marker.len()
3369 },
3370 };
3371
3372 reset_tracking_state(
3374 list_item,
3375 &mut has_list_breaking_content_since_last_item,
3376 &mut min_continuation_for_tracking,
3377 );
3378 }
3379 } else {
3380 current_block = Some(ListBlock {
3382 start_line: line_num,
3383 end_line: line_num,
3384 is_ordered: list_item.is_ordered,
3385 marker: if list_item.is_ordered {
3386 None
3387 } else {
3388 Some(list_item.marker.clone())
3389 },
3390 blockquote_prefix,
3391 item_lines: vec![line_num],
3392 nesting_level: nesting,
3393 max_marker_width: list_item.marker.len(),
3394 });
3395
3396 reset_tracking_state(
3398 list_item,
3399 &mut has_list_breaking_content_since_last_item,
3400 &mut min_continuation_for_tracking,
3401 );
3402 }
3403
3404 last_list_item_line = line_num;
3405 current_indent_level = item_indent;
3406 last_marker_width = if list_item.is_ordered {
3407 list_item.marker.len() + 1 } else {
3409 list_item.marker.len()
3410 };
3411 } else if let Some(ref mut block) = current_block {
3412 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3414 eprintln!(
3415 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3416 line_num, line_info.is_blank
3417 );
3418 }
3419
3420 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3428 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3429 } else {
3430 false
3431 };
3432
3433 let min_continuation_indent = if block.is_ordered {
3437 current_indent_level + last_marker_width
3438 } else {
3439 current_indent_level + 2 };
3441
3442 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3443 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3445 eprintln!(
3446 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3447 line_num, line_info.indent, min_continuation_indent
3448 );
3449 }
3450 block.end_line = line_num;
3451 } else if line_info.is_blank {
3452 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3455 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3456 }
3457 let mut check_idx = line_idx + 1;
3458 let mut found_continuation = false;
3459
3460 while check_idx < lines.len() && lines[check_idx].is_blank {
3462 check_idx += 1;
3463 }
3464
3465 if check_idx < lines.len() {
3466 let next_line = &lines[check_idx];
3467 let next_content = next_line.content(content);
3469 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3472 let next_bq_level_for_indent = next_content
3473 .chars()
3474 .take_while(|c| *c == '>' || c.is_whitespace())
3475 .filter(|&c| c == '>')
3476 .count();
3477 let effective_indent =
3478 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3479 let mut pos = 0;
3482 let mut found_markers = 0;
3483 for c in next_content.chars() {
3484 pos += c.len_utf8();
3485 if c == '>' {
3486 found_markers += 1;
3487 if found_markers == next_bq_level_for_indent {
3488 if next_content.get(pos..pos + 1) == Some(" ") {
3490 pos += 1;
3491 }
3492 break;
3493 }
3494 }
3495 }
3496 let after_blockquote_marker = &next_content[pos..];
3497 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3498 } else {
3499 next_line.indent
3500 };
3501 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3504 if block.is_ordered { last_marker_width } else { 2 }
3507 } else {
3508 min_continuation_indent
3509 };
3510 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3512 eprintln!(
3513 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3514 line_num,
3515 check_idx + 1,
3516 effective_indent,
3517 adjusted_min_continuation,
3518 next_line.list_item.is_some(),
3519 next_line.in_code_block
3520 );
3521 }
3522 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3523 found_continuation = true;
3524 }
3525 else if !next_line.in_code_block
3527 && next_line.list_item.is_some()
3528 && let Some(item) = &next_line.list_item
3529 {
3530 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3531 .find(next_line.content(content))
3532 .map_or(String::new(), |m| m.as_str().to_string());
3533 if item.marker_column == current_indent_level
3534 && item.is_ordered == block.is_ordered
3535 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3536 {
3537 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3541 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3542 if let Some(between_line) = lines.get(idx) {
3543 let between_content = between_line.content(content);
3544 let trimmed = between_content.trim();
3545 if trimmed.is_empty() {
3547 return false;
3548 }
3549 let line_indent = between_content.len() - between_content.trim_start().len();
3551
3552 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3554 .find(between_content)
3555 .map_or(String::new(), |m| m.as_str().to_string());
3556 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3557 let blockquote_level_changed =
3558 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3559
3560 if trimmed.starts_with("```")
3562 || trimmed.starts_with("~~~")
3563 || trimmed.starts_with("---")
3564 || trimmed.starts_with("***")
3565 || trimmed.starts_with("___")
3566 || blockquote_level_changed
3567 || crate::utils::skip_context::is_table_line(trimmed)
3568 || between_line.heading.is_some()
3569 {
3570 return true; }
3572
3573 line_indent >= min_continuation_indent
3575 } else {
3576 false
3577 }
3578 });
3579
3580 if block.is_ordered {
3581 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3584 if let Some(between_line) = lines.get(idx) {
3585 let between_content = between_line.content(content);
3586 let trimmed = between_content.trim();
3587 if trimmed.is_empty() {
3588 return false;
3589 }
3590 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3592 .find(between_content)
3593 .map_or(String::new(), |m| m.as_str().to_string());
3594 let between_bq_level =
3595 between_bq_prefix.chars().filter(|&c| c == '>').count();
3596 let blockquote_level_changed =
3597 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3598 trimmed.starts_with("```")
3600 || trimmed.starts_with("~~~")
3601 || trimmed.starts_with("---")
3602 || trimmed.starts_with("***")
3603 || trimmed.starts_with("___")
3604 || blockquote_level_changed
3605 || crate::utils::skip_context::is_table_line(trimmed)
3606 || between_line.heading.is_some()
3607 } else {
3608 false
3609 }
3610 });
3611 found_continuation = !has_structural_separators;
3612 } else {
3613 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3615 if let Some(between_line) = lines.get(idx) {
3616 let between_content = between_line.content(content);
3617 let trimmed = between_content.trim();
3618 if trimmed.is_empty() {
3619 return false;
3620 }
3621 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3623 .find(between_content)
3624 .map_or(String::new(), |m| m.as_str().to_string());
3625 let between_bq_level =
3626 between_bq_prefix.chars().filter(|&c| c == '>').count();
3627 let blockquote_level_changed =
3628 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3629 trimmed.starts_with("```")
3631 || trimmed.starts_with("~~~")
3632 || trimmed.starts_with("---")
3633 || trimmed.starts_with("***")
3634 || trimmed.starts_with("___")
3635 || blockquote_level_changed
3636 || crate::utils::skip_context::is_table_line(trimmed)
3637 || between_line.heading.is_some()
3638 } else {
3639 false
3640 }
3641 });
3642 found_continuation = !has_structural_separators;
3643 }
3644 }
3645 }
3646 }
3647
3648 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3649 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
3650 }
3651 if found_continuation {
3652 block.end_line = line_num;
3654 } else {
3655 list_blocks.push(block.clone());
3657 current_block = None;
3658 }
3659 } else {
3660 let min_required_indent = if block.is_ordered {
3663 current_indent_level + last_marker_width
3664 } else {
3665 current_indent_level + 2
3666 };
3667
3668 let line_content = line_info.content(content).trim();
3673
3674 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3676
3677 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3680 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3681 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3682
3683 let is_structural_separator = line_info.heading.is_some()
3684 || line_content.starts_with("```")
3685 || line_content.starts_with("~~~")
3686 || line_content.starts_with("---")
3687 || line_content.starts_with("***")
3688 || line_content.starts_with("___")
3689 || blockquote_level_changed
3690 || looks_like_table;
3691
3692 let is_lazy_continuation = !is_structural_separator
3696 && !line_info.is_blank
3697 && (line_info.indent == 0
3698 || line_info.indent >= min_required_indent
3699 || line_info.in_code_span_continuation);
3700
3701 if is_lazy_continuation {
3702 block.end_line = line_num;
3705 } else {
3706 list_blocks.push(block.clone());
3708 current_block = None;
3709 }
3710 }
3711 }
3712 }
3713
3714 if let Some(block) = current_block {
3716 list_blocks.push(block);
3717 }
3718
3719 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3721
3722 list_blocks
3723 }
3724
3725 fn compute_char_frequency(content: &str) -> CharFrequency {
3727 let mut frequency = CharFrequency::default();
3728
3729 for ch in content.chars() {
3730 match ch {
3731 '#' => frequency.hash_count += 1,
3732 '*' => frequency.asterisk_count += 1,
3733 '_' => frequency.underscore_count += 1,
3734 '-' => frequency.hyphen_count += 1,
3735 '+' => frequency.plus_count += 1,
3736 '>' => frequency.gt_count += 1,
3737 '|' => frequency.pipe_count += 1,
3738 '[' => frequency.bracket_count += 1,
3739 '`' => frequency.backtick_count += 1,
3740 '<' => frequency.lt_count += 1,
3741 '!' => frequency.exclamation_count += 1,
3742 '\n' => frequency.newline_count += 1,
3743 _ => {}
3744 }
3745 }
3746
3747 frequency
3748 }
3749
3750 fn parse_html_tags(
3752 content: &str,
3753 lines: &[LineInfo],
3754 code_blocks: &[(usize, usize)],
3755 flavor: MarkdownFlavor,
3756 ) -> Vec<HtmlTag> {
3757 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3758 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3759
3760 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3761
3762 for cap in HTML_TAG_REGEX.captures_iter(content) {
3763 let full_match = cap.get(0).unwrap();
3764 let match_start = full_match.start();
3765 let match_end = full_match.end();
3766
3767 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3769 continue;
3770 }
3771
3772 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3773 let tag_name_original = cap.get(2).unwrap().as_str();
3774 let tag_name = tag_name_original.to_lowercase();
3775 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3776
3777 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3780 continue;
3781 }
3782
3783 let mut line_num = 1;
3785 let mut col_start = match_start;
3786 let mut col_end = match_end;
3787 for (idx, line_info) in lines.iter().enumerate() {
3788 if match_start >= line_info.byte_offset {
3789 line_num = idx + 1;
3790 col_start = match_start - line_info.byte_offset;
3791 col_end = match_end - line_info.byte_offset;
3792 } else {
3793 break;
3794 }
3795 }
3796
3797 html_tags.push(HtmlTag {
3798 line: line_num,
3799 start_col: col_start,
3800 end_col: col_end,
3801 byte_offset: match_start,
3802 byte_end: match_end,
3803 tag_name,
3804 is_closing,
3805 is_self_closing,
3806 raw_content: full_match.as_str().to_string(),
3807 });
3808 }
3809
3810 html_tags
3811 }
3812
3813 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3815 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3816
3817 for (line_idx, line_info) in lines.iter().enumerate() {
3818 if line_info.in_code_block || line_info.is_blank {
3820 continue;
3821 }
3822
3823 let line = line_info.content(content);
3824 let line_num = line_idx + 1;
3825
3826 if !line.contains('|') {
3828 continue;
3829 }
3830
3831 let parts: Vec<&str> = line.split('|').collect();
3833 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3834
3835 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3837 let mut column_alignments = Vec::new();
3838
3839 if is_separator {
3840 for part in &parts[1..parts.len() - 1] {
3841 let trimmed = part.trim();
3843 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3844 "center".to_string()
3845 } else if trimmed.ends_with(':') {
3846 "right".to_string()
3847 } else if trimmed.starts_with(':') {
3848 "left".to_string()
3849 } else {
3850 "none".to_string()
3851 };
3852 column_alignments.push(alignment);
3853 }
3854 }
3855
3856 table_rows.push(TableRow {
3857 line: line_num,
3858 is_separator,
3859 column_count,
3860 column_alignments,
3861 });
3862 }
3863
3864 table_rows
3865 }
3866
3867 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3869 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3870
3871 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3873 let full_match = cap.get(0).unwrap();
3874 let match_start = full_match.start();
3875 let match_end = full_match.end();
3876
3877 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3879 continue;
3880 }
3881
3882 let preceding_char = if match_start > 0 {
3884 content.chars().nth(match_start - 1)
3885 } else {
3886 None
3887 };
3888 let following_char = content.chars().nth(match_end);
3889
3890 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3891 continue;
3892 }
3893 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3894 continue;
3895 }
3896
3897 let url = full_match.as_str();
3898 let url_type = if url.starts_with("https://") {
3899 "https"
3900 } else if url.starts_with("http://") {
3901 "http"
3902 } else if url.starts_with("ftp://") {
3903 "ftp"
3904 } else {
3905 "other"
3906 };
3907
3908 let mut line_num = 1;
3910 let mut col_start = match_start;
3911 let mut col_end = match_end;
3912 for (idx, line_info) in lines.iter().enumerate() {
3913 if match_start >= line_info.byte_offset {
3914 line_num = idx + 1;
3915 col_start = match_start - line_info.byte_offset;
3916 col_end = match_end - line_info.byte_offset;
3917 } else {
3918 break;
3919 }
3920 }
3921
3922 bare_urls.push(BareUrl {
3923 line: line_num,
3924 start_col: col_start,
3925 end_col: col_end,
3926 byte_offset: match_start,
3927 byte_end: match_end,
3928 url: url.to_string(),
3929 url_type: url_type.to_string(),
3930 });
3931 }
3932
3933 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3935 let full_match = cap.get(0).unwrap();
3936 let match_start = full_match.start();
3937 let match_end = full_match.end();
3938
3939 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3941 continue;
3942 }
3943
3944 let preceding_char = if match_start > 0 {
3946 content.chars().nth(match_start - 1)
3947 } else {
3948 None
3949 };
3950 let following_char = content.chars().nth(match_end);
3951
3952 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3953 continue;
3954 }
3955 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3956 continue;
3957 }
3958
3959 let email = full_match.as_str();
3960
3961 let mut line_num = 1;
3963 let mut col_start = match_start;
3964 let mut col_end = match_end;
3965 for (idx, line_info) in lines.iter().enumerate() {
3966 if match_start >= line_info.byte_offset {
3967 line_num = idx + 1;
3968 col_start = match_start - line_info.byte_offset;
3969 col_end = match_end - line_info.byte_offset;
3970 } else {
3971 break;
3972 }
3973 }
3974
3975 bare_urls.push(BareUrl {
3976 line: line_num,
3977 start_col: col_start,
3978 end_col: col_end,
3979 byte_offset: match_start,
3980 byte_end: match_end,
3981 url: email.to_string(),
3982 url_type: "email".to_string(),
3983 });
3984 }
3985
3986 bare_urls
3987 }
3988
3989 #[must_use]
4009 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4010 ValidHeadingsIter::new(&self.lines)
4011 }
4012
4013 #[must_use]
4017 pub fn has_valid_headings(&self) -> bool {
4018 self.lines
4019 .iter()
4020 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4021 }
4022}
4023
4024fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4026 if list_blocks.len() < 2 {
4027 return;
4028 }
4029
4030 let mut merger = ListBlockMerger::new(content, lines);
4031 *list_blocks = merger.merge(list_blocks);
4032}
4033
4034struct ListBlockMerger<'a> {
4036 content: &'a str,
4037 lines: &'a [LineInfo],
4038}
4039
4040impl<'a> ListBlockMerger<'a> {
4041 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4042 Self { content, lines }
4043 }
4044
4045 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4046 let mut merged = Vec::with_capacity(list_blocks.len());
4047 let mut current = list_blocks[0].clone();
4048
4049 for next in list_blocks.iter().skip(1) {
4050 if self.should_merge_blocks(¤t, next) {
4051 current = self.merge_two_blocks(current, next);
4052 } else {
4053 merged.push(current);
4054 current = next.clone();
4055 }
4056 }
4057
4058 merged.push(current);
4059 merged
4060 }
4061
4062 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4064 if !self.blocks_are_compatible(current, next) {
4066 return false;
4067 }
4068
4069 let spacing = self.analyze_spacing_between(current, next);
4071 match spacing {
4072 BlockSpacing::Consecutive => true,
4073 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4074 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4075 self.can_merge_with_content_between(current, next)
4076 }
4077 }
4078 }
4079
4080 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4082 current.is_ordered == next.is_ordered
4083 && current.blockquote_prefix == next.blockquote_prefix
4084 && current.nesting_level == next.nesting_level
4085 }
4086
4087 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4089 let gap = next.start_line - current.end_line;
4090
4091 match gap {
4092 1 => BlockSpacing::Consecutive,
4093 2 => BlockSpacing::SingleBlank,
4094 _ if gap > 2 => {
4095 if self.has_only_blank_lines_between(current, next) {
4096 BlockSpacing::MultipleBlanks
4097 } else {
4098 BlockSpacing::ContentBetween
4099 }
4100 }
4101 _ => BlockSpacing::Consecutive, }
4103 }
4104
4105 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4107 if has_meaningful_content_between(self.content, current, next, self.lines) {
4110 return false; }
4112
4113 !current.is_ordered && current.marker == next.marker
4115 }
4116
4117 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4119 if has_meaningful_content_between(self.content, current, next, self.lines) {
4121 return false; }
4123
4124 current.is_ordered && next.is_ordered
4126 }
4127
4128 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4130 for line_num in (current.end_line + 1)..next.start_line {
4131 if let Some(line_info) = self.lines.get(line_num - 1)
4132 && !line_info.content(self.content).trim().is_empty()
4133 {
4134 return false;
4135 }
4136 }
4137 true
4138 }
4139
4140 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4142 current.end_line = next.end_line;
4143 current.item_lines.extend_from_slice(&next.item_lines);
4144
4145 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4147
4148 if !current.is_ordered && self.markers_differ(¤t, next) {
4150 current.marker = None; }
4152
4153 current
4154 }
4155
4156 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4158 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4159 }
4160}
4161
4162#[derive(Debug, PartialEq)]
4164enum BlockSpacing {
4165 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4170
4171fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4173 for line_num in (current.end_line + 1)..next.start_line {
4175 if let Some(line_info) = lines.get(line_num - 1) {
4176 let trimmed = line_info.content(content).trim();
4178
4179 if trimmed.is_empty() {
4181 continue;
4182 }
4183
4184 if line_info.heading.is_some() {
4188 return true; }
4190
4191 if is_horizontal_rule(trimmed) {
4193 return true; }
4195
4196 if crate::utils::skip_context::is_table_line(trimmed) {
4198 return true; }
4200
4201 if trimmed.starts_with('>') {
4203 return true; }
4205
4206 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4208 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4209
4210 let min_continuation_indent = if current.is_ordered {
4212 current.nesting_level + current.max_marker_width + 1 } else {
4214 current.nesting_level + 2
4215 };
4216
4217 if line_indent < min_continuation_indent {
4218 return true; }
4221 }
4222
4223 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4225
4226 let min_indent = if current.is_ordered {
4228 current.nesting_level + current.max_marker_width
4229 } else {
4230 current.nesting_level + 2
4231 };
4232
4233 if line_indent < min_indent {
4235 return true; }
4237
4238 }
4241 }
4242
4243 false
4245}
4246
4247pub fn is_horizontal_rule_line(line: &str) -> bool {
4254 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4256 if leading_spaces > 3 || line.starts_with('\t') {
4257 return false;
4258 }
4259
4260 is_horizontal_rule_content(line.trim())
4261}
4262
4263pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4266 if trimmed.len() < 3 {
4267 return false;
4268 }
4269
4270 let chars: Vec<char> = trimmed.chars().collect();
4272 if let Some(&first_char) = chars.first()
4273 && (first_char == '-' || first_char == '*' || first_char == '_')
4274 {
4275 let mut count = 0;
4276 for &ch in &chars {
4277 if ch == first_char {
4278 count += 1;
4279 } else if ch != ' ' && ch != '\t' {
4280 return false; }
4282 }
4283 return count >= 3;
4284 }
4285 false
4286}
4287
4288pub fn is_horizontal_rule(trimmed: &str) -> bool {
4290 is_horizontal_rule_content(trimmed)
4291}
4292
4293#[cfg(test)]
4295mod tests {
4296 use super::*;
4297
4298 #[test]
4299 fn test_empty_content() {
4300 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4301 assert_eq!(ctx.content, "");
4302 assert_eq!(ctx.line_offsets, vec![0]);
4303 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4304 assert_eq!(ctx.lines.len(), 0);
4305 }
4306
4307 #[test]
4308 fn test_single_line() {
4309 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4310 assert_eq!(ctx.content, "# Hello");
4311 assert_eq!(ctx.line_offsets, vec![0]);
4312 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4313 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4314 }
4315
4316 #[test]
4317 fn test_multi_line() {
4318 let content = "# Title\n\nSecond line\nThird line";
4319 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4320 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4321 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4328
4329 #[test]
4330 fn test_line_info() {
4331 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4332 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4333
4334 assert_eq!(ctx.lines.len(), 7);
4336
4337 let line1 = &ctx.lines[0];
4339 assert_eq!(line1.content(ctx.content), "# Title");
4340 assert_eq!(line1.byte_offset, 0);
4341 assert_eq!(line1.indent, 0);
4342 assert!(!line1.is_blank);
4343 assert!(!line1.in_code_block);
4344 assert!(line1.list_item.is_none());
4345
4346 let line2 = &ctx.lines[1];
4348 assert_eq!(line2.content(ctx.content), " indented");
4349 assert_eq!(line2.byte_offset, 8);
4350 assert_eq!(line2.indent, 4);
4351 assert!(!line2.is_blank);
4352
4353 let line3 = &ctx.lines[2];
4355 assert_eq!(line3.content(ctx.content), "");
4356 assert!(line3.is_blank);
4357
4358 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4360 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4361 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4362 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4363 }
4364
4365 #[test]
4366 fn test_list_item_detection() {
4367 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4368 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4369
4370 let line1 = &ctx.lines[0];
4372 assert!(line1.list_item.is_some());
4373 let list1 = line1.list_item.as_ref().unwrap();
4374 assert_eq!(list1.marker, "-");
4375 assert!(!list1.is_ordered);
4376 assert_eq!(list1.marker_column, 0);
4377 assert_eq!(list1.content_column, 2);
4378
4379 let line2 = &ctx.lines[1];
4381 assert!(line2.list_item.is_some());
4382 let list2 = line2.list_item.as_ref().unwrap();
4383 assert_eq!(list2.marker, "*");
4384 assert_eq!(list2.marker_column, 2);
4385
4386 let line3 = &ctx.lines[2];
4388 assert!(line3.list_item.is_some());
4389 let list3 = line3.list_item.as_ref().unwrap();
4390 assert_eq!(list3.marker, "1.");
4391 assert!(list3.is_ordered);
4392 assert_eq!(list3.number, Some(1));
4393
4394 let line6 = &ctx.lines[5];
4396 assert!(line6.list_item.is_none());
4397 }
4398
4399 #[test]
4400 fn test_offset_to_line_col_edge_cases() {
4401 let content = "a\nb\nc";
4402 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4403 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4411
4412 #[test]
4413 fn test_mdx_esm_blocks() {
4414 let content = r##"import {Chart} from './snowfall.js'
4415export const year = 2023
4416
4417# Last year's snowfall
4418
4419In {year}, the snowfall was above average.
4420It was followed by a warm spring which caused
4421flood conditions in many of the nearby rivers.
4422
4423<Chart color="#fcb32c" year={year} />
4424"##;
4425
4426 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4427
4428 assert_eq!(ctx.lines.len(), 10);
4430 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4431 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4432 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4433 assert!(
4434 !ctx.lines[3].in_esm_block,
4435 "Line 4 (heading) should NOT be in_esm_block"
4436 );
4437 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4438 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4439 }
4440
4441 #[test]
4442 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4443 let content = r#"import {Chart} from './snowfall.js'
4444export const year = 2023
4445
4446# Last year's snowfall
4447"#;
4448
4449 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4450
4451 assert!(
4453 !ctx.lines[0].in_esm_block,
4454 "Line 1 should NOT be in_esm_block in Standard flavor"
4455 );
4456 assert!(
4457 !ctx.lines[1].in_esm_block,
4458 "Line 2 should NOT be in_esm_block in Standard flavor"
4459 );
4460 }
4461
4462 #[test]
4463 fn test_blockquote_with_indented_content() {
4464 let content = r#"# Heading
4468
4469> -S socket-path
4470> More text
4471"#;
4472 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4473
4474 assert!(
4476 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4477 "Line 3 should be a blockquote"
4478 );
4479 assert!(
4481 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4482 "Line 4 should be a blockquote"
4483 );
4484
4485 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4488 assert_eq!(bq3.content, "-S socket-path");
4489 assert_eq!(bq3.nesting_level, 1);
4490 assert!(bq3.has_multiple_spaces_after_marker);
4492
4493 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4494 assert_eq!(bq4.content, "More text");
4495 assert_eq!(bq4.nesting_level, 1);
4496 }
4497
4498 #[test]
4499 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4500 let content = r#"# Title
4502
4503A footnote[^1].
4504
4505[^1]: This is the footnote content.
4506
4507[^note]: Another footnote with [link](https://example.com).
4508
4509[regular]: ./path.md "A real reference definition"
4510"#;
4511 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4512
4513 assert_eq!(
4515 ctx.reference_defs.len(),
4516 1,
4517 "Footnotes should not be parsed as reference definitions"
4518 );
4519
4520 assert_eq!(ctx.reference_defs[0].id, "regular");
4522 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4523 assert_eq!(
4524 ctx.reference_defs[0].title,
4525 Some("A real reference definition".to_string())
4526 );
4527 }
4528
4529 #[test]
4530 fn test_footnote_with_inline_link_not_misidentified() {
4531 let content = r#"# Title
4534
4535A footnote[^1].
4536
4537[^1]: [link](https://www.google.com).
4538"#;
4539 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4540
4541 assert!(
4543 ctx.reference_defs.is_empty(),
4544 "Footnote with inline link should not create a reference definition"
4545 );
4546 }
4547
4548 #[test]
4549 fn test_various_footnote_formats_excluded() {
4550 let content = r#"[^1]: Numeric footnote
4552[^note]: Named footnote
4553[^a]: Single char footnote
4554[^long-footnote-name]: Long named footnote
4555[^123abc]: Mixed alphanumeric
4556
4557[ref1]: ./file1.md
4558[ref2]: ./file2.md
4559"#;
4560 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4561
4562 assert_eq!(
4564 ctx.reference_defs.len(),
4565 2,
4566 "Only regular reference definitions should be parsed"
4567 );
4568
4569 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4570 assert!(ids.contains(&"ref1"));
4571 assert!(ids.contains(&"ref2"));
4572 assert!(!ids.iter().any(|id| id.starts_with('^')));
4573 }
4574
4575 #[test]
4580 fn test_has_char_tracked_characters() {
4581 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
4583 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4584
4585 assert!(ctx.has_char('#'), "Should detect hash");
4587 assert!(ctx.has_char('*'), "Should detect asterisk");
4588 assert!(ctx.has_char('_'), "Should detect underscore");
4589 assert!(ctx.has_char('-'), "Should detect hyphen");
4590 assert!(ctx.has_char('+'), "Should detect plus");
4591 assert!(ctx.has_char('>'), "Should detect gt");
4592 assert!(ctx.has_char('|'), "Should detect pipe");
4593 assert!(ctx.has_char('['), "Should detect bracket");
4594 assert!(ctx.has_char('`'), "Should detect backtick");
4595 assert!(ctx.has_char('<'), "Should detect lt");
4596 assert!(ctx.has_char('!'), "Should detect exclamation");
4597 assert!(ctx.has_char('\n'), "Should detect newline");
4598 }
4599
4600 #[test]
4601 fn test_has_char_absent_characters() {
4602 let content = "Simple text without special chars";
4603 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4604
4605 assert!(!ctx.has_char('#'), "Should not detect hash");
4607 assert!(!ctx.has_char('*'), "Should not detect asterisk");
4608 assert!(!ctx.has_char('_'), "Should not detect underscore");
4609 assert!(!ctx.has_char('-'), "Should not detect hyphen");
4610 assert!(!ctx.has_char('+'), "Should not detect plus");
4611 assert!(!ctx.has_char('>'), "Should not detect gt");
4612 assert!(!ctx.has_char('|'), "Should not detect pipe");
4613 assert!(!ctx.has_char('['), "Should not detect bracket");
4614 assert!(!ctx.has_char('`'), "Should not detect backtick");
4615 assert!(!ctx.has_char('<'), "Should not detect lt");
4616 assert!(!ctx.has_char('!'), "Should not detect exclamation");
4617 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
4619 }
4620
4621 #[test]
4622 fn test_has_char_fallback_for_untracked() {
4623 let content = "Text with @mention and $dollar and %percent";
4624 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4625
4626 assert!(ctx.has_char('@'), "Should detect @ via fallback");
4628 assert!(ctx.has_char('$'), "Should detect $ via fallback");
4629 assert!(ctx.has_char('%'), "Should detect % via fallback");
4630 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
4631 }
4632
4633 #[test]
4634 fn test_char_count_tracked_characters() {
4635 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
4636 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4637
4638 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
4640 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
4641 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
4642 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
4643 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
4644 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
4645 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
4646 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
4647 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
4648 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
4649 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
4650 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
4651 }
4652
4653 #[test]
4654 fn test_char_count_zero_for_absent() {
4655 let content = "Plain text";
4656 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4657
4658 assert_eq!(ctx.char_count('#'), 0);
4659 assert_eq!(ctx.char_count('*'), 0);
4660 assert_eq!(ctx.char_count('_'), 0);
4661 assert_eq!(ctx.char_count('\n'), 0);
4662 }
4663
4664 #[test]
4665 fn test_char_count_fallback_for_untracked() {
4666 let content = "@@@ $$ %%%";
4667 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4668
4669 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
4670 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
4671 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
4672 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
4673 }
4674
4675 #[test]
4676 fn test_char_count_empty_content() {
4677 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4678
4679 assert_eq!(ctx.char_count('#'), 0);
4680 assert_eq!(ctx.char_count('*'), 0);
4681 assert_eq!(ctx.char_count('@'), 0);
4682 assert!(!ctx.has_char('#'));
4683 assert!(!ctx.has_char('@'));
4684 }
4685
4686 #[test]
4691 fn test_is_in_html_tag_simple() {
4692 let content = "<div>content</div>";
4693 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4694
4695 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
4697 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
4698 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
4699
4700 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
4702 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
4703
4704 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
4706 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
4707 }
4708
4709 #[test]
4710 fn test_is_in_html_tag_self_closing() {
4711 let content = "Text <br/> more text";
4712 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4713
4714 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
4716 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
4717
4718 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
4720 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
4721 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
4722
4723 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
4725 }
4726
4727 #[test]
4728 fn test_is_in_html_tag_with_attributes() {
4729 let content = r#"<a href="url" class="link">text</a>"#;
4730 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4731
4732 assert!(ctx.is_in_html_tag(0), "Start of tag");
4734 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
4735 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
4736 assert!(ctx.is_in_html_tag(26), "End of opening tag");
4737
4738 assert!(!ctx.is_in_html_tag(27), "Start of content");
4740 assert!(!ctx.is_in_html_tag(30), "End of content");
4741
4742 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
4744 }
4745
4746 #[test]
4747 fn test_is_in_html_tag_multiline() {
4748 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
4749 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4750
4751 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
4753 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
4754 assert!(ctx.is_in_html_tag(15), "Inside attribute");
4755
4756 let closing_bracket_pos = content.find(">\n").unwrap();
4758 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
4759 }
4760
4761 #[test]
4762 fn test_is_in_html_tag_no_tags() {
4763 let content = "Plain text without any HTML";
4764 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4765
4766 for i in 0..content.len() {
4768 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
4769 }
4770 }
4771
4772 #[test]
4777 fn test_is_in_jinja_range_expression() {
4778 let content = "Hello {{ name }}!";
4779 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4780
4781 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
4783 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
4784
4785 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
4787 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
4788 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
4789 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
4790 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
4791
4792 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
4794 }
4795
4796 #[test]
4797 fn test_is_in_jinja_range_statement() {
4798 let content = "{% if condition %}content{% endif %}";
4799 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4800
4801 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
4803 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
4804 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
4805
4806 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
4808
4809 assert!(ctx.is_in_jinja_range(25), "Start of endif");
4811 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
4812 }
4813
4814 #[test]
4815 fn test_is_in_jinja_range_multiple() {
4816 let content = "{{ a }} and {{ b }}";
4817 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4818
4819 assert!(ctx.is_in_jinja_range(0));
4821 assert!(ctx.is_in_jinja_range(3));
4822 assert!(ctx.is_in_jinja_range(6));
4823
4824 assert!(!ctx.is_in_jinja_range(8));
4826 assert!(!ctx.is_in_jinja_range(11));
4827
4828 assert!(ctx.is_in_jinja_range(12));
4830 assert!(ctx.is_in_jinja_range(15));
4831 assert!(ctx.is_in_jinja_range(18));
4832 }
4833
4834 #[test]
4835 fn test_is_in_jinja_range_no_jinja() {
4836 let content = "Plain text with single braces but not Jinja";
4837 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4838
4839 for i in 0..content.len() {
4841 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
4842 }
4843 }
4844
4845 #[test]
4850 fn test_is_in_link_title_with_title() {
4851 let content = r#"[ref]: https://example.com "Title text"
4852
4853Some content."#;
4854 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4855
4856 assert_eq!(ctx.reference_defs.len(), 1);
4858 let def = &ctx.reference_defs[0];
4859 assert!(def.title_byte_start.is_some());
4860 assert!(def.title_byte_end.is_some());
4861
4862 let title_start = def.title_byte_start.unwrap();
4863 let title_end = def.title_byte_end.unwrap();
4864
4865 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
4867
4868 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
4870 assert!(
4871 ctx.is_in_link_title(title_start + 5),
4872 "Middle of title should be in title"
4873 );
4874 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
4875
4876 assert!(
4878 !ctx.is_in_link_title(title_end),
4879 "After title end should not be in title"
4880 );
4881 }
4882
4883 #[test]
4884 fn test_is_in_link_title_without_title() {
4885 let content = "[ref]: https://example.com\n\nSome content.";
4886 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4887
4888 assert_eq!(ctx.reference_defs.len(), 1);
4890 let def = &ctx.reference_defs[0];
4891 assert!(def.title_byte_start.is_none());
4892 assert!(def.title_byte_end.is_none());
4893
4894 for i in 0..content.len() {
4896 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
4897 }
4898 }
4899
4900 #[test]
4901 fn test_is_in_link_title_multiple_refs() {
4902 let content = r#"[ref1]: /url1 "Title One"
4903[ref2]: /url2
4904[ref3]: /url3 "Title Three"
4905"#;
4906 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4907
4908 assert_eq!(ctx.reference_defs.len(), 3);
4910
4911 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
4913 assert!(ref1.title_byte_start.is_some());
4914
4915 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
4917 assert!(ref2.title_byte_start.is_none());
4918
4919 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
4921 assert!(ref3.title_byte_start.is_some());
4922
4923 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
4925 assert!(ctx.is_in_link_title(start + 1));
4926 assert!(!ctx.is_in_link_title(end + 5));
4927 }
4928
4929 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
4931 assert!(ctx.is_in_link_title(start + 1));
4932 }
4933 }
4934
4935 #[test]
4936 fn test_is_in_link_title_single_quotes() {
4937 let content = "[ref]: /url 'Single quoted title'\n";
4938 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4939
4940 assert_eq!(ctx.reference_defs.len(), 1);
4941 let def = &ctx.reference_defs[0];
4942
4943 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
4944 assert!(ctx.is_in_link_title(start));
4945 assert!(ctx.is_in_link_title(start + 5));
4946 assert!(!ctx.is_in_link_title(end));
4947 }
4948 }
4949
4950 #[test]
4951 fn test_is_in_link_title_parentheses() {
4952 let content = "[ref]: /url (Parenthesized title)\n";
4955 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4956
4957 if ctx.reference_defs.is_empty() {
4960 for i in 0..content.len() {
4962 assert!(!ctx.is_in_link_title(i));
4963 }
4964 } else {
4965 let def = &ctx.reference_defs[0];
4966 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
4967 assert!(ctx.is_in_link_title(start));
4968 assert!(ctx.is_in_link_title(start + 5));
4969 assert!(!ctx.is_in_link_title(end));
4970 } else {
4971 for i in 0..content.len() {
4973 assert!(!ctx.is_in_link_title(i));
4974 }
4975 }
4976 }
4977 }
4978
4979 #[test]
4980 fn test_is_in_link_title_no_refs() {
4981 let content = "Just plain text without any reference definitions.";
4982 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4983
4984 assert!(ctx.reference_defs.is_empty());
4985
4986 for i in 0..content.len() {
4987 assert!(!ctx.is_in_link_title(i));
4988 }
4989 }
4990
4991 #[test]
4996 fn test_math_spans_inline() {
4997 let content = "Text with inline math $[f](x)$ in it.";
4998 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4999
5000 let math_spans = ctx.math_spans();
5001 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5002
5003 let span = &math_spans[0];
5004 assert!(!span.is_display, "Should be inline math, not display");
5005 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5006 }
5007
5008 #[test]
5009 fn test_math_spans_display_single_line() {
5010 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5011 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5012
5013 let math_spans = ctx.math_spans();
5014 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5015
5016 let span = &math_spans[0];
5017 assert!(span.is_display, "Should be display math");
5018 assert!(
5019 span.content.contains("[x](\\zeta)"),
5020 "Content should contain the link-like pattern"
5021 );
5022 }
5023
5024 #[test]
5025 fn test_math_spans_display_multiline() {
5026 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5027 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5028
5029 let math_spans = ctx.math_spans();
5030 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5031
5032 let span = &math_spans[0];
5033 assert!(span.is_display, "Should be display math");
5034 }
5035
5036 #[test]
5037 fn test_is_in_math_span() {
5038 let content = "Text $[f](x)$ more text";
5039 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5040
5041 let math_start = content.find('$').unwrap();
5043 let math_end = content.rfind('$').unwrap() + 1;
5044
5045 assert!(
5046 ctx.is_in_math_span(math_start + 1),
5047 "Position inside math span should return true"
5048 );
5049 assert!(
5050 ctx.is_in_math_span(math_start + 3),
5051 "Position inside math span should return true"
5052 );
5053
5054 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5056 assert!(
5057 !ctx.is_in_math_span(math_end + 1),
5058 "Position after math span should return false"
5059 );
5060 }
5061
5062 #[test]
5063 fn test_math_spans_mixed_with_code() {
5064 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5065 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5066
5067 let math_spans = ctx.math_spans();
5068 let code_spans = ctx.code_spans();
5069
5070 assert_eq!(math_spans.len(), 1, "Should have one math span");
5071 assert_eq!(code_spans.len(), 1, "Should have one code span");
5072
5073 assert_eq!(math_spans[0].content, "[f](x)");
5075 assert_eq!(code_spans[0].content, "[g](y)");
5077 }
5078
5079 #[test]
5080 fn test_math_spans_no_math() {
5081 let content = "Regular text without any math at all.";
5082 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5083
5084 let math_spans = ctx.math_spans();
5085 assert!(math_spans.is_empty(), "Should have no math spans");
5086 }
5087
5088 #[test]
5089 fn test_math_spans_multiple() {
5090 let content = "First $a$ and second $b$ and display $$c$$";
5091 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5092
5093 let math_spans = ctx.math_spans();
5094 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5095
5096 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5098 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5099
5100 assert_eq!(inline_count, 2, "Should have two inline math spans");
5101 assert_eq!(display_count, 1, "Should have one display math span");
5102 }
5103
5104 #[test]
5105 fn test_is_in_math_span_boundary_positions() {
5106 let content = "$[f](x)$";
5109 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5110
5111 let math_spans = ctx.math_spans();
5112 assert_eq!(math_spans.len(), 1, "Should have one math span");
5113
5114 let span = &math_spans[0];
5115
5116 assert!(
5118 ctx.is_in_math_span(span.byte_offset),
5119 "Start position should be in span"
5120 );
5121
5122 assert!(
5124 ctx.is_in_math_span(span.byte_offset + 1),
5125 "Position after start should be in span"
5126 );
5127
5128 assert!(
5130 ctx.is_in_math_span(span.byte_end - 1),
5131 "Position at end-1 should be in span"
5132 );
5133
5134 assert!(
5136 !ctx.is_in_math_span(span.byte_end),
5137 "Position at byte_end should NOT be in span (exclusive)"
5138 );
5139 }
5140
5141 #[test]
5142 fn test_math_spans_at_document_start() {
5143 let content = "$x$ text";
5144 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5145
5146 let math_spans = ctx.math_spans();
5147 assert_eq!(math_spans.len(), 1);
5148 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5149 }
5150
5151 #[test]
5152 fn test_math_spans_at_document_end() {
5153 let content = "text $x$";
5154 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5155
5156 let math_spans = ctx.math_spans();
5157 assert_eq!(math_spans.len(), 1);
5158 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5159 }
5160
5161 #[test]
5162 fn test_math_spans_consecutive() {
5163 let content = "$a$$b$";
5164 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5165
5166 let math_spans = ctx.math_spans();
5167 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5169
5170 for i in 0..content.len() {
5172 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5173 }
5174 }
5175
5176 #[test]
5177 fn test_math_spans_currency_not_math() {
5178 let content = "Price is $100";
5180 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5181
5182 let math_spans = ctx.math_spans();
5183 assert!(
5186 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5187 "Unbalanced $ should not create math span containing 100"
5188 );
5189 }
5190
5191 #[test]
5196 fn test_reference_lookup_o1_basic() {
5197 let content = r#"[ref1]: /url1
5198[REF2]: /url2 "Title"
5199[Ref3]: /url3
5200
5201Use [link][ref1] and [link][REF2]."#;
5202 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5203
5204 assert_eq!(ctx.reference_defs.len(), 3);
5206
5207 assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5209 assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5212 assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5213 assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5214 assert_eq!(ctx.get_reference_url("nonexistent"), None);
5215 }
5216
5217 #[test]
5218 fn test_reference_lookup_o1_get_reference_def() {
5219 let content = r#"[myref]: https://example.com "My Title"
5220"#;
5221 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5222
5223 let def = ctx.get_reference_def("myref").expect("Should find myref");
5225 assert_eq!(def.url, "https://example.com");
5226 assert_eq!(def.title.as_deref(), Some("My Title"));
5227
5228 let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5230 assert_eq!(def2.url, "https://example.com");
5231
5232 assert!(ctx.get_reference_def("nonexistent").is_none());
5234 }
5235
5236 #[test]
5237 fn test_reference_lookup_o1_has_reference_def() {
5238 let content = r#"[foo]: /foo
5239[BAR]: /bar
5240"#;
5241 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5242
5243 assert!(ctx.has_reference_def("foo"));
5245 assert!(ctx.has_reference_def("FOO")); assert!(ctx.has_reference_def("bar"));
5247 assert!(ctx.has_reference_def("Bar")); assert!(!ctx.has_reference_def("baz")); }
5250
5251 #[test]
5252 fn test_reference_lookup_o1_empty_content() {
5253 let content = "No references here.";
5254 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5255
5256 assert!(ctx.reference_defs.is_empty());
5257 assert_eq!(ctx.get_reference_url("anything"), None);
5258 assert!(ctx.get_reference_def("anything").is_none());
5259 assert!(!ctx.has_reference_def("anything"));
5260 }
5261
5262 #[test]
5263 fn test_reference_lookup_o1_special_characters_in_id() {
5264 let content = r#"[ref-with-dash]: /url1
5265[ref_with_underscore]: /url2
5266[ref.with.dots]: /url3
5267"#;
5268 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5269
5270 assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5271 assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5272 assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5273 }
5274
5275 #[test]
5276 fn test_reference_lookup_o1_unicode_id() {
5277 let content = r#"[日本語]: /japanese
5278[émoji]: /emoji
5279"#;
5280 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5281
5282 assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5283 assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5284 assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); }
5286}