1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15 ($name:expr, $profile:expr, $code:expr) => {{
16 let start = std::time::Instant::now();
17 let result = $code;
18 if $profile {
19 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20 }
21 result
22 }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(
34 r#"(?sx)
35 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36 (?:
37 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
38 |
39 \[([^\]]*)\] # Reference ID in group 6
40 )"#
41 ).unwrap()
42});
43
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?sx)
49 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50 (?:
51 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
52 |
53 \[([^\]]*)\] # Reference ID in group 6
54 )"#
55 ).unwrap()
56});
57
58static REF_DEF_PATTERN: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71#[derive(Debug, Clone)]
73pub struct LineInfo {
74 pub byte_offset: usize,
76 pub byte_len: usize,
78 pub indent: usize,
80 pub visual_indent: usize,
84 pub is_blank: bool,
86 pub in_code_block: bool,
88 pub in_front_matter: bool,
90 pub in_html_block: bool,
92 pub in_html_comment: bool,
94 pub list_item: Option<ListItemInfo>,
96 pub heading: Option<HeadingInfo>,
98 pub blockquote: Option<BlockquoteInfo>,
100 pub in_mkdocstrings: bool,
102 pub in_esm_block: bool,
104 pub in_code_span_continuation: bool,
106 pub is_horizontal_rule: bool,
109 pub in_math_block: bool,
111}
112
113impl LineInfo {
114 pub fn content<'a>(&self, source: &'a str) -> &'a str {
116 &source[self.byte_offset..self.byte_offset + self.byte_len]
117 }
118}
119
120#[derive(Debug, Clone)]
122pub struct ListItemInfo {
123 pub marker: String,
125 pub is_ordered: bool,
127 pub number: Option<usize>,
129 pub marker_column: usize,
131 pub content_column: usize,
133}
134
135#[derive(Debug, Clone, PartialEq)]
137pub enum HeadingStyle {
138 ATX,
140 Setext1,
142 Setext2,
144}
145
146#[derive(Debug, Clone)]
148pub struct ParsedLink<'a> {
149 pub line: usize,
151 pub start_col: usize,
153 pub end_col: usize,
155 pub byte_offset: usize,
157 pub byte_end: usize,
159 pub text: Cow<'a, str>,
161 pub url: Cow<'a, str>,
163 pub is_reference: bool,
165 pub reference_id: Option<Cow<'a, str>>,
167 pub link_type: LinkType,
169}
170
171#[derive(Debug, Clone)]
173pub struct BrokenLinkInfo {
174 pub reference: String,
176 pub span: std::ops::Range<usize>,
178}
179
180#[derive(Debug, Clone)]
182pub struct FootnoteRef {
183 pub id: String,
185 pub line: usize,
187 pub byte_offset: usize,
189 pub byte_end: usize,
191}
192
193#[derive(Debug, Clone)]
195pub struct ParsedImage<'a> {
196 pub line: usize,
198 pub start_col: usize,
200 pub end_col: usize,
202 pub byte_offset: usize,
204 pub byte_end: usize,
206 pub alt_text: Cow<'a, str>,
208 pub url: Cow<'a, str>,
210 pub is_reference: bool,
212 pub reference_id: Option<Cow<'a, str>>,
214 pub link_type: LinkType,
216}
217
218#[derive(Debug, Clone)]
220pub struct ReferenceDef {
221 pub line: usize,
223 pub id: String,
225 pub url: String,
227 pub title: Option<String>,
229 pub byte_offset: usize,
231 pub byte_end: usize,
233 pub title_byte_start: Option<usize>,
235 pub title_byte_end: Option<usize>,
237}
238
239#[derive(Debug, Clone)]
241pub struct CodeSpan {
242 pub line: usize,
244 pub end_line: usize,
246 pub start_col: usize,
248 pub end_col: usize,
250 pub byte_offset: usize,
252 pub byte_end: usize,
254 pub backtick_count: usize,
256 pub content: String,
258}
259
260#[derive(Debug, Clone)]
262pub struct MathSpan {
263 pub line: usize,
265 pub end_line: usize,
267 pub start_col: usize,
269 pub end_col: usize,
271 pub byte_offset: usize,
273 pub byte_end: usize,
275 pub is_display: bool,
277 pub content: String,
279}
280
281#[derive(Debug, Clone)]
283pub struct HeadingInfo {
284 pub level: u8,
286 pub style: HeadingStyle,
288 pub marker: String,
290 pub marker_column: usize,
292 pub content_column: usize,
294 pub text: String,
296 pub custom_id: Option<String>,
298 pub raw_text: String,
300 pub has_closing_sequence: bool,
302 pub closing_sequence: String,
304 pub is_valid: bool,
307}
308
309#[derive(Debug, Clone)]
314pub struct ValidHeading<'a> {
315 pub line_num: usize,
317 pub heading: &'a HeadingInfo,
319 pub line_info: &'a LineInfo,
321}
322
323pub struct ValidHeadingsIter<'a> {
328 lines: &'a [LineInfo],
329 current_index: usize,
330}
331
332impl<'a> ValidHeadingsIter<'a> {
333 fn new(lines: &'a [LineInfo]) -> Self {
334 Self {
335 lines,
336 current_index: 0,
337 }
338 }
339}
340
341impl<'a> Iterator for ValidHeadingsIter<'a> {
342 type Item = ValidHeading<'a>;
343
344 fn next(&mut self) -> Option<Self::Item> {
345 while self.current_index < self.lines.len() {
346 let idx = self.current_index;
347 self.current_index += 1;
348
349 let line_info = &self.lines[idx];
350 if let Some(heading) = &line_info.heading
351 && heading.is_valid
352 {
353 return Some(ValidHeading {
354 line_num: idx + 1, heading,
356 line_info,
357 });
358 }
359 }
360 None
361 }
362}
363
364#[derive(Debug, Clone)]
366pub struct BlockquoteInfo {
367 pub nesting_level: usize,
369 pub indent: String,
371 pub marker_column: usize,
373 pub prefix: String,
375 pub content: String,
377 pub has_no_space_after_marker: bool,
379 pub has_multiple_spaces_after_marker: bool,
381 pub needs_md028_fix: bool,
383}
384
385#[derive(Debug, Clone)]
387pub struct ListBlock {
388 pub start_line: usize,
390 pub end_line: usize,
392 pub is_ordered: bool,
394 pub marker: Option<String>,
396 pub blockquote_prefix: String,
398 pub item_lines: Vec<usize>,
400 pub nesting_level: usize,
402 pub max_marker_width: usize,
404}
405
406use std::sync::{Arc, OnceLock};
407
408type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
410
411#[derive(Debug, Clone, Default)]
413pub struct CharFrequency {
414 pub hash_count: usize,
416 pub asterisk_count: usize,
418 pub underscore_count: usize,
420 pub hyphen_count: usize,
422 pub plus_count: usize,
424 pub gt_count: usize,
426 pub pipe_count: usize,
428 pub bracket_count: usize,
430 pub backtick_count: usize,
432 pub lt_count: usize,
434 pub exclamation_count: usize,
436 pub newline_count: usize,
438}
439
440#[derive(Debug, Clone)]
442pub struct HtmlTag {
443 pub line: usize,
445 pub start_col: usize,
447 pub end_col: usize,
449 pub byte_offset: usize,
451 pub byte_end: usize,
453 pub tag_name: String,
455 pub is_closing: bool,
457 pub is_self_closing: bool,
459 pub raw_content: String,
461}
462
463#[derive(Debug, Clone)]
465pub struct EmphasisSpan {
466 pub line: usize,
468 pub start_col: usize,
470 pub end_col: usize,
472 pub byte_offset: usize,
474 pub byte_end: usize,
476 pub marker: char,
478 pub marker_count: usize,
480 pub content: String,
482}
483
484#[derive(Debug, Clone)]
486pub struct TableRow {
487 pub line: usize,
489 pub is_separator: bool,
491 pub column_count: usize,
493 pub column_alignments: Vec<String>, }
496
497#[derive(Debug, Clone)]
499pub struct BareUrl {
500 pub line: usize,
502 pub start_col: usize,
504 pub end_col: usize,
506 pub byte_offset: usize,
508 pub byte_end: usize,
510 pub url: String,
512 pub url_type: String,
514}
515
516pub struct LintContext<'a> {
517 pub content: &'a str,
518 pub line_offsets: Vec<usize>,
519 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
542
543struct BlockquoteComponents<'a> {
545 indent: &'a str,
546 markers: &'a str,
547 spaces_after: &'a str,
548 content: &'a str,
549}
550
551#[inline]
553fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
554 let bytes = line.as_bytes();
555 let mut pos = 0;
556
557 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
559 pos += 1;
560 }
561 let indent_end = pos;
562
563 if pos >= bytes.len() || bytes[pos] != b'>' {
565 return None;
566 }
567
568 while pos < bytes.len() && bytes[pos] == b'>' {
570 pos += 1;
571 }
572 let markers_end = pos;
573
574 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
576 pos += 1;
577 }
578 let spaces_end = pos;
579
580 Some(BlockquoteComponents {
581 indent: &line[0..indent_end],
582 markers: &line[indent_end..markers_end],
583 spaces_after: &line[markers_end..spaces_end],
584 content: &line[spaces_end..],
585 })
586}
587
588impl<'a> LintContext<'a> {
589 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
590 #[cfg(not(target_arch = "wasm32"))]
591 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
592 #[cfg(target_arch = "wasm32")]
593 let profile = false;
594
595 let line_offsets = profile_section!("Line offsets", profile, {
596 let mut offsets = vec![0];
597 for (i, c) in content.char_indices() {
598 if c == '\n' {
599 offsets.push(i + 1);
600 }
601 }
602 offsets
603 });
604
605 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
607
608 let html_comment_ranges = profile_section!(
610 "HTML comment ranges",
611 profile,
612 crate::utils::skip_context::compute_html_comment_ranges(content)
613 );
614
615 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
617 if flavor == MarkdownFlavor::MkDocs {
618 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
619 } else {
620 Vec::new()
621 }
622 });
623
624 let (mut lines, emphasis_spans) = profile_section!(
627 "Basic line info",
628 profile,
629 Self::compute_basic_line_info(
630 content,
631 &line_offsets,
632 &code_blocks,
633 flavor,
634 &html_comment_ranges,
635 &autodoc_ranges,
636 )
637 );
638
639 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
641
642 profile_section!(
644 "ESM blocks",
645 profile,
646 Self::detect_esm_blocks(content, &mut lines, flavor)
647 );
648
649 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
651
652 profile_section!(
654 "Headings & blockquotes",
655 profile,
656 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
657 );
658
659 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
661
662 for span in &code_spans {
665 if span.end_line > span.line {
666 for line_num in (span.line + 1)..=span.end_line {
668 if let Some(line_info) = lines.get_mut(line_num - 1) {
669 line_info.in_code_span_continuation = true;
670 }
671 }
672 }
673 }
674
675 let (links, broken_links, footnote_refs) = profile_section!(
677 "Links",
678 profile,
679 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
680 );
681
682 let images = profile_section!(
683 "Images",
684 profile,
685 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
686 );
687
688 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
689
690 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
691
692 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
694
695 let table_blocks = profile_section!(
697 "Table blocks",
698 profile,
699 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
700 content,
701 &code_blocks,
702 &code_spans,
703 &html_comment_ranges,
704 )
705 );
706
707 let line_index = profile_section!(
709 "Line index",
710 profile,
711 crate::utils::range_utils::LineIndex::new(content)
712 );
713
714 let jinja_ranges = profile_section!(
716 "Jinja ranges",
717 profile,
718 crate::utils::jinja_utils::find_jinja_ranges(content)
719 );
720
721 Self {
722 content,
723 line_offsets,
724 code_blocks,
725 lines,
726 links,
727 images,
728 broken_links,
729 footnote_refs,
730 reference_defs,
731 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
732 math_spans_cache: OnceLock::new(), list_blocks,
734 char_frequency,
735 html_tags_cache: OnceLock::new(),
736 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
737 table_rows_cache: OnceLock::new(),
738 bare_urls_cache: OnceLock::new(),
739 has_mixed_list_nesting_cache: OnceLock::new(),
740 html_comment_ranges,
741 table_blocks,
742 line_index,
743 jinja_ranges,
744 flavor,
745 source_file,
746 }
747 }
748
749 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
751 Arc::clone(
752 self.code_spans_cache
753 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
754 )
755 }
756
757 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
759 Arc::clone(
760 self.math_spans_cache
761 .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
762 )
763 }
764
765 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
767 let math_spans = self.math_spans();
768 math_spans
769 .iter()
770 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
771 }
772
773 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
775 &self.html_comment_ranges
776 }
777
778 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
780 Arc::clone(self.html_tags_cache.get_or_init(|| {
781 Arc::new(Self::parse_html_tags(
782 self.content,
783 &self.lines,
784 &self.code_blocks,
785 self.flavor,
786 ))
787 }))
788 }
789
790 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
792 Arc::clone(
793 self.emphasis_spans_cache
794 .get()
795 .expect("emphasis_spans_cache initialized during construction"),
796 )
797 }
798
799 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
801 Arc::clone(
802 self.table_rows_cache
803 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
804 )
805 }
806
807 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
809 Arc::clone(
810 self.bare_urls_cache
811 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
812 )
813 }
814
815 pub fn has_mixed_list_nesting(&self) -> bool {
819 *self
820 .has_mixed_list_nesting_cache
821 .get_or_init(|| self.compute_mixed_list_nesting())
822 }
823
824 fn compute_mixed_list_nesting(&self) -> bool {
826 let mut stack: Vec<(usize, bool)> = Vec::new();
831 let mut last_was_blank = false;
832
833 for line_info in &self.lines {
834 if line_info.in_code_block
836 || line_info.in_front_matter
837 || line_info.in_mkdocstrings
838 || line_info.in_html_comment
839 || line_info.in_esm_block
840 {
841 continue;
842 }
843
844 if line_info.is_blank {
846 last_was_blank = true;
847 continue;
848 }
849
850 if let Some(list_item) = &line_info.list_item {
851 let current_pos = if list_item.marker_column == 1 {
853 0
854 } else {
855 list_item.marker_column
856 };
857
858 if last_was_blank && current_pos == 0 {
860 stack.clear();
861 }
862 last_was_blank = false;
863
864 while let Some(&(pos, _)) = stack.last() {
866 if pos >= current_pos {
867 stack.pop();
868 } else {
869 break;
870 }
871 }
872
873 if let Some(&(_, parent_is_ordered)) = stack.last()
875 && parent_is_ordered != list_item.is_ordered
876 {
877 return true; }
879
880 stack.push((current_pos, list_item.is_ordered));
881 } else {
882 last_was_blank = false;
884 }
885 }
886
887 false
888 }
889
890 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
892 match self.line_offsets.binary_search(&offset) {
893 Ok(line) => (line + 1, 1),
894 Err(line) => {
895 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
896 (line, offset - line_start + 1)
897 }
898 }
899 }
900
901 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
903 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
905 return true;
906 }
907
908 self.code_spans()
910 .iter()
911 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
912 }
913
914 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
916 if line_num > 0 {
917 self.lines.get(line_num - 1)
918 } else {
919 None
920 }
921 }
922
923 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
925 self.line_info(line_num).map(|info| info.byte_offset)
926 }
927
928 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
930 let normalized_id = ref_id.to_lowercase();
931 self.reference_defs
932 .iter()
933 .find(|def| def.id == normalized_id)
934 .map(|def| def.url.as_str())
935 }
936
937 pub fn is_in_list_block(&self, line_num: usize) -> bool {
939 self.list_blocks
940 .iter()
941 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
942 }
943
944 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
946 self.list_blocks
947 .iter()
948 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
949 }
950
951 pub fn is_in_code_block(&self, line_num: usize) -> bool {
955 if line_num == 0 || line_num > self.lines.len() {
956 return false;
957 }
958 self.lines[line_num - 1].in_code_block
959 }
960
961 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
963 if line_num == 0 || line_num > self.lines.len() {
964 return false;
965 }
966 self.lines[line_num - 1].in_front_matter
967 }
968
969 pub fn is_in_html_block(&self, line_num: usize) -> bool {
971 if line_num == 0 || line_num > self.lines.len() {
972 return false;
973 }
974 self.lines[line_num - 1].in_html_block
975 }
976
977 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
979 if line_num == 0 || line_num > self.lines.len() {
980 return false;
981 }
982
983 let col_0indexed = if col > 0 { col - 1 } else { 0 };
987 let code_spans = self.code_spans();
988 code_spans.iter().any(|span| {
989 if line_num < span.line || line_num > span.end_line {
991 return false;
992 }
993
994 if span.line == span.end_line {
995 col_0indexed >= span.start_col && col_0indexed < span.end_col
997 } else if line_num == span.line {
998 col_0indexed >= span.start_col
1000 } else if line_num == span.end_line {
1001 col_0indexed < span.end_col
1003 } else {
1004 true
1006 }
1007 })
1008 }
1009
1010 #[inline]
1012 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1013 let code_spans = self.code_spans();
1014 code_spans
1015 .iter()
1016 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1017 }
1018
1019 #[inline]
1022 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1023 self.reference_defs
1024 .iter()
1025 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1026 }
1027
1028 #[inline]
1032 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1033 self.html_comment_ranges
1034 .iter()
1035 .any(|range| byte_pos >= range.start && byte_pos < range.end)
1036 }
1037
1038 #[inline]
1041 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1042 self.html_tags()
1043 .iter()
1044 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1045 }
1046
1047 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1049 self.jinja_ranges
1050 .iter()
1051 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1052 }
1053
1054 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1056 self.reference_defs.iter().any(|def| {
1057 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1058 byte_pos >= start && byte_pos < end
1059 } else {
1060 false
1061 }
1062 })
1063 }
1064
1065 pub fn has_char(&self, ch: char) -> bool {
1067 match ch {
1068 '#' => self.char_frequency.hash_count > 0,
1069 '*' => self.char_frequency.asterisk_count > 0,
1070 '_' => self.char_frequency.underscore_count > 0,
1071 '-' => self.char_frequency.hyphen_count > 0,
1072 '+' => self.char_frequency.plus_count > 0,
1073 '>' => self.char_frequency.gt_count > 0,
1074 '|' => self.char_frequency.pipe_count > 0,
1075 '[' => self.char_frequency.bracket_count > 0,
1076 '`' => self.char_frequency.backtick_count > 0,
1077 '<' => self.char_frequency.lt_count > 0,
1078 '!' => self.char_frequency.exclamation_count > 0,
1079 '\n' => self.char_frequency.newline_count > 0,
1080 _ => self.content.contains(ch), }
1082 }
1083
1084 pub fn char_count(&self, ch: char) -> usize {
1086 match ch {
1087 '#' => self.char_frequency.hash_count,
1088 '*' => self.char_frequency.asterisk_count,
1089 '_' => self.char_frequency.underscore_count,
1090 '-' => self.char_frequency.hyphen_count,
1091 '+' => self.char_frequency.plus_count,
1092 '>' => self.char_frequency.gt_count,
1093 '|' => self.char_frequency.pipe_count,
1094 '[' => self.char_frequency.bracket_count,
1095 '`' => self.char_frequency.backtick_count,
1096 '<' => self.char_frequency.lt_count,
1097 '!' => self.char_frequency.exclamation_count,
1098 '\n' => self.char_frequency.newline_count,
1099 _ => self.content.matches(ch).count(), }
1101 }
1102
1103 pub fn likely_has_headings(&self) -> bool {
1105 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1107
1108 pub fn likely_has_lists(&self) -> bool {
1110 self.char_frequency.asterisk_count > 0
1111 || self.char_frequency.hyphen_count > 0
1112 || self.char_frequency.plus_count > 0
1113 }
1114
1115 pub fn likely_has_emphasis(&self) -> bool {
1117 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1118 }
1119
1120 pub fn likely_has_tables(&self) -> bool {
1122 self.char_frequency.pipe_count > 2
1123 }
1124
1125 pub fn likely_has_blockquotes(&self) -> bool {
1127 self.char_frequency.gt_count > 0
1128 }
1129
1130 pub fn likely_has_code(&self) -> bool {
1132 self.char_frequency.backtick_count > 0
1133 }
1134
1135 pub fn likely_has_links_or_images(&self) -> bool {
1137 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1138 }
1139
1140 pub fn likely_has_html(&self) -> bool {
1142 self.char_frequency.lt_count > 0
1143 }
1144
1145 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1150 if let Some(line_info) = self.lines.get(line_idx)
1151 && let Some(ref bq) = line_info.blockquote
1152 {
1153 bq.prefix.trim_end().to_string()
1154 } else {
1155 String::new()
1156 }
1157 }
1158
1159 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1161 self.html_tags()
1162 .iter()
1163 .filter(|tag| tag.line == line_num)
1164 .cloned()
1165 .collect()
1166 }
1167
1168 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1170 self.emphasis_spans()
1171 .iter()
1172 .filter(|span| span.line == line_num)
1173 .cloned()
1174 .collect()
1175 }
1176
1177 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1179 self.table_rows()
1180 .iter()
1181 .filter(|row| row.line == line_num)
1182 .cloned()
1183 .collect()
1184 }
1185
1186 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1188 self.bare_urls()
1189 .iter()
1190 .filter(|url| url.line == line_num)
1191 .cloned()
1192 .collect()
1193 }
1194
1195 #[inline]
1201 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1202 let idx = match lines.binary_search_by(|line| {
1204 if byte_offset < line.byte_offset {
1205 std::cmp::Ordering::Greater
1206 } else if byte_offset > line.byte_offset + line.byte_len {
1207 std::cmp::Ordering::Less
1208 } else {
1209 std::cmp::Ordering::Equal
1210 }
1211 }) {
1212 Ok(idx) => idx,
1213 Err(idx) => idx.saturating_sub(1),
1214 };
1215
1216 let line = &lines[idx];
1217 let line_num = idx + 1;
1218 let col = byte_offset.saturating_sub(line.byte_offset);
1219
1220 (idx, line_num, col)
1221 }
1222
1223 #[inline]
1225 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1226 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1228
1229 if idx > 0 {
1231 let span = &code_spans[idx - 1];
1232 if offset >= span.byte_offset && offset < span.byte_end {
1233 return true;
1234 }
1235 }
1236
1237 false
1238 }
1239
1240 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1244 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1245
1246 let mut link_ranges = Vec::new();
1247 let mut options = Options::empty();
1248 options.insert(Options::ENABLE_WIKILINKS);
1249 options.insert(Options::ENABLE_FOOTNOTES);
1250
1251 let parser = Parser::new_ext(content, options).into_offset_iter();
1252 let mut link_stack: Vec<usize> = Vec::new();
1253
1254 for (event, range) in parser {
1255 match event {
1256 Event::Start(Tag::Link { .. }) => {
1257 link_stack.push(range.start);
1258 }
1259 Event::End(TagEnd::Link) => {
1260 if let Some(start_pos) = link_stack.pop() {
1261 link_ranges.push((start_pos, range.end));
1262 }
1263 }
1264 _ => {}
1265 }
1266 }
1267
1268 link_ranges
1269 }
1270
1271 fn parse_links(
1273 content: &'a str,
1274 lines: &[LineInfo],
1275 code_blocks: &[(usize, usize)],
1276 code_spans: &[CodeSpan],
1277 flavor: MarkdownFlavor,
1278 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1279 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1280 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1281 use std::collections::HashSet;
1282
1283 let mut links = Vec::with_capacity(content.len() / 500);
1284 let mut broken_links = Vec::new();
1285 let mut footnote_refs = Vec::new();
1286
1287 let mut found_positions = HashSet::new();
1289
1290 let mut options = Options::empty();
1300 options.insert(Options::ENABLE_WIKILINKS);
1301 options.insert(Options::ENABLE_FOOTNOTES);
1302
1303 let parser = Parser::new_with_broken_link_callback(
1304 content,
1305 options,
1306 Some(|link: BrokenLink<'_>| {
1307 broken_links.push(BrokenLinkInfo {
1308 reference: link.reference.to_string(),
1309 span: link.span.clone(),
1310 });
1311 None
1312 }),
1313 )
1314 .into_offset_iter();
1315
1316 let mut link_stack: Vec<(
1317 usize,
1318 usize,
1319 pulldown_cmark::CowStr<'a>,
1320 LinkType,
1321 pulldown_cmark::CowStr<'a>,
1322 )> = Vec::new();
1323 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1326 match event {
1327 Event::Start(Tag::Link {
1328 link_type,
1329 dest_url,
1330 id,
1331 ..
1332 }) => {
1333 link_stack.push((range.start, range.end, dest_url, link_type, id));
1335 text_chunks.clear();
1336 }
1337 Event::Text(text) if !link_stack.is_empty() => {
1338 text_chunks.push((text.to_string(), range.start, range.end));
1340 }
1341 Event::Code(code) if !link_stack.is_empty() => {
1342 let code_text = format!("`{code}`");
1344 text_chunks.push((code_text, range.start, range.end));
1345 }
1346 Event::End(TagEnd::Link) => {
1347 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1348 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1350 text_chunks.clear();
1351 continue;
1352 }
1353
1354 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1356
1357 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1359 text_chunks.clear();
1360 continue;
1361 }
1362
1363 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1364
1365 let is_reference = matches!(
1366 link_type,
1367 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1368 );
1369
1370 let link_text = if start_pos < content.len() {
1373 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1374
1375 let mut close_pos = None;
1379 let mut depth = 0;
1380 let mut in_code_span = false;
1381
1382 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1383 let mut backslash_count = 0;
1385 let mut j = i;
1386 while j > 0 && link_bytes[j - 1] == b'\\' {
1387 backslash_count += 1;
1388 j -= 1;
1389 }
1390 let is_escaped = backslash_count % 2 != 0;
1391
1392 if byte == b'`' && !is_escaped {
1394 in_code_span = !in_code_span;
1395 }
1396
1397 if !is_escaped && !in_code_span {
1399 if byte == b'[' {
1400 depth += 1;
1401 } else if byte == b']' {
1402 if depth == 0 {
1403 close_pos = Some(i);
1405 break;
1406 } else {
1407 depth -= 1;
1408 }
1409 }
1410 }
1411 }
1412
1413 if let Some(pos) = close_pos {
1414 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1415 } else {
1416 Cow::Borrowed("")
1417 }
1418 } else {
1419 Cow::Borrowed("")
1420 };
1421
1422 let reference_id = if is_reference && !ref_id.is_empty() {
1424 Some(Cow::Owned(ref_id.to_lowercase()))
1425 } else if is_reference {
1426 Some(Cow::Owned(link_text.to_lowercase()))
1428 } else {
1429 None
1430 };
1431
1432 found_positions.insert(start_pos);
1434
1435 links.push(ParsedLink {
1436 line: line_num,
1437 start_col: col_start,
1438 end_col: col_end,
1439 byte_offset: start_pos,
1440 byte_end: range.end,
1441 text: link_text,
1442 url: Cow::Owned(url.to_string()),
1443 is_reference,
1444 reference_id,
1445 link_type,
1446 });
1447
1448 text_chunks.clear();
1449 }
1450 }
1451 Event::FootnoteReference(footnote_id) => {
1452 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1455 continue;
1456 }
1457
1458 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1459 footnote_refs.push(FootnoteRef {
1460 id: footnote_id.to_string(),
1461 line: line_num,
1462 byte_offset: range.start,
1463 byte_end: range.end,
1464 });
1465 }
1466 _ => {}
1467 }
1468 }
1469
1470 for cap in LINK_PATTERN.captures_iter(content) {
1474 let full_match = cap.get(0).unwrap();
1475 let match_start = full_match.start();
1476 let match_end = full_match.end();
1477
1478 if found_positions.contains(&match_start) {
1480 continue;
1481 }
1482
1483 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1485 continue;
1486 }
1487
1488 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1490 continue;
1491 }
1492
1493 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1495 continue;
1496 }
1497
1498 if Self::is_offset_in_code_span(code_spans, match_start) {
1500 continue;
1501 }
1502
1503 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1505 continue;
1506 }
1507
1508 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1510
1511 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1513 continue;
1514 }
1515
1516 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1517
1518 let text = cap.get(1).map_or("", |m| m.as_str());
1519
1520 if let Some(ref_id) = cap.get(6) {
1522 let ref_id_str = ref_id.as_str();
1523 let normalized_ref = if ref_id_str.is_empty() {
1524 Cow::Owned(text.to_lowercase()) } else {
1526 Cow::Owned(ref_id_str.to_lowercase())
1527 };
1528
1529 links.push(ParsedLink {
1531 line: line_num,
1532 start_col: col_start,
1533 end_col: col_end,
1534 byte_offset: match_start,
1535 byte_end: match_end,
1536 text: Cow::Borrowed(text),
1537 url: Cow::Borrowed(""), is_reference: true,
1539 reference_id: Some(normalized_ref),
1540 link_type: LinkType::Reference, });
1542 }
1543 }
1544
1545 (links, broken_links, footnote_refs)
1546 }
1547
1548 fn parse_images(
1550 content: &'a str,
1551 lines: &[LineInfo],
1552 code_blocks: &[(usize, usize)],
1553 code_spans: &[CodeSpan],
1554 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1555 ) -> Vec<ParsedImage<'a>> {
1556 use crate::utils::skip_context::is_in_html_comment_ranges;
1557 use std::collections::HashSet;
1558
1559 let mut images = Vec::with_capacity(content.len() / 1000);
1561 let mut found_positions = HashSet::new();
1562
1563 let parser = Parser::new(content).into_offset_iter();
1565 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1566 Vec::new();
1567 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1570 match event {
1571 Event::Start(Tag::Image {
1572 link_type,
1573 dest_url,
1574 id,
1575 ..
1576 }) => {
1577 image_stack.push((range.start, dest_url, link_type, id));
1578 text_chunks.clear();
1579 }
1580 Event::Text(text) if !image_stack.is_empty() => {
1581 text_chunks.push((text.to_string(), range.start, range.end));
1582 }
1583 Event::Code(code) if !image_stack.is_empty() => {
1584 let code_text = format!("`{code}`");
1585 text_chunks.push((code_text, range.start, range.end));
1586 }
1587 Event::End(TagEnd::Image) => {
1588 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1589 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1591 continue;
1592 }
1593
1594 if Self::is_offset_in_code_span(code_spans, start_pos) {
1596 continue;
1597 }
1598
1599 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1601 continue;
1602 }
1603
1604 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1606 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1607
1608 let is_reference = matches!(
1609 link_type,
1610 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1611 );
1612
1613 let alt_text = if start_pos < content.len() {
1616 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1617
1618 let mut close_pos = None;
1621 let mut depth = 0;
1622
1623 if image_bytes.len() > 2 {
1624 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1625 let mut backslash_count = 0;
1627 let mut j = i;
1628 while j > 0 && image_bytes[j - 1] == b'\\' {
1629 backslash_count += 1;
1630 j -= 1;
1631 }
1632 let is_escaped = backslash_count % 2 != 0;
1633
1634 if !is_escaped {
1635 if byte == b'[' {
1636 depth += 1;
1637 } else if byte == b']' {
1638 if depth == 0 {
1639 close_pos = Some(i);
1641 break;
1642 } else {
1643 depth -= 1;
1644 }
1645 }
1646 }
1647 }
1648 }
1649
1650 if let Some(pos) = close_pos {
1651 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1652 } else {
1653 Cow::Borrowed("")
1654 }
1655 } else {
1656 Cow::Borrowed("")
1657 };
1658
1659 let reference_id = if is_reference && !ref_id.is_empty() {
1660 Some(Cow::Owned(ref_id.to_lowercase()))
1661 } else if is_reference {
1662 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1664 None
1665 };
1666
1667 found_positions.insert(start_pos);
1668 images.push(ParsedImage {
1669 line: line_num,
1670 start_col: col_start,
1671 end_col: col_end,
1672 byte_offset: start_pos,
1673 byte_end: range.end,
1674 alt_text,
1675 url: Cow::Owned(url.to_string()),
1676 is_reference,
1677 reference_id,
1678 link_type,
1679 });
1680 }
1681 }
1682 _ => {}
1683 }
1684 }
1685
1686 for cap in IMAGE_PATTERN.captures_iter(content) {
1688 let full_match = cap.get(0).unwrap();
1689 let match_start = full_match.start();
1690 let match_end = full_match.end();
1691
1692 if found_positions.contains(&match_start) {
1694 continue;
1695 }
1696
1697 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1699 continue;
1700 }
1701
1702 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1704 || Self::is_offset_in_code_span(code_spans, match_start)
1705 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1706 {
1707 continue;
1708 }
1709
1710 if let Some(ref_id) = cap.get(6) {
1712 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1713 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1714 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1715 let ref_id_str = ref_id.as_str();
1716 let normalized_ref = if ref_id_str.is_empty() {
1717 Cow::Owned(alt_text.to_lowercase())
1718 } else {
1719 Cow::Owned(ref_id_str.to_lowercase())
1720 };
1721
1722 images.push(ParsedImage {
1723 line: line_num,
1724 start_col: col_start,
1725 end_col: col_end,
1726 byte_offset: match_start,
1727 byte_end: match_end,
1728 alt_text: Cow::Borrowed(alt_text),
1729 url: Cow::Borrowed(""),
1730 is_reference: true,
1731 reference_id: Some(normalized_ref),
1732 link_type: LinkType::Reference, });
1734 }
1735 }
1736
1737 images
1738 }
1739
1740 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1742 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1746 if line_info.in_code_block {
1748 continue;
1749 }
1750
1751 let line = line_info.content(content);
1752 let line_num = line_idx + 1;
1753
1754 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1755 let id_raw = cap.get(1).unwrap().as_str();
1756
1757 if id_raw.starts_with('^') {
1760 continue;
1761 }
1762
1763 let id = id_raw.to_lowercase();
1764 let url = cap.get(2).unwrap().as_str().to_string();
1765 let title_match = cap.get(3).or_else(|| cap.get(4));
1766 let title = title_match.map(|m| m.as_str().to_string());
1767
1768 let match_obj = cap.get(0).unwrap();
1771 let byte_offset = line_info.byte_offset + match_obj.start();
1772 let byte_end = line_info.byte_offset + match_obj.end();
1773
1774 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1776 let start = line_info.byte_offset + m.start().saturating_sub(1);
1778 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1780 } else {
1781 (None, None)
1782 };
1783
1784 refs.push(ReferenceDef {
1785 line: line_num,
1786 id,
1787 url,
1788 title,
1789 byte_offset,
1790 byte_end,
1791 title_byte_start,
1792 title_byte_end,
1793 });
1794 }
1795 }
1796
1797 refs
1798 }
1799
1800 #[inline]
1804 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1805 let trimmed_start = line.trim_start();
1806 if !trimmed_start.starts_with('>') {
1807 return None;
1808 }
1809
1810 let mut remaining = line;
1812 let mut total_prefix_len = 0;
1813
1814 loop {
1815 let trimmed = remaining.trim_start();
1816 if !trimmed.starts_with('>') {
1817 break;
1818 }
1819
1820 let leading_ws_len = remaining.len() - trimmed.len();
1822 total_prefix_len += leading_ws_len + 1;
1823
1824 let after_gt = &trimmed[1..];
1825
1826 if let Some(stripped) = after_gt.strip_prefix(' ') {
1828 total_prefix_len += 1;
1829 remaining = stripped;
1830 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1831 total_prefix_len += 1;
1832 remaining = stripped;
1833 } else {
1834 remaining = after_gt;
1835 }
1836 }
1837
1838 Some((&line[..total_prefix_len], remaining))
1839 }
1840
1841 fn detect_list_items_and_emphasis_with_pulldown(
1865 content: &str,
1866 line_offsets: &[usize],
1867 flavor: MarkdownFlavor,
1868 front_matter_end: usize,
1869 code_blocks: &[(usize, usize)],
1870 ) -> (ListItemMap, Vec<EmphasisSpan>) {
1871 use std::collections::HashMap;
1872
1873 let mut list_items = HashMap::new();
1874 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
1875
1876 let mut options = Options::empty();
1877 options.insert(Options::ENABLE_TABLES);
1878 options.insert(Options::ENABLE_FOOTNOTES);
1879 options.insert(Options::ENABLE_STRIKETHROUGH);
1880 options.insert(Options::ENABLE_TASKLISTS);
1881 options.insert(Options::ENABLE_GFM);
1883
1884 let _ = flavor;
1886
1887 let parser = Parser::new_ext(content, options).into_offset_iter();
1888 let mut list_depth: usize = 0;
1889 let mut list_stack: Vec<bool> = Vec::new();
1890
1891 for (event, range) in parser {
1892 match event {
1893 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
1895 let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
1896 2
1897 } else {
1898 1
1899 };
1900 let match_start = range.start;
1901 let match_end = range.end;
1902
1903 if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
1905 let marker = content[match_start..].chars().next().unwrap_or('*');
1907 if marker == '*' || marker == '_' {
1908 let content_start = match_start + marker_count;
1910 let content_end = if match_end >= marker_count {
1911 match_end - marker_count
1912 } else {
1913 match_end
1914 };
1915 let content_part = if content_start < content_end && content_end <= content.len() {
1916 &content[content_start..content_end]
1917 } else {
1918 ""
1919 };
1920
1921 let line_idx = match line_offsets.binary_search(&match_start) {
1923 Ok(idx) => idx,
1924 Err(idx) => idx.saturating_sub(1),
1925 };
1926 let line_num = line_idx + 1;
1927 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
1928 let col_start = match_start - line_start;
1929 let col_end = match_end - line_start;
1930
1931 emphasis_spans.push(EmphasisSpan {
1932 line: line_num,
1933 start_col: col_start,
1934 end_col: col_end,
1935 byte_offset: match_start,
1936 byte_end: match_end,
1937 marker,
1938 marker_count,
1939 content: content_part.to_string(),
1940 });
1941 }
1942 }
1943 }
1944 Event::Start(Tag::List(start_number)) => {
1945 list_depth += 1;
1946 list_stack.push(start_number.is_some());
1947 }
1948 Event::End(TagEnd::List(_)) => {
1949 list_depth = list_depth.saturating_sub(1);
1950 list_stack.pop();
1951 }
1952 Event::Start(Tag::Item) if list_depth > 0 => {
1953 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1955 let item_start = range.start;
1957
1958 let mut line_idx = match line_offsets.binary_search(&item_start) {
1960 Ok(idx) => idx,
1961 Err(idx) => idx.saturating_sub(1),
1962 };
1963
1964 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1968 line_idx += 1;
1969 }
1970
1971 if front_matter_end > 0 && line_idx < front_matter_end {
1973 continue;
1974 }
1975
1976 if line_idx < line_offsets.len() {
1977 let line_start_byte = line_offsets[line_idx];
1978 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
1979 let line = &content[line_start_byte..line_end.min(content.len())];
1980
1981 let line = line
1983 .strip_suffix('\n')
1984 .or_else(|| line.strip_suffix("\r\n"))
1985 .unwrap_or(line);
1986
1987 let blockquote_parse = Self::parse_blockquote_prefix(line);
1989 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
1990 (prefix.len(), content)
1991 } else {
1992 (0, line)
1993 };
1994
1995 if current_list_is_ordered {
1997 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1998 Self::parse_ordered_list(line_to_parse)
1999 {
2000 let marker = format!("{number_str}{delimiter}");
2001 let marker_column = blockquote_prefix_len + leading_spaces.len();
2002 let content_column = marker_column + marker.len() + spacing.len();
2003 let number = number_str.parse().ok();
2004
2005 list_items.entry(line_start_byte).or_insert((
2006 true,
2007 marker,
2008 marker_column,
2009 content_column,
2010 number,
2011 ));
2012 }
2013 } else if let Some((leading_spaces, marker, spacing, _content)) =
2014 Self::parse_unordered_list(line_to_parse)
2015 {
2016 let marker_column = blockquote_prefix_len + leading_spaces.len();
2017 let content_column = marker_column + 1 + spacing.len();
2018
2019 list_items.entry(line_start_byte).or_insert((
2020 false,
2021 marker.to_string(),
2022 marker_column,
2023 content_column,
2024 None,
2025 ));
2026 }
2027 }
2028 }
2029 _ => {}
2030 }
2031 }
2032
2033 (list_items, emphasis_spans)
2034 }
2035
2036 #[inline]
2040 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2041 let bytes = line.as_bytes();
2042 let mut i = 0;
2043
2044 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2046 i += 1;
2047 }
2048
2049 if i >= bytes.len() {
2051 return None;
2052 }
2053 let marker = bytes[i] as char;
2054 if marker != '-' && marker != '*' && marker != '+' {
2055 return None;
2056 }
2057 let marker_pos = i;
2058 i += 1;
2059
2060 let spacing_start = i;
2062 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2063 i += 1;
2064 }
2065
2066 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2067 }
2068
2069 #[inline]
2073 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2074 let bytes = line.as_bytes();
2075 let mut i = 0;
2076
2077 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2079 i += 1;
2080 }
2081
2082 let number_start = i;
2084 while i < bytes.len() && bytes[i].is_ascii_digit() {
2085 i += 1;
2086 }
2087 if i == number_start {
2088 return None; }
2090
2091 if i >= bytes.len() {
2093 return None;
2094 }
2095 let delimiter = bytes[i] as char;
2096 if delimiter != '.' && delimiter != ')' {
2097 return None;
2098 }
2099 let delimiter_pos = i;
2100 i += 1;
2101
2102 let spacing_start = i;
2104 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2105 i += 1;
2106 }
2107
2108 Some((
2109 &line[..number_start],
2110 &line[number_start..delimiter_pos],
2111 delimiter,
2112 &line[spacing_start..i],
2113 &line[i..],
2114 ))
2115 }
2116
2117 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2120 let num_lines = line_offsets.len();
2121 let mut in_code_block = vec![false; num_lines];
2122
2123 for &(start, end) in code_blocks {
2125 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2127 let mut boundary = start;
2128 while boundary > 0 && !content.is_char_boundary(boundary) {
2129 boundary -= 1;
2130 }
2131 boundary
2132 } else {
2133 start
2134 };
2135
2136 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2137 let mut boundary = end;
2138 while boundary < content.len() && !content.is_char_boundary(boundary) {
2139 boundary += 1;
2140 }
2141 boundary
2142 } else {
2143 end.min(content.len())
2144 };
2145
2146 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2165 let first_line = first_line_after.saturating_sub(1);
2166 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2167
2168 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2170 *flag = true;
2171 }
2172 }
2173
2174 in_code_block
2175 }
2176
2177 fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2180 let content_lines: Vec<&str> = content.lines().collect();
2181 let num_lines = content_lines.len();
2182 let mut in_math_block = vec![false; num_lines];
2183
2184 let mut inside_math = false;
2185
2186 for (i, line) in content_lines.iter().enumerate() {
2187 if code_block_map.get(i).copied().unwrap_or(false) {
2189 continue;
2190 }
2191
2192 let trimmed = line.trim();
2193
2194 if trimmed == "$$" {
2197 if inside_math {
2198 in_math_block[i] = true;
2200 inside_math = false;
2201 } else {
2202 in_math_block[i] = true;
2204 inside_math = true;
2205 }
2206 } else if inside_math {
2207 in_math_block[i] = true;
2209 }
2210 }
2211
2212 in_math_block
2213 }
2214
2215 fn compute_basic_line_info(
2218 content: &str,
2219 line_offsets: &[usize],
2220 code_blocks: &[(usize, usize)],
2221 flavor: MarkdownFlavor,
2222 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2223 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2224 ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2225 let content_lines: Vec<&str> = content.lines().collect();
2226 let mut lines = Vec::with_capacity(content_lines.len());
2227
2228 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2230
2231 let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2233
2234 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2237
2238 let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2241 content,
2242 line_offsets,
2243 flavor,
2244 front_matter_end,
2245 code_blocks,
2246 );
2247
2248 for (i, line) in content_lines.iter().enumerate() {
2249 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2250 let indent = line.len() - line.trim_start().len();
2251 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2253
2254 let blockquote_parse = Self::parse_blockquote_prefix(line);
2256
2257 let is_blank = if let Some((_, content)) = blockquote_parse {
2259 content.trim().is_empty()
2261 } else {
2262 line.trim().is_empty()
2263 };
2264
2265 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2267
2268 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2270 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2271 let line_end_offset = byte_offset + line.len();
2274 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2275 html_comment_ranges,
2276 byte_offset,
2277 line_end_offset,
2278 );
2279 let list_item =
2282 list_item_map
2283 .get(&byte_offset)
2284 .map(
2285 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2286 marker: marker.clone(),
2287 is_ordered: *is_ordered,
2288 number: *number,
2289 marker_column: *marker_column,
2290 content_column: *content_column,
2291 },
2292 );
2293
2294 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2297 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2298
2299 let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2301
2302 lines.push(LineInfo {
2303 byte_offset,
2304 byte_len: line.len(),
2305 indent,
2306 visual_indent,
2307 is_blank,
2308 in_code_block,
2309 in_front_matter,
2310 in_html_block: false, in_html_comment,
2312 list_item,
2313 heading: None, blockquote: None, in_mkdocstrings,
2316 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2319 in_math_block,
2320 });
2321 }
2322
2323 (lines, emphasis_spans)
2324 }
2325
2326 fn detect_headings_and_blockquotes(
2328 content: &str,
2329 lines: &mut [LineInfo],
2330 flavor: MarkdownFlavor,
2331 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2332 link_byte_ranges: &[(usize, usize)],
2333 ) {
2334 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2336 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2337 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2338 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2339
2340 let content_lines: Vec<&str> = content.lines().collect();
2341
2342 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2344
2345 for i in 0..lines.len() {
2347 let line = content_lines[i];
2348
2349 if !(front_matter_end > 0 && i < front_matter_end)
2354 && let Some(bq) = parse_blockquote_detailed(line)
2355 {
2356 let nesting_level = bq.markers.len();
2357 let marker_column = bq.indent.len();
2358 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2359 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2360 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2361 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2362
2363 lines[i].blockquote = Some(BlockquoteInfo {
2364 nesting_level,
2365 indent: bq.indent.to_string(),
2366 marker_column,
2367 prefix,
2368 content: bq.content.to_string(),
2369 has_no_space_after_marker: has_no_space,
2370 has_multiple_spaces_after_marker: has_multiple_spaces,
2371 needs_md028_fix,
2372 });
2373
2374 if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2377 lines[i].is_horizontal_rule = true;
2378 }
2379 }
2380
2381 if lines[i].in_code_block {
2383 continue;
2384 }
2385
2386 if front_matter_end > 0 && i < front_matter_end {
2388 continue;
2389 }
2390
2391 if lines[i].in_html_block {
2393 continue;
2394 }
2395
2396 if lines[i].is_blank {
2398 continue;
2399 }
2400
2401 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2404 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2405 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2406 } else {
2407 false
2408 };
2409
2410 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2411 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2413 continue;
2414 }
2415 let line_offset = lines[i].byte_offset;
2418 if link_byte_ranges
2419 .iter()
2420 .any(|&(start, end)| line_offset > start && line_offset < end)
2421 {
2422 continue;
2423 }
2424 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2425 let hashes = caps.get(2).map_or("", |m| m.as_str());
2426 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2427 let rest = caps.get(4).map_or("", |m| m.as_str());
2428
2429 let level = hashes.len() as u8;
2430 let marker_column = leading_spaces.len();
2431
2432 let (text, has_closing, closing_seq) = {
2434 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2436 if rest[id_start..].trim_end().ends_with('}') {
2438 (&rest[..id_start], &rest[id_start..])
2440 } else {
2441 (rest, "")
2442 }
2443 } else {
2444 (rest, "")
2445 };
2446
2447 let trimmed_rest = rest_without_id.trim_end();
2449 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2450 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2453
2454 let last_hash_char_idx = char_positions
2456 .iter()
2457 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2458
2459 if let Some(mut char_idx) = last_hash_char_idx {
2460 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2462 char_idx -= 1;
2463 }
2464
2465 let start_of_hashes = char_positions[char_idx].0;
2467
2468 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2470
2471 let potential_closing = &trimmed_rest[start_of_hashes..];
2473 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2474
2475 if is_all_hashes && has_space_before {
2476 let closing_hashes = potential_closing.to_string();
2478 let text_part = if !custom_id_part.is_empty() {
2481 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2484 } else {
2485 trimmed_rest[..start_of_hashes].trim_end().to_string()
2486 };
2487 (text_part, true, closing_hashes)
2488 } else {
2489 (rest.to_string(), false, String::new())
2491 }
2492 } else {
2493 (rest.to_string(), false, String::new())
2495 }
2496 } else {
2497 (rest.to_string(), false, String::new())
2499 }
2500 };
2501
2502 let content_column = marker_column + hashes.len() + spaces_after.len();
2503
2504 let raw_text = text.trim().to_string();
2506 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2507
2508 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2510 let next_line = content_lines[i + 1];
2511 if !lines[i + 1].in_code_block
2512 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2513 && let Some(next_line_id) =
2514 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2515 {
2516 custom_id = Some(next_line_id);
2517 }
2518 }
2519
2520 let is_valid = !spaces_after.is_empty()
2530 || rest.is_empty()
2531 || level > 1
2532 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2533
2534 lines[i].heading = Some(HeadingInfo {
2535 level,
2536 style: HeadingStyle::ATX,
2537 marker: hashes.to_string(),
2538 marker_column,
2539 content_column,
2540 text: clean_text,
2541 custom_id,
2542 raw_text,
2543 has_closing_sequence: has_closing,
2544 closing_sequence: closing_seq,
2545 is_valid,
2546 });
2547 }
2548 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2550 let next_line = content_lines[i + 1];
2551 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2552 if front_matter_end > 0 && i < front_matter_end {
2554 continue;
2555 }
2556
2557 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2559 {
2560 continue;
2561 }
2562
2563 let content_line = line.trim();
2566
2567 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2569 continue;
2570 }
2571
2572 if content_line.starts_with('_') {
2574 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2575 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2576 continue;
2577 }
2578 }
2579
2580 if let Some(first_char) = content_line.chars().next()
2582 && first_char.is_ascii_digit()
2583 {
2584 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2585 if num_end < content_line.len() {
2586 let next = content_line.chars().nth(num_end);
2587 if next == Some('.') || next == Some(')') {
2588 continue;
2589 }
2590 }
2591 }
2592
2593 if ATX_HEADING_REGEX.is_match(line) {
2595 continue;
2596 }
2597
2598 if content_line.starts_with('>') {
2600 continue;
2601 }
2602
2603 let trimmed_start = line.trim_start();
2605 if trimmed_start.len() >= 3 {
2606 let first_three: String = trimmed_start.chars().take(3).collect();
2607 if first_three == "```" || first_three == "~~~" {
2608 continue;
2609 }
2610 }
2611
2612 if content_line.starts_with('<') {
2614 continue;
2615 }
2616
2617 let underline = next_line.trim();
2618
2619 let level = if underline.starts_with('=') { 1 } else { 2 };
2620 let style = if level == 1 {
2621 HeadingStyle::Setext1
2622 } else {
2623 HeadingStyle::Setext2
2624 };
2625
2626 let raw_text = line.trim().to_string();
2628 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2629
2630 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2632 let attr_line = content_lines[i + 2];
2633 if !lines[i + 2].in_code_block
2634 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2635 && let Some(attr_line_id) =
2636 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2637 {
2638 custom_id = Some(attr_line_id);
2639 }
2640 }
2641
2642 lines[i].heading = Some(HeadingInfo {
2643 level,
2644 style,
2645 marker: underline.to_string(),
2646 marker_column: next_line.len() - next_line.trim_start().len(),
2647 content_column: lines[i].indent,
2648 text: clean_text,
2649 custom_id,
2650 raw_text,
2651 has_closing_sequence: false,
2652 closing_sequence: String::new(),
2653 is_valid: true, });
2655 }
2656 }
2657 }
2658 }
2659
2660 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2662 const BLOCK_ELEMENTS: &[&str] = &[
2665 "address",
2666 "article",
2667 "aside",
2668 "audio",
2669 "blockquote",
2670 "canvas",
2671 "details",
2672 "dialog",
2673 "dd",
2674 "div",
2675 "dl",
2676 "dt",
2677 "embed",
2678 "fieldset",
2679 "figcaption",
2680 "figure",
2681 "footer",
2682 "form",
2683 "h1",
2684 "h2",
2685 "h3",
2686 "h4",
2687 "h5",
2688 "h6",
2689 "header",
2690 "hr",
2691 "iframe",
2692 "li",
2693 "main",
2694 "menu",
2695 "nav",
2696 "noscript",
2697 "object",
2698 "ol",
2699 "p",
2700 "picture",
2701 "pre",
2702 "script",
2703 "search",
2704 "section",
2705 "source",
2706 "style",
2707 "summary",
2708 "svg",
2709 "table",
2710 "tbody",
2711 "td",
2712 "template",
2713 "textarea",
2714 "tfoot",
2715 "th",
2716 "thead",
2717 "tr",
2718 "track",
2719 "ul",
2720 "video",
2721 ];
2722
2723 let mut i = 0;
2724 while i < lines.len() {
2725 if lines[i].in_code_block || lines[i].in_front_matter {
2727 i += 1;
2728 continue;
2729 }
2730
2731 let trimmed = lines[i].content(content).trim_start();
2732
2733 if trimmed.starts_with('<') && trimmed.len() > 1 {
2735 let after_bracket = &trimmed[1..];
2737 let is_closing = after_bracket.starts_with('/');
2738 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2739
2740 let tag_name = tag_start
2742 .chars()
2743 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2744 .collect::<String>()
2745 .to_lowercase();
2746
2747 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2749 lines[i].in_html_block = true;
2751
2752 if !is_closing {
2755 let closing_tag = format!("</{tag_name}>");
2756 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2758 let mut j = i + 1;
2759 let mut found_closing_tag = false;
2760 while j < lines.len() && j < i + 100 {
2761 if !allow_blank_lines && lines[j].is_blank {
2764 break;
2765 }
2766
2767 lines[j].in_html_block = true;
2768
2769 if lines[j].content(content).contains(&closing_tag) {
2771 found_closing_tag = true;
2772 }
2773
2774 if found_closing_tag {
2777 j += 1;
2778 while j < lines.len() && j < i + 100 {
2780 if lines[j].is_blank {
2781 break;
2782 }
2783 lines[j].in_html_block = true;
2784 j += 1;
2785 }
2786 break;
2787 }
2788 j += 1;
2789 }
2790 }
2791 }
2792 }
2793
2794 i += 1;
2795 }
2796 }
2797
2798 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2801 if !flavor.supports_esm_blocks() {
2803 return;
2804 }
2805
2806 let mut in_multiline_comment = false;
2807
2808 for line in lines.iter_mut() {
2809 if line.is_blank || line.in_html_comment {
2811 continue;
2812 }
2813
2814 let trimmed = line.content(content).trim_start();
2815
2816 if in_multiline_comment {
2818 if trimmed.contains("*/") {
2819 in_multiline_comment = false;
2820 }
2821 continue;
2822 }
2823
2824 if trimmed.starts_with("//") {
2826 continue;
2827 }
2828
2829 if trimmed.starts_with("/*") {
2831 if !trimmed.contains("*/") {
2832 in_multiline_comment = true;
2833 }
2834 continue;
2835 }
2836
2837 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2839 line.in_esm_block = true;
2840 } else {
2841 break;
2843 }
2844 }
2845 }
2846
2847 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2849 let mut code_spans = Vec::new();
2850
2851 if !content.contains('`') {
2853 return code_spans;
2854 }
2855
2856 let parser = Parser::new(content).into_offset_iter();
2858
2859 for (event, range) in parser {
2860 if let Event::Code(_) = event {
2861 let start_pos = range.start;
2862 let end_pos = range.end;
2863
2864 let full_span = &content[start_pos..end_pos];
2866 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2867
2868 let content_start = start_pos + backtick_count;
2870 let content_end = end_pos - backtick_count;
2871 let span_content = if content_start < content_end {
2872 content[content_start..content_end].to_string()
2873 } else {
2874 String::new()
2875 };
2876
2877 let line_idx = lines
2880 .partition_point(|line| line.byte_offset <= start_pos)
2881 .saturating_sub(1);
2882 let line_num = line_idx + 1;
2883 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2884
2885 let end_line_idx = lines
2887 .partition_point(|line| line.byte_offset <= end_pos)
2888 .saturating_sub(1);
2889 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2890
2891 let line_content = lines[line_idx].content(content);
2894 let col_start = if byte_col_start <= line_content.len() {
2895 line_content[..byte_col_start].chars().count()
2896 } else {
2897 line_content.chars().count()
2898 };
2899
2900 let end_line_content = lines[end_line_idx].content(content);
2901 let col_end = if byte_col_end <= end_line_content.len() {
2902 end_line_content[..byte_col_end].chars().count()
2903 } else {
2904 end_line_content.chars().count()
2905 };
2906
2907 code_spans.push(CodeSpan {
2908 line: line_num,
2909 end_line: end_line_idx + 1,
2910 start_col: col_start,
2911 end_col: col_end,
2912 byte_offset: start_pos,
2913 byte_end: end_pos,
2914 backtick_count,
2915 content: span_content,
2916 });
2917 }
2918 }
2919
2920 code_spans.sort_by_key(|span| span.byte_offset);
2922
2923 code_spans
2924 }
2925
2926 fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
2928 let mut math_spans = Vec::new();
2929
2930 if !content.contains('$') {
2932 return math_spans;
2933 }
2934
2935 let mut options = Options::empty();
2937 options.insert(Options::ENABLE_MATH);
2938 let parser = Parser::new_ext(content, options).into_offset_iter();
2939
2940 for (event, range) in parser {
2941 let (is_display, math_content) = match &event {
2942 Event::InlineMath(text) => (false, text.as_ref()),
2943 Event::DisplayMath(text) => (true, text.as_ref()),
2944 _ => continue,
2945 };
2946
2947 let start_pos = range.start;
2948 let end_pos = range.end;
2949
2950 let line_idx = lines
2952 .partition_point(|line| line.byte_offset <= start_pos)
2953 .saturating_sub(1);
2954 let line_num = line_idx + 1;
2955 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2956
2957 let end_line_idx = lines
2959 .partition_point(|line| line.byte_offset <= end_pos)
2960 .saturating_sub(1);
2961 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2962
2963 let line_content = lines[line_idx].content(content);
2965 let col_start = if byte_col_start <= line_content.len() {
2966 line_content[..byte_col_start].chars().count()
2967 } else {
2968 line_content.chars().count()
2969 };
2970
2971 let end_line_content = lines[end_line_idx].content(content);
2972 let col_end = if byte_col_end <= end_line_content.len() {
2973 end_line_content[..byte_col_end].chars().count()
2974 } else {
2975 end_line_content.chars().count()
2976 };
2977
2978 math_spans.push(MathSpan {
2979 line: line_num,
2980 end_line: end_line_idx + 1,
2981 start_col: col_start,
2982 end_col: col_end,
2983 byte_offset: start_pos,
2984 byte_end: end_pos,
2985 is_display,
2986 content: math_content.to_string(),
2987 });
2988 }
2989
2990 math_spans.sort_by_key(|span| span.byte_offset);
2992
2993 math_spans
2994 }
2995
2996 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3007 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3009
3010 #[inline]
3013 fn reset_tracking_state(
3014 list_item: &ListItemInfo,
3015 has_list_breaking_content: &mut bool,
3016 min_continuation: &mut usize,
3017 ) {
3018 *has_list_breaking_content = false;
3019 let marker_width = if list_item.is_ordered {
3020 list_item.marker.len() + 1 } else {
3022 list_item.marker.len()
3023 };
3024 *min_continuation = if list_item.is_ordered {
3025 marker_width
3026 } else {
3027 UNORDERED_LIST_MIN_CONTINUATION_INDENT
3028 };
3029 }
3030
3031 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
3034 let mut last_list_item_line = 0;
3035 let mut current_indent_level = 0;
3036 let mut last_marker_width = 0;
3037
3038 let mut has_list_breaking_content_since_last_item = false;
3040 let mut min_continuation_for_tracking = 0;
3041
3042 for (line_idx, line_info) in lines.iter().enumerate() {
3043 let line_num = line_idx + 1;
3044
3045 if line_info.in_code_block {
3047 if let Some(ref mut block) = current_block {
3048 let min_continuation_indent =
3050 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3051
3052 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3054
3055 match context {
3056 CodeBlockContext::Indented => {
3057 block.end_line = line_num;
3059 continue;
3060 }
3061 CodeBlockContext::Standalone => {
3062 let completed_block = current_block.take().unwrap();
3064 list_blocks.push(completed_block);
3065 continue;
3066 }
3067 CodeBlockContext::Adjacent => {
3068 block.end_line = line_num;
3070 continue;
3071 }
3072 }
3073 } else {
3074 continue;
3076 }
3077 }
3078
3079 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3081 caps.get(0).unwrap().as_str().to_string()
3082 } else {
3083 String::new()
3084 };
3085
3086 if let Some(ref block) = current_block
3089 && line_info.list_item.is_none()
3090 && !line_info.is_blank
3091 && !line_info.in_code_span_continuation
3092 {
3093 let line_content = line_info.content(content).trim();
3094
3095 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3100
3101 let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3104
3105 let breaks_list = line_info.heading.is_some()
3106 || line_content.starts_with("---")
3107 || line_content.starts_with("***")
3108 || line_content.starts_with("___")
3109 || crate::utils::skip_context::is_table_line(line_content)
3110 || blockquote_prefix_changes
3111 || (line_info.indent > 0
3112 && line_info.indent < min_continuation_for_tracking
3113 && !is_lazy_continuation);
3114
3115 if breaks_list {
3116 has_list_breaking_content_since_last_item = true;
3117 }
3118 }
3119
3120 if line_info.in_code_span_continuation
3123 && line_info.list_item.is_none()
3124 && let Some(ref mut block) = current_block
3125 {
3126 block.end_line = line_num;
3127 }
3128
3129 let effective_continuation_indent = if let Some(ref block) = current_block {
3135 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3136 let line_content = line_info.content(content);
3137 let line_bq_level = line_content
3138 .chars()
3139 .take_while(|c| *c == '>' || c.is_whitespace())
3140 .filter(|&c| c == '>')
3141 .count();
3142 if line_bq_level > 0 && line_bq_level == block_bq_level {
3143 let mut pos = 0;
3145 let mut found_markers = 0;
3146 for c in line_content.chars() {
3147 pos += c.len_utf8();
3148 if c == '>' {
3149 found_markers += 1;
3150 if found_markers == line_bq_level {
3151 if line_content.get(pos..pos + 1) == Some(" ") {
3152 pos += 1;
3153 }
3154 break;
3155 }
3156 }
3157 }
3158 let after_bq = &line_content[pos..];
3159 after_bq.len() - after_bq.trim_start().len()
3160 } else {
3161 line_info.indent
3162 }
3163 } else {
3164 line_info.indent
3165 };
3166 let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3167 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3168 if block_bq_level > 0 {
3169 if block.is_ordered { last_marker_width } else { 2 }
3170 } else {
3171 min_continuation_for_tracking
3172 }
3173 } else {
3174 min_continuation_for_tracking
3175 };
3176 let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3177 || (line_info.indent == 0 && !line_info.is_blank); if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3180 eprintln!(
3181 "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3182 line_num,
3183 effective_continuation_indent,
3184 adjusted_min_continuation_for_tracking,
3185 is_valid_continuation,
3186 line_info.in_code_span_continuation,
3187 line_info.in_code_block,
3188 current_block.is_some()
3189 );
3190 }
3191
3192 if !line_info.in_code_span_continuation
3193 && line_info.list_item.is_none()
3194 && !line_info.is_blank
3195 && !line_info.in_code_block
3196 && is_valid_continuation
3197 && let Some(ref mut block) = current_block
3198 {
3199 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3200 eprintln!(
3201 "[DEBUG] Line {}: extending block.end_line from {} to {}",
3202 line_num, block.end_line, line_num
3203 );
3204 }
3205 block.end_line = line_num;
3206 }
3207
3208 if let Some(list_item) = &line_info.list_item {
3210 let item_indent = list_item.marker_column;
3212 let nesting = item_indent / 2; if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3215 eprintln!(
3216 "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3217 line_num, list_item.marker, item_indent
3218 );
3219 }
3220
3221 if let Some(ref mut block) = current_block {
3222 let is_nested = nesting > block.nesting_level;
3226 let same_type =
3227 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3228 let same_context = block.blockquote_prefix == blockquote_prefix;
3229 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3231
3232 let marker_compatible =
3234 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3235
3236 let has_non_list_content = has_list_breaking_content_since_last_item;
3239
3240 let mut continues_list = if is_nested {
3244 same_context && reasonable_distance && !has_non_list_content
3246 } else {
3247 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3249 };
3250
3251 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3252 eprintln!(
3253 "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3254 line_num,
3255 continues_list,
3256 is_nested,
3257 same_type,
3258 same_context,
3259 reasonable_distance,
3260 marker_compatible,
3261 has_non_list_content,
3262 last_list_item_line,
3263 block.end_line
3264 );
3265 }
3266
3267 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
3270 if block.item_lines.contains(&(line_num - 1)) {
3273 continues_list = true;
3275 } else {
3276 continues_list = true;
3280 }
3281 }
3282
3283 if continues_list {
3284 block.end_line = line_num;
3286 block.item_lines.push(line_num);
3287
3288 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3290 list_item.marker.len() + 1
3291 } else {
3292 list_item.marker.len()
3293 });
3294
3295 if !block.is_ordered
3297 && block.marker.is_some()
3298 && block.marker.as_ref() != Some(&list_item.marker)
3299 {
3300 block.marker = None;
3302 }
3303
3304 reset_tracking_state(
3306 list_item,
3307 &mut has_list_breaking_content_since_last_item,
3308 &mut min_continuation_for_tracking,
3309 );
3310 } else {
3311 list_blocks.push(block.clone());
3314
3315 *block = ListBlock {
3316 start_line: line_num,
3317 end_line: line_num,
3318 is_ordered: list_item.is_ordered,
3319 marker: if list_item.is_ordered {
3320 None
3321 } else {
3322 Some(list_item.marker.clone())
3323 },
3324 blockquote_prefix: blockquote_prefix.clone(),
3325 item_lines: vec![line_num],
3326 nesting_level: nesting,
3327 max_marker_width: if list_item.is_ordered {
3328 list_item.marker.len() + 1
3329 } else {
3330 list_item.marker.len()
3331 },
3332 };
3333
3334 reset_tracking_state(
3336 list_item,
3337 &mut has_list_breaking_content_since_last_item,
3338 &mut min_continuation_for_tracking,
3339 );
3340 }
3341 } else {
3342 current_block = Some(ListBlock {
3344 start_line: line_num,
3345 end_line: line_num,
3346 is_ordered: list_item.is_ordered,
3347 marker: if list_item.is_ordered {
3348 None
3349 } else {
3350 Some(list_item.marker.clone())
3351 },
3352 blockquote_prefix,
3353 item_lines: vec![line_num],
3354 nesting_level: nesting,
3355 max_marker_width: list_item.marker.len(),
3356 });
3357
3358 reset_tracking_state(
3360 list_item,
3361 &mut has_list_breaking_content_since_last_item,
3362 &mut min_continuation_for_tracking,
3363 );
3364 }
3365
3366 last_list_item_line = line_num;
3367 current_indent_level = item_indent;
3368 last_marker_width = if list_item.is_ordered {
3369 list_item.marker.len() + 1 } else {
3371 list_item.marker.len()
3372 };
3373 } else if let Some(ref mut block) = current_block {
3374 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3376 eprintln!(
3377 "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3378 line_num, line_info.is_blank
3379 );
3380 }
3381
3382 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3390 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3391 } else {
3392 false
3393 };
3394
3395 let min_continuation_indent = if block.is_ordered {
3399 current_indent_level + last_marker_width
3400 } else {
3401 current_indent_level + 2 };
3403
3404 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3405 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3407 eprintln!(
3408 "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3409 line_num, line_info.indent, min_continuation_indent
3410 );
3411 }
3412 block.end_line = line_num;
3413 } else if line_info.is_blank {
3414 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3417 eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3418 }
3419 let mut check_idx = line_idx + 1;
3420 let mut found_continuation = false;
3421
3422 while check_idx < lines.len() && lines[check_idx].is_blank {
3424 check_idx += 1;
3425 }
3426
3427 if check_idx < lines.len() {
3428 let next_line = &lines[check_idx];
3429 let next_content = next_line.content(content);
3431 let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3434 let next_bq_level_for_indent = next_content
3435 .chars()
3436 .take_while(|c| *c == '>' || c.is_whitespace())
3437 .filter(|&c| c == '>')
3438 .count();
3439 let effective_indent =
3440 if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3441 let mut pos = 0;
3444 let mut found_markers = 0;
3445 for c in next_content.chars() {
3446 pos += c.len_utf8();
3447 if c == '>' {
3448 found_markers += 1;
3449 if found_markers == next_bq_level_for_indent {
3450 if next_content.get(pos..pos + 1) == Some(" ") {
3452 pos += 1;
3453 }
3454 break;
3455 }
3456 }
3457 }
3458 let after_blockquote_marker = &next_content[pos..];
3459 after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3460 } else {
3461 next_line.indent
3462 };
3463 let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3466 if block.is_ordered { last_marker_width } else { 2 }
3469 } else {
3470 min_continuation_indent
3471 };
3472 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3474 eprintln!(
3475 "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3476 line_num,
3477 check_idx + 1,
3478 effective_indent,
3479 adjusted_min_continuation,
3480 next_line.list_item.is_some(),
3481 next_line.in_code_block
3482 );
3483 }
3484 if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3485 found_continuation = true;
3486 }
3487 else if !next_line.in_code_block
3489 && next_line.list_item.is_some()
3490 && let Some(item) = &next_line.list_item
3491 {
3492 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3493 .find(next_line.content(content))
3494 .map_or(String::new(), |m| m.as_str().to_string());
3495 if item.marker_column == current_indent_level
3496 && item.is_ordered == block.is_ordered
3497 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3498 {
3499 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3503 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3504 if let Some(between_line) = lines.get(idx) {
3505 let between_content = between_line.content(content);
3506 let trimmed = between_content.trim();
3507 if trimmed.is_empty() {
3509 return false;
3510 }
3511 let line_indent = between_content.len() - between_content.trim_start().len();
3513
3514 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3516 .find(between_content)
3517 .map_or(String::new(), |m| m.as_str().to_string());
3518 let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3519 let blockquote_level_changed =
3520 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3521
3522 if trimmed.starts_with("```")
3524 || trimmed.starts_with("~~~")
3525 || trimmed.starts_with("---")
3526 || trimmed.starts_with("***")
3527 || trimmed.starts_with("___")
3528 || blockquote_level_changed
3529 || crate::utils::skip_context::is_table_line(trimmed)
3530 || between_line.heading.is_some()
3531 {
3532 return true; }
3534
3535 line_indent >= min_continuation_indent
3537 } else {
3538 false
3539 }
3540 });
3541
3542 if block.is_ordered {
3543 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3546 if let Some(between_line) = lines.get(idx) {
3547 let between_content = between_line.content(content);
3548 let trimmed = between_content.trim();
3549 if trimmed.is_empty() {
3550 return false;
3551 }
3552 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3554 .find(between_content)
3555 .map_or(String::new(), |m| m.as_str().to_string());
3556 let between_bq_level =
3557 between_bq_prefix.chars().filter(|&c| c == '>').count();
3558 let blockquote_level_changed =
3559 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3560 trimmed.starts_with("```")
3562 || trimmed.starts_with("~~~")
3563 || trimmed.starts_with("---")
3564 || trimmed.starts_with("***")
3565 || trimmed.starts_with("___")
3566 || blockquote_level_changed
3567 || crate::utils::skip_context::is_table_line(trimmed)
3568 || between_line.heading.is_some()
3569 } else {
3570 false
3571 }
3572 });
3573 found_continuation = !has_structural_separators;
3574 } else {
3575 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3577 if let Some(between_line) = lines.get(idx) {
3578 let between_content = between_line.content(content);
3579 let trimmed = between_content.trim();
3580 if trimmed.is_empty() {
3581 return false;
3582 }
3583 let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3585 .find(between_content)
3586 .map_or(String::new(), |m| m.as_str().to_string());
3587 let between_bq_level =
3588 between_bq_prefix.chars().filter(|&c| c == '>').count();
3589 let blockquote_level_changed =
3590 trimmed.starts_with(">") && between_bq_level != block_bq_level;
3591 trimmed.starts_with("```")
3593 || trimmed.starts_with("~~~")
3594 || trimmed.starts_with("---")
3595 || trimmed.starts_with("***")
3596 || trimmed.starts_with("___")
3597 || blockquote_level_changed
3598 || crate::utils::skip_context::is_table_line(trimmed)
3599 || between_line.heading.is_some()
3600 } else {
3601 false
3602 }
3603 });
3604 found_continuation = !has_structural_separators;
3605 }
3606 }
3607 }
3608 }
3609
3610 if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3611 eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
3612 }
3613 if found_continuation {
3614 block.end_line = line_num;
3616 } else {
3617 list_blocks.push(block.clone());
3619 current_block = None;
3620 }
3621 } else {
3622 let min_required_indent = if block.is_ordered {
3625 current_indent_level + last_marker_width
3626 } else {
3627 current_indent_level + 2
3628 };
3629
3630 let line_content = line_info.content(content).trim();
3635
3636 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3638
3639 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3642 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3643 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3644
3645 let is_structural_separator = line_info.heading.is_some()
3646 || line_content.starts_with("```")
3647 || line_content.starts_with("~~~")
3648 || line_content.starts_with("---")
3649 || line_content.starts_with("***")
3650 || line_content.starts_with("___")
3651 || blockquote_level_changed
3652 || looks_like_table;
3653
3654 let is_lazy_continuation = !is_structural_separator
3657 && !line_info.is_blank
3658 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3659
3660 if is_lazy_continuation {
3661 let line_content_raw = line_info.content(content);
3665 let block_bq_level_lazy = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3666 let line_bq_level_lazy = line_content_raw
3667 .chars()
3668 .take_while(|c| *c == '>' || c.is_whitespace())
3669 .filter(|&c| c == '>')
3670 .count();
3671 let has_proper_blockquote_indent =
3672 if line_bq_level_lazy > 0 && line_bq_level_lazy == block_bq_level_lazy {
3673 let mut pos = 0;
3675 let mut found_markers = 0;
3676 for c in line_content_raw.chars() {
3677 pos += c.len_utf8();
3678 if c == '>' {
3679 found_markers += 1;
3680 if found_markers == line_bq_level_lazy {
3681 if line_content_raw.get(pos..pos + 1) == Some(" ") {
3682 pos += 1;
3683 }
3684 break;
3685 }
3686 }
3687 }
3688 let after_bq = &line_content_raw[pos..];
3689 let effective_indent_lazy = after_bq.len() - after_bq.trim_start().len();
3690 let min_required_for_bq = if block.is_ordered { last_marker_width } else { 2 };
3691 effective_indent_lazy >= min_required_for_bq
3692 } else {
3693 false
3694 };
3695
3696 if has_proper_blockquote_indent {
3698 block.end_line = line_num;
3699 } else {
3700 let content_to_check = if !blockquote_prefix.is_empty() {
3701 line_info
3703 .content(content)
3704 .strip_prefix(&blockquote_prefix)
3705 .unwrap_or(line_info.content(content))
3706 .trim()
3707 } else {
3708 line_info.content(content).trim()
3709 };
3710
3711 let starts_with_uppercase =
3712 content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3713
3714 if starts_with_uppercase && last_list_item_line > 0 {
3717 list_blocks.push(block.clone());
3719 current_block = None;
3720 } else {
3721 block.end_line = line_num;
3723 }
3724 }
3725 } else {
3726 list_blocks.push(block.clone());
3728 current_block = None;
3729 }
3730 }
3731 }
3732 }
3733
3734 if let Some(block) = current_block {
3736 list_blocks.push(block);
3737 }
3738
3739 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3741
3742 list_blocks
3743 }
3744
3745 fn compute_char_frequency(content: &str) -> CharFrequency {
3747 let mut frequency = CharFrequency::default();
3748
3749 for ch in content.chars() {
3750 match ch {
3751 '#' => frequency.hash_count += 1,
3752 '*' => frequency.asterisk_count += 1,
3753 '_' => frequency.underscore_count += 1,
3754 '-' => frequency.hyphen_count += 1,
3755 '+' => frequency.plus_count += 1,
3756 '>' => frequency.gt_count += 1,
3757 '|' => frequency.pipe_count += 1,
3758 '[' => frequency.bracket_count += 1,
3759 '`' => frequency.backtick_count += 1,
3760 '<' => frequency.lt_count += 1,
3761 '!' => frequency.exclamation_count += 1,
3762 '\n' => frequency.newline_count += 1,
3763 _ => {}
3764 }
3765 }
3766
3767 frequency
3768 }
3769
3770 fn parse_html_tags(
3772 content: &str,
3773 lines: &[LineInfo],
3774 code_blocks: &[(usize, usize)],
3775 flavor: MarkdownFlavor,
3776 ) -> Vec<HtmlTag> {
3777 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3778 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3779
3780 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3781
3782 for cap in HTML_TAG_REGEX.captures_iter(content) {
3783 let full_match = cap.get(0).unwrap();
3784 let match_start = full_match.start();
3785 let match_end = full_match.end();
3786
3787 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3789 continue;
3790 }
3791
3792 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3793 let tag_name_original = cap.get(2).unwrap().as_str();
3794 let tag_name = tag_name_original.to_lowercase();
3795 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3796
3797 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3800 continue;
3801 }
3802
3803 let mut line_num = 1;
3805 let mut col_start = match_start;
3806 let mut col_end = match_end;
3807 for (idx, line_info) in lines.iter().enumerate() {
3808 if match_start >= line_info.byte_offset {
3809 line_num = idx + 1;
3810 col_start = match_start - line_info.byte_offset;
3811 col_end = match_end - line_info.byte_offset;
3812 } else {
3813 break;
3814 }
3815 }
3816
3817 html_tags.push(HtmlTag {
3818 line: line_num,
3819 start_col: col_start,
3820 end_col: col_end,
3821 byte_offset: match_start,
3822 byte_end: match_end,
3823 tag_name,
3824 is_closing,
3825 is_self_closing,
3826 raw_content: full_match.as_str().to_string(),
3827 });
3828 }
3829
3830 html_tags
3831 }
3832
3833 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3835 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3836
3837 for (line_idx, line_info) in lines.iter().enumerate() {
3838 if line_info.in_code_block || line_info.is_blank {
3840 continue;
3841 }
3842
3843 let line = line_info.content(content);
3844 let line_num = line_idx + 1;
3845
3846 if !line.contains('|') {
3848 continue;
3849 }
3850
3851 let parts: Vec<&str> = line.split('|').collect();
3853 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3854
3855 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3857 let mut column_alignments = Vec::new();
3858
3859 if is_separator {
3860 for part in &parts[1..parts.len() - 1] {
3861 let trimmed = part.trim();
3863 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3864 "center".to_string()
3865 } else if trimmed.ends_with(':') {
3866 "right".to_string()
3867 } else if trimmed.starts_with(':') {
3868 "left".to_string()
3869 } else {
3870 "none".to_string()
3871 };
3872 column_alignments.push(alignment);
3873 }
3874 }
3875
3876 table_rows.push(TableRow {
3877 line: line_num,
3878 is_separator,
3879 column_count,
3880 column_alignments,
3881 });
3882 }
3883
3884 table_rows
3885 }
3886
3887 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3889 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3890
3891 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3893 let full_match = cap.get(0).unwrap();
3894 let match_start = full_match.start();
3895 let match_end = full_match.end();
3896
3897 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3899 continue;
3900 }
3901
3902 let preceding_char = if match_start > 0 {
3904 content.chars().nth(match_start - 1)
3905 } else {
3906 None
3907 };
3908 let following_char = content.chars().nth(match_end);
3909
3910 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3911 continue;
3912 }
3913 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3914 continue;
3915 }
3916
3917 let url = full_match.as_str();
3918 let url_type = if url.starts_with("https://") {
3919 "https"
3920 } else if url.starts_with("http://") {
3921 "http"
3922 } else if url.starts_with("ftp://") {
3923 "ftp"
3924 } else {
3925 "other"
3926 };
3927
3928 let mut line_num = 1;
3930 let mut col_start = match_start;
3931 let mut col_end = match_end;
3932 for (idx, line_info) in lines.iter().enumerate() {
3933 if match_start >= line_info.byte_offset {
3934 line_num = idx + 1;
3935 col_start = match_start - line_info.byte_offset;
3936 col_end = match_end - line_info.byte_offset;
3937 } else {
3938 break;
3939 }
3940 }
3941
3942 bare_urls.push(BareUrl {
3943 line: line_num,
3944 start_col: col_start,
3945 end_col: col_end,
3946 byte_offset: match_start,
3947 byte_end: match_end,
3948 url: url.to_string(),
3949 url_type: url_type.to_string(),
3950 });
3951 }
3952
3953 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3955 let full_match = cap.get(0).unwrap();
3956 let match_start = full_match.start();
3957 let match_end = full_match.end();
3958
3959 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3961 continue;
3962 }
3963
3964 let preceding_char = if match_start > 0 {
3966 content.chars().nth(match_start - 1)
3967 } else {
3968 None
3969 };
3970 let following_char = content.chars().nth(match_end);
3971
3972 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3973 continue;
3974 }
3975 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3976 continue;
3977 }
3978
3979 let email = full_match.as_str();
3980
3981 let mut line_num = 1;
3983 let mut col_start = match_start;
3984 let mut col_end = match_end;
3985 for (idx, line_info) in lines.iter().enumerate() {
3986 if match_start >= line_info.byte_offset {
3987 line_num = idx + 1;
3988 col_start = match_start - line_info.byte_offset;
3989 col_end = match_end - line_info.byte_offset;
3990 } else {
3991 break;
3992 }
3993 }
3994
3995 bare_urls.push(BareUrl {
3996 line: line_num,
3997 start_col: col_start,
3998 end_col: col_end,
3999 byte_offset: match_start,
4000 byte_end: match_end,
4001 url: email.to_string(),
4002 url_type: "email".to_string(),
4003 });
4004 }
4005
4006 bare_urls
4007 }
4008
4009 #[must_use]
4029 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4030 ValidHeadingsIter::new(&self.lines)
4031 }
4032
4033 #[must_use]
4037 pub fn has_valid_headings(&self) -> bool {
4038 self.lines
4039 .iter()
4040 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4041 }
4042}
4043
4044fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4046 if list_blocks.len() < 2 {
4047 return;
4048 }
4049
4050 let mut merger = ListBlockMerger::new(content, lines);
4051 *list_blocks = merger.merge(list_blocks);
4052}
4053
4054struct ListBlockMerger<'a> {
4056 content: &'a str,
4057 lines: &'a [LineInfo],
4058}
4059
4060impl<'a> ListBlockMerger<'a> {
4061 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4062 Self { content, lines }
4063 }
4064
4065 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4066 let mut merged = Vec::with_capacity(list_blocks.len());
4067 let mut current = list_blocks[0].clone();
4068
4069 for next in list_blocks.iter().skip(1) {
4070 if self.should_merge_blocks(¤t, next) {
4071 current = self.merge_two_blocks(current, next);
4072 } else {
4073 merged.push(current);
4074 current = next.clone();
4075 }
4076 }
4077
4078 merged.push(current);
4079 merged
4080 }
4081
4082 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4084 if !self.blocks_are_compatible(current, next) {
4086 return false;
4087 }
4088
4089 let spacing = self.analyze_spacing_between(current, next);
4091 match spacing {
4092 BlockSpacing::Consecutive => true,
4093 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4094 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4095 self.can_merge_with_content_between(current, next)
4096 }
4097 }
4098 }
4099
4100 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4102 current.is_ordered == next.is_ordered
4103 && current.blockquote_prefix == next.blockquote_prefix
4104 && current.nesting_level == next.nesting_level
4105 }
4106
4107 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4109 let gap = next.start_line - current.end_line;
4110
4111 match gap {
4112 1 => BlockSpacing::Consecutive,
4113 2 => BlockSpacing::SingleBlank,
4114 _ if gap > 2 => {
4115 if self.has_only_blank_lines_between(current, next) {
4116 BlockSpacing::MultipleBlanks
4117 } else {
4118 BlockSpacing::ContentBetween
4119 }
4120 }
4121 _ => BlockSpacing::Consecutive, }
4123 }
4124
4125 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4127 if has_meaningful_content_between(self.content, current, next, self.lines) {
4130 return false; }
4132
4133 !current.is_ordered && current.marker == next.marker
4135 }
4136
4137 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4139 if has_meaningful_content_between(self.content, current, next, self.lines) {
4141 return false; }
4143
4144 current.is_ordered && next.is_ordered
4146 }
4147
4148 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4150 for line_num in (current.end_line + 1)..next.start_line {
4151 if let Some(line_info) = self.lines.get(line_num - 1)
4152 && !line_info.content(self.content).trim().is_empty()
4153 {
4154 return false;
4155 }
4156 }
4157 true
4158 }
4159
4160 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4162 current.end_line = next.end_line;
4163 current.item_lines.extend_from_slice(&next.item_lines);
4164
4165 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4167
4168 if !current.is_ordered && self.markers_differ(¤t, next) {
4170 current.marker = None; }
4172
4173 current
4174 }
4175
4176 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4178 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4179 }
4180}
4181
4182#[derive(Debug, PartialEq)]
4184enum BlockSpacing {
4185 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
4190
4191fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4193 for line_num in (current.end_line + 1)..next.start_line {
4195 if let Some(line_info) = lines.get(line_num - 1) {
4196 let trimmed = line_info.content(content).trim();
4198
4199 if trimmed.is_empty() {
4201 continue;
4202 }
4203
4204 if line_info.heading.is_some() {
4208 return true; }
4210
4211 if is_horizontal_rule(trimmed) {
4213 return true; }
4215
4216 if crate::utils::skip_context::is_table_line(trimmed) {
4218 return true; }
4220
4221 if trimmed.starts_with('>') {
4223 return true; }
4225
4226 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4228 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4229
4230 let min_continuation_indent = if current.is_ordered {
4232 current.nesting_level + current.max_marker_width + 1 } else {
4234 current.nesting_level + 2
4235 };
4236
4237 if line_indent < min_continuation_indent {
4238 return true; }
4241 }
4242
4243 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4245
4246 let min_indent = if current.is_ordered {
4248 current.nesting_level + current.max_marker_width
4249 } else {
4250 current.nesting_level + 2
4251 };
4252
4253 if line_indent < min_indent {
4255 return true; }
4257
4258 }
4261 }
4262
4263 false
4265}
4266
4267pub fn is_horizontal_rule_line(line: &str) -> bool {
4274 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4276 if leading_spaces > 3 || line.starts_with('\t') {
4277 return false;
4278 }
4279
4280 is_horizontal_rule_content(line.trim())
4281}
4282
4283pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4286 if trimmed.len() < 3 {
4287 return false;
4288 }
4289
4290 let chars: Vec<char> = trimmed.chars().collect();
4292 if let Some(&first_char) = chars.first()
4293 && (first_char == '-' || first_char == '*' || first_char == '_')
4294 {
4295 let mut count = 0;
4296 for &ch in &chars {
4297 if ch == first_char {
4298 count += 1;
4299 } else if ch != ' ' && ch != '\t' {
4300 return false; }
4302 }
4303 return count >= 3;
4304 }
4305 false
4306}
4307
4308pub fn is_horizontal_rule(trimmed: &str) -> bool {
4310 is_horizontal_rule_content(trimmed)
4311}
4312
4313#[cfg(test)]
4315mod tests {
4316 use super::*;
4317
4318 #[test]
4319 fn test_empty_content() {
4320 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4321 assert_eq!(ctx.content, "");
4322 assert_eq!(ctx.line_offsets, vec![0]);
4323 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4324 assert_eq!(ctx.lines.len(), 0);
4325 }
4326
4327 #[test]
4328 fn test_single_line() {
4329 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4330 assert_eq!(ctx.content, "# Hello");
4331 assert_eq!(ctx.line_offsets, vec![0]);
4332 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4333 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4334 }
4335
4336 #[test]
4337 fn test_multi_line() {
4338 let content = "# Title\n\nSecond line\nThird line";
4339 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4340 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4341 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
4348
4349 #[test]
4350 fn test_line_info() {
4351 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
4352 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4353
4354 assert_eq!(ctx.lines.len(), 7);
4356
4357 let line1 = &ctx.lines[0];
4359 assert_eq!(line1.content(ctx.content), "# Title");
4360 assert_eq!(line1.byte_offset, 0);
4361 assert_eq!(line1.indent, 0);
4362 assert!(!line1.is_blank);
4363 assert!(!line1.in_code_block);
4364 assert!(line1.list_item.is_none());
4365
4366 let line2 = &ctx.lines[1];
4368 assert_eq!(line2.content(ctx.content), " indented");
4369 assert_eq!(line2.byte_offset, 8);
4370 assert_eq!(line2.indent, 4);
4371 assert!(!line2.is_blank);
4372
4373 let line3 = &ctx.lines[2];
4375 assert_eq!(line3.content(ctx.content), "");
4376 assert!(line3.is_blank);
4377
4378 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4380 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4381 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4382 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4383 }
4384
4385 #[test]
4386 fn test_list_item_detection() {
4387 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
4388 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4389
4390 let line1 = &ctx.lines[0];
4392 assert!(line1.list_item.is_some());
4393 let list1 = line1.list_item.as_ref().unwrap();
4394 assert_eq!(list1.marker, "-");
4395 assert!(!list1.is_ordered);
4396 assert_eq!(list1.marker_column, 0);
4397 assert_eq!(list1.content_column, 2);
4398
4399 let line2 = &ctx.lines[1];
4401 assert!(line2.list_item.is_some());
4402 let list2 = line2.list_item.as_ref().unwrap();
4403 assert_eq!(list2.marker, "*");
4404 assert_eq!(list2.marker_column, 2);
4405
4406 let line3 = &ctx.lines[2];
4408 assert!(line3.list_item.is_some());
4409 let list3 = line3.list_item.as_ref().unwrap();
4410 assert_eq!(list3.marker, "1.");
4411 assert!(list3.is_ordered);
4412 assert_eq!(list3.number, Some(1));
4413
4414 let line6 = &ctx.lines[5];
4416 assert!(line6.list_item.is_none());
4417 }
4418
4419 #[test]
4420 fn test_offset_to_line_col_edge_cases() {
4421 let content = "a\nb\nc";
4422 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4423 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4431
4432 #[test]
4433 fn test_mdx_esm_blocks() {
4434 let content = r##"import {Chart} from './snowfall.js'
4435export const year = 2023
4436
4437# Last year's snowfall
4438
4439In {year}, the snowfall was above average.
4440It was followed by a warm spring which caused
4441flood conditions in many of the nearby rivers.
4442
4443<Chart color="#fcb32c" year={year} />
4444"##;
4445
4446 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4447
4448 assert_eq!(ctx.lines.len(), 10);
4450 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4451 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4452 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4453 assert!(
4454 !ctx.lines[3].in_esm_block,
4455 "Line 4 (heading) should NOT be in_esm_block"
4456 );
4457 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4458 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4459 }
4460
4461 #[test]
4462 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4463 let content = r#"import {Chart} from './snowfall.js'
4464export const year = 2023
4465
4466# Last year's snowfall
4467"#;
4468
4469 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4470
4471 assert!(
4473 !ctx.lines[0].in_esm_block,
4474 "Line 1 should NOT be in_esm_block in Standard flavor"
4475 );
4476 assert!(
4477 !ctx.lines[1].in_esm_block,
4478 "Line 2 should NOT be in_esm_block in Standard flavor"
4479 );
4480 }
4481
4482 #[test]
4483 fn test_blockquote_with_indented_content() {
4484 let content = r#"# Heading
4488
4489> -S socket-path
4490> More text
4491"#;
4492 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4493
4494 assert!(
4496 ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4497 "Line 3 should be a blockquote"
4498 );
4499 assert!(
4501 ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4502 "Line 4 should be a blockquote"
4503 );
4504
4505 let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4508 assert_eq!(bq3.content, "-S socket-path");
4509 assert_eq!(bq3.nesting_level, 1);
4510 assert!(bq3.has_multiple_spaces_after_marker);
4512
4513 let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4514 assert_eq!(bq4.content, "More text");
4515 assert_eq!(bq4.nesting_level, 1);
4516 }
4517
4518 #[test]
4519 fn test_footnote_definitions_not_parsed_as_reference_defs() {
4520 let content = r#"# Title
4522
4523A footnote[^1].
4524
4525[^1]: This is the footnote content.
4526
4527[^note]: Another footnote with [link](https://example.com).
4528
4529[regular]: ./path.md "A real reference definition"
4530"#;
4531 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4532
4533 assert_eq!(
4535 ctx.reference_defs.len(),
4536 1,
4537 "Footnotes should not be parsed as reference definitions"
4538 );
4539
4540 assert_eq!(ctx.reference_defs[0].id, "regular");
4542 assert_eq!(ctx.reference_defs[0].url, "./path.md");
4543 assert_eq!(
4544 ctx.reference_defs[0].title,
4545 Some("A real reference definition".to_string())
4546 );
4547 }
4548
4549 #[test]
4550 fn test_footnote_with_inline_link_not_misidentified() {
4551 let content = r#"# Title
4554
4555A footnote[^1].
4556
4557[^1]: [link](https://www.google.com).
4558"#;
4559 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4560
4561 assert!(
4563 ctx.reference_defs.is_empty(),
4564 "Footnote with inline link should not create a reference definition"
4565 );
4566 }
4567
4568 #[test]
4569 fn test_various_footnote_formats_excluded() {
4570 let content = r#"[^1]: Numeric footnote
4572[^note]: Named footnote
4573[^a]: Single char footnote
4574[^long-footnote-name]: Long named footnote
4575[^123abc]: Mixed alphanumeric
4576
4577[ref1]: ./file1.md
4578[ref2]: ./file2.md
4579"#;
4580 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4581
4582 assert_eq!(
4584 ctx.reference_defs.len(),
4585 2,
4586 "Only regular reference definitions should be parsed"
4587 );
4588
4589 let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4590 assert!(ids.contains(&"ref1"));
4591 assert!(ids.contains(&"ref2"));
4592 assert!(!ids.iter().any(|id| id.starts_with('^')));
4593 }
4594
4595 #[test]
4600 fn test_has_char_tracked_characters() {
4601 let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
4603 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4604
4605 assert!(ctx.has_char('#'), "Should detect hash");
4607 assert!(ctx.has_char('*'), "Should detect asterisk");
4608 assert!(ctx.has_char('_'), "Should detect underscore");
4609 assert!(ctx.has_char('-'), "Should detect hyphen");
4610 assert!(ctx.has_char('+'), "Should detect plus");
4611 assert!(ctx.has_char('>'), "Should detect gt");
4612 assert!(ctx.has_char('|'), "Should detect pipe");
4613 assert!(ctx.has_char('['), "Should detect bracket");
4614 assert!(ctx.has_char('`'), "Should detect backtick");
4615 assert!(ctx.has_char('<'), "Should detect lt");
4616 assert!(ctx.has_char('!'), "Should detect exclamation");
4617 assert!(ctx.has_char('\n'), "Should detect newline");
4618 }
4619
4620 #[test]
4621 fn test_has_char_absent_characters() {
4622 let content = "Simple text without special chars";
4623 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4624
4625 assert!(!ctx.has_char('#'), "Should not detect hash");
4627 assert!(!ctx.has_char('*'), "Should not detect asterisk");
4628 assert!(!ctx.has_char('_'), "Should not detect underscore");
4629 assert!(!ctx.has_char('-'), "Should not detect hyphen");
4630 assert!(!ctx.has_char('+'), "Should not detect plus");
4631 assert!(!ctx.has_char('>'), "Should not detect gt");
4632 assert!(!ctx.has_char('|'), "Should not detect pipe");
4633 assert!(!ctx.has_char('['), "Should not detect bracket");
4634 assert!(!ctx.has_char('`'), "Should not detect backtick");
4635 assert!(!ctx.has_char('<'), "Should not detect lt");
4636 assert!(!ctx.has_char('!'), "Should not detect exclamation");
4637 assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
4639 }
4640
4641 #[test]
4642 fn test_has_char_fallback_for_untracked() {
4643 let content = "Text with @mention and $dollar and %percent";
4644 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4645
4646 assert!(ctx.has_char('@'), "Should detect @ via fallback");
4648 assert!(ctx.has_char('$'), "Should detect $ via fallback");
4649 assert!(ctx.has_char('%'), "Should detect % via fallback");
4650 assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
4651 }
4652
4653 #[test]
4654 fn test_char_count_tracked_characters() {
4655 let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
4656 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4657
4658 assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
4660 assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
4661 assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
4662 assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
4663 assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
4664 assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
4665 assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
4666 assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
4667 assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
4668 assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
4669 assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
4670 assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
4671 }
4672
4673 #[test]
4674 fn test_char_count_zero_for_absent() {
4675 let content = "Plain text";
4676 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4677
4678 assert_eq!(ctx.char_count('#'), 0);
4679 assert_eq!(ctx.char_count('*'), 0);
4680 assert_eq!(ctx.char_count('_'), 0);
4681 assert_eq!(ctx.char_count('\n'), 0);
4682 }
4683
4684 #[test]
4685 fn test_char_count_fallback_for_untracked() {
4686 let content = "@@@ $$ %%%";
4687 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4688
4689 assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
4690 assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
4691 assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
4692 assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
4693 }
4694
4695 #[test]
4696 fn test_char_count_empty_content() {
4697 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4698
4699 assert_eq!(ctx.char_count('#'), 0);
4700 assert_eq!(ctx.char_count('*'), 0);
4701 assert_eq!(ctx.char_count('@'), 0);
4702 assert!(!ctx.has_char('#'));
4703 assert!(!ctx.has_char('@'));
4704 }
4705
4706 #[test]
4711 fn test_is_in_html_tag_simple() {
4712 let content = "<div>content</div>";
4713 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4714
4715 assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
4717 assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
4718 assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
4719
4720 assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
4722 assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
4723
4724 assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
4726 assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
4727 }
4728
4729 #[test]
4730 fn test_is_in_html_tag_self_closing() {
4731 let content = "Text <br/> more text";
4732 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4733
4734 assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
4736 assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
4737
4738 assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
4740 assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
4741 assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
4742
4743 assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
4745 }
4746
4747 #[test]
4748 fn test_is_in_html_tag_with_attributes() {
4749 let content = r#"<a href="url" class="link">text</a>"#;
4750 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4751
4752 assert!(ctx.is_in_html_tag(0), "Start of tag");
4754 assert!(ctx.is_in_html_tag(10), "Inside href attribute");
4755 assert!(ctx.is_in_html_tag(20), "Inside class attribute");
4756 assert!(ctx.is_in_html_tag(26), "End of opening tag");
4757
4758 assert!(!ctx.is_in_html_tag(27), "Start of content");
4760 assert!(!ctx.is_in_html_tag(30), "End of content");
4761
4762 assert!(ctx.is_in_html_tag(31), "Start of closing tag");
4764 }
4765
4766 #[test]
4767 fn test_is_in_html_tag_multiline() {
4768 let content = "<div\n class=\"test\"\n>\ncontent\n</div>";
4769 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4770
4771 assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
4773 assert!(ctx.is_in_html_tag(5), "After first newline in tag");
4774 assert!(ctx.is_in_html_tag(15), "Inside attribute");
4775
4776 let closing_bracket_pos = content.find(">\n").unwrap();
4778 assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
4779 }
4780
4781 #[test]
4782 fn test_is_in_html_tag_no_tags() {
4783 let content = "Plain text without any HTML";
4784 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4785
4786 for i in 0..content.len() {
4788 assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
4789 }
4790 }
4791
4792 #[test]
4797 fn test_is_in_jinja_range_expression() {
4798 let content = "Hello {{ name }}!";
4799 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4800
4801 assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
4803 assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
4804
4805 assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
4807 assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
4808 assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
4809 assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
4810 assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
4811
4812 assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
4814 }
4815
4816 #[test]
4817 fn test_is_in_jinja_range_statement() {
4818 let content = "{% if condition %}content{% endif %}";
4819 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4820
4821 assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
4823 assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
4824 assert!(ctx.is_in_jinja_range(17), "End of opening statement");
4825
4826 assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
4828
4829 assert!(ctx.is_in_jinja_range(25), "Start of endif");
4831 assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
4832 }
4833
4834 #[test]
4835 fn test_is_in_jinja_range_multiple() {
4836 let content = "{{ a }} and {{ b }}";
4837 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4838
4839 assert!(ctx.is_in_jinja_range(0));
4841 assert!(ctx.is_in_jinja_range(3));
4842 assert!(ctx.is_in_jinja_range(6));
4843
4844 assert!(!ctx.is_in_jinja_range(8));
4846 assert!(!ctx.is_in_jinja_range(11));
4847
4848 assert!(ctx.is_in_jinja_range(12));
4850 assert!(ctx.is_in_jinja_range(15));
4851 assert!(ctx.is_in_jinja_range(18));
4852 }
4853
4854 #[test]
4855 fn test_is_in_jinja_range_no_jinja() {
4856 let content = "Plain text with single braces but not Jinja";
4857 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4858
4859 for i in 0..content.len() {
4861 assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
4862 }
4863 }
4864
4865 #[test]
4870 fn test_is_in_link_title_with_title() {
4871 let content = r#"[ref]: https://example.com "Title text"
4872
4873Some content."#;
4874 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4875
4876 assert_eq!(ctx.reference_defs.len(), 1);
4878 let def = &ctx.reference_defs[0];
4879 assert!(def.title_byte_start.is_some());
4880 assert!(def.title_byte_end.is_some());
4881
4882 let title_start = def.title_byte_start.unwrap();
4883 let title_end = def.title_byte_end.unwrap();
4884
4885 assert!(!ctx.is_in_link_title(10), "URL should not be in title");
4887
4888 assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
4890 assert!(
4891 ctx.is_in_link_title(title_start + 5),
4892 "Middle of title should be in title"
4893 );
4894 assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
4895
4896 assert!(
4898 !ctx.is_in_link_title(title_end),
4899 "After title end should not be in title"
4900 );
4901 }
4902
4903 #[test]
4904 fn test_is_in_link_title_without_title() {
4905 let content = "[ref]: https://example.com\n\nSome content.";
4906 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4907
4908 assert_eq!(ctx.reference_defs.len(), 1);
4910 let def = &ctx.reference_defs[0];
4911 assert!(def.title_byte_start.is_none());
4912 assert!(def.title_byte_end.is_none());
4913
4914 for i in 0..content.len() {
4916 assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
4917 }
4918 }
4919
4920 #[test]
4921 fn test_is_in_link_title_multiple_refs() {
4922 let content = r#"[ref1]: /url1 "Title One"
4923[ref2]: /url2
4924[ref3]: /url3 "Title Three"
4925"#;
4926 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4927
4928 assert_eq!(ctx.reference_defs.len(), 3);
4930
4931 let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
4933 assert!(ref1.title_byte_start.is_some());
4934
4935 let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
4937 assert!(ref2.title_byte_start.is_none());
4938
4939 let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
4941 assert!(ref3.title_byte_start.is_some());
4942
4943 if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
4945 assert!(ctx.is_in_link_title(start + 1));
4946 assert!(!ctx.is_in_link_title(end + 5));
4947 }
4948
4949 if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
4951 assert!(ctx.is_in_link_title(start + 1));
4952 }
4953 }
4954
4955 #[test]
4956 fn test_is_in_link_title_single_quotes() {
4957 let content = "[ref]: /url 'Single quoted title'\n";
4958 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4959
4960 assert_eq!(ctx.reference_defs.len(), 1);
4961 let def = &ctx.reference_defs[0];
4962
4963 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
4964 assert!(ctx.is_in_link_title(start));
4965 assert!(ctx.is_in_link_title(start + 5));
4966 assert!(!ctx.is_in_link_title(end));
4967 }
4968 }
4969
4970 #[test]
4971 fn test_is_in_link_title_parentheses() {
4972 let content = "[ref]: /url (Parenthesized title)\n";
4975 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4976
4977 if ctx.reference_defs.is_empty() {
4980 for i in 0..content.len() {
4982 assert!(!ctx.is_in_link_title(i));
4983 }
4984 } else {
4985 let def = &ctx.reference_defs[0];
4986 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
4987 assert!(ctx.is_in_link_title(start));
4988 assert!(ctx.is_in_link_title(start + 5));
4989 assert!(!ctx.is_in_link_title(end));
4990 } else {
4991 for i in 0..content.len() {
4993 assert!(!ctx.is_in_link_title(i));
4994 }
4995 }
4996 }
4997 }
4998
4999 #[test]
5000 fn test_is_in_link_title_no_refs() {
5001 let content = "Just plain text without any reference definitions.";
5002 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5003
5004 assert!(ctx.reference_defs.is_empty());
5005
5006 for i in 0..content.len() {
5007 assert!(!ctx.is_in_link_title(i));
5008 }
5009 }
5010
5011 #[test]
5016 fn test_math_spans_inline() {
5017 let content = "Text with inline math $[f](x)$ in it.";
5018 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5019
5020 let math_spans = ctx.math_spans();
5021 assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5022
5023 let span = &math_spans[0];
5024 assert!(!span.is_display, "Should be inline math, not display");
5025 assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5026 }
5027
5028 #[test]
5029 fn test_math_spans_display_single_line() {
5030 let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5031 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5032
5033 let math_spans = ctx.math_spans();
5034 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5035
5036 let span = &math_spans[0];
5037 assert!(span.is_display, "Should be display math");
5038 assert!(
5039 span.content.contains("[x](\\zeta)"),
5040 "Content should contain the link-like pattern"
5041 );
5042 }
5043
5044 #[test]
5045 fn test_math_spans_display_multiline() {
5046 let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5047 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5048
5049 let math_spans = ctx.math_spans();
5050 assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5051
5052 let span = &math_spans[0];
5053 assert!(span.is_display, "Should be display math");
5054 }
5055
5056 #[test]
5057 fn test_is_in_math_span() {
5058 let content = "Text $[f](x)$ more text";
5059 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5060
5061 let math_start = content.find('$').unwrap();
5063 let math_end = content.rfind('$').unwrap() + 1;
5064
5065 assert!(
5066 ctx.is_in_math_span(math_start + 1),
5067 "Position inside math span should return true"
5068 );
5069 assert!(
5070 ctx.is_in_math_span(math_start + 3),
5071 "Position inside math span should return true"
5072 );
5073
5074 assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5076 assert!(
5077 !ctx.is_in_math_span(math_end + 1),
5078 "Position after math span should return false"
5079 );
5080 }
5081
5082 #[test]
5083 fn test_math_spans_mixed_with_code() {
5084 let content = "Math $[f](x)$ and code `[g](y)` mixed";
5085 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5086
5087 let math_spans = ctx.math_spans();
5088 let code_spans = ctx.code_spans();
5089
5090 assert_eq!(math_spans.len(), 1, "Should have one math span");
5091 assert_eq!(code_spans.len(), 1, "Should have one code span");
5092
5093 assert_eq!(math_spans[0].content, "[f](x)");
5095 assert_eq!(code_spans[0].content, "[g](y)");
5097 }
5098
5099 #[test]
5100 fn test_math_spans_no_math() {
5101 let content = "Regular text without any math at all.";
5102 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5103
5104 let math_spans = ctx.math_spans();
5105 assert!(math_spans.is_empty(), "Should have no math spans");
5106 }
5107
5108 #[test]
5109 fn test_math_spans_multiple() {
5110 let content = "First $a$ and second $b$ and display $$c$$";
5111 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5112
5113 let math_spans = ctx.math_spans();
5114 assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5115
5116 let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5118 let display_count = math_spans.iter().filter(|s| s.is_display).count();
5119
5120 assert_eq!(inline_count, 2, "Should have two inline math spans");
5121 assert_eq!(display_count, 1, "Should have one display math span");
5122 }
5123
5124 #[test]
5125 fn test_is_in_math_span_boundary_positions() {
5126 let content = "$[f](x)$";
5129 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5130
5131 let math_spans = ctx.math_spans();
5132 assert_eq!(math_spans.len(), 1, "Should have one math span");
5133
5134 let span = &math_spans[0];
5135
5136 assert!(
5138 ctx.is_in_math_span(span.byte_offset),
5139 "Start position should be in span"
5140 );
5141
5142 assert!(
5144 ctx.is_in_math_span(span.byte_offset + 1),
5145 "Position after start should be in span"
5146 );
5147
5148 assert!(
5150 ctx.is_in_math_span(span.byte_end - 1),
5151 "Position at end-1 should be in span"
5152 );
5153
5154 assert!(
5156 !ctx.is_in_math_span(span.byte_end),
5157 "Position at byte_end should NOT be in span (exclusive)"
5158 );
5159 }
5160
5161 #[test]
5162 fn test_math_spans_at_document_start() {
5163 let content = "$x$ text";
5164 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5165
5166 let math_spans = ctx.math_spans();
5167 assert_eq!(math_spans.len(), 1);
5168 assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5169 }
5170
5171 #[test]
5172 fn test_math_spans_at_document_end() {
5173 let content = "text $x$";
5174 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5175
5176 let math_spans = ctx.math_spans();
5177 assert_eq!(math_spans.len(), 1);
5178 assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5179 }
5180
5181 #[test]
5182 fn test_math_spans_consecutive() {
5183 let content = "$a$$b$";
5184 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5185
5186 let math_spans = ctx.math_spans();
5187 assert!(!math_spans.is_empty(), "Should detect at least one math span");
5189
5190 for i in 0..content.len() {
5192 assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5193 }
5194 }
5195
5196 #[test]
5197 fn test_math_spans_currency_not_math() {
5198 let content = "Price is $100";
5200 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5201
5202 let math_spans = ctx.math_spans();
5203 assert!(
5206 math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5207 "Unbalanced $ should not create math span containing 100"
5208 );
5209 }
5210}