1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13 ($name:expr, $profile:expr, $code:expr) => {{
14 let start = std::time::Instant::now();
15 let result = $code;
16 if $profile {
17 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18 }
19 result
20 }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(
32 r#"(?sx)
33 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34 (?:
35 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
36 |
37 \[([^\]]*)\] # Reference ID in group 6
38 )"#
39 ).unwrap()
40});
41
42static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?sx)
47 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48 (?:
49 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
50 |
51 \[([^\]]*)\] # Reference ID in group 6
52 )"#
53 ).unwrap()
54});
55
56static REF_DEF_PATTERN: LazyLock<Regex> =
58 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62 Regex::new(
63 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64 ).unwrap()
65});
66
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub is_blank: bool,
85 pub in_code_block: bool,
87 pub in_front_matter: bool,
89 pub in_html_block: bool,
91 pub in_html_comment: bool,
93 pub list_item: Option<ListItemInfo>,
95 pub heading: Option<HeadingInfo>,
97 pub blockquote: Option<BlockquoteInfo>,
99 pub in_mkdocstrings: bool,
101 pub in_esm_block: bool,
103 pub in_code_span_continuation: bool,
105}
106
107impl LineInfo {
108 pub fn content<'a>(&self, source: &'a str) -> &'a str {
110 &source[self.byte_offset..self.byte_offset + self.byte_len]
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct ListItemInfo {
117 pub marker: String,
119 pub is_ordered: bool,
121 pub number: Option<usize>,
123 pub marker_column: usize,
125 pub content_column: usize,
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum HeadingStyle {
132 ATX,
134 Setext1,
136 Setext2,
138}
139
140#[derive(Debug, Clone)]
142pub struct ParsedLink<'a> {
143 pub line: usize,
145 pub start_col: usize,
147 pub end_col: usize,
149 pub byte_offset: usize,
151 pub byte_end: usize,
153 pub text: Cow<'a, str>,
155 pub url: Cow<'a, str>,
157 pub is_reference: bool,
159 pub reference_id: Option<Cow<'a, str>>,
161 pub link_type: LinkType,
163}
164
165#[derive(Debug, Clone)]
167pub struct BrokenLinkInfo {
168 pub reference: String,
170 pub span: std::ops::Range<usize>,
172}
173
174#[derive(Debug, Clone)]
176pub struct FootnoteRef {
177 pub id: String,
179 pub line: usize,
181 pub byte_offset: usize,
183 pub byte_end: usize,
185}
186
187#[derive(Debug, Clone)]
189pub struct ParsedImage<'a> {
190 pub line: usize,
192 pub start_col: usize,
194 pub end_col: usize,
196 pub byte_offset: usize,
198 pub byte_end: usize,
200 pub alt_text: Cow<'a, str>,
202 pub url: Cow<'a, str>,
204 pub is_reference: bool,
206 pub reference_id: Option<Cow<'a, str>>,
208 pub link_type: LinkType,
210}
211
212#[derive(Debug, Clone)]
214pub struct ReferenceDef {
215 pub line: usize,
217 pub id: String,
219 pub url: String,
221 pub title: Option<String>,
223 pub byte_offset: usize,
225 pub byte_end: usize,
227 pub title_byte_start: Option<usize>,
229 pub title_byte_end: Option<usize>,
231}
232
233#[derive(Debug, Clone)]
235pub struct CodeSpan {
236 pub line: usize,
238 pub end_line: usize,
240 pub start_col: usize,
242 pub end_col: usize,
244 pub byte_offset: usize,
246 pub byte_end: usize,
248 pub backtick_count: usize,
250 pub content: String,
252}
253
254#[derive(Debug, Clone)]
256pub struct HeadingInfo {
257 pub level: u8,
259 pub style: HeadingStyle,
261 pub marker: String,
263 pub marker_column: usize,
265 pub content_column: usize,
267 pub text: String,
269 pub custom_id: Option<String>,
271 pub raw_text: String,
273 pub has_closing_sequence: bool,
275 pub closing_sequence: String,
277 pub is_valid: bool,
280}
281
282#[derive(Debug, Clone)]
287pub struct ValidHeading<'a> {
288 pub line_num: usize,
290 pub heading: &'a HeadingInfo,
292 pub line_info: &'a LineInfo,
294}
295
296pub struct ValidHeadingsIter<'a> {
301 lines: &'a [LineInfo],
302 current_index: usize,
303}
304
305impl<'a> ValidHeadingsIter<'a> {
306 fn new(lines: &'a [LineInfo]) -> Self {
307 Self {
308 lines,
309 current_index: 0,
310 }
311 }
312}
313
314impl<'a> Iterator for ValidHeadingsIter<'a> {
315 type Item = ValidHeading<'a>;
316
317 fn next(&mut self) -> Option<Self::Item> {
318 while self.current_index < self.lines.len() {
319 let idx = self.current_index;
320 self.current_index += 1;
321
322 let line_info = &self.lines[idx];
323 if let Some(heading) = &line_info.heading
324 && heading.is_valid
325 {
326 return Some(ValidHeading {
327 line_num: idx + 1, heading,
329 line_info,
330 });
331 }
332 }
333 None
334 }
335}
336
337#[derive(Debug, Clone)]
339pub struct BlockquoteInfo {
340 pub nesting_level: usize,
342 pub indent: String,
344 pub marker_column: usize,
346 pub prefix: String,
348 pub content: String,
350 pub has_no_space_after_marker: bool,
352 pub has_multiple_spaces_after_marker: bool,
354 pub needs_md028_fix: bool,
356}
357
358#[derive(Debug, Clone)]
360pub struct ListBlock {
361 pub start_line: usize,
363 pub end_line: usize,
365 pub is_ordered: bool,
367 pub marker: Option<String>,
369 pub blockquote_prefix: String,
371 pub item_lines: Vec<usize>,
373 pub nesting_level: usize,
375 pub max_marker_width: usize,
377}
378
379use std::sync::{Arc, OnceLock};
380
381#[derive(Debug, Clone, Default)]
383pub struct CharFrequency {
384 pub hash_count: usize,
386 pub asterisk_count: usize,
388 pub underscore_count: usize,
390 pub hyphen_count: usize,
392 pub plus_count: usize,
394 pub gt_count: usize,
396 pub pipe_count: usize,
398 pub bracket_count: usize,
400 pub backtick_count: usize,
402 pub lt_count: usize,
404 pub exclamation_count: usize,
406 pub newline_count: usize,
408}
409
410#[derive(Debug, Clone)]
412pub struct HtmlTag {
413 pub line: usize,
415 pub start_col: usize,
417 pub end_col: usize,
419 pub byte_offset: usize,
421 pub byte_end: usize,
423 pub tag_name: String,
425 pub is_closing: bool,
427 pub is_self_closing: bool,
429 pub raw_content: String,
431}
432
433#[derive(Debug, Clone)]
435pub struct EmphasisSpan {
436 pub line: usize,
438 pub start_col: usize,
440 pub end_col: usize,
442 pub byte_offset: usize,
444 pub byte_end: usize,
446 pub marker: char,
448 pub marker_count: usize,
450 pub content: String,
452}
453
454#[derive(Debug, Clone)]
456pub struct TableRow {
457 pub line: usize,
459 pub is_separator: bool,
461 pub column_count: usize,
463 pub column_alignments: Vec<String>, }
466
467#[derive(Debug, Clone)]
469pub struct BareUrl {
470 pub line: usize,
472 pub start_col: usize,
474 pub end_col: usize,
476 pub byte_offset: usize,
478 pub byte_end: usize,
480 pub url: String,
482 pub url_type: String,
484}
485
486pub struct LintContext<'a> {
487 pub content: &'a str,
488 pub line_offsets: Vec<usize>,
489 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
511
512struct BlockquoteComponents<'a> {
514 indent: &'a str,
515 markers: &'a str,
516 spaces_after: &'a str,
517 content: &'a str,
518}
519
520#[inline]
522fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
523 let bytes = line.as_bytes();
524 let mut pos = 0;
525
526 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
528 pos += 1;
529 }
530 let indent_end = pos;
531
532 if pos >= bytes.len() || bytes[pos] != b'>' {
534 return None;
535 }
536
537 while pos < bytes.len() && bytes[pos] == b'>' {
539 pos += 1;
540 }
541 let markers_end = pos;
542
543 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
545 pos += 1;
546 }
547 let spaces_end = pos;
548
549 Some(BlockquoteComponents {
550 indent: &line[0..indent_end],
551 markers: &line[indent_end..markers_end],
552 spaces_after: &line[markers_end..spaces_end],
553 content: &line[spaces_end..],
554 })
555}
556
557impl<'a> LintContext<'a> {
558 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
559 #[cfg(not(target_arch = "wasm32"))]
560 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
561 #[cfg(target_arch = "wasm32")]
562 let profile = false;
563
564 let line_offsets = profile_section!("Line offsets", profile, {
565 let mut offsets = vec![0];
566 for (i, c) in content.char_indices() {
567 if c == '\n' {
568 offsets.push(i + 1);
569 }
570 }
571 offsets
572 });
573
574 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
576
577 let html_comment_ranges = profile_section!(
579 "HTML comment ranges",
580 profile,
581 crate::utils::skip_context::compute_html_comment_ranges(content)
582 );
583
584 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
586 if flavor == MarkdownFlavor::MkDocs {
587 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
588 } else {
589 Vec::new()
590 }
591 });
592
593 let mut lines = profile_section!(
595 "Basic line info",
596 profile,
597 Self::compute_basic_line_info(
598 content,
599 &line_offsets,
600 &code_blocks,
601 flavor,
602 &html_comment_ranges,
603 &autodoc_ranges,
604 )
605 );
606
607 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
609
610 profile_section!(
612 "ESM blocks",
613 profile,
614 Self::detect_esm_blocks(content, &mut lines, flavor)
615 );
616
617 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
619
620 profile_section!(
622 "Headings & blockquotes",
623 profile,
624 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
625 );
626
627 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
629
630 for span in &code_spans {
633 if span.end_line > span.line {
634 for line_num in (span.line + 1)..=span.end_line {
636 if let Some(line_info) = lines.get_mut(line_num - 1) {
637 line_info.in_code_span_continuation = true;
638 }
639 }
640 }
641 }
642
643 let (links, broken_links, footnote_refs) = profile_section!(
645 "Links",
646 profile,
647 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
648 );
649
650 let images = profile_section!(
651 "Images",
652 profile,
653 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
654 );
655
656 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
657
658 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
659
660 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
662
663 let table_blocks = profile_section!(
665 "Table blocks",
666 profile,
667 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
668 content,
669 &code_blocks,
670 &code_spans,
671 &html_comment_ranges,
672 )
673 );
674
675 let line_index = profile_section!(
677 "Line index",
678 profile,
679 crate::utils::range_utils::LineIndex::new(content)
680 );
681
682 let jinja_ranges = profile_section!(
684 "Jinja ranges",
685 profile,
686 crate::utils::jinja_utils::find_jinja_ranges(content)
687 );
688
689 Self {
690 content,
691 line_offsets,
692 code_blocks,
693 lines,
694 links,
695 images,
696 broken_links,
697 footnote_refs,
698 reference_defs,
699 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
700 list_blocks,
701 char_frequency,
702 html_tags_cache: OnceLock::new(),
703 emphasis_spans_cache: OnceLock::new(),
704 table_rows_cache: OnceLock::new(),
705 bare_urls_cache: OnceLock::new(),
706 has_mixed_list_nesting_cache: OnceLock::new(),
707 html_comment_ranges,
708 table_blocks,
709 line_index,
710 jinja_ranges,
711 flavor,
712 source_file,
713 }
714 }
715
716 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
718 Arc::clone(
719 self.code_spans_cache
720 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
721 )
722 }
723
724 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
726 &self.html_comment_ranges
727 }
728
729 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
731 Arc::clone(self.html_tags_cache.get_or_init(|| {
732 Arc::new(Self::parse_html_tags(
733 self.content,
734 &self.lines,
735 &self.code_blocks,
736 self.flavor,
737 ))
738 }))
739 }
740
741 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
743 Arc::clone(
744 self.emphasis_spans_cache
745 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
746 )
747 }
748
749 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
751 Arc::clone(
752 self.table_rows_cache
753 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
754 )
755 }
756
757 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
759 Arc::clone(
760 self.bare_urls_cache
761 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
762 )
763 }
764
765 pub fn has_mixed_list_nesting(&self) -> bool {
769 *self
770 .has_mixed_list_nesting_cache
771 .get_or_init(|| self.compute_mixed_list_nesting())
772 }
773
774 fn compute_mixed_list_nesting(&self) -> bool {
776 let mut stack: Vec<(usize, bool)> = Vec::new();
781 let mut last_was_blank = false;
782
783 for line_info in &self.lines {
784 if line_info.in_code_block
786 || line_info.in_front_matter
787 || line_info.in_mkdocstrings
788 || line_info.in_html_comment
789 || line_info.in_esm_block
790 {
791 continue;
792 }
793
794 if line_info.is_blank {
796 last_was_blank = true;
797 continue;
798 }
799
800 if let Some(list_item) = &line_info.list_item {
801 let current_pos = if list_item.marker_column == 1 {
803 0
804 } else {
805 list_item.marker_column
806 };
807
808 if last_was_blank && current_pos == 0 {
810 stack.clear();
811 }
812 last_was_blank = false;
813
814 while let Some(&(pos, _)) = stack.last() {
816 if pos >= current_pos {
817 stack.pop();
818 } else {
819 break;
820 }
821 }
822
823 if let Some(&(_, parent_is_ordered)) = stack.last()
825 && parent_is_ordered != list_item.is_ordered
826 {
827 return true; }
829
830 stack.push((current_pos, list_item.is_ordered));
831 } else {
832 last_was_blank = false;
834 }
835 }
836
837 false
838 }
839
840 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
842 match self.line_offsets.binary_search(&offset) {
843 Ok(line) => (line + 1, 1),
844 Err(line) => {
845 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
846 (line, offset - line_start + 1)
847 }
848 }
849 }
850
851 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
853 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
855 return true;
856 }
857
858 self.code_spans()
860 .iter()
861 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
862 }
863
864 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
866 if line_num > 0 {
867 self.lines.get(line_num - 1)
868 } else {
869 None
870 }
871 }
872
873 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
875 self.line_info(line_num).map(|info| info.byte_offset)
876 }
877
878 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
880 let normalized_id = ref_id.to_lowercase();
881 self.reference_defs
882 .iter()
883 .find(|def| def.id == normalized_id)
884 .map(|def| def.url.as_str())
885 }
886
887 pub fn is_in_list_block(&self, line_num: usize) -> bool {
889 self.list_blocks
890 .iter()
891 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
892 }
893
894 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
896 self.list_blocks
897 .iter()
898 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
899 }
900
901 pub fn is_in_code_block(&self, line_num: usize) -> bool {
905 if line_num == 0 || line_num > self.lines.len() {
906 return false;
907 }
908 self.lines[line_num - 1].in_code_block
909 }
910
911 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
913 if line_num == 0 || line_num > self.lines.len() {
914 return false;
915 }
916 self.lines[line_num - 1].in_front_matter
917 }
918
919 pub fn is_in_html_block(&self, line_num: usize) -> bool {
921 if line_num == 0 || line_num > self.lines.len() {
922 return false;
923 }
924 self.lines[line_num - 1].in_html_block
925 }
926
927 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
929 if line_num == 0 || line_num > self.lines.len() {
930 return false;
931 }
932
933 let col_0indexed = if col > 0 { col - 1 } else { 0 };
937 let code_spans = self.code_spans();
938 code_spans.iter().any(|span| {
939 if line_num < span.line || line_num > span.end_line {
941 return false;
942 }
943
944 if span.line == span.end_line {
945 col_0indexed >= span.start_col && col_0indexed < span.end_col
947 } else if line_num == span.line {
948 col_0indexed >= span.start_col
950 } else if line_num == span.end_line {
951 col_0indexed < span.end_col
953 } else {
954 true
956 }
957 })
958 }
959
960 #[inline]
962 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
963 let code_spans = self.code_spans();
964 code_spans
965 .iter()
966 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
967 }
968
969 #[inline]
972 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
973 self.reference_defs
974 .iter()
975 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
976 }
977
978 #[inline]
982 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
983 self.html_comment_ranges
984 .iter()
985 .any(|range| byte_pos >= range.start && byte_pos < range.end)
986 }
987
988 #[inline]
991 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
992 self.html_tags()
993 .iter()
994 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
995 }
996
997 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
999 self.jinja_ranges
1000 .iter()
1001 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1002 }
1003
1004 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1006 self.reference_defs.iter().any(|def| {
1007 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1008 byte_pos >= start && byte_pos < end
1009 } else {
1010 false
1011 }
1012 })
1013 }
1014
1015 pub fn has_char(&self, ch: char) -> bool {
1017 match ch {
1018 '#' => self.char_frequency.hash_count > 0,
1019 '*' => self.char_frequency.asterisk_count > 0,
1020 '_' => self.char_frequency.underscore_count > 0,
1021 '-' => self.char_frequency.hyphen_count > 0,
1022 '+' => self.char_frequency.plus_count > 0,
1023 '>' => self.char_frequency.gt_count > 0,
1024 '|' => self.char_frequency.pipe_count > 0,
1025 '[' => self.char_frequency.bracket_count > 0,
1026 '`' => self.char_frequency.backtick_count > 0,
1027 '<' => self.char_frequency.lt_count > 0,
1028 '!' => self.char_frequency.exclamation_count > 0,
1029 '\n' => self.char_frequency.newline_count > 0,
1030 _ => self.content.contains(ch), }
1032 }
1033
1034 pub fn char_count(&self, ch: char) -> usize {
1036 match ch {
1037 '#' => self.char_frequency.hash_count,
1038 '*' => self.char_frequency.asterisk_count,
1039 '_' => self.char_frequency.underscore_count,
1040 '-' => self.char_frequency.hyphen_count,
1041 '+' => self.char_frequency.plus_count,
1042 '>' => self.char_frequency.gt_count,
1043 '|' => self.char_frequency.pipe_count,
1044 '[' => self.char_frequency.bracket_count,
1045 '`' => self.char_frequency.backtick_count,
1046 '<' => self.char_frequency.lt_count,
1047 '!' => self.char_frequency.exclamation_count,
1048 '\n' => self.char_frequency.newline_count,
1049 _ => self.content.matches(ch).count(), }
1051 }
1052
1053 pub fn likely_has_headings(&self) -> bool {
1055 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1057
1058 pub fn likely_has_lists(&self) -> bool {
1060 self.char_frequency.asterisk_count > 0
1061 || self.char_frequency.hyphen_count > 0
1062 || self.char_frequency.plus_count > 0
1063 }
1064
1065 pub fn likely_has_emphasis(&self) -> bool {
1067 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1068 }
1069
1070 pub fn likely_has_tables(&self) -> bool {
1072 self.char_frequency.pipe_count > 2
1073 }
1074
1075 pub fn likely_has_blockquotes(&self) -> bool {
1077 self.char_frequency.gt_count > 0
1078 }
1079
1080 pub fn likely_has_code(&self) -> bool {
1082 self.char_frequency.backtick_count > 0
1083 }
1084
1085 pub fn likely_has_links_or_images(&self) -> bool {
1087 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1088 }
1089
1090 pub fn likely_has_html(&self) -> bool {
1092 self.char_frequency.lt_count > 0
1093 }
1094
1095 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1097 self.html_tags()
1098 .iter()
1099 .filter(|tag| tag.line == line_num)
1100 .cloned()
1101 .collect()
1102 }
1103
1104 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1106 self.emphasis_spans()
1107 .iter()
1108 .filter(|span| span.line == line_num)
1109 .cloned()
1110 .collect()
1111 }
1112
1113 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1115 self.table_rows()
1116 .iter()
1117 .filter(|row| row.line == line_num)
1118 .cloned()
1119 .collect()
1120 }
1121
1122 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1124 self.bare_urls()
1125 .iter()
1126 .filter(|url| url.line == line_num)
1127 .cloned()
1128 .collect()
1129 }
1130
1131 #[inline]
1137 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1138 let idx = match lines.binary_search_by(|line| {
1140 if byte_offset < line.byte_offset {
1141 std::cmp::Ordering::Greater
1142 } else if byte_offset > line.byte_offset + line.byte_len {
1143 std::cmp::Ordering::Less
1144 } else {
1145 std::cmp::Ordering::Equal
1146 }
1147 }) {
1148 Ok(idx) => idx,
1149 Err(idx) => idx.saturating_sub(1),
1150 };
1151
1152 let line = &lines[idx];
1153 let line_num = idx + 1;
1154 let col = byte_offset.saturating_sub(line.byte_offset);
1155
1156 (idx, line_num, col)
1157 }
1158
1159 #[inline]
1161 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1162 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1164
1165 if idx > 0 {
1167 let span = &code_spans[idx - 1];
1168 if offset >= span.byte_offset && offset < span.byte_end {
1169 return true;
1170 }
1171 }
1172
1173 false
1174 }
1175
1176 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1180 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1181
1182 let mut link_ranges = Vec::new();
1183 let mut options = Options::empty();
1184 options.insert(Options::ENABLE_WIKILINKS);
1185 options.insert(Options::ENABLE_FOOTNOTES);
1186
1187 let parser = Parser::new_ext(content, options).into_offset_iter();
1188 let mut link_stack: Vec<usize> = Vec::new();
1189
1190 for (event, range) in parser {
1191 match event {
1192 Event::Start(Tag::Link { .. }) => {
1193 link_stack.push(range.start);
1194 }
1195 Event::End(TagEnd::Link) => {
1196 if let Some(start_pos) = link_stack.pop() {
1197 link_ranges.push((start_pos, range.end));
1198 }
1199 }
1200 _ => {}
1201 }
1202 }
1203
1204 link_ranges
1205 }
1206
1207 fn parse_links(
1209 content: &'a str,
1210 lines: &[LineInfo],
1211 code_blocks: &[(usize, usize)],
1212 code_spans: &[CodeSpan],
1213 flavor: MarkdownFlavor,
1214 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1215 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1216 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1217 use std::collections::HashSet;
1218
1219 let mut links = Vec::with_capacity(content.len() / 500);
1220 let mut broken_links = Vec::new();
1221 let mut footnote_refs = Vec::new();
1222
1223 let mut found_positions = HashSet::new();
1225
1226 let mut options = Options::empty();
1236 options.insert(Options::ENABLE_WIKILINKS);
1237 options.insert(Options::ENABLE_FOOTNOTES);
1238
1239 let parser = Parser::new_with_broken_link_callback(
1240 content,
1241 options,
1242 Some(|link: BrokenLink<'_>| {
1243 broken_links.push(BrokenLinkInfo {
1244 reference: link.reference.to_string(),
1245 span: link.span.clone(),
1246 });
1247 None
1248 }),
1249 )
1250 .into_offset_iter();
1251
1252 let mut link_stack: Vec<(
1253 usize,
1254 usize,
1255 pulldown_cmark::CowStr<'a>,
1256 LinkType,
1257 pulldown_cmark::CowStr<'a>,
1258 )> = Vec::new();
1259 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1262 match event {
1263 Event::Start(Tag::Link {
1264 link_type,
1265 dest_url,
1266 id,
1267 ..
1268 }) => {
1269 link_stack.push((range.start, range.end, dest_url, link_type, id));
1271 text_chunks.clear();
1272 }
1273 Event::Text(text) if !link_stack.is_empty() => {
1274 text_chunks.push((text.to_string(), range.start, range.end));
1276 }
1277 Event::Code(code) if !link_stack.is_empty() => {
1278 let code_text = format!("`{code}`");
1280 text_chunks.push((code_text, range.start, range.end));
1281 }
1282 Event::End(TagEnd::Link) => {
1283 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1284 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1286 text_chunks.clear();
1287 continue;
1288 }
1289
1290 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1292
1293 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1295 text_chunks.clear();
1296 continue;
1297 }
1298
1299 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1300
1301 let is_reference = matches!(
1302 link_type,
1303 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1304 );
1305
1306 let link_text = if start_pos < content.len() {
1309 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1310
1311 let mut close_pos = None;
1315 let mut depth = 0;
1316 let mut in_code_span = false;
1317
1318 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1319 let mut backslash_count = 0;
1321 let mut j = i;
1322 while j > 0 && link_bytes[j - 1] == b'\\' {
1323 backslash_count += 1;
1324 j -= 1;
1325 }
1326 let is_escaped = backslash_count % 2 != 0;
1327
1328 if byte == b'`' && !is_escaped {
1330 in_code_span = !in_code_span;
1331 }
1332
1333 if !is_escaped && !in_code_span {
1335 if byte == b'[' {
1336 depth += 1;
1337 } else if byte == b']' {
1338 if depth == 0 {
1339 close_pos = Some(i);
1341 break;
1342 } else {
1343 depth -= 1;
1344 }
1345 }
1346 }
1347 }
1348
1349 if let Some(pos) = close_pos {
1350 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1351 } else {
1352 Cow::Borrowed("")
1353 }
1354 } else {
1355 Cow::Borrowed("")
1356 };
1357
1358 let reference_id = if is_reference && !ref_id.is_empty() {
1360 Some(Cow::Owned(ref_id.to_lowercase()))
1361 } else if is_reference {
1362 Some(Cow::Owned(link_text.to_lowercase()))
1364 } else {
1365 None
1366 };
1367
1368 found_positions.insert(start_pos);
1370
1371 links.push(ParsedLink {
1372 line: line_num,
1373 start_col: col_start,
1374 end_col: col_end,
1375 byte_offset: start_pos,
1376 byte_end: range.end,
1377 text: link_text,
1378 url: Cow::Owned(url.to_string()),
1379 is_reference,
1380 reference_id,
1381 link_type,
1382 });
1383
1384 text_chunks.clear();
1385 }
1386 }
1387 Event::FootnoteReference(footnote_id) => {
1388 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1391 continue;
1392 }
1393
1394 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1395 footnote_refs.push(FootnoteRef {
1396 id: footnote_id.to_string(),
1397 line: line_num,
1398 byte_offset: range.start,
1399 byte_end: range.end,
1400 });
1401 }
1402 _ => {}
1403 }
1404 }
1405
1406 for cap in LINK_PATTERN.captures_iter(content) {
1410 let full_match = cap.get(0).unwrap();
1411 let match_start = full_match.start();
1412 let match_end = full_match.end();
1413
1414 if found_positions.contains(&match_start) {
1416 continue;
1417 }
1418
1419 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1421 continue;
1422 }
1423
1424 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1426 continue;
1427 }
1428
1429 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1431 continue;
1432 }
1433
1434 if Self::is_offset_in_code_span(code_spans, match_start) {
1436 continue;
1437 }
1438
1439 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1441 continue;
1442 }
1443
1444 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1446
1447 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1449 continue;
1450 }
1451
1452 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1453
1454 let text = cap.get(1).map_or("", |m| m.as_str());
1455
1456 if let Some(ref_id) = cap.get(6) {
1458 let ref_id_str = ref_id.as_str();
1459 let normalized_ref = if ref_id_str.is_empty() {
1460 Cow::Owned(text.to_lowercase()) } else {
1462 Cow::Owned(ref_id_str.to_lowercase())
1463 };
1464
1465 links.push(ParsedLink {
1467 line: line_num,
1468 start_col: col_start,
1469 end_col: col_end,
1470 byte_offset: match_start,
1471 byte_end: match_end,
1472 text: Cow::Borrowed(text),
1473 url: Cow::Borrowed(""), is_reference: true,
1475 reference_id: Some(normalized_ref),
1476 link_type: LinkType::Reference, });
1478 }
1479 }
1480
1481 (links, broken_links, footnote_refs)
1482 }
1483
1484 fn parse_images(
1486 content: &'a str,
1487 lines: &[LineInfo],
1488 code_blocks: &[(usize, usize)],
1489 code_spans: &[CodeSpan],
1490 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1491 ) -> Vec<ParsedImage<'a>> {
1492 use crate::utils::skip_context::is_in_html_comment_ranges;
1493 use std::collections::HashSet;
1494
1495 let mut images = Vec::with_capacity(content.len() / 1000);
1497 let mut found_positions = HashSet::new();
1498
1499 let parser = Parser::new(content).into_offset_iter();
1501 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1502 Vec::new();
1503 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1506 match event {
1507 Event::Start(Tag::Image {
1508 link_type,
1509 dest_url,
1510 id,
1511 ..
1512 }) => {
1513 image_stack.push((range.start, dest_url, link_type, id));
1514 text_chunks.clear();
1515 }
1516 Event::Text(text) if !image_stack.is_empty() => {
1517 text_chunks.push((text.to_string(), range.start, range.end));
1518 }
1519 Event::Code(code) if !image_stack.is_empty() => {
1520 let code_text = format!("`{code}`");
1521 text_chunks.push((code_text, range.start, range.end));
1522 }
1523 Event::End(TagEnd::Image) => {
1524 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1525 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1527 continue;
1528 }
1529
1530 if Self::is_offset_in_code_span(code_spans, start_pos) {
1532 continue;
1533 }
1534
1535 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1537 continue;
1538 }
1539
1540 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1542 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1543
1544 let is_reference = matches!(
1545 link_type,
1546 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1547 );
1548
1549 let alt_text = if start_pos < content.len() {
1552 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1553
1554 let mut close_pos = None;
1557 let mut depth = 0;
1558
1559 if image_bytes.len() > 2 {
1560 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1561 let mut backslash_count = 0;
1563 let mut j = i;
1564 while j > 0 && image_bytes[j - 1] == b'\\' {
1565 backslash_count += 1;
1566 j -= 1;
1567 }
1568 let is_escaped = backslash_count % 2 != 0;
1569
1570 if !is_escaped {
1571 if byte == b'[' {
1572 depth += 1;
1573 } else if byte == b']' {
1574 if depth == 0 {
1575 close_pos = Some(i);
1577 break;
1578 } else {
1579 depth -= 1;
1580 }
1581 }
1582 }
1583 }
1584 }
1585
1586 if let Some(pos) = close_pos {
1587 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1588 } else {
1589 Cow::Borrowed("")
1590 }
1591 } else {
1592 Cow::Borrowed("")
1593 };
1594
1595 let reference_id = if is_reference && !ref_id.is_empty() {
1596 Some(Cow::Owned(ref_id.to_lowercase()))
1597 } else if is_reference {
1598 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1600 None
1601 };
1602
1603 found_positions.insert(start_pos);
1604 images.push(ParsedImage {
1605 line: line_num,
1606 start_col: col_start,
1607 end_col: col_end,
1608 byte_offset: start_pos,
1609 byte_end: range.end,
1610 alt_text,
1611 url: Cow::Owned(url.to_string()),
1612 is_reference,
1613 reference_id,
1614 link_type,
1615 });
1616 }
1617 }
1618 _ => {}
1619 }
1620 }
1621
1622 for cap in IMAGE_PATTERN.captures_iter(content) {
1624 let full_match = cap.get(0).unwrap();
1625 let match_start = full_match.start();
1626 let match_end = full_match.end();
1627
1628 if found_positions.contains(&match_start) {
1630 continue;
1631 }
1632
1633 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1635 continue;
1636 }
1637
1638 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1640 || Self::is_offset_in_code_span(code_spans, match_start)
1641 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1642 {
1643 continue;
1644 }
1645
1646 if let Some(ref_id) = cap.get(6) {
1648 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1649 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1650 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1651 let ref_id_str = ref_id.as_str();
1652 let normalized_ref = if ref_id_str.is_empty() {
1653 Cow::Owned(alt_text.to_lowercase())
1654 } else {
1655 Cow::Owned(ref_id_str.to_lowercase())
1656 };
1657
1658 images.push(ParsedImage {
1659 line: line_num,
1660 start_col: col_start,
1661 end_col: col_end,
1662 byte_offset: match_start,
1663 byte_end: match_end,
1664 alt_text: Cow::Borrowed(alt_text),
1665 url: Cow::Borrowed(""),
1666 is_reference: true,
1667 reference_id: Some(normalized_ref),
1668 link_type: LinkType::Reference, });
1670 }
1671 }
1672
1673 images
1674 }
1675
1676 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1678 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1682 if line_info.in_code_block {
1684 continue;
1685 }
1686
1687 let line = line_info.content(content);
1688 let line_num = line_idx + 1;
1689
1690 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1691 let id = cap.get(1).unwrap().as_str().to_lowercase();
1692 let url = cap.get(2).unwrap().as_str().to_string();
1693 let title_match = cap.get(3).or_else(|| cap.get(4));
1694 let title = title_match.map(|m| m.as_str().to_string());
1695
1696 let match_obj = cap.get(0).unwrap();
1699 let byte_offset = line_info.byte_offset + match_obj.start();
1700 let byte_end = line_info.byte_offset + match_obj.end();
1701
1702 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1704 let start = line_info.byte_offset + m.start().saturating_sub(1);
1706 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1708 } else {
1709 (None, None)
1710 };
1711
1712 refs.push(ReferenceDef {
1713 line: line_num,
1714 id,
1715 url,
1716 title,
1717 byte_offset,
1718 byte_end,
1719 title_byte_start,
1720 title_byte_end,
1721 });
1722 }
1723 }
1724
1725 refs
1726 }
1727
1728 #[inline]
1732 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1733 let trimmed_start = line.trim_start();
1734 if !trimmed_start.starts_with('>') {
1735 return None;
1736 }
1737
1738 let mut remaining = line;
1740 let mut total_prefix_len = 0;
1741
1742 loop {
1743 let trimmed = remaining.trim_start();
1744 if !trimmed.starts_with('>') {
1745 break;
1746 }
1747
1748 let leading_ws_len = remaining.len() - trimmed.len();
1750 total_prefix_len += leading_ws_len + 1;
1751
1752 let after_gt = &trimmed[1..];
1753
1754 if let Some(stripped) = after_gt.strip_prefix(' ') {
1756 total_prefix_len += 1;
1757 remaining = stripped;
1758 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1759 total_prefix_len += 1;
1760 remaining = stripped;
1761 } else {
1762 remaining = after_gt;
1763 }
1764 }
1765
1766 Some((&line[..total_prefix_len], remaining))
1767 }
1768
1769 #[inline]
1773 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1774 let bytes = line.as_bytes();
1775 let mut i = 0;
1776
1777 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1779 i += 1;
1780 }
1781
1782 if i >= bytes.len() {
1784 return None;
1785 }
1786 let marker = bytes[i] as char;
1787 if marker != '-' && marker != '*' && marker != '+' {
1788 return None;
1789 }
1790 let marker_pos = i;
1791 i += 1;
1792
1793 let spacing_start = i;
1795 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1796 i += 1;
1797 }
1798
1799 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1800 }
1801
1802 #[inline]
1806 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1807 let bytes = line.as_bytes();
1808 let mut i = 0;
1809
1810 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1812 i += 1;
1813 }
1814
1815 let number_start = i;
1817 while i < bytes.len() && bytes[i].is_ascii_digit() {
1818 i += 1;
1819 }
1820 if i == number_start {
1821 return None; }
1823
1824 if i >= bytes.len() {
1826 return None;
1827 }
1828 let delimiter = bytes[i] as char;
1829 if delimiter != '.' && delimiter != ')' {
1830 return None;
1831 }
1832 let delimiter_pos = i;
1833 i += 1;
1834
1835 let spacing_start = i;
1837 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1838 i += 1;
1839 }
1840
1841 Some((
1842 &line[..number_start],
1843 &line[number_start..delimiter_pos],
1844 delimiter,
1845 &line[spacing_start..i],
1846 &line[i..],
1847 ))
1848 }
1849
1850 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1853 let num_lines = line_offsets.len();
1854 let mut in_code_block = vec![false; num_lines];
1855
1856 for &(start, end) in code_blocks {
1858 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1860 let mut boundary = start;
1861 while boundary > 0 && !content.is_char_boundary(boundary) {
1862 boundary -= 1;
1863 }
1864 boundary
1865 } else {
1866 start
1867 };
1868
1869 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1870 let mut boundary = end;
1871 while boundary < content.len() && !content.is_char_boundary(boundary) {
1872 boundary += 1;
1873 }
1874 boundary
1875 } else {
1876 end.min(content.len())
1877 };
1878
1879 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1898 let first_line = first_line_after.saturating_sub(1);
1899 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1900
1901 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1903 *flag = true;
1904 }
1905 }
1906
1907 in_code_block
1908 }
1909
1910 fn compute_basic_line_info(
1912 content: &str,
1913 line_offsets: &[usize],
1914 code_blocks: &[(usize, usize)],
1915 flavor: MarkdownFlavor,
1916 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1917 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1918 ) -> Vec<LineInfo> {
1919 let content_lines: Vec<&str> = content.lines().collect();
1920 let mut lines = Vec::with_capacity(content_lines.len());
1921
1922 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1924
1925 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1928
1929 for (i, line) in content_lines.iter().enumerate() {
1930 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1931 let indent = line.len() - line.trim_start().len();
1932
1933 let blockquote_parse = Self::parse_blockquote_prefix(line);
1935
1936 let is_blank = if let Some((_, content)) = blockquote_parse {
1938 content.trim().is_empty()
1940 } else {
1941 line.trim().is_empty()
1942 };
1943
1944 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1946
1947 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1949 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1950 let line_end_offset = byte_offset + line.len();
1953 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1954 html_comment_ranges,
1955 byte_offset,
1956 line_end_offset,
1957 );
1958 let list_item = if !(in_code_block
1959 || is_blank
1960 || in_mkdocstrings
1961 || in_html_comment
1962 || (front_matter_end > 0 && i < front_matter_end))
1963 {
1964 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1966 (content, prefix.len())
1967 } else {
1968 (&**line, 0)
1969 };
1970
1971 if let Some((leading_spaces, marker, spacing, _content)) =
1972 Self::parse_unordered_list(line_for_list_check)
1973 {
1974 let marker_column = blockquote_prefix_len + leading_spaces.len();
1975 let content_column = marker_column + 1 + spacing.len();
1976
1977 if spacing.is_empty() {
1984 None
1985 } else {
1986 Some(ListItemInfo {
1987 marker: marker.to_string(),
1988 is_ordered: false,
1989 number: None,
1990 marker_column,
1991 content_column,
1992 })
1993 }
1994 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1995 Self::parse_ordered_list(line_for_list_check)
1996 {
1997 let marker = format!("{number_str}{delimiter}");
1998 let marker_column = blockquote_prefix_len + leading_spaces.len();
1999 let content_column = marker_column + marker.len() + spacing.len();
2000
2001 if spacing.is_empty() {
2004 None
2005 } else {
2006 Some(ListItemInfo {
2007 marker,
2008 is_ordered: true,
2009 number: number_str.parse().ok(),
2010 marker_column,
2011 content_column,
2012 })
2013 }
2014 } else {
2015 None
2016 }
2017 } else {
2018 None
2019 };
2020
2021 lines.push(LineInfo {
2022 byte_offset,
2023 byte_len: line.len(),
2024 indent,
2025 is_blank,
2026 in_code_block,
2027 in_front_matter: front_matter_end > 0 && i < front_matter_end,
2028 in_html_block: false, in_html_comment,
2030 list_item,
2031 heading: None, blockquote: None, in_mkdocstrings,
2034 in_esm_block: false, in_code_span_continuation: false, });
2037 }
2038
2039 lines
2040 }
2041
2042 fn detect_headings_and_blockquotes(
2044 content: &str,
2045 lines: &mut [LineInfo],
2046 flavor: MarkdownFlavor,
2047 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2048 link_byte_ranges: &[(usize, usize)],
2049 ) {
2050 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2052 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2053 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2054 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2055
2056 let content_lines: Vec<&str> = content.lines().collect();
2057
2058 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2060
2061 for i in 0..lines.len() {
2063 if lines[i].in_code_block {
2064 continue;
2065 }
2066
2067 if front_matter_end > 0 && i < front_matter_end {
2069 continue;
2070 }
2071
2072 if lines[i].in_html_block {
2074 continue;
2075 }
2076
2077 let line = content_lines[i];
2078
2079 if let Some(bq) = parse_blockquote_detailed(line) {
2081 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2083
2084 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2086
2087 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2089 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2092
2093 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2097
2098 lines[i].blockquote = Some(BlockquoteInfo {
2099 nesting_level,
2100 indent: bq.indent.to_string(),
2101 marker_column,
2102 prefix,
2103 content: bq.content.to_string(),
2104 has_no_space_after_marker: has_no_space,
2105 has_multiple_spaces_after_marker: has_multiple_spaces,
2106 needs_md028_fix,
2107 });
2108 }
2109
2110 if lines[i].is_blank {
2112 continue;
2113 }
2114
2115 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2118 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2119 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2120 } else {
2121 false
2122 };
2123
2124 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2125 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2127 continue;
2128 }
2129 let line_offset = lines[i].byte_offset;
2132 if link_byte_ranges
2133 .iter()
2134 .any(|&(start, end)| line_offset > start && line_offset < end)
2135 {
2136 continue;
2137 }
2138 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2139 let hashes = caps.get(2).map_or("", |m| m.as_str());
2140 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2141 let rest = caps.get(4).map_or("", |m| m.as_str());
2142
2143 let level = hashes.len() as u8;
2144 let marker_column = leading_spaces.len();
2145
2146 let (text, has_closing, closing_seq) = {
2148 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2150 if rest[id_start..].trim_end().ends_with('}') {
2152 (&rest[..id_start], &rest[id_start..])
2154 } else {
2155 (rest, "")
2156 }
2157 } else {
2158 (rest, "")
2159 };
2160
2161 let trimmed_rest = rest_without_id.trim_end();
2163 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2164 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2167
2168 let last_hash_char_idx = char_positions
2170 .iter()
2171 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2172
2173 if let Some(mut char_idx) = last_hash_char_idx {
2174 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2176 char_idx -= 1;
2177 }
2178
2179 let start_of_hashes = char_positions[char_idx].0;
2181
2182 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2184
2185 let potential_closing = &trimmed_rest[start_of_hashes..];
2187 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2188
2189 if is_all_hashes && has_space_before {
2190 let closing_hashes = potential_closing.to_string();
2192 let text_part = if !custom_id_part.is_empty() {
2195 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2198 } else {
2199 trimmed_rest[..start_of_hashes].trim_end().to_string()
2200 };
2201 (text_part, true, closing_hashes)
2202 } else {
2203 (rest.to_string(), false, String::new())
2205 }
2206 } else {
2207 (rest.to_string(), false, String::new())
2209 }
2210 } else {
2211 (rest.to_string(), false, String::new())
2213 }
2214 };
2215
2216 let content_column = marker_column + hashes.len() + spaces_after.len();
2217
2218 let raw_text = text.trim().to_string();
2220 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2221
2222 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2224 let next_line = content_lines[i + 1];
2225 if !lines[i + 1].in_code_block
2226 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2227 && let Some(next_line_id) =
2228 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2229 {
2230 custom_id = Some(next_line_id);
2231 }
2232 }
2233
2234 let is_valid = !spaces_after.is_empty()
2244 || rest.is_empty()
2245 || level > 1
2246 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2247
2248 lines[i].heading = Some(HeadingInfo {
2249 level,
2250 style: HeadingStyle::ATX,
2251 marker: hashes.to_string(),
2252 marker_column,
2253 content_column,
2254 text: clean_text,
2255 custom_id,
2256 raw_text,
2257 has_closing_sequence: has_closing,
2258 closing_sequence: closing_seq,
2259 is_valid,
2260 });
2261 }
2262 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2264 let next_line = content_lines[i + 1];
2265 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2266 if front_matter_end > 0 && i < front_matter_end {
2268 continue;
2269 }
2270
2271 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2273 {
2274 continue;
2275 }
2276
2277 let underline = next_line.trim();
2278
2279 let level = if underline.starts_with('=') { 1 } else { 2 };
2280 let style = if level == 1 {
2281 HeadingStyle::Setext1
2282 } else {
2283 HeadingStyle::Setext2
2284 };
2285
2286 let raw_text = line.trim().to_string();
2288 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2289
2290 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2292 let attr_line = content_lines[i + 2];
2293 if !lines[i + 2].in_code_block
2294 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2295 && let Some(attr_line_id) =
2296 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2297 {
2298 custom_id = Some(attr_line_id);
2299 }
2300 }
2301
2302 lines[i].heading = Some(HeadingInfo {
2303 level,
2304 style,
2305 marker: underline.to_string(),
2306 marker_column: next_line.len() - next_line.trim_start().len(),
2307 content_column: lines[i].indent,
2308 text: clean_text,
2309 custom_id,
2310 raw_text,
2311 has_closing_sequence: false,
2312 closing_sequence: String::new(),
2313 is_valid: true, });
2315 }
2316 }
2317 }
2318 }
2319
2320 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2322 const BLOCK_ELEMENTS: &[&str] = &[
2325 "address",
2326 "article",
2327 "aside",
2328 "audio",
2329 "blockquote",
2330 "canvas",
2331 "details",
2332 "dialog",
2333 "dd",
2334 "div",
2335 "dl",
2336 "dt",
2337 "embed",
2338 "fieldset",
2339 "figcaption",
2340 "figure",
2341 "footer",
2342 "form",
2343 "h1",
2344 "h2",
2345 "h3",
2346 "h4",
2347 "h5",
2348 "h6",
2349 "header",
2350 "hr",
2351 "iframe",
2352 "li",
2353 "main",
2354 "menu",
2355 "nav",
2356 "noscript",
2357 "object",
2358 "ol",
2359 "p",
2360 "picture",
2361 "pre",
2362 "script",
2363 "search",
2364 "section",
2365 "source",
2366 "style",
2367 "summary",
2368 "svg",
2369 "table",
2370 "tbody",
2371 "td",
2372 "template",
2373 "textarea",
2374 "tfoot",
2375 "th",
2376 "thead",
2377 "tr",
2378 "track",
2379 "ul",
2380 "video",
2381 ];
2382
2383 let mut i = 0;
2384 while i < lines.len() {
2385 if lines[i].in_code_block || lines[i].in_front_matter {
2387 i += 1;
2388 continue;
2389 }
2390
2391 let trimmed = lines[i].content(content).trim_start();
2392
2393 if trimmed.starts_with('<') && trimmed.len() > 1 {
2395 let after_bracket = &trimmed[1..];
2397 let is_closing = after_bracket.starts_with('/');
2398 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2399
2400 let tag_name = tag_start
2402 .chars()
2403 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2404 .collect::<String>()
2405 .to_lowercase();
2406
2407 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2409 lines[i].in_html_block = true;
2411
2412 if !is_closing {
2415 let closing_tag = format!("</{tag_name}>");
2416 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2418 let mut j = i + 1;
2419 while j < lines.len() && j < i + 100 {
2420 if !allow_blank_lines && lines[j].is_blank {
2423 break;
2424 }
2425
2426 lines[j].in_html_block = true;
2427
2428 if lines[j].content(content).contains(&closing_tag) {
2430 break;
2431 }
2432 j += 1;
2433 }
2434 }
2435 }
2436 }
2437
2438 i += 1;
2439 }
2440 }
2441
2442 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2445 if !flavor.supports_esm_blocks() {
2447 return;
2448 }
2449
2450 let mut in_multiline_comment = false;
2451
2452 for line in lines.iter_mut() {
2453 if line.is_blank || line.in_html_comment {
2455 continue;
2456 }
2457
2458 let trimmed = line.content(content).trim_start();
2459
2460 if in_multiline_comment {
2462 if trimmed.contains("*/") {
2463 in_multiline_comment = false;
2464 }
2465 continue;
2466 }
2467
2468 if trimmed.starts_with("//") {
2470 continue;
2471 }
2472
2473 if trimmed.starts_with("/*") {
2475 if !trimmed.contains("*/") {
2476 in_multiline_comment = true;
2477 }
2478 continue;
2479 }
2480
2481 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2483 line.in_esm_block = true;
2484 } else {
2485 break;
2487 }
2488 }
2489 }
2490
2491 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2493 let mut code_spans = Vec::new();
2494
2495 if !content.contains('`') {
2497 return code_spans;
2498 }
2499
2500 let parser = Parser::new(content).into_offset_iter();
2502
2503 for (event, range) in parser {
2504 if let Event::Code(_) = event {
2505 let start_pos = range.start;
2506 let end_pos = range.end;
2507
2508 let full_span = &content[start_pos..end_pos];
2510 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2511
2512 let content_start = start_pos + backtick_count;
2514 let content_end = end_pos - backtick_count;
2515 let span_content = if content_start < content_end {
2516 content[content_start..content_end].to_string()
2517 } else {
2518 String::new()
2519 };
2520
2521 let line_idx = lines
2524 .partition_point(|line| line.byte_offset <= start_pos)
2525 .saturating_sub(1);
2526 let line_num = line_idx + 1;
2527 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2528
2529 let end_line_idx = lines
2531 .partition_point(|line| line.byte_offset <= end_pos)
2532 .saturating_sub(1);
2533 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2534
2535 let line_content = lines[line_idx].content(content);
2538 let col_start = if byte_col_start <= line_content.len() {
2539 line_content[..byte_col_start].chars().count()
2540 } else {
2541 line_content.chars().count()
2542 };
2543
2544 let end_line_content = lines[end_line_idx].content(content);
2545 let col_end = if byte_col_end <= end_line_content.len() {
2546 end_line_content[..byte_col_end].chars().count()
2547 } else {
2548 end_line_content.chars().count()
2549 };
2550
2551 code_spans.push(CodeSpan {
2552 line: line_num,
2553 end_line: end_line_idx + 1,
2554 start_col: col_start,
2555 end_col: col_end,
2556 byte_offset: start_pos,
2557 byte_end: end_pos,
2558 backtick_count,
2559 content: span_content,
2560 });
2561 }
2562 }
2563
2564 code_spans.sort_by_key(|span| span.byte_offset);
2566
2567 code_spans
2568 }
2569
2570 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2581 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2583
2584 #[inline]
2587 fn reset_tracking_state(
2588 list_item: &ListItemInfo,
2589 has_list_breaking_content: &mut bool,
2590 min_continuation: &mut usize,
2591 ) {
2592 *has_list_breaking_content = false;
2593 let marker_width = if list_item.is_ordered {
2594 list_item.marker.len() + 1 } else {
2596 list_item.marker.len()
2597 };
2598 *min_continuation = if list_item.is_ordered {
2599 marker_width
2600 } else {
2601 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2602 };
2603 }
2604
2605 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2608 let mut last_list_item_line = 0;
2609 let mut current_indent_level = 0;
2610 let mut last_marker_width = 0;
2611
2612 let mut has_list_breaking_content_since_last_item = false;
2614 let mut min_continuation_for_tracking = 0;
2615
2616 for (line_idx, line_info) in lines.iter().enumerate() {
2617 let line_num = line_idx + 1;
2618
2619 if line_info.in_code_block {
2621 if let Some(ref mut block) = current_block {
2622 let min_continuation_indent =
2624 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2625
2626 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2628
2629 match context {
2630 CodeBlockContext::Indented => {
2631 block.end_line = line_num;
2633 continue;
2634 }
2635 CodeBlockContext::Standalone => {
2636 let completed_block = current_block.take().unwrap();
2638 list_blocks.push(completed_block);
2639 continue;
2640 }
2641 CodeBlockContext::Adjacent => {
2642 block.end_line = line_num;
2644 continue;
2645 }
2646 }
2647 } else {
2648 continue;
2650 }
2651 }
2652
2653 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2655 caps.get(0).unwrap().as_str().to_string()
2656 } else {
2657 String::new()
2658 };
2659
2660 if current_block.is_some()
2663 && line_info.list_item.is_none()
2664 && !line_info.is_blank
2665 && !line_info.in_code_span_continuation
2666 {
2667 let line_content = line_info.content(content).trim();
2668
2669 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2674 let breaks_list = line_info.heading.is_some()
2675 || line_content.starts_with("---")
2676 || line_content.starts_with("***")
2677 || line_content.starts_with("___")
2678 || crate::utils::skip_context::is_table_line(line_content)
2679 || line_content.starts_with(">")
2680 || (line_info.indent > 0
2681 && line_info.indent < min_continuation_for_tracking
2682 && !is_lazy_continuation);
2683
2684 if breaks_list {
2685 has_list_breaking_content_since_last_item = true;
2686 }
2687 }
2688
2689 if line_info.in_code_span_continuation
2692 && line_info.list_item.is_none()
2693 && let Some(ref mut block) = current_block
2694 {
2695 block.end_line = line_num;
2696 }
2697
2698 let is_valid_continuation =
2703 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2705 && line_info.list_item.is_none()
2706 && !line_info.is_blank
2707 && !line_info.in_code_block
2708 && is_valid_continuation
2709 && let Some(ref mut block) = current_block
2710 {
2711 block.end_line = line_num;
2712 }
2713
2714 if let Some(list_item) = &line_info.list_item {
2716 let item_indent = list_item.marker_column;
2718 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2721 let is_nested = nesting > block.nesting_level;
2725 let same_type =
2726 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2727 let same_context = block.blockquote_prefix == blockquote_prefix;
2728 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2730
2731 let marker_compatible =
2733 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2734
2735 let has_non_list_content = has_list_breaking_content_since_last_item;
2738
2739 let mut continues_list = if is_nested {
2743 same_context && reasonable_distance && !has_non_list_content
2745 } else {
2746 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2748 };
2749
2750 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2753 if block.item_lines.contains(&(line_num - 1)) {
2756 continues_list = true;
2758 } else {
2759 continues_list = true;
2763 }
2764 }
2765
2766 if continues_list {
2767 block.end_line = line_num;
2769 block.item_lines.push(line_num);
2770
2771 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2773 list_item.marker.len() + 1
2774 } else {
2775 list_item.marker.len()
2776 });
2777
2778 if !block.is_ordered
2780 && block.marker.is_some()
2781 && block.marker.as_ref() != Some(&list_item.marker)
2782 {
2783 block.marker = None;
2785 }
2786
2787 reset_tracking_state(
2789 list_item,
2790 &mut has_list_breaking_content_since_last_item,
2791 &mut min_continuation_for_tracking,
2792 );
2793 } else {
2794 list_blocks.push(block.clone());
2797
2798 *block = ListBlock {
2799 start_line: line_num,
2800 end_line: line_num,
2801 is_ordered: list_item.is_ordered,
2802 marker: if list_item.is_ordered {
2803 None
2804 } else {
2805 Some(list_item.marker.clone())
2806 },
2807 blockquote_prefix: blockquote_prefix.clone(),
2808 item_lines: vec![line_num],
2809 nesting_level: nesting,
2810 max_marker_width: if list_item.is_ordered {
2811 list_item.marker.len() + 1
2812 } else {
2813 list_item.marker.len()
2814 },
2815 };
2816
2817 reset_tracking_state(
2819 list_item,
2820 &mut has_list_breaking_content_since_last_item,
2821 &mut min_continuation_for_tracking,
2822 );
2823 }
2824 } else {
2825 current_block = Some(ListBlock {
2827 start_line: line_num,
2828 end_line: line_num,
2829 is_ordered: list_item.is_ordered,
2830 marker: if list_item.is_ordered {
2831 None
2832 } else {
2833 Some(list_item.marker.clone())
2834 },
2835 blockquote_prefix,
2836 item_lines: vec![line_num],
2837 nesting_level: nesting,
2838 max_marker_width: list_item.marker.len(),
2839 });
2840
2841 reset_tracking_state(
2843 list_item,
2844 &mut has_list_breaking_content_since_last_item,
2845 &mut min_continuation_for_tracking,
2846 );
2847 }
2848
2849 last_list_item_line = line_num;
2850 current_indent_level = item_indent;
2851 last_marker_width = if list_item.is_ordered {
2852 list_item.marker.len() + 1 } else {
2854 list_item.marker.len()
2855 };
2856 } else if let Some(ref mut block) = current_block {
2857 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2867 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2868 } else {
2869 false
2870 };
2871
2872 let min_continuation_indent = if block.is_ordered {
2876 current_indent_level + last_marker_width
2877 } else {
2878 current_indent_level + 2 };
2880
2881 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2882 block.end_line = line_num;
2884 } else if line_info.is_blank {
2885 let mut check_idx = line_idx + 1;
2888 let mut found_continuation = false;
2889
2890 while check_idx < lines.len() && lines[check_idx].is_blank {
2892 check_idx += 1;
2893 }
2894
2895 if check_idx < lines.len() {
2896 let next_line = &lines[check_idx];
2897 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2899 found_continuation = true;
2900 }
2901 else if !next_line.in_code_block
2903 && next_line.list_item.is_some()
2904 && let Some(item) = &next_line.list_item
2905 {
2906 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2907 .find(next_line.content(content))
2908 .map_or(String::new(), |m| m.as_str().to_string());
2909 if item.marker_column == current_indent_level
2910 && item.is_ordered == block.is_ordered
2911 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2912 {
2913 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2916 if let Some(between_line) = lines.get(idx) {
2917 let between_content = between_line.content(content);
2918 let trimmed = between_content.trim();
2919 if trimmed.is_empty() {
2921 return false;
2922 }
2923 let line_indent = between_content.len() - between_content.trim_start().len();
2925
2926 if trimmed.starts_with("```")
2928 || trimmed.starts_with("~~~")
2929 || trimmed.starts_with("---")
2930 || trimmed.starts_with("***")
2931 || trimmed.starts_with("___")
2932 || trimmed.starts_with(">")
2933 || crate::utils::skip_context::is_table_line(trimmed)
2934 || between_line.heading.is_some()
2935 {
2936 return true; }
2938
2939 line_indent >= min_continuation_indent
2941 } else {
2942 false
2943 }
2944 });
2945
2946 if block.is_ordered {
2947 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2950 if let Some(between_line) = lines.get(idx) {
2951 let trimmed = between_line.content(content).trim();
2952 if trimmed.is_empty() {
2953 return false;
2954 }
2955 trimmed.starts_with("```")
2957 || trimmed.starts_with("~~~")
2958 || trimmed.starts_with("---")
2959 || trimmed.starts_with("***")
2960 || trimmed.starts_with("___")
2961 || trimmed.starts_with(">")
2962 || crate::utils::skip_context::is_table_line(trimmed)
2963 || between_line.heading.is_some()
2964 } else {
2965 false
2966 }
2967 });
2968 found_continuation = !has_structural_separators;
2969 } else {
2970 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2972 if let Some(between_line) = lines.get(idx) {
2973 let trimmed = between_line.content(content).trim();
2974 if trimmed.is_empty() {
2975 return false;
2976 }
2977 trimmed.starts_with("```")
2979 || trimmed.starts_with("~~~")
2980 || trimmed.starts_with("---")
2981 || trimmed.starts_with("***")
2982 || trimmed.starts_with("___")
2983 || trimmed.starts_with(">")
2984 || crate::utils::skip_context::is_table_line(trimmed)
2985 || between_line.heading.is_some()
2986 } else {
2987 false
2988 }
2989 });
2990 found_continuation = !has_structural_separators;
2991 }
2992 }
2993 }
2994 }
2995
2996 if found_continuation {
2997 block.end_line = line_num;
2999 } else {
3000 list_blocks.push(block.clone());
3002 current_block = None;
3003 }
3004 } else {
3005 let min_required_indent = if block.is_ordered {
3008 current_indent_level + last_marker_width
3009 } else {
3010 current_indent_level + 2
3011 };
3012
3013 let line_content = line_info.content(content).trim();
3018
3019 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3021
3022 let is_structural_separator = line_info.heading.is_some()
3023 || line_content.starts_with("```")
3024 || line_content.starts_with("~~~")
3025 || line_content.starts_with("---")
3026 || line_content.starts_with("***")
3027 || line_content.starts_with("___")
3028 || line_content.starts_with(">")
3029 || looks_like_table;
3030
3031 let is_lazy_continuation = !is_structural_separator
3034 && !line_info.is_blank
3035 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3036
3037 if is_lazy_continuation {
3038 let content_to_check = if !blockquote_prefix.is_empty() {
3041 line_info
3043 .content(content)
3044 .strip_prefix(&blockquote_prefix)
3045 .unwrap_or(line_info.content(content))
3046 .trim()
3047 } else {
3048 line_info.content(content).trim()
3049 };
3050
3051 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3052
3053 if starts_with_uppercase && last_list_item_line > 0 {
3056 list_blocks.push(block.clone());
3058 current_block = None;
3059 } else {
3060 block.end_line = line_num;
3062 }
3063 } else {
3064 list_blocks.push(block.clone());
3066 current_block = None;
3067 }
3068 }
3069 }
3070 }
3071
3072 if let Some(block) = current_block {
3074 list_blocks.push(block);
3075 }
3076
3077 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3079
3080 list_blocks
3081 }
3082
3083 fn compute_char_frequency(content: &str) -> CharFrequency {
3085 let mut frequency = CharFrequency::default();
3086
3087 for ch in content.chars() {
3088 match ch {
3089 '#' => frequency.hash_count += 1,
3090 '*' => frequency.asterisk_count += 1,
3091 '_' => frequency.underscore_count += 1,
3092 '-' => frequency.hyphen_count += 1,
3093 '+' => frequency.plus_count += 1,
3094 '>' => frequency.gt_count += 1,
3095 '|' => frequency.pipe_count += 1,
3096 '[' => frequency.bracket_count += 1,
3097 '`' => frequency.backtick_count += 1,
3098 '<' => frequency.lt_count += 1,
3099 '!' => frequency.exclamation_count += 1,
3100 '\n' => frequency.newline_count += 1,
3101 _ => {}
3102 }
3103 }
3104
3105 frequency
3106 }
3107
3108 fn parse_html_tags(
3110 content: &str,
3111 lines: &[LineInfo],
3112 code_blocks: &[(usize, usize)],
3113 flavor: MarkdownFlavor,
3114 ) -> Vec<HtmlTag> {
3115 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3116 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3117
3118 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3119
3120 for cap in HTML_TAG_REGEX.captures_iter(content) {
3121 let full_match = cap.get(0).unwrap();
3122 let match_start = full_match.start();
3123 let match_end = full_match.end();
3124
3125 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3127 continue;
3128 }
3129
3130 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3131 let tag_name_original = cap.get(2).unwrap().as_str();
3132 let tag_name = tag_name_original.to_lowercase();
3133 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3134
3135 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3138 continue;
3139 }
3140
3141 let mut line_num = 1;
3143 let mut col_start = match_start;
3144 let mut col_end = match_end;
3145 for (idx, line_info) in lines.iter().enumerate() {
3146 if match_start >= line_info.byte_offset {
3147 line_num = idx + 1;
3148 col_start = match_start - line_info.byte_offset;
3149 col_end = match_end - line_info.byte_offset;
3150 } else {
3151 break;
3152 }
3153 }
3154
3155 html_tags.push(HtmlTag {
3156 line: line_num,
3157 start_col: col_start,
3158 end_col: col_end,
3159 byte_offset: match_start,
3160 byte_end: match_end,
3161 tag_name,
3162 is_closing,
3163 is_self_closing,
3164 raw_content: full_match.as_str().to_string(),
3165 });
3166 }
3167
3168 html_tags
3169 }
3170
3171 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3173 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3174 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3175
3176 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3177
3178 for cap in EMPHASIS_REGEX.captures_iter(content) {
3179 let full_match = cap.get(0).unwrap();
3180 let match_start = full_match.start();
3181 let match_end = full_match.end();
3182
3183 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3185 continue;
3186 }
3187
3188 let opening_markers = cap.get(1).unwrap().as_str();
3189 let content_part = cap.get(2).unwrap().as_str();
3190 let closing_markers = cap.get(3).unwrap().as_str();
3191
3192 if opening_markers.chars().next() != closing_markers.chars().next()
3194 || opening_markers.len() != closing_markers.len()
3195 {
3196 continue;
3197 }
3198
3199 let marker = opening_markers.chars().next().unwrap();
3200 let marker_count = opening_markers.len();
3201
3202 let mut line_num = 1;
3204 let mut col_start = match_start;
3205 let mut col_end = match_end;
3206 for (idx, line_info) in lines.iter().enumerate() {
3207 if match_start >= line_info.byte_offset {
3208 line_num = idx + 1;
3209 col_start = match_start - line_info.byte_offset;
3210 col_end = match_end - line_info.byte_offset;
3211 } else {
3212 break;
3213 }
3214 }
3215
3216 emphasis_spans.push(EmphasisSpan {
3217 line: line_num,
3218 start_col: col_start,
3219 end_col: col_end,
3220 byte_offset: match_start,
3221 byte_end: match_end,
3222 marker,
3223 marker_count,
3224 content: content_part.to_string(),
3225 });
3226 }
3227
3228 emphasis_spans
3229 }
3230
3231 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3233 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3234
3235 for (line_idx, line_info) in lines.iter().enumerate() {
3236 if line_info.in_code_block || line_info.is_blank {
3238 continue;
3239 }
3240
3241 let line = line_info.content(content);
3242 let line_num = line_idx + 1;
3243
3244 if !line.contains('|') {
3246 continue;
3247 }
3248
3249 let parts: Vec<&str> = line.split('|').collect();
3251 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3252
3253 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3255 let mut column_alignments = Vec::new();
3256
3257 if is_separator {
3258 for part in &parts[1..parts.len() - 1] {
3259 let trimmed = part.trim();
3261 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3262 "center".to_string()
3263 } else if trimmed.ends_with(':') {
3264 "right".to_string()
3265 } else if trimmed.starts_with(':') {
3266 "left".to_string()
3267 } else {
3268 "none".to_string()
3269 };
3270 column_alignments.push(alignment);
3271 }
3272 }
3273
3274 table_rows.push(TableRow {
3275 line: line_num,
3276 is_separator,
3277 column_count,
3278 column_alignments,
3279 });
3280 }
3281
3282 table_rows
3283 }
3284
3285 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3287 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3288
3289 for cap in BARE_URL_PATTERN.captures_iter(content) {
3291 let full_match = cap.get(0).unwrap();
3292 let match_start = full_match.start();
3293 let match_end = full_match.end();
3294
3295 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3297 continue;
3298 }
3299
3300 let preceding_char = if match_start > 0 {
3302 content.chars().nth(match_start - 1)
3303 } else {
3304 None
3305 };
3306 let following_char = content.chars().nth(match_end);
3307
3308 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3309 continue;
3310 }
3311 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3312 continue;
3313 }
3314
3315 let url = full_match.as_str();
3316 let url_type = if url.starts_with("https://") {
3317 "https"
3318 } else if url.starts_with("http://") {
3319 "http"
3320 } else if url.starts_with("ftp://") {
3321 "ftp"
3322 } else {
3323 "other"
3324 };
3325
3326 let mut line_num = 1;
3328 let mut col_start = match_start;
3329 let mut col_end = match_end;
3330 for (idx, line_info) in lines.iter().enumerate() {
3331 if match_start >= line_info.byte_offset {
3332 line_num = idx + 1;
3333 col_start = match_start - line_info.byte_offset;
3334 col_end = match_end - line_info.byte_offset;
3335 } else {
3336 break;
3337 }
3338 }
3339
3340 bare_urls.push(BareUrl {
3341 line: line_num,
3342 start_col: col_start,
3343 end_col: col_end,
3344 byte_offset: match_start,
3345 byte_end: match_end,
3346 url: url.to_string(),
3347 url_type: url_type.to_string(),
3348 });
3349 }
3350
3351 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3353 let full_match = cap.get(0).unwrap();
3354 let match_start = full_match.start();
3355 let match_end = full_match.end();
3356
3357 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3359 continue;
3360 }
3361
3362 let preceding_char = if match_start > 0 {
3364 content.chars().nth(match_start - 1)
3365 } else {
3366 None
3367 };
3368 let following_char = content.chars().nth(match_end);
3369
3370 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3371 continue;
3372 }
3373 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3374 continue;
3375 }
3376
3377 let email = full_match.as_str();
3378
3379 let mut line_num = 1;
3381 let mut col_start = match_start;
3382 let mut col_end = match_end;
3383 for (idx, line_info) in lines.iter().enumerate() {
3384 if match_start >= line_info.byte_offset {
3385 line_num = idx + 1;
3386 col_start = match_start - line_info.byte_offset;
3387 col_end = match_end - line_info.byte_offset;
3388 } else {
3389 break;
3390 }
3391 }
3392
3393 bare_urls.push(BareUrl {
3394 line: line_num,
3395 start_col: col_start,
3396 end_col: col_end,
3397 byte_offset: match_start,
3398 byte_end: match_end,
3399 url: email.to_string(),
3400 url_type: "email".to_string(),
3401 });
3402 }
3403
3404 bare_urls
3405 }
3406
3407 #[must_use]
3427 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3428 ValidHeadingsIter::new(&self.lines)
3429 }
3430
3431 #[must_use]
3435 pub fn has_valid_headings(&self) -> bool {
3436 self.lines
3437 .iter()
3438 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3439 }
3440}
3441
3442fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3444 if list_blocks.len() < 2 {
3445 return;
3446 }
3447
3448 let mut merger = ListBlockMerger::new(content, lines);
3449 *list_blocks = merger.merge(list_blocks);
3450}
3451
3452struct ListBlockMerger<'a> {
3454 content: &'a str,
3455 lines: &'a [LineInfo],
3456}
3457
3458impl<'a> ListBlockMerger<'a> {
3459 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3460 Self { content, lines }
3461 }
3462
3463 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3464 let mut merged = Vec::with_capacity(list_blocks.len());
3465 let mut current = list_blocks[0].clone();
3466
3467 for next in list_blocks.iter().skip(1) {
3468 if self.should_merge_blocks(¤t, next) {
3469 current = self.merge_two_blocks(current, next);
3470 } else {
3471 merged.push(current);
3472 current = next.clone();
3473 }
3474 }
3475
3476 merged.push(current);
3477 merged
3478 }
3479
3480 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3482 if !self.blocks_are_compatible(current, next) {
3484 return false;
3485 }
3486
3487 let spacing = self.analyze_spacing_between(current, next);
3489 match spacing {
3490 BlockSpacing::Consecutive => true,
3491 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3492 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3493 self.can_merge_with_content_between(current, next)
3494 }
3495 }
3496 }
3497
3498 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3500 current.is_ordered == next.is_ordered
3501 && current.blockquote_prefix == next.blockquote_prefix
3502 && current.nesting_level == next.nesting_level
3503 }
3504
3505 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3507 let gap = next.start_line - current.end_line;
3508
3509 match gap {
3510 1 => BlockSpacing::Consecutive,
3511 2 => BlockSpacing::SingleBlank,
3512 _ if gap > 2 => {
3513 if self.has_only_blank_lines_between(current, next) {
3514 BlockSpacing::MultipleBlanks
3515 } else {
3516 BlockSpacing::ContentBetween
3517 }
3518 }
3519 _ => BlockSpacing::Consecutive, }
3521 }
3522
3523 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3525 if has_meaningful_content_between(self.content, current, next, self.lines) {
3528 return false; }
3530
3531 !current.is_ordered && current.marker == next.marker
3533 }
3534
3535 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3537 if has_meaningful_content_between(self.content, current, next, self.lines) {
3539 return false; }
3541
3542 current.is_ordered && next.is_ordered
3544 }
3545
3546 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3548 for line_num in (current.end_line + 1)..next.start_line {
3549 if let Some(line_info) = self.lines.get(line_num - 1)
3550 && !line_info.content(self.content).trim().is_empty()
3551 {
3552 return false;
3553 }
3554 }
3555 true
3556 }
3557
3558 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3560 current.end_line = next.end_line;
3561 current.item_lines.extend_from_slice(&next.item_lines);
3562
3563 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3565
3566 if !current.is_ordered && self.markers_differ(¤t, next) {
3568 current.marker = None; }
3570
3571 current
3572 }
3573
3574 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3576 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3577 }
3578}
3579
3580#[derive(Debug, PartialEq)]
3582enum BlockSpacing {
3583 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3588
3589fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3591 for line_num in (current.end_line + 1)..next.start_line {
3593 if let Some(line_info) = lines.get(line_num - 1) {
3594 let trimmed = line_info.content(content).trim();
3596
3597 if trimmed.is_empty() {
3599 continue;
3600 }
3601
3602 if line_info.heading.is_some() {
3606 return true; }
3608
3609 if is_horizontal_rule(trimmed) {
3611 return true; }
3613
3614 if crate::utils::skip_context::is_table_line(trimmed) {
3616 return true; }
3618
3619 if trimmed.starts_with('>') {
3621 return true; }
3623
3624 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3626 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3627
3628 let min_continuation_indent = if current.is_ordered {
3630 current.nesting_level + current.max_marker_width + 1 } else {
3632 current.nesting_level + 2
3633 };
3634
3635 if line_indent < min_continuation_indent {
3636 return true; }
3639 }
3640
3641 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3643
3644 let min_indent = if current.is_ordered {
3646 current.nesting_level + current.max_marker_width
3647 } else {
3648 current.nesting_level + 2
3649 };
3650
3651 if line_indent < min_indent {
3653 return true; }
3655
3656 }
3659 }
3660
3661 false
3663}
3664
3665fn is_horizontal_rule(trimmed: &str) -> bool {
3667 if trimmed.len() < 3 {
3668 return false;
3669 }
3670
3671 let chars: Vec<char> = trimmed.chars().collect();
3673 if let Some(&first_char) = chars.first()
3674 && (first_char == '-' || first_char == '*' || first_char == '_')
3675 {
3676 let mut count = 0;
3677 for &ch in &chars {
3678 if ch == first_char {
3679 count += 1;
3680 } else if ch != ' ' && ch != '\t' {
3681 return false; }
3683 }
3684 return count >= 3;
3685 }
3686 false
3687}
3688
3689#[cfg(test)]
3691mod tests {
3692 use super::*;
3693
3694 #[test]
3695 fn test_empty_content() {
3696 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3697 assert_eq!(ctx.content, "");
3698 assert_eq!(ctx.line_offsets, vec![0]);
3699 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3700 assert_eq!(ctx.lines.len(), 0);
3701 }
3702
3703 #[test]
3704 fn test_single_line() {
3705 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3706 assert_eq!(ctx.content, "# Hello");
3707 assert_eq!(ctx.line_offsets, vec![0]);
3708 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3709 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3710 }
3711
3712 #[test]
3713 fn test_multi_line() {
3714 let content = "# Title\n\nSecond line\nThird line";
3715 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3716 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3717 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3724
3725 #[test]
3726 fn test_line_info() {
3727 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3728 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3729
3730 assert_eq!(ctx.lines.len(), 7);
3732
3733 let line1 = &ctx.lines[0];
3735 assert_eq!(line1.content(ctx.content), "# Title");
3736 assert_eq!(line1.byte_offset, 0);
3737 assert_eq!(line1.indent, 0);
3738 assert!(!line1.is_blank);
3739 assert!(!line1.in_code_block);
3740 assert!(line1.list_item.is_none());
3741
3742 let line2 = &ctx.lines[1];
3744 assert_eq!(line2.content(ctx.content), " indented");
3745 assert_eq!(line2.byte_offset, 8);
3746 assert_eq!(line2.indent, 4);
3747 assert!(!line2.is_blank);
3748
3749 let line3 = &ctx.lines[2];
3751 assert_eq!(line3.content(ctx.content), "");
3752 assert!(line3.is_blank);
3753
3754 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3756 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3757 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3758 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3759 }
3760
3761 #[test]
3762 fn test_list_item_detection() {
3763 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3764 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3765
3766 let line1 = &ctx.lines[0];
3768 assert!(line1.list_item.is_some());
3769 let list1 = line1.list_item.as_ref().unwrap();
3770 assert_eq!(list1.marker, "-");
3771 assert!(!list1.is_ordered);
3772 assert_eq!(list1.marker_column, 0);
3773 assert_eq!(list1.content_column, 2);
3774
3775 let line2 = &ctx.lines[1];
3777 assert!(line2.list_item.is_some());
3778 let list2 = line2.list_item.as_ref().unwrap();
3779 assert_eq!(list2.marker, "*");
3780 assert_eq!(list2.marker_column, 2);
3781
3782 let line3 = &ctx.lines[2];
3784 assert!(line3.list_item.is_some());
3785 let list3 = line3.list_item.as_ref().unwrap();
3786 assert_eq!(list3.marker, "1.");
3787 assert!(list3.is_ordered);
3788 assert_eq!(list3.number, Some(1));
3789
3790 let line6 = &ctx.lines[5];
3792 assert!(line6.list_item.is_none());
3793 }
3794
3795 #[test]
3796 fn test_offset_to_line_col_edge_cases() {
3797 let content = "a\nb\nc";
3798 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3799 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3807
3808 #[test]
3809 fn test_mdx_esm_blocks() {
3810 let content = r##"import {Chart} from './snowfall.js'
3811export const year = 2023
3812
3813# Last year's snowfall
3814
3815In {year}, the snowfall was above average.
3816It was followed by a warm spring which caused
3817flood conditions in many of the nearby rivers.
3818
3819<Chart color="#fcb32c" year={year} />
3820"##;
3821
3822 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3823
3824 assert_eq!(ctx.lines.len(), 10);
3826 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3827 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3828 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3829 assert!(
3830 !ctx.lines[3].in_esm_block,
3831 "Line 4 (heading) should NOT be in_esm_block"
3832 );
3833 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3834 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3835 }
3836
3837 #[test]
3838 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3839 let content = r#"import {Chart} from './snowfall.js'
3840export const year = 2023
3841
3842# Last year's snowfall
3843"#;
3844
3845 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3846
3847 assert!(
3849 !ctx.lines[0].in_esm_block,
3850 "Line 1 should NOT be in_esm_block in Standard flavor"
3851 );
3852 assert!(
3853 !ctx.lines[1].in_esm_block,
3854 "Line 2 should NOT be in_esm_block in Standard flavor"
3855 );
3856 }
3857}