1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13 ($name:expr, $profile:expr, $code:expr) => {{
14 let start = std::time::Instant::now();
15 let result = $code;
16 if $profile {
17 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18 }
19 result
20 }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(
32 r#"(?sx)
33 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34 (?:
35 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
36 |
37 \[([^\]]*)\] # Reference ID in group 6
38 )"#
39 ).unwrap()
40});
41
42static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?sx)
47 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48 (?:
49 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
50 |
51 \[([^\]]*)\] # Reference ID in group 6
52 )"#
53 ).unwrap()
54});
55
56static REF_DEF_PATTERN: LazyLock<Regex> =
58 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62 Regex::new(
63 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64 ).unwrap()
65});
66
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub is_blank: bool,
85 pub in_code_block: bool,
87 pub in_front_matter: bool,
89 pub in_html_block: bool,
91 pub in_html_comment: bool,
93 pub list_item: Option<ListItemInfo>,
95 pub heading: Option<HeadingInfo>,
97 pub blockquote: Option<BlockquoteInfo>,
99 pub in_mkdocstrings: bool,
101 pub in_esm_block: bool,
103 pub in_code_span_continuation: bool,
105}
106
107impl LineInfo {
108 pub fn content<'a>(&self, source: &'a str) -> &'a str {
110 &source[self.byte_offset..self.byte_offset + self.byte_len]
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct ListItemInfo {
117 pub marker: String,
119 pub is_ordered: bool,
121 pub number: Option<usize>,
123 pub marker_column: usize,
125 pub content_column: usize,
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum HeadingStyle {
132 ATX,
134 Setext1,
136 Setext2,
138}
139
140#[derive(Debug, Clone)]
142pub struct ParsedLink<'a> {
143 pub line: usize,
145 pub start_col: usize,
147 pub end_col: usize,
149 pub byte_offset: usize,
151 pub byte_end: usize,
153 pub text: Cow<'a, str>,
155 pub url: Cow<'a, str>,
157 pub is_reference: bool,
159 pub reference_id: Option<Cow<'a, str>>,
161 pub link_type: LinkType,
163}
164
165#[derive(Debug, Clone)]
167pub struct BrokenLinkInfo {
168 pub reference: String,
170 pub span: std::ops::Range<usize>,
172}
173
174#[derive(Debug, Clone)]
176pub struct FootnoteRef {
177 pub id: String,
179 pub line: usize,
181 pub byte_offset: usize,
183 pub byte_end: usize,
185}
186
187#[derive(Debug, Clone)]
189pub struct ParsedImage<'a> {
190 pub line: usize,
192 pub start_col: usize,
194 pub end_col: usize,
196 pub byte_offset: usize,
198 pub byte_end: usize,
200 pub alt_text: Cow<'a, str>,
202 pub url: Cow<'a, str>,
204 pub is_reference: bool,
206 pub reference_id: Option<Cow<'a, str>>,
208 pub link_type: LinkType,
210}
211
212#[derive(Debug, Clone)]
214pub struct ReferenceDef {
215 pub line: usize,
217 pub id: String,
219 pub url: String,
221 pub title: Option<String>,
223 pub byte_offset: usize,
225 pub byte_end: usize,
227 pub title_byte_start: Option<usize>,
229 pub title_byte_end: Option<usize>,
231}
232
233#[derive(Debug, Clone)]
235pub struct CodeSpan {
236 pub line: usize,
238 pub end_line: usize,
240 pub start_col: usize,
242 pub end_col: usize,
244 pub byte_offset: usize,
246 pub byte_end: usize,
248 pub backtick_count: usize,
250 pub content: String,
252}
253
254#[derive(Debug, Clone)]
256pub struct HeadingInfo {
257 pub level: u8,
259 pub style: HeadingStyle,
261 pub marker: String,
263 pub marker_column: usize,
265 pub content_column: usize,
267 pub text: String,
269 pub custom_id: Option<String>,
271 pub raw_text: String,
273 pub has_closing_sequence: bool,
275 pub closing_sequence: String,
277 pub is_valid: bool,
280}
281
282#[derive(Debug, Clone)]
287pub struct ValidHeading<'a> {
288 pub line_num: usize,
290 pub heading: &'a HeadingInfo,
292 pub line_info: &'a LineInfo,
294}
295
296pub struct ValidHeadingsIter<'a> {
301 lines: &'a [LineInfo],
302 current_index: usize,
303}
304
305impl<'a> ValidHeadingsIter<'a> {
306 fn new(lines: &'a [LineInfo]) -> Self {
307 Self {
308 lines,
309 current_index: 0,
310 }
311 }
312}
313
314impl<'a> Iterator for ValidHeadingsIter<'a> {
315 type Item = ValidHeading<'a>;
316
317 fn next(&mut self) -> Option<Self::Item> {
318 while self.current_index < self.lines.len() {
319 let idx = self.current_index;
320 self.current_index += 1;
321
322 let line_info = &self.lines[idx];
323 if let Some(heading) = &line_info.heading
324 && heading.is_valid
325 {
326 return Some(ValidHeading {
327 line_num: idx + 1, heading,
329 line_info,
330 });
331 }
332 }
333 None
334 }
335}
336
337#[derive(Debug, Clone)]
339pub struct BlockquoteInfo {
340 pub nesting_level: usize,
342 pub indent: String,
344 pub marker_column: usize,
346 pub prefix: String,
348 pub content: String,
350 pub has_no_space_after_marker: bool,
352 pub has_multiple_spaces_after_marker: bool,
354 pub needs_md028_fix: bool,
356}
357
358#[derive(Debug, Clone)]
360pub struct ListBlock {
361 pub start_line: usize,
363 pub end_line: usize,
365 pub is_ordered: bool,
367 pub marker: Option<String>,
369 pub blockquote_prefix: String,
371 pub item_lines: Vec<usize>,
373 pub nesting_level: usize,
375 pub max_marker_width: usize,
377}
378
379use std::sync::{Arc, OnceLock};
380
381#[derive(Debug, Clone, Default)]
383pub struct CharFrequency {
384 pub hash_count: usize,
386 pub asterisk_count: usize,
388 pub underscore_count: usize,
390 pub hyphen_count: usize,
392 pub plus_count: usize,
394 pub gt_count: usize,
396 pub pipe_count: usize,
398 pub bracket_count: usize,
400 pub backtick_count: usize,
402 pub lt_count: usize,
404 pub exclamation_count: usize,
406 pub newline_count: usize,
408}
409
410#[derive(Debug, Clone)]
412pub struct HtmlTag {
413 pub line: usize,
415 pub start_col: usize,
417 pub end_col: usize,
419 pub byte_offset: usize,
421 pub byte_end: usize,
423 pub tag_name: String,
425 pub is_closing: bool,
427 pub is_self_closing: bool,
429 pub raw_content: String,
431}
432
433#[derive(Debug, Clone)]
435pub struct EmphasisSpan {
436 pub line: usize,
438 pub start_col: usize,
440 pub end_col: usize,
442 pub byte_offset: usize,
444 pub byte_end: usize,
446 pub marker: char,
448 pub marker_count: usize,
450 pub content: String,
452}
453
454#[derive(Debug, Clone)]
456pub struct TableRow {
457 pub line: usize,
459 pub is_separator: bool,
461 pub column_count: usize,
463 pub column_alignments: Vec<String>, }
466
467#[derive(Debug, Clone)]
469pub struct BareUrl {
470 pub line: usize,
472 pub start_col: usize,
474 pub end_col: usize,
476 pub byte_offset: usize,
478 pub byte_end: usize,
480 pub url: String,
482 pub url_type: String,
484}
485
486pub struct LintContext<'a> {
487 pub content: &'a str,
488 pub line_offsets: Vec<usize>,
489 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
511
512struct BlockquoteComponents<'a> {
514 indent: &'a str,
515 markers: &'a str,
516 spaces_after: &'a str,
517 content: &'a str,
518}
519
520#[inline]
522fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
523 let bytes = line.as_bytes();
524 let mut pos = 0;
525
526 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
528 pos += 1;
529 }
530 let indent_end = pos;
531
532 if pos >= bytes.len() || bytes[pos] != b'>' {
534 return None;
535 }
536
537 while pos < bytes.len() && bytes[pos] == b'>' {
539 pos += 1;
540 }
541 let markers_end = pos;
542
543 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
545 pos += 1;
546 }
547 let spaces_end = pos;
548
549 Some(BlockquoteComponents {
550 indent: &line[0..indent_end],
551 markers: &line[indent_end..markers_end],
552 spaces_after: &line[markers_end..spaces_end],
553 content: &line[spaces_end..],
554 })
555}
556
557impl<'a> LintContext<'a> {
558 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
559 #[cfg(not(target_arch = "wasm32"))]
560 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
561 #[cfg(target_arch = "wasm32")]
562 let profile = false;
563
564 let line_offsets = profile_section!("Line offsets", profile, {
565 let mut offsets = vec![0];
566 for (i, c) in content.char_indices() {
567 if c == '\n' {
568 offsets.push(i + 1);
569 }
570 }
571 offsets
572 });
573
574 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
576
577 let html_comment_ranges = profile_section!(
579 "HTML comment ranges",
580 profile,
581 crate::utils::skip_context::compute_html_comment_ranges(content)
582 );
583
584 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
586 if flavor == MarkdownFlavor::MkDocs {
587 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
588 } else {
589 Vec::new()
590 }
591 });
592
593 let mut lines = profile_section!(
595 "Basic line info",
596 profile,
597 Self::compute_basic_line_info(
598 content,
599 &line_offsets,
600 &code_blocks,
601 flavor,
602 &html_comment_ranges,
603 &autodoc_ranges,
604 )
605 );
606
607 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
609
610 profile_section!(
612 "ESM blocks",
613 profile,
614 Self::detect_esm_blocks(content, &mut lines, flavor)
615 );
616
617 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
619
620 profile_section!(
622 "Headings & blockquotes",
623 profile,
624 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
625 );
626
627 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
629
630 for span in &code_spans {
633 if span.end_line > span.line {
634 for line_num in (span.line + 1)..=span.end_line {
636 if let Some(line_info) = lines.get_mut(line_num - 1) {
637 line_info.in_code_span_continuation = true;
638 }
639 }
640 }
641 }
642
643 let (links, broken_links, footnote_refs) = profile_section!(
645 "Links",
646 profile,
647 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
648 );
649
650 let images = profile_section!(
651 "Images",
652 profile,
653 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
654 );
655
656 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
657
658 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
659
660 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
662
663 let table_blocks = profile_section!(
665 "Table blocks",
666 profile,
667 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
668 content,
669 &code_blocks,
670 &code_spans,
671 &html_comment_ranges,
672 )
673 );
674
675 let line_index = profile_section!(
677 "Line index",
678 profile,
679 crate::utils::range_utils::LineIndex::new(content)
680 );
681
682 let jinja_ranges = profile_section!(
684 "Jinja ranges",
685 profile,
686 crate::utils::jinja_utils::find_jinja_ranges(content)
687 );
688
689 Self {
690 content,
691 line_offsets,
692 code_blocks,
693 lines,
694 links,
695 images,
696 broken_links,
697 footnote_refs,
698 reference_defs,
699 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
700 list_blocks,
701 char_frequency,
702 html_tags_cache: OnceLock::new(),
703 emphasis_spans_cache: OnceLock::new(),
704 table_rows_cache: OnceLock::new(),
705 bare_urls_cache: OnceLock::new(),
706 has_mixed_list_nesting_cache: OnceLock::new(),
707 html_comment_ranges,
708 table_blocks,
709 line_index,
710 jinja_ranges,
711 flavor,
712 source_file,
713 }
714 }
715
716 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
718 Arc::clone(
719 self.code_spans_cache
720 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
721 )
722 }
723
724 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
726 &self.html_comment_ranges
727 }
728
729 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
731 Arc::clone(self.html_tags_cache.get_or_init(|| {
732 Arc::new(Self::parse_html_tags(
733 self.content,
734 &self.lines,
735 &self.code_blocks,
736 self.flavor,
737 ))
738 }))
739 }
740
741 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
743 Arc::clone(
744 self.emphasis_spans_cache
745 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
746 )
747 }
748
749 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
751 Arc::clone(
752 self.table_rows_cache
753 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
754 )
755 }
756
757 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
759 Arc::clone(
760 self.bare_urls_cache
761 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
762 )
763 }
764
765 pub fn has_mixed_list_nesting(&self) -> bool {
769 *self
770 .has_mixed_list_nesting_cache
771 .get_or_init(|| self.compute_mixed_list_nesting())
772 }
773
774 fn compute_mixed_list_nesting(&self) -> bool {
776 let mut stack: Vec<(usize, bool)> = Vec::new();
781 let mut last_was_blank = false;
782
783 for line_info in &self.lines {
784 if line_info.in_code_block
786 || line_info.in_front_matter
787 || line_info.in_mkdocstrings
788 || line_info.in_html_comment
789 || line_info.in_esm_block
790 {
791 continue;
792 }
793
794 if line_info.is_blank {
796 last_was_blank = true;
797 continue;
798 }
799
800 if let Some(list_item) = &line_info.list_item {
801 let current_pos = if list_item.marker_column == 1 {
803 0
804 } else {
805 list_item.marker_column
806 };
807
808 if last_was_blank && current_pos == 0 {
810 stack.clear();
811 }
812 last_was_blank = false;
813
814 while let Some(&(pos, _)) = stack.last() {
816 if pos >= current_pos {
817 stack.pop();
818 } else {
819 break;
820 }
821 }
822
823 if let Some(&(_, parent_is_ordered)) = stack.last()
825 && parent_is_ordered != list_item.is_ordered
826 {
827 return true; }
829
830 stack.push((current_pos, list_item.is_ordered));
831 } else {
832 last_was_blank = false;
834 }
835 }
836
837 false
838 }
839
840 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
842 match self.line_offsets.binary_search(&offset) {
843 Ok(line) => (line + 1, 1),
844 Err(line) => {
845 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
846 (line, offset - line_start + 1)
847 }
848 }
849 }
850
851 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
853 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
855 return true;
856 }
857
858 self.code_spans()
860 .iter()
861 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
862 }
863
864 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
866 if line_num > 0 {
867 self.lines.get(line_num - 1)
868 } else {
869 None
870 }
871 }
872
873 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
875 self.line_info(line_num).map(|info| info.byte_offset)
876 }
877
878 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
880 let normalized_id = ref_id.to_lowercase();
881 self.reference_defs
882 .iter()
883 .find(|def| def.id == normalized_id)
884 .map(|def| def.url.as_str())
885 }
886
887 pub fn is_in_list_block(&self, line_num: usize) -> bool {
889 self.list_blocks
890 .iter()
891 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
892 }
893
894 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
896 self.list_blocks
897 .iter()
898 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
899 }
900
901 pub fn is_in_code_block(&self, line_num: usize) -> bool {
905 if line_num == 0 || line_num > self.lines.len() {
906 return false;
907 }
908 self.lines[line_num - 1].in_code_block
909 }
910
911 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
913 if line_num == 0 || line_num > self.lines.len() {
914 return false;
915 }
916 self.lines[line_num - 1].in_front_matter
917 }
918
919 pub fn is_in_html_block(&self, line_num: usize) -> bool {
921 if line_num == 0 || line_num > self.lines.len() {
922 return false;
923 }
924 self.lines[line_num - 1].in_html_block
925 }
926
927 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
929 if line_num == 0 || line_num > self.lines.len() {
930 return false;
931 }
932
933 let col_0indexed = if col > 0 { col - 1 } else { 0 };
937 let code_spans = self.code_spans();
938 code_spans.iter().any(|span| {
939 if line_num < span.line || line_num > span.end_line {
941 return false;
942 }
943
944 if span.line == span.end_line {
945 col_0indexed >= span.start_col && col_0indexed < span.end_col
947 } else if line_num == span.line {
948 col_0indexed >= span.start_col
950 } else if line_num == span.end_line {
951 col_0indexed < span.end_col
953 } else {
954 true
956 }
957 })
958 }
959
960 #[inline]
962 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
963 let code_spans = self.code_spans();
964 code_spans
965 .iter()
966 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
967 }
968
969 #[inline]
972 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
973 self.reference_defs
974 .iter()
975 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
976 }
977
978 #[inline]
982 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
983 self.html_comment_ranges
984 .iter()
985 .any(|range| byte_pos >= range.start && byte_pos < range.end)
986 }
987
988 #[inline]
991 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
992 self.html_tags()
993 .iter()
994 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
995 }
996
997 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
999 self.jinja_ranges
1000 .iter()
1001 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1002 }
1003
1004 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1006 self.reference_defs.iter().any(|def| {
1007 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1008 byte_pos >= start && byte_pos < end
1009 } else {
1010 false
1011 }
1012 })
1013 }
1014
1015 pub fn has_char(&self, ch: char) -> bool {
1017 match ch {
1018 '#' => self.char_frequency.hash_count > 0,
1019 '*' => self.char_frequency.asterisk_count > 0,
1020 '_' => self.char_frequency.underscore_count > 0,
1021 '-' => self.char_frequency.hyphen_count > 0,
1022 '+' => self.char_frequency.plus_count > 0,
1023 '>' => self.char_frequency.gt_count > 0,
1024 '|' => self.char_frequency.pipe_count > 0,
1025 '[' => self.char_frequency.bracket_count > 0,
1026 '`' => self.char_frequency.backtick_count > 0,
1027 '<' => self.char_frequency.lt_count > 0,
1028 '!' => self.char_frequency.exclamation_count > 0,
1029 '\n' => self.char_frequency.newline_count > 0,
1030 _ => self.content.contains(ch), }
1032 }
1033
1034 pub fn char_count(&self, ch: char) -> usize {
1036 match ch {
1037 '#' => self.char_frequency.hash_count,
1038 '*' => self.char_frequency.asterisk_count,
1039 '_' => self.char_frequency.underscore_count,
1040 '-' => self.char_frequency.hyphen_count,
1041 '+' => self.char_frequency.plus_count,
1042 '>' => self.char_frequency.gt_count,
1043 '|' => self.char_frequency.pipe_count,
1044 '[' => self.char_frequency.bracket_count,
1045 '`' => self.char_frequency.backtick_count,
1046 '<' => self.char_frequency.lt_count,
1047 '!' => self.char_frequency.exclamation_count,
1048 '\n' => self.char_frequency.newline_count,
1049 _ => self.content.matches(ch).count(), }
1051 }
1052
1053 pub fn likely_has_headings(&self) -> bool {
1055 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1057
1058 pub fn likely_has_lists(&self) -> bool {
1060 self.char_frequency.asterisk_count > 0
1061 || self.char_frequency.hyphen_count > 0
1062 || self.char_frequency.plus_count > 0
1063 }
1064
1065 pub fn likely_has_emphasis(&self) -> bool {
1067 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1068 }
1069
1070 pub fn likely_has_tables(&self) -> bool {
1072 self.char_frequency.pipe_count > 2
1073 }
1074
1075 pub fn likely_has_blockquotes(&self) -> bool {
1077 self.char_frequency.gt_count > 0
1078 }
1079
1080 pub fn likely_has_code(&self) -> bool {
1082 self.char_frequency.backtick_count > 0
1083 }
1084
1085 pub fn likely_has_links_or_images(&self) -> bool {
1087 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1088 }
1089
1090 pub fn likely_has_html(&self) -> bool {
1092 self.char_frequency.lt_count > 0
1093 }
1094
1095 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1097 self.html_tags()
1098 .iter()
1099 .filter(|tag| tag.line == line_num)
1100 .cloned()
1101 .collect()
1102 }
1103
1104 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1106 self.emphasis_spans()
1107 .iter()
1108 .filter(|span| span.line == line_num)
1109 .cloned()
1110 .collect()
1111 }
1112
1113 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1115 self.table_rows()
1116 .iter()
1117 .filter(|row| row.line == line_num)
1118 .cloned()
1119 .collect()
1120 }
1121
1122 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1124 self.bare_urls()
1125 .iter()
1126 .filter(|url| url.line == line_num)
1127 .cloned()
1128 .collect()
1129 }
1130
1131 #[inline]
1137 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1138 let idx = match lines.binary_search_by(|line| {
1140 if byte_offset < line.byte_offset {
1141 std::cmp::Ordering::Greater
1142 } else if byte_offset > line.byte_offset + line.byte_len {
1143 std::cmp::Ordering::Less
1144 } else {
1145 std::cmp::Ordering::Equal
1146 }
1147 }) {
1148 Ok(idx) => idx,
1149 Err(idx) => idx.saturating_sub(1),
1150 };
1151
1152 let line = &lines[idx];
1153 let line_num = idx + 1;
1154 let col = byte_offset.saturating_sub(line.byte_offset);
1155
1156 (idx, line_num, col)
1157 }
1158
1159 #[inline]
1161 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1162 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1164
1165 if idx > 0 {
1167 let span = &code_spans[idx - 1];
1168 if offset >= span.byte_offset && offset < span.byte_end {
1169 return true;
1170 }
1171 }
1172
1173 false
1174 }
1175
1176 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1180 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1181
1182 let mut link_ranges = Vec::new();
1183 let mut options = Options::empty();
1184 options.insert(Options::ENABLE_WIKILINKS);
1185 options.insert(Options::ENABLE_FOOTNOTES);
1186
1187 let parser = Parser::new_ext(content, options).into_offset_iter();
1188 let mut link_stack: Vec<usize> = Vec::new();
1189
1190 for (event, range) in parser {
1191 match event {
1192 Event::Start(Tag::Link { .. }) => {
1193 link_stack.push(range.start);
1194 }
1195 Event::End(TagEnd::Link) => {
1196 if let Some(start_pos) = link_stack.pop() {
1197 link_ranges.push((start_pos, range.end));
1198 }
1199 }
1200 _ => {}
1201 }
1202 }
1203
1204 link_ranges
1205 }
1206
1207 fn parse_links(
1209 content: &'a str,
1210 lines: &[LineInfo],
1211 code_blocks: &[(usize, usize)],
1212 code_spans: &[CodeSpan],
1213 flavor: MarkdownFlavor,
1214 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1215 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1216 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1217 use std::collections::HashSet;
1218
1219 let mut links = Vec::with_capacity(content.len() / 500);
1220 let mut broken_links = Vec::new();
1221 let mut footnote_refs = Vec::new();
1222
1223 let mut found_positions = HashSet::new();
1225
1226 let mut options = Options::empty();
1236 options.insert(Options::ENABLE_WIKILINKS);
1237 options.insert(Options::ENABLE_FOOTNOTES);
1238
1239 let parser = Parser::new_with_broken_link_callback(
1240 content,
1241 options,
1242 Some(|link: BrokenLink<'_>| {
1243 broken_links.push(BrokenLinkInfo {
1244 reference: link.reference.to_string(),
1245 span: link.span.clone(),
1246 });
1247 None
1248 }),
1249 )
1250 .into_offset_iter();
1251
1252 let mut link_stack: Vec<(
1253 usize,
1254 usize,
1255 pulldown_cmark::CowStr<'a>,
1256 LinkType,
1257 pulldown_cmark::CowStr<'a>,
1258 )> = Vec::new();
1259 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1262 match event {
1263 Event::Start(Tag::Link {
1264 link_type,
1265 dest_url,
1266 id,
1267 ..
1268 }) => {
1269 link_stack.push((range.start, range.end, dest_url, link_type, id));
1271 text_chunks.clear();
1272 }
1273 Event::Text(text) if !link_stack.is_empty() => {
1274 text_chunks.push((text.to_string(), range.start, range.end));
1276 }
1277 Event::Code(code) if !link_stack.is_empty() => {
1278 let code_text = format!("`{code}`");
1280 text_chunks.push((code_text, range.start, range.end));
1281 }
1282 Event::End(TagEnd::Link) => {
1283 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1284 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1286 text_chunks.clear();
1287 continue;
1288 }
1289
1290 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1292
1293 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1295 text_chunks.clear();
1296 continue;
1297 }
1298
1299 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1300
1301 let is_reference = matches!(
1302 link_type,
1303 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1304 );
1305
1306 let link_text = if start_pos < content.len() {
1309 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1310
1311 let mut close_pos = None;
1315 let mut depth = 0;
1316 let mut in_code_span = false;
1317
1318 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1319 let mut backslash_count = 0;
1321 let mut j = i;
1322 while j > 0 && link_bytes[j - 1] == b'\\' {
1323 backslash_count += 1;
1324 j -= 1;
1325 }
1326 let is_escaped = backslash_count % 2 != 0;
1327
1328 if byte == b'`' && !is_escaped {
1330 in_code_span = !in_code_span;
1331 }
1332
1333 if !is_escaped && !in_code_span {
1335 if byte == b'[' {
1336 depth += 1;
1337 } else if byte == b']' {
1338 if depth == 0 {
1339 close_pos = Some(i);
1341 break;
1342 } else {
1343 depth -= 1;
1344 }
1345 }
1346 }
1347 }
1348
1349 if let Some(pos) = close_pos {
1350 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1351 } else {
1352 Cow::Borrowed("")
1353 }
1354 } else {
1355 Cow::Borrowed("")
1356 };
1357
1358 let reference_id = if is_reference && !ref_id.is_empty() {
1360 Some(Cow::Owned(ref_id.to_lowercase()))
1361 } else if is_reference {
1362 Some(Cow::Owned(link_text.to_lowercase()))
1364 } else {
1365 None
1366 };
1367
1368 let has_escaped_bang = start_pos >= 2
1372 && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1373 && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1374
1375 let has_escaped_bracket =
1378 start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1379
1380 if has_escaped_bang || has_escaped_bracket {
1381 text_chunks.clear();
1382 continue; }
1384
1385 found_positions.insert(start_pos);
1387
1388 links.push(ParsedLink {
1389 line: line_num,
1390 start_col: col_start,
1391 end_col: col_end,
1392 byte_offset: start_pos,
1393 byte_end: range.end,
1394 text: link_text,
1395 url: Cow::Owned(url.to_string()),
1396 is_reference,
1397 reference_id,
1398 link_type,
1399 });
1400
1401 text_chunks.clear();
1402 }
1403 }
1404 Event::FootnoteReference(footnote_id) => {
1405 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1408 continue;
1409 }
1410
1411 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1412 footnote_refs.push(FootnoteRef {
1413 id: footnote_id.to_string(),
1414 line: line_num,
1415 byte_offset: range.start,
1416 byte_end: range.end,
1417 });
1418 }
1419 _ => {}
1420 }
1421 }
1422
1423 for cap in LINK_PATTERN.captures_iter(content) {
1427 let full_match = cap.get(0).unwrap();
1428 let match_start = full_match.start();
1429 let match_end = full_match.end();
1430
1431 if found_positions.contains(&match_start) {
1433 continue;
1434 }
1435
1436 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1438 continue;
1439 }
1440
1441 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1443 continue;
1444 }
1445
1446 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1448 continue;
1449 }
1450
1451 if Self::is_offset_in_code_span(code_spans, match_start) {
1453 continue;
1454 }
1455
1456 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1458 continue;
1459 }
1460
1461 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1463
1464 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1466 continue;
1467 }
1468
1469 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1470
1471 let text = cap.get(1).map_or("", |m| m.as_str());
1472
1473 if let Some(ref_id) = cap.get(6) {
1475 let ref_id_str = ref_id.as_str();
1476 let normalized_ref = if ref_id_str.is_empty() {
1477 Cow::Owned(text.to_lowercase()) } else {
1479 Cow::Owned(ref_id_str.to_lowercase())
1480 };
1481
1482 links.push(ParsedLink {
1484 line: line_num,
1485 start_col: col_start,
1486 end_col: col_end,
1487 byte_offset: match_start,
1488 byte_end: match_end,
1489 text: Cow::Borrowed(text),
1490 url: Cow::Borrowed(""), is_reference: true,
1492 reference_id: Some(normalized_ref),
1493 link_type: LinkType::Reference, });
1495 }
1496 }
1497
1498 (links, broken_links, footnote_refs)
1499 }
1500
1501 fn parse_images(
1503 content: &'a str,
1504 lines: &[LineInfo],
1505 code_blocks: &[(usize, usize)],
1506 code_spans: &[CodeSpan],
1507 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1508 ) -> Vec<ParsedImage<'a>> {
1509 use crate::utils::skip_context::is_in_html_comment_ranges;
1510 use std::collections::HashSet;
1511
1512 let mut images = Vec::with_capacity(content.len() / 1000);
1514 let mut found_positions = HashSet::new();
1515
1516 let parser = Parser::new(content).into_offset_iter();
1518 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1519 Vec::new();
1520 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1523 match event {
1524 Event::Start(Tag::Image {
1525 link_type,
1526 dest_url,
1527 id,
1528 ..
1529 }) => {
1530 image_stack.push((range.start, dest_url, link_type, id));
1531 text_chunks.clear();
1532 }
1533 Event::Text(text) if !image_stack.is_empty() => {
1534 text_chunks.push((text.to_string(), range.start, range.end));
1535 }
1536 Event::Code(code) if !image_stack.is_empty() => {
1537 let code_text = format!("`{code}`");
1538 text_chunks.push((code_text, range.start, range.end));
1539 }
1540 Event::End(TagEnd::Image) => {
1541 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1542 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1544 continue;
1545 }
1546
1547 if Self::is_offset_in_code_span(code_spans, start_pos) {
1549 continue;
1550 }
1551
1552 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1554 continue;
1555 }
1556
1557 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1559 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1560
1561 let is_reference = matches!(
1562 link_type,
1563 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1564 );
1565
1566 let alt_text = if start_pos < content.len() {
1569 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1570
1571 let mut close_pos = None;
1574 let mut depth = 0;
1575
1576 if image_bytes.len() > 2 {
1577 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1578 let mut backslash_count = 0;
1580 let mut j = i;
1581 while j > 0 && image_bytes[j - 1] == b'\\' {
1582 backslash_count += 1;
1583 j -= 1;
1584 }
1585 let is_escaped = backslash_count % 2 != 0;
1586
1587 if !is_escaped {
1588 if byte == b'[' {
1589 depth += 1;
1590 } else if byte == b']' {
1591 if depth == 0 {
1592 close_pos = Some(i);
1594 break;
1595 } else {
1596 depth -= 1;
1597 }
1598 }
1599 }
1600 }
1601 }
1602
1603 if let Some(pos) = close_pos {
1604 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1605 } else {
1606 Cow::Borrowed("")
1607 }
1608 } else {
1609 Cow::Borrowed("")
1610 };
1611
1612 let reference_id = if is_reference && !ref_id.is_empty() {
1613 Some(Cow::Owned(ref_id.to_lowercase()))
1614 } else if is_reference {
1615 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1617 None
1618 };
1619
1620 found_positions.insert(start_pos);
1621 images.push(ParsedImage {
1622 line: line_num,
1623 start_col: col_start,
1624 end_col: col_end,
1625 byte_offset: start_pos,
1626 byte_end: range.end,
1627 alt_text,
1628 url: Cow::Owned(url.to_string()),
1629 is_reference,
1630 reference_id,
1631 link_type,
1632 });
1633 }
1634 }
1635 _ => {}
1636 }
1637 }
1638
1639 for cap in IMAGE_PATTERN.captures_iter(content) {
1641 let full_match = cap.get(0).unwrap();
1642 let match_start = full_match.start();
1643 let match_end = full_match.end();
1644
1645 if found_positions.contains(&match_start) {
1647 continue;
1648 }
1649
1650 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652 continue;
1653 }
1654
1655 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1657 || Self::is_offset_in_code_span(code_spans, match_start)
1658 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1659 {
1660 continue;
1661 }
1662
1663 if let Some(ref_id) = cap.get(6) {
1665 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1666 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1667 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1668 let ref_id_str = ref_id.as_str();
1669 let normalized_ref = if ref_id_str.is_empty() {
1670 Cow::Owned(alt_text.to_lowercase())
1671 } else {
1672 Cow::Owned(ref_id_str.to_lowercase())
1673 };
1674
1675 images.push(ParsedImage {
1676 line: line_num,
1677 start_col: col_start,
1678 end_col: col_end,
1679 byte_offset: match_start,
1680 byte_end: match_end,
1681 alt_text: Cow::Borrowed(alt_text),
1682 url: Cow::Borrowed(""),
1683 is_reference: true,
1684 reference_id: Some(normalized_ref),
1685 link_type: LinkType::Reference, });
1687 }
1688 }
1689
1690 images
1691 }
1692
1693 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1695 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1699 if line_info.in_code_block {
1701 continue;
1702 }
1703
1704 let line = line_info.content(content);
1705 let line_num = line_idx + 1;
1706
1707 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1708 let id = cap.get(1).unwrap().as_str().to_lowercase();
1709 let url = cap.get(2).unwrap().as_str().to_string();
1710 let title_match = cap.get(3).or_else(|| cap.get(4));
1711 let title = title_match.map(|m| m.as_str().to_string());
1712
1713 let match_obj = cap.get(0).unwrap();
1716 let byte_offset = line_info.byte_offset + match_obj.start();
1717 let byte_end = line_info.byte_offset + match_obj.end();
1718
1719 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1721 let start = line_info.byte_offset + m.start().saturating_sub(1);
1723 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1725 } else {
1726 (None, None)
1727 };
1728
1729 refs.push(ReferenceDef {
1730 line: line_num,
1731 id,
1732 url,
1733 title,
1734 byte_offset,
1735 byte_end,
1736 title_byte_start,
1737 title_byte_end,
1738 });
1739 }
1740 }
1741
1742 refs
1743 }
1744
1745 #[inline]
1749 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1750 let trimmed_start = line.trim_start();
1751 if !trimmed_start.starts_with('>') {
1752 return None;
1753 }
1754
1755 let mut remaining = line;
1757 let mut total_prefix_len = 0;
1758
1759 loop {
1760 let trimmed = remaining.trim_start();
1761 if !trimmed.starts_with('>') {
1762 break;
1763 }
1764
1765 let leading_ws_len = remaining.len() - trimmed.len();
1767 total_prefix_len += leading_ws_len + 1;
1768
1769 let after_gt = &trimmed[1..];
1770
1771 if let Some(stripped) = after_gt.strip_prefix(' ') {
1773 total_prefix_len += 1;
1774 remaining = stripped;
1775 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1776 total_prefix_len += 1;
1777 remaining = stripped;
1778 } else {
1779 remaining = after_gt;
1780 }
1781 }
1782
1783 Some((&line[..total_prefix_len], remaining))
1784 }
1785
1786 #[inline]
1790 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1791 let bytes = line.as_bytes();
1792 let mut i = 0;
1793
1794 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1796 i += 1;
1797 }
1798
1799 if i >= bytes.len() {
1801 return None;
1802 }
1803 let marker = bytes[i] as char;
1804 if marker != '-' && marker != '*' && marker != '+' {
1805 return None;
1806 }
1807 let marker_pos = i;
1808 i += 1;
1809
1810 let spacing_start = i;
1812 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1813 i += 1;
1814 }
1815
1816 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1817 }
1818
1819 #[inline]
1823 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1824 let bytes = line.as_bytes();
1825 let mut i = 0;
1826
1827 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1829 i += 1;
1830 }
1831
1832 let number_start = i;
1834 while i < bytes.len() && bytes[i].is_ascii_digit() {
1835 i += 1;
1836 }
1837 if i == number_start {
1838 return None; }
1840
1841 if i >= bytes.len() {
1843 return None;
1844 }
1845 let delimiter = bytes[i] as char;
1846 if delimiter != '.' && delimiter != ')' {
1847 return None;
1848 }
1849 let delimiter_pos = i;
1850 i += 1;
1851
1852 let spacing_start = i;
1854 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1855 i += 1;
1856 }
1857
1858 Some((
1859 &line[..number_start],
1860 &line[number_start..delimiter_pos],
1861 delimiter,
1862 &line[spacing_start..i],
1863 &line[i..],
1864 ))
1865 }
1866
1867 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1870 let num_lines = line_offsets.len();
1871 let mut in_code_block = vec![false; num_lines];
1872
1873 for &(start, end) in code_blocks {
1875 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1877 let mut boundary = start;
1878 while boundary > 0 && !content.is_char_boundary(boundary) {
1879 boundary -= 1;
1880 }
1881 boundary
1882 } else {
1883 start
1884 };
1885
1886 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1887 let mut boundary = end;
1888 while boundary < content.len() && !content.is_char_boundary(boundary) {
1889 boundary += 1;
1890 }
1891 boundary
1892 } else {
1893 end.min(content.len())
1894 };
1895
1896 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1915 let first_line = first_line_after.saturating_sub(1);
1916 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1917
1918 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1920 *flag = true;
1921 }
1922 }
1923
1924 in_code_block
1925 }
1926
1927 fn compute_basic_line_info(
1929 content: &str,
1930 line_offsets: &[usize],
1931 code_blocks: &[(usize, usize)],
1932 flavor: MarkdownFlavor,
1933 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1934 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1935 ) -> Vec<LineInfo> {
1936 let content_lines: Vec<&str> = content.lines().collect();
1937 let mut lines = Vec::with_capacity(content_lines.len());
1938
1939 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1941
1942 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1945
1946 for (i, line) in content_lines.iter().enumerate() {
1947 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1948 let indent = line.len() - line.trim_start().len();
1949
1950 let blockquote_parse = Self::parse_blockquote_prefix(line);
1952
1953 let is_blank = if let Some((_, content)) = blockquote_parse {
1955 content.trim().is_empty()
1957 } else {
1958 line.trim().is_empty()
1959 };
1960
1961 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1963
1964 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1966 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1967 let line_end_offset = byte_offset + line.len();
1970 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1971 html_comment_ranges,
1972 byte_offset,
1973 line_end_offset,
1974 );
1975 let list_item = if !(in_code_block
1976 || is_blank
1977 || in_mkdocstrings
1978 || in_html_comment
1979 || (front_matter_end > 0 && i < front_matter_end))
1980 {
1981 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1983 (content, prefix.len())
1984 } else {
1985 (&**line, 0)
1986 };
1987
1988 if let Some((leading_spaces, marker, spacing, _content)) =
1989 Self::parse_unordered_list(line_for_list_check)
1990 {
1991 let marker_column = blockquote_prefix_len + leading_spaces.len();
1992 let content_column = marker_column + 1 + spacing.len();
1993
1994 if spacing.is_empty() {
2001 None
2002 } else {
2003 Some(ListItemInfo {
2004 marker: marker.to_string(),
2005 is_ordered: false,
2006 number: None,
2007 marker_column,
2008 content_column,
2009 })
2010 }
2011 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2012 Self::parse_ordered_list(line_for_list_check)
2013 {
2014 let marker = format!("{number_str}{delimiter}");
2015 let marker_column = blockquote_prefix_len + leading_spaces.len();
2016 let content_column = marker_column + marker.len() + spacing.len();
2017
2018 if spacing.is_empty() {
2021 None
2022 } else {
2023 Some(ListItemInfo {
2024 marker,
2025 is_ordered: true,
2026 number: number_str.parse().ok(),
2027 marker_column,
2028 content_column,
2029 })
2030 }
2031 } else {
2032 None
2033 }
2034 } else {
2035 None
2036 };
2037
2038 lines.push(LineInfo {
2039 byte_offset,
2040 byte_len: line.len(),
2041 indent,
2042 is_blank,
2043 in_code_block,
2044 in_front_matter: front_matter_end > 0 && i < front_matter_end,
2045 in_html_block: false, in_html_comment,
2047 list_item,
2048 heading: None, blockquote: None, in_mkdocstrings,
2051 in_esm_block: false, in_code_span_continuation: false, });
2054 }
2055
2056 lines
2057 }
2058
2059 fn detect_headings_and_blockquotes(
2061 content: &str,
2062 lines: &mut [LineInfo],
2063 flavor: MarkdownFlavor,
2064 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2065 link_byte_ranges: &[(usize, usize)],
2066 ) {
2067 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2069 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2070 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2071 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2072
2073 let content_lines: Vec<&str> = content.lines().collect();
2074
2075 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2077
2078 for i in 0..lines.len() {
2080 if lines[i].in_code_block {
2081 continue;
2082 }
2083
2084 if front_matter_end > 0 && i < front_matter_end {
2086 continue;
2087 }
2088
2089 if lines[i].in_html_block {
2091 continue;
2092 }
2093
2094 let line = content_lines[i];
2095
2096 if let Some(bq) = parse_blockquote_detailed(line) {
2098 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2100
2101 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2103
2104 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2106 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2109
2110 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2114
2115 lines[i].blockquote = Some(BlockquoteInfo {
2116 nesting_level,
2117 indent: bq.indent.to_string(),
2118 marker_column,
2119 prefix,
2120 content: bq.content.to_string(),
2121 has_no_space_after_marker: has_no_space,
2122 has_multiple_spaces_after_marker: has_multiple_spaces,
2123 needs_md028_fix,
2124 });
2125 }
2126
2127 if lines[i].is_blank {
2129 continue;
2130 }
2131
2132 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2135 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2136 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2137 } else {
2138 false
2139 };
2140
2141 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2142 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2144 continue;
2145 }
2146 let line_offset = lines[i].byte_offset;
2149 if link_byte_ranges
2150 .iter()
2151 .any(|&(start, end)| line_offset > start && line_offset < end)
2152 {
2153 continue;
2154 }
2155 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2156 let hashes = caps.get(2).map_or("", |m| m.as_str());
2157 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2158 let rest = caps.get(4).map_or("", |m| m.as_str());
2159
2160 let level = hashes.len() as u8;
2161 let marker_column = leading_spaces.len();
2162
2163 let (text, has_closing, closing_seq) = {
2165 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2167 if rest[id_start..].trim_end().ends_with('}') {
2169 (&rest[..id_start], &rest[id_start..])
2171 } else {
2172 (rest, "")
2173 }
2174 } else {
2175 (rest, "")
2176 };
2177
2178 let trimmed_rest = rest_without_id.trim_end();
2180 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2181 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2184
2185 let last_hash_char_idx = char_positions
2187 .iter()
2188 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2189
2190 if let Some(mut char_idx) = last_hash_char_idx {
2191 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2193 char_idx -= 1;
2194 }
2195
2196 let start_of_hashes = char_positions[char_idx].0;
2198
2199 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2201
2202 let potential_closing = &trimmed_rest[start_of_hashes..];
2204 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2205
2206 if is_all_hashes && has_space_before {
2207 let closing_hashes = potential_closing.to_string();
2209 let text_part = if !custom_id_part.is_empty() {
2212 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2215 } else {
2216 trimmed_rest[..start_of_hashes].trim_end().to_string()
2217 };
2218 (text_part, true, closing_hashes)
2219 } else {
2220 (rest.to_string(), false, String::new())
2222 }
2223 } else {
2224 (rest.to_string(), false, String::new())
2226 }
2227 } else {
2228 (rest.to_string(), false, String::new())
2230 }
2231 };
2232
2233 let content_column = marker_column + hashes.len() + spaces_after.len();
2234
2235 let raw_text = text.trim().to_string();
2237 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2238
2239 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2241 let next_line = content_lines[i + 1];
2242 if !lines[i + 1].in_code_block
2243 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2244 && let Some(next_line_id) =
2245 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2246 {
2247 custom_id = Some(next_line_id);
2248 }
2249 }
2250
2251 let is_valid = !spaces_after.is_empty()
2261 || rest.is_empty()
2262 || level > 1
2263 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2264
2265 lines[i].heading = Some(HeadingInfo {
2266 level,
2267 style: HeadingStyle::ATX,
2268 marker: hashes.to_string(),
2269 marker_column,
2270 content_column,
2271 text: clean_text,
2272 custom_id,
2273 raw_text,
2274 has_closing_sequence: has_closing,
2275 closing_sequence: closing_seq,
2276 is_valid,
2277 });
2278 }
2279 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2281 let next_line = content_lines[i + 1];
2282 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2283 if front_matter_end > 0 && i < front_matter_end {
2285 continue;
2286 }
2287
2288 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2290 {
2291 continue;
2292 }
2293
2294 let underline = next_line.trim();
2295
2296 let level = if underline.starts_with('=') { 1 } else { 2 };
2297 let style = if level == 1 {
2298 HeadingStyle::Setext1
2299 } else {
2300 HeadingStyle::Setext2
2301 };
2302
2303 let raw_text = line.trim().to_string();
2305 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2306
2307 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2309 let attr_line = content_lines[i + 2];
2310 if !lines[i + 2].in_code_block
2311 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2312 && let Some(attr_line_id) =
2313 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2314 {
2315 custom_id = Some(attr_line_id);
2316 }
2317 }
2318
2319 lines[i].heading = Some(HeadingInfo {
2320 level,
2321 style,
2322 marker: underline.to_string(),
2323 marker_column: next_line.len() - next_line.trim_start().len(),
2324 content_column: lines[i].indent,
2325 text: clean_text,
2326 custom_id,
2327 raw_text,
2328 has_closing_sequence: false,
2329 closing_sequence: String::new(),
2330 is_valid: true, });
2332 }
2333 }
2334 }
2335 }
2336
2337 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2339 const BLOCK_ELEMENTS: &[&str] = &[
2342 "address",
2343 "article",
2344 "aside",
2345 "audio",
2346 "blockquote",
2347 "canvas",
2348 "details",
2349 "dialog",
2350 "dd",
2351 "div",
2352 "dl",
2353 "dt",
2354 "embed",
2355 "fieldset",
2356 "figcaption",
2357 "figure",
2358 "footer",
2359 "form",
2360 "h1",
2361 "h2",
2362 "h3",
2363 "h4",
2364 "h5",
2365 "h6",
2366 "header",
2367 "hr",
2368 "iframe",
2369 "li",
2370 "main",
2371 "menu",
2372 "nav",
2373 "noscript",
2374 "object",
2375 "ol",
2376 "p",
2377 "picture",
2378 "pre",
2379 "script",
2380 "search",
2381 "section",
2382 "source",
2383 "style",
2384 "summary",
2385 "svg",
2386 "table",
2387 "tbody",
2388 "td",
2389 "template",
2390 "textarea",
2391 "tfoot",
2392 "th",
2393 "thead",
2394 "tr",
2395 "track",
2396 "ul",
2397 "video",
2398 ];
2399
2400 let mut i = 0;
2401 while i < lines.len() {
2402 if lines[i].in_code_block || lines[i].in_front_matter {
2404 i += 1;
2405 continue;
2406 }
2407
2408 let trimmed = lines[i].content(content).trim_start();
2409
2410 if trimmed.starts_with('<') && trimmed.len() > 1 {
2412 let after_bracket = &trimmed[1..];
2414 let is_closing = after_bracket.starts_with('/');
2415 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2416
2417 let tag_name = tag_start
2419 .chars()
2420 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2421 .collect::<String>()
2422 .to_lowercase();
2423
2424 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2426 lines[i].in_html_block = true;
2428
2429 if !is_closing {
2432 let closing_tag = format!("</{tag_name}>");
2433 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2435 let mut j = i + 1;
2436 while j < lines.len() && j < i + 100 {
2437 if !allow_blank_lines && lines[j].is_blank {
2440 break;
2441 }
2442
2443 lines[j].in_html_block = true;
2444
2445 if lines[j].content(content).contains(&closing_tag) {
2447 break;
2448 }
2449 j += 1;
2450 }
2451 }
2452 }
2453 }
2454
2455 i += 1;
2456 }
2457 }
2458
2459 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2462 if !flavor.supports_esm_blocks() {
2464 return;
2465 }
2466
2467 let mut in_multiline_comment = false;
2468
2469 for line in lines.iter_mut() {
2470 if line.is_blank || line.in_html_comment {
2472 continue;
2473 }
2474
2475 let trimmed = line.content(content).trim_start();
2476
2477 if in_multiline_comment {
2479 if trimmed.contains("*/") {
2480 in_multiline_comment = false;
2481 }
2482 continue;
2483 }
2484
2485 if trimmed.starts_with("//") {
2487 continue;
2488 }
2489
2490 if trimmed.starts_with("/*") {
2492 if !trimmed.contains("*/") {
2493 in_multiline_comment = true;
2494 }
2495 continue;
2496 }
2497
2498 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2500 line.in_esm_block = true;
2501 } else {
2502 break;
2504 }
2505 }
2506 }
2507
2508 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2510 let mut code_spans = Vec::new();
2511
2512 if !content.contains('`') {
2514 return code_spans;
2515 }
2516
2517 let parser = Parser::new(content).into_offset_iter();
2519
2520 for (event, range) in parser {
2521 if let Event::Code(_) = event {
2522 let start_pos = range.start;
2523 let end_pos = range.end;
2524
2525 let full_span = &content[start_pos..end_pos];
2527 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2528
2529 let content_start = start_pos + backtick_count;
2531 let content_end = end_pos - backtick_count;
2532 let span_content = if content_start < content_end {
2533 content[content_start..content_end].to_string()
2534 } else {
2535 String::new()
2536 };
2537
2538 let line_idx = lines
2541 .partition_point(|line| line.byte_offset <= start_pos)
2542 .saturating_sub(1);
2543 let line_num = line_idx + 1;
2544 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2545
2546 let end_line_idx = lines
2548 .partition_point(|line| line.byte_offset <= end_pos)
2549 .saturating_sub(1);
2550 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2551
2552 let line_content = lines[line_idx].content(content);
2555 let col_start = if byte_col_start <= line_content.len() {
2556 line_content[..byte_col_start].chars().count()
2557 } else {
2558 line_content.chars().count()
2559 };
2560
2561 let end_line_content = lines[end_line_idx].content(content);
2562 let col_end = if byte_col_end <= end_line_content.len() {
2563 end_line_content[..byte_col_end].chars().count()
2564 } else {
2565 end_line_content.chars().count()
2566 };
2567
2568 code_spans.push(CodeSpan {
2569 line: line_num,
2570 end_line: end_line_idx + 1,
2571 start_col: col_start,
2572 end_col: col_end,
2573 byte_offset: start_pos,
2574 byte_end: end_pos,
2575 backtick_count,
2576 content: span_content,
2577 });
2578 }
2579 }
2580
2581 code_spans.sort_by_key(|span| span.byte_offset);
2583
2584 code_spans
2585 }
2586
2587 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2598 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2600
2601 #[inline]
2604 fn reset_tracking_state(
2605 list_item: &ListItemInfo,
2606 has_list_breaking_content: &mut bool,
2607 min_continuation: &mut usize,
2608 ) {
2609 *has_list_breaking_content = false;
2610 let marker_width = if list_item.is_ordered {
2611 list_item.marker.len() + 1 } else {
2613 list_item.marker.len()
2614 };
2615 *min_continuation = if list_item.is_ordered {
2616 marker_width
2617 } else {
2618 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2619 };
2620 }
2621
2622 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2625 let mut last_list_item_line = 0;
2626 let mut current_indent_level = 0;
2627 let mut last_marker_width = 0;
2628
2629 let mut has_list_breaking_content_since_last_item = false;
2631 let mut min_continuation_for_tracking = 0;
2632
2633 for (line_idx, line_info) in lines.iter().enumerate() {
2634 let line_num = line_idx + 1;
2635
2636 if line_info.in_code_block {
2638 if let Some(ref mut block) = current_block {
2639 let min_continuation_indent =
2641 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2642
2643 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2645
2646 match context {
2647 CodeBlockContext::Indented => {
2648 block.end_line = line_num;
2650 continue;
2651 }
2652 CodeBlockContext::Standalone => {
2653 let completed_block = current_block.take().unwrap();
2655 list_blocks.push(completed_block);
2656 continue;
2657 }
2658 CodeBlockContext::Adjacent => {
2659 block.end_line = line_num;
2661 continue;
2662 }
2663 }
2664 } else {
2665 continue;
2667 }
2668 }
2669
2670 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2672 caps.get(0).unwrap().as_str().to_string()
2673 } else {
2674 String::new()
2675 };
2676
2677 if current_block.is_some()
2680 && line_info.list_item.is_none()
2681 && !line_info.is_blank
2682 && !line_info.in_code_span_continuation
2683 {
2684 let line_content = line_info.content(content).trim();
2685
2686 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2691 let breaks_list = line_info.heading.is_some()
2692 || line_content.starts_with("---")
2693 || line_content.starts_with("***")
2694 || line_content.starts_with("___")
2695 || crate::utils::skip_context::is_table_line(line_content)
2696 || line_content.starts_with(">")
2697 || (line_info.indent > 0
2698 && line_info.indent < min_continuation_for_tracking
2699 && !is_lazy_continuation);
2700
2701 if breaks_list {
2702 has_list_breaking_content_since_last_item = true;
2703 }
2704 }
2705
2706 if line_info.in_code_span_continuation
2709 && line_info.list_item.is_none()
2710 && let Some(ref mut block) = current_block
2711 {
2712 block.end_line = line_num;
2713 }
2714
2715 let is_valid_continuation =
2720 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2722 && line_info.list_item.is_none()
2723 && !line_info.is_blank
2724 && !line_info.in_code_block
2725 && is_valid_continuation
2726 && let Some(ref mut block) = current_block
2727 {
2728 block.end_line = line_num;
2729 }
2730
2731 if let Some(list_item) = &line_info.list_item {
2733 let item_indent = list_item.marker_column;
2735 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2738 let is_nested = nesting > block.nesting_level;
2742 let same_type =
2743 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2744 let same_context = block.blockquote_prefix == blockquote_prefix;
2745 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2747
2748 let marker_compatible =
2750 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2751
2752 let has_non_list_content = has_list_breaking_content_since_last_item;
2755
2756 let mut continues_list = if is_nested {
2760 same_context && reasonable_distance && !has_non_list_content
2762 } else {
2763 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2765 };
2766
2767 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2770 if block.item_lines.contains(&(line_num - 1)) {
2773 continues_list = true;
2775 } else {
2776 continues_list = true;
2780 }
2781 }
2782
2783 if continues_list {
2784 block.end_line = line_num;
2786 block.item_lines.push(line_num);
2787
2788 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2790 list_item.marker.len() + 1
2791 } else {
2792 list_item.marker.len()
2793 });
2794
2795 if !block.is_ordered
2797 && block.marker.is_some()
2798 && block.marker.as_ref() != Some(&list_item.marker)
2799 {
2800 block.marker = None;
2802 }
2803
2804 reset_tracking_state(
2806 list_item,
2807 &mut has_list_breaking_content_since_last_item,
2808 &mut min_continuation_for_tracking,
2809 );
2810 } else {
2811 list_blocks.push(block.clone());
2814
2815 *block = ListBlock {
2816 start_line: line_num,
2817 end_line: line_num,
2818 is_ordered: list_item.is_ordered,
2819 marker: if list_item.is_ordered {
2820 None
2821 } else {
2822 Some(list_item.marker.clone())
2823 },
2824 blockquote_prefix: blockquote_prefix.clone(),
2825 item_lines: vec![line_num],
2826 nesting_level: nesting,
2827 max_marker_width: if list_item.is_ordered {
2828 list_item.marker.len() + 1
2829 } else {
2830 list_item.marker.len()
2831 },
2832 };
2833
2834 reset_tracking_state(
2836 list_item,
2837 &mut has_list_breaking_content_since_last_item,
2838 &mut min_continuation_for_tracking,
2839 );
2840 }
2841 } else {
2842 current_block = Some(ListBlock {
2844 start_line: line_num,
2845 end_line: line_num,
2846 is_ordered: list_item.is_ordered,
2847 marker: if list_item.is_ordered {
2848 None
2849 } else {
2850 Some(list_item.marker.clone())
2851 },
2852 blockquote_prefix,
2853 item_lines: vec![line_num],
2854 nesting_level: nesting,
2855 max_marker_width: list_item.marker.len(),
2856 });
2857
2858 reset_tracking_state(
2860 list_item,
2861 &mut has_list_breaking_content_since_last_item,
2862 &mut min_continuation_for_tracking,
2863 );
2864 }
2865
2866 last_list_item_line = line_num;
2867 current_indent_level = item_indent;
2868 last_marker_width = if list_item.is_ordered {
2869 list_item.marker.len() + 1 } else {
2871 list_item.marker.len()
2872 };
2873 } else if let Some(ref mut block) = current_block {
2874 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2884 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2885 } else {
2886 false
2887 };
2888
2889 let min_continuation_indent = if block.is_ordered {
2893 current_indent_level + last_marker_width
2894 } else {
2895 current_indent_level + 2 };
2897
2898 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2899 block.end_line = line_num;
2901 } else if line_info.is_blank {
2902 let mut check_idx = line_idx + 1;
2905 let mut found_continuation = false;
2906
2907 while check_idx < lines.len() && lines[check_idx].is_blank {
2909 check_idx += 1;
2910 }
2911
2912 if check_idx < lines.len() {
2913 let next_line = &lines[check_idx];
2914 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2916 found_continuation = true;
2917 }
2918 else if !next_line.in_code_block
2920 && next_line.list_item.is_some()
2921 && let Some(item) = &next_line.list_item
2922 {
2923 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2924 .find(next_line.content(content))
2925 .map_or(String::new(), |m| m.as_str().to_string());
2926 if item.marker_column == current_indent_level
2927 && item.is_ordered == block.is_ordered
2928 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2929 {
2930 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2933 if let Some(between_line) = lines.get(idx) {
2934 let between_content = between_line.content(content);
2935 let trimmed = between_content.trim();
2936 if trimmed.is_empty() {
2938 return false;
2939 }
2940 let line_indent = between_content.len() - between_content.trim_start().len();
2942
2943 if trimmed.starts_with("```")
2945 || trimmed.starts_with("~~~")
2946 || trimmed.starts_with("---")
2947 || trimmed.starts_with("***")
2948 || trimmed.starts_with("___")
2949 || trimmed.starts_with(">")
2950 || crate::utils::skip_context::is_table_line(trimmed)
2951 || between_line.heading.is_some()
2952 {
2953 return true; }
2955
2956 line_indent >= min_continuation_indent
2958 } else {
2959 false
2960 }
2961 });
2962
2963 if block.is_ordered {
2964 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2967 if let Some(between_line) = lines.get(idx) {
2968 let trimmed = between_line.content(content).trim();
2969 if trimmed.is_empty() {
2970 return false;
2971 }
2972 trimmed.starts_with("```")
2974 || trimmed.starts_with("~~~")
2975 || trimmed.starts_with("---")
2976 || trimmed.starts_with("***")
2977 || trimmed.starts_with("___")
2978 || trimmed.starts_with(">")
2979 || crate::utils::skip_context::is_table_line(trimmed)
2980 || between_line.heading.is_some()
2981 } else {
2982 false
2983 }
2984 });
2985 found_continuation = !has_structural_separators;
2986 } else {
2987 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2989 if let Some(between_line) = lines.get(idx) {
2990 let trimmed = between_line.content(content).trim();
2991 if trimmed.is_empty() {
2992 return false;
2993 }
2994 trimmed.starts_with("```")
2996 || trimmed.starts_with("~~~")
2997 || trimmed.starts_with("---")
2998 || trimmed.starts_with("***")
2999 || trimmed.starts_with("___")
3000 || trimmed.starts_with(">")
3001 || crate::utils::skip_context::is_table_line(trimmed)
3002 || between_line.heading.is_some()
3003 } else {
3004 false
3005 }
3006 });
3007 found_continuation = !has_structural_separators;
3008 }
3009 }
3010 }
3011 }
3012
3013 if found_continuation {
3014 block.end_line = line_num;
3016 } else {
3017 list_blocks.push(block.clone());
3019 current_block = None;
3020 }
3021 } else {
3022 let min_required_indent = if block.is_ordered {
3025 current_indent_level + last_marker_width
3026 } else {
3027 current_indent_level + 2
3028 };
3029
3030 let line_content = line_info.content(content).trim();
3035
3036 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3038
3039 let is_structural_separator = line_info.heading.is_some()
3040 || line_content.starts_with("```")
3041 || line_content.starts_with("~~~")
3042 || line_content.starts_with("---")
3043 || line_content.starts_with("***")
3044 || line_content.starts_with("___")
3045 || line_content.starts_with(">")
3046 || looks_like_table;
3047
3048 let is_lazy_continuation = !is_structural_separator
3051 && !line_info.is_blank
3052 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3053
3054 if is_lazy_continuation {
3055 let content_to_check = if !blockquote_prefix.is_empty() {
3058 line_info
3060 .content(content)
3061 .strip_prefix(&blockquote_prefix)
3062 .unwrap_or(line_info.content(content))
3063 .trim()
3064 } else {
3065 line_info.content(content).trim()
3066 };
3067
3068 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3069
3070 if starts_with_uppercase && last_list_item_line > 0 {
3073 list_blocks.push(block.clone());
3075 current_block = None;
3076 } else {
3077 block.end_line = line_num;
3079 }
3080 } else {
3081 list_blocks.push(block.clone());
3083 current_block = None;
3084 }
3085 }
3086 }
3087 }
3088
3089 if let Some(block) = current_block {
3091 list_blocks.push(block);
3092 }
3093
3094 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3096
3097 list_blocks
3098 }
3099
3100 fn compute_char_frequency(content: &str) -> CharFrequency {
3102 let mut frequency = CharFrequency::default();
3103
3104 for ch in content.chars() {
3105 match ch {
3106 '#' => frequency.hash_count += 1,
3107 '*' => frequency.asterisk_count += 1,
3108 '_' => frequency.underscore_count += 1,
3109 '-' => frequency.hyphen_count += 1,
3110 '+' => frequency.plus_count += 1,
3111 '>' => frequency.gt_count += 1,
3112 '|' => frequency.pipe_count += 1,
3113 '[' => frequency.bracket_count += 1,
3114 '`' => frequency.backtick_count += 1,
3115 '<' => frequency.lt_count += 1,
3116 '!' => frequency.exclamation_count += 1,
3117 '\n' => frequency.newline_count += 1,
3118 _ => {}
3119 }
3120 }
3121
3122 frequency
3123 }
3124
3125 fn parse_html_tags(
3127 content: &str,
3128 lines: &[LineInfo],
3129 code_blocks: &[(usize, usize)],
3130 flavor: MarkdownFlavor,
3131 ) -> Vec<HtmlTag> {
3132 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3133 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3134
3135 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3136
3137 for cap in HTML_TAG_REGEX.captures_iter(content) {
3138 let full_match = cap.get(0).unwrap();
3139 let match_start = full_match.start();
3140 let match_end = full_match.end();
3141
3142 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3144 continue;
3145 }
3146
3147 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3148 let tag_name_original = cap.get(2).unwrap().as_str();
3149 let tag_name = tag_name_original.to_lowercase();
3150 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3151
3152 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3155 continue;
3156 }
3157
3158 let mut line_num = 1;
3160 let mut col_start = match_start;
3161 let mut col_end = match_end;
3162 for (idx, line_info) in lines.iter().enumerate() {
3163 if match_start >= line_info.byte_offset {
3164 line_num = idx + 1;
3165 col_start = match_start - line_info.byte_offset;
3166 col_end = match_end - line_info.byte_offset;
3167 } else {
3168 break;
3169 }
3170 }
3171
3172 html_tags.push(HtmlTag {
3173 line: line_num,
3174 start_col: col_start,
3175 end_col: col_end,
3176 byte_offset: match_start,
3177 byte_end: match_end,
3178 tag_name,
3179 is_closing,
3180 is_self_closing,
3181 raw_content: full_match.as_str().to_string(),
3182 });
3183 }
3184
3185 html_tags
3186 }
3187
3188 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3190 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3191 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3192
3193 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3194
3195 for cap in EMPHASIS_REGEX.captures_iter(content) {
3196 let full_match = cap.get(0).unwrap();
3197 let match_start = full_match.start();
3198 let match_end = full_match.end();
3199
3200 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3202 continue;
3203 }
3204
3205 let opening_markers = cap.get(1).unwrap().as_str();
3206 let content_part = cap.get(2).unwrap().as_str();
3207 let closing_markers = cap.get(3).unwrap().as_str();
3208
3209 if opening_markers.chars().next() != closing_markers.chars().next()
3211 || opening_markers.len() != closing_markers.len()
3212 {
3213 continue;
3214 }
3215
3216 let marker = opening_markers.chars().next().unwrap();
3217 let marker_count = opening_markers.len();
3218
3219 let mut line_num = 1;
3221 let mut col_start = match_start;
3222 let mut col_end = match_end;
3223 for (idx, line_info) in lines.iter().enumerate() {
3224 if match_start >= line_info.byte_offset {
3225 line_num = idx + 1;
3226 col_start = match_start - line_info.byte_offset;
3227 col_end = match_end - line_info.byte_offset;
3228 } else {
3229 break;
3230 }
3231 }
3232
3233 emphasis_spans.push(EmphasisSpan {
3234 line: line_num,
3235 start_col: col_start,
3236 end_col: col_end,
3237 byte_offset: match_start,
3238 byte_end: match_end,
3239 marker,
3240 marker_count,
3241 content: content_part.to_string(),
3242 });
3243 }
3244
3245 emphasis_spans
3246 }
3247
3248 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3250 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3251
3252 for (line_idx, line_info) in lines.iter().enumerate() {
3253 if line_info.in_code_block || line_info.is_blank {
3255 continue;
3256 }
3257
3258 let line = line_info.content(content);
3259 let line_num = line_idx + 1;
3260
3261 if !line.contains('|') {
3263 continue;
3264 }
3265
3266 let parts: Vec<&str> = line.split('|').collect();
3268 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3269
3270 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3272 let mut column_alignments = Vec::new();
3273
3274 if is_separator {
3275 for part in &parts[1..parts.len() - 1] {
3276 let trimmed = part.trim();
3278 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3279 "center".to_string()
3280 } else if trimmed.ends_with(':') {
3281 "right".to_string()
3282 } else if trimmed.starts_with(':') {
3283 "left".to_string()
3284 } else {
3285 "none".to_string()
3286 };
3287 column_alignments.push(alignment);
3288 }
3289 }
3290
3291 table_rows.push(TableRow {
3292 line: line_num,
3293 is_separator,
3294 column_count,
3295 column_alignments,
3296 });
3297 }
3298
3299 table_rows
3300 }
3301
3302 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3304 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3305
3306 for cap in BARE_URL_PATTERN.captures_iter(content) {
3308 let full_match = cap.get(0).unwrap();
3309 let match_start = full_match.start();
3310 let match_end = full_match.end();
3311
3312 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3314 continue;
3315 }
3316
3317 let preceding_char = if match_start > 0 {
3319 content.chars().nth(match_start - 1)
3320 } else {
3321 None
3322 };
3323 let following_char = content.chars().nth(match_end);
3324
3325 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3326 continue;
3327 }
3328 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3329 continue;
3330 }
3331
3332 let url = full_match.as_str();
3333 let url_type = if url.starts_with("https://") {
3334 "https"
3335 } else if url.starts_with("http://") {
3336 "http"
3337 } else if url.starts_with("ftp://") {
3338 "ftp"
3339 } else {
3340 "other"
3341 };
3342
3343 let mut line_num = 1;
3345 let mut col_start = match_start;
3346 let mut col_end = match_end;
3347 for (idx, line_info) in lines.iter().enumerate() {
3348 if match_start >= line_info.byte_offset {
3349 line_num = idx + 1;
3350 col_start = match_start - line_info.byte_offset;
3351 col_end = match_end - line_info.byte_offset;
3352 } else {
3353 break;
3354 }
3355 }
3356
3357 bare_urls.push(BareUrl {
3358 line: line_num,
3359 start_col: col_start,
3360 end_col: col_end,
3361 byte_offset: match_start,
3362 byte_end: match_end,
3363 url: url.to_string(),
3364 url_type: url_type.to_string(),
3365 });
3366 }
3367
3368 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3370 let full_match = cap.get(0).unwrap();
3371 let match_start = full_match.start();
3372 let match_end = full_match.end();
3373
3374 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3376 continue;
3377 }
3378
3379 let preceding_char = if match_start > 0 {
3381 content.chars().nth(match_start - 1)
3382 } else {
3383 None
3384 };
3385 let following_char = content.chars().nth(match_end);
3386
3387 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3388 continue;
3389 }
3390 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3391 continue;
3392 }
3393
3394 let email = full_match.as_str();
3395
3396 let mut line_num = 1;
3398 let mut col_start = match_start;
3399 let mut col_end = match_end;
3400 for (idx, line_info) in lines.iter().enumerate() {
3401 if match_start >= line_info.byte_offset {
3402 line_num = idx + 1;
3403 col_start = match_start - line_info.byte_offset;
3404 col_end = match_end - line_info.byte_offset;
3405 } else {
3406 break;
3407 }
3408 }
3409
3410 bare_urls.push(BareUrl {
3411 line: line_num,
3412 start_col: col_start,
3413 end_col: col_end,
3414 byte_offset: match_start,
3415 byte_end: match_end,
3416 url: email.to_string(),
3417 url_type: "email".to_string(),
3418 });
3419 }
3420
3421 bare_urls
3422 }
3423
3424 #[must_use]
3444 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3445 ValidHeadingsIter::new(&self.lines)
3446 }
3447
3448 #[must_use]
3452 pub fn has_valid_headings(&self) -> bool {
3453 self.lines
3454 .iter()
3455 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3456 }
3457}
3458
3459fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3461 if list_blocks.len() < 2 {
3462 return;
3463 }
3464
3465 let mut merger = ListBlockMerger::new(content, lines);
3466 *list_blocks = merger.merge(list_blocks);
3467}
3468
3469struct ListBlockMerger<'a> {
3471 content: &'a str,
3472 lines: &'a [LineInfo],
3473}
3474
3475impl<'a> ListBlockMerger<'a> {
3476 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3477 Self { content, lines }
3478 }
3479
3480 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3481 let mut merged = Vec::with_capacity(list_blocks.len());
3482 let mut current = list_blocks[0].clone();
3483
3484 for next in list_blocks.iter().skip(1) {
3485 if self.should_merge_blocks(¤t, next) {
3486 current = self.merge_two_blocks(current, next);
3487 } else {
3488 merged.push(current);
3489 current = next.clone();
3490 }
3491 }
3492
3493 merged.push(current);
3494 merged
3495 }
3496
3497 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3499 if !self.blocks_are_compatible(current, next) {
3501 return false;
3502 }
3503
3504 let spacing = self.analyze_spacing_between(current, next);
3506 match spacing {
3507 BlockSpacing::Consecutive => true,
3508 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3509 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3510 self.can_merge_with_content_between(current, next)
3511 }
3512 }
3513 }
3514
3515 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3517 current.is_ordered == next.is_ordered
3518 && current.blockquote_prefix == next.blockquote_prefix
3519 && current.nesting_level == next.nesting_level
3520 }
3521
3522 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3524 let gap = next.start_line - current.end_line;
3525
3526 match gap {
3527 1 => BlockSpacing::Consecutive,
3528 2 => BlockSpacing::SingleBlank,
3529 _ if gap > 2 => {
3530 if self.has_only_blank_lines_between(current, next) {
3531 BlockSpacing::MultipleBlanks
3532 } else {
3533 BlockSpacing::ContentBetween
3534 }
3535 }
3536 _ => BlockSpacing::Consecutive, }
3538 }
3539
3540 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3542 if has_meaningful_content_between(self.content, current, next, self.lines) {
3545 return false; }
3547
3548 !current.is_ordered && current.marker == next.marker
3550 }
3551
3552 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3554 if has_meaningful_content_between(self.content, current, next, self.lines) {
3556 return false; }
3558
3559 current.is_ordered && next.is_ordered
3561 }
3562
3563 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3565 for line_num in (current.end_line + 1)..next.start_line {
3566 if let Some(line_info) = self.lines.get(line_num - 1)
3567 && !line_info.content(self.content).trim().is_empty()
3568 {
3569 return false;
3570 }
3571 }
3572 true
3573 }
3574
3575 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3577 current.end_line = next.end_line;
3578 current.item_lines.extend_from_slice(&next.item_lines);
3579
3580 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3582
3583 if !current.is_ordered && self.markers_differ(¤t, next) {
3585 current.marker = None; }
3587
3588 current
3589 }
3590
3591 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3593 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3594 }
3595}
3596
3597#[derive(Debug, PartialEq)]
3599enum BlockSpacing {
3600 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3605
3606fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3608 for line_num in (current.end_line + 1)..next.start_line {
3610 if let Some(line_info) = lines.get(line_num - 1) {
3611 let trimmed = line_info.content(content).trim();
3613
3614 if trimmed.is_empty() {
3616 continue;
3617 }
3618
3619 if line_info.heading.is_some() {
3623 return true; }
3625
3626 if is_horizontal_rule(trimmed) {
3628 return true; }
3630
3631 if crate::utils::skip_context::is_table_line(trimmed) {
3633 return true; }
3635
3636 if trimmed.starts_with('>') {
3638 return true; }
3640
3641 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3643 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3644
3645 let min_continuation_indent = if current.is_ordered {
3647 current.nesting_level + current.max_marker_width + 1 } else {
3649 current.nesting_level + 2
3650 };
3651
3652 if line_indent < min_continuation_indent {
3653 return true; }
3656 }
3657
3658 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3660
3661 let min_indent = if current.is_ordered {
3663 current.nesting_level + current.max_marker_width
3664 } else {
3665 current.nesting_level + 2
3666 };
3667
3668 if line_indent < min_indent {
3670 return true; }
3672
3673 }
3676 }
3677
3678 false
3680}
3681
3682fn is_horizontal_rule(trimmed: &str) -> bool {
3684 if trimmed.len() < 3 {
3685 return false;
3686 }
3687
3688 let chars: Vec<char> = trimmed.chars().collect();
3690 if let Some(&first_char) = chars.first()
3691 && (first_char == '-' || first_char == '*' || first_char == '_')
3692 {
3693 let mut count = 0;
3694 for &ch in &chars {
3695 if ch == first_char {
3696 count += 1;
3697 } else if ch != ' ' && ch != '\t' {
3698 return false; }
3700 }
3701 return count >= 3;
3702 }
3703 false
3704}
3705
3706#[cfg(test)]
3708mod tests {
3709 use super::*;
3710
3711 #[test]
3712 fn test_empty_content() {
3713 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3714 assert_eq!(ctx.content, "");
3715 assert_eq!(ctx.line_offsets, vec![0]);
3716 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3717 assert_eq!(ctx.lines.len(), 0);
3718 }
3719
3720 #[test]
3721 fn test_single_line() {
3722 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3723 assert_eq!(ctx.content, "# Hello");
3724 assert_eq!(ctx.line_offsets, vec![0]);
3725 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3726 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3727 }
3728
3729 #[test]
3730 fn test_multi_line() {
3731 let content = "# Title\n\nSecond line\nThird line";
3732 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3733 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3734 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3741
3742 #[test]
3743 fn test_line_info() {
3744 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3745 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3746
3747 assert_eq!(ctx.lines.len(), 7);
3749
3750 let line1 = &ctx.lines[0];
3752 assert_eq!(line1.content(ctx.content), "# Title");
3753 assert_eq!(line1.byte_offset, 0);
3754 assert_eq!(line1.indent, 0);
3755 assert!(!line1.is_blank);
3756 assert!(!line1.in_code_block);
3757 assert!(line1.list_item.is_none());
3758
3759 let line2 = &ctx.lines[1];
3761 assert_eq!(line2.content(ctx.content), " indented");
3762 assert_eq!(line2.byte_offset, 8);
3763 assert_eq!(line2.indent, 4);
3764 assert!(!line2.is_blank);
3765
3766 let line3 = &ctx.lines[2];
3768 assert_eq!(line3.content(ctx.content), "");
3769 assert!(line3.is_blank);
3770
3771 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3773 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3774 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3775 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3776 }
3777
3778 #[test]
3779 fn test_list_item_detection() {
3780 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3781 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3782
3783 let line1 = &ctx.lines[0];
3785 assert!(line1.list_item.is_some());
3786 let list1 = line1.list_item.as_ref().unwrap();
3787 assert_eq!(list1.marker, "-");
3788 assert!(!list1.is_ordered);
3789 assert_eq!(list1.marker_column, 0);
3790 assert_eq!(list1.content_column, 2);
3791
3792 let line2 = &ctx.lines[1];
3794 assert!(line2.list_item.is_some());
3795 let list2 = line2.list_item.as_ref().unwrap();
3796 assert_eq!(list2.marker, "*");
3797 assert_eq!(list2.marker_column, 2);
3798
3799 let line3 = &ctx.lines[2];
3801 assert!(line3.list_item.is_some());
3802 let list3 = line3.list_item.as_ref().unwrap();
3803 assert_eq!(list3.marker, "1.");
3804 assert!(list3.is_ordered);
3805 assert_eq!(list3.number, Some(1));
3806
3807 let line6 = &ctx.lines[5];
3809 assert!(line6.list_item.is_none());
3810 }
3811
3812 #[test]
3813 fn test_offset_to_line_col_edge_cases() {
3814 let content = "a\nb\nc";
3815 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3816 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3824
3825 #[test]
3826 fn test_mdx_esm_blocks() {
3827 let content = r##"import {Chart} from './snowfall.js'
3828export const year = 2023
3829
3830# Last year's snowfall
3831
3832In {year}, the snowfall was above average.
3833It was followed by a warm spring which caused
3834flood conditions in many of the nearby rivers.
3835
3836<Chart color="#fcb32c" year={year} />
3837"##;
3838
3839 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3840
3841 assert_eq!(ctx.lines.len(), 10);
3843 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3844 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3845 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3846 assert!(
3847 !ctx.lines[3].in_esm_block,
3848 "Line 4 (heading) should NOT be in_esm_block"
3849 );
3850 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3851 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3852 }
3853
3854 #[test]
3855 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3856 let content = r#"import {Chart} from './snowfall.js'
3857export const year = 2023
3858
3859# Last year's snowfall
3860"#;
3861
3862 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3863
3864 assert!(
3866 !ctx.lines[0].in_esm_block,
3867 "Line 1 should NOT be in_esm_block in Standard flavor"
3868 );
3869 assert!(
3870 !ctx.lines[1].in_esm_block,
3871 "Line 2 should NOT be in_esm_block in Standard flavor"
3872 );
3873 }
3874}