1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13 ($name:expr, $profile:expr, $code:expr) => {{
14 let start = std::time::Instant::now();
15 let result = $code;
16 if $profile {
17 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18 }
19 result
20 }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(
32 r#"(?sx)
33 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34 (?:
35 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
36 |
37 \[([^\]]*)\] # Reference ID in group 6
38 )"#
39 ).unwrap()
40});
41
42static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?sx)
47 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48 (?:
49 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
50 |
51 \[([^\]]*)\] # Reference ID in group 6
52 )"#
53 ).unwrap()
54});
55
56static REF_DEF_PATTERN: LazyLock<Regex> =
58 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62 Regex::new(
63 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64 ).unwrap()
65});
66
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub is_blank: bool,
85 pub in_code_block: bool,
87 pub in_front_matter: bool,
89 pub in_html_block: bool,
91 pub in_html_comment: bool,
93 pub list_item: Option<ListItemInfo>,
95 pub heading: Option<HeadingInfo>,
97 pub blockquote: Option<BlockquoteInfo>,
99 pub in_mkdocstrings: bool,
101 pub in_esm_block: bool,
103 pub in_code_span_continuation: bool,
105 pub is_horizontal_rule: bool,
108}
109
110impl LineInfo {
111 pub fn content<'a>(&self, source: &'a str) -> &'a str {
113 &source[self.byte_offset..self.byte_offset + self.byte_len]
114 }
115}
116
117#[derive(Debug, Clone)]
119pub struct ListItemInfo {
120 pub marker: String,
122 pub is_ordered: bool,
124 pub number: Option<usize>,
126 pub marker_column: usize,
128 pub content_column: usize,
130}
131
132#[derive(Debug, Clone, PartialEq)]
134pub enum HeadingStyle {
135 ATX,
137 Setext1,
139 Setext2,
141}
142
143#[derive(Debug, Clone)]
145pub struct ParsedLink<'a> {
146 pub line: usize,
148 pub start_col: usize,
150 pub end_col: usize,
152 pub byte_offset: usize,
154 pub byte_end: usize,
156 pub text: Cow<'a, str>,
158 pub url: Cow<'a, str>,
160 pub is_reference: bool,
162 pub reference_id: Option<Cow<'a, str>>,
164 pub link_type: LinkType,
166}
167
168#[derive(Debug, Clone)]
170pub struct BrokenLinkInfo {
171 pub reference: String,
173 pub span: std::ops::Range<usize>,
175}
176
177#[derive(Debug, Clone)]
179pub struct FootnoteRef {
180 pub id: String,
182 pub line: usize,
184 pub byte_offset: usize,
186 pub byte_end: usize,
188}
189
190#[derive(Debug, Clone)]
192pub struct ParsedImage<'a> {
193 pub line: usize,
195 pub start_col: usize,
197 pub end_col: usize,
199 pub byte_offset: usize,
201 pub byte_end: usize,
203 pub alt_text: Cow<'a, str>,
205 pub url: Cow<'a, str>,
207 pub is_reference: bool,
209 pub reference_id: Option<Cow<'a, str>>,
211 pub link_type: LinkType,
213}
214
215#[derive(Debug, Clone)]
217pub struct ReferenceDef {
218 pub line: usize,
220 pub id: String,
222 pub url: String,
224 pub title: Option<String>,
226 pub byte_offset: usize,
228 pub byte_end: usize,
230 pub title_byte_start: Option<usize>,
232 pub title_byte_end: Option<usize>,
234}
235
236#[derive(Debug, Clone)]
238pub struct CodeSpan {
239 pub line: usize,
241 pub end_line: usize,
243 pub start_col: usize,
245 pub end_col: usize,
247 pub byte_offset: usize,
249 pub byte_end: usize,
251 pub backtick_count: usize,
253 pub content: String,
255}
256
257#[derive(Debug, Clone)]
259pub struct HeadingInfo {
260 pub level: u8,
262 pub style: HeadingStyle,
264 pub marker: String,
266 pub marker_column: usize,
268 pub content_column: usize,
270 pub text: String,
272 pub custom_id: Option<String>,
274 pub raw_text: String,
276 pub has_closing_sequence: bool,
278 pub closing_sequence: String,
280 pub is_valid: bool,
283}
284
285#[derive(Debug, Clone)]
290pub struct ValidHeading<'a> {
291 pub line_num: usize,
293 pub heading: &'a HeadingInfo,
295 pub line_info: &'a LineInfo,
297}
298
299pub struct ValidHeadingsIter<'a> {
304 lines: &'a [LineInfo],
305 current_index: usize,
306}
307
308impl<'a> ValidHeadingsIter<'a> {
309 fn new(lines: &'a [LineInfo]) -> Self {
310 Self {
311 lines,
312 current_index: 0,
313 }
314 }
315}
316
317impl<'a> Iterator for ValidHeadingsIter<'a> {
318 type Item = ValidHeading<'a>;
319
320 fn next(&mut self) -> Option<Self::Item> {
321 while self.current_index < self.lines.len() {
322 let idx = self.current_index;
323 self.current_index += 1;
324
325 let line_info = &self.lines[idx];
326 if let Some(heading) = &line_info.heading
327 && heading.is_valid
328 {
329 return Some(ValidHeading {
330 line_num: idx + 1, heading,
332 line_info,
333 });
334 }
335 }
336 None
337 }
338}
339
340#[derive(Debug, Clone)]
342pub struct BlockquoteInfo {
343 pub nesting_level: usize,
345 pub indent: String,
347 pub marker_column: usize,
349 pub prefix: String,
351 pub content: String,
353 pub has_no_space_after_marker: bool,
355 pub has_multiple_spaces_after_marker: bool,
357 pub needs_md028_fix: bool,
359}
360
361#[derive(Debug, Clone)]
363pub struct ListBlock {
364 pub start_line: usize,
366 pub end_line: usize,
368 pub is_ordered: bool,
370 pub marker: Option<String>,
372 pub blockquote_prefix: String,
374 pub item_lines: Vec<usize>,
376 pub nesting_level: usize,
378 pub max_marker_width: usize,
380}
381
382use std::sync::{Arc, OnceLock};
383
384#[derive(Debug, Clone, Default)]
386pub struct CharFrequency {
387 pub hash_count: usize,
389 pub asterisk_count: usize,
391 pub underscore_count: usize,
393 pub hyphen_count: usize,
395 pub plus_count: usize,
397 pub gt_count: usize,
399 pub pipe_count: usize,
401 pub bracket_count: usize,
403 pub backtick_count: usize,
405 pub lt_count: usize,
407 pub exclamation_count: usize,
409 pub newline_count: usize,
411}
412
413#[derive(Debug, Clone)]
415pub struct HtmlTag {
416 pub line: usize,
418 pub start_col: usize,
420 pub end_col: usize,
422 pub byte_offset: usize,
424 pub byte_end: usize,
426 pub tag_name: String,
428 pub is_closing: bool,
430 pub is_self_closing: bool,
432 pub raw_content: String,
434}
435
436#[derive(Debug, Clone)]
438pub struct EmphasisSpan {
439 pub line: usize,
441 pub start_col: usize,
443 pub end_col: usize,
445 pub byte_offset: usize,
447 pub byte_end: usize,
449 pub marker: char,
451 pub marker_count: usize,
453 pub content: String,
455}
456
457#[derive(Debug, Clone)]
459pub struct TableRow {
460 pub line: usize,
462 pub is_separator: bool,
464 pub column_count: usize,
466 pub column_alignments: Vec<String>, }
469
470#[derive(Debug, Clone)]
472pub struct BareUrl {
473 pub line: usize,
475 pub start_col: usize,
477 pub end_col: usize,
479 pub byte_offset: usize,
481 pub byte_end: usize,
483 pub url: String,
485 pub url_type: String,
487}
488
489pub struct LintContext<'a> {
490 pub content: &'a str,
491 pub line_offsets: Vec<usize>,
492 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
514
515struct BlockquoteComponents<'a> {
517 indent: &'a str,
518 markers: &'a str,
519 spaces_after: &'a str,
520 content: &'a str,
521}
522
523#[inline]
525fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
526 let bytes = line.as_bytes();
527 let mut pos = 0;
528
529 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
531 pos += 1;
532 }
533 let indent_end = pos;
534
535 if pos >= bytes.len() || bytes[pos] != b'>' {
537 return None;
538 }
539
540 while pos < bytes.len() && bytes[pos] == b'>' {
542 pos += 1;
543 }
544 let markers_end = pos;
545
546 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
548 pos += 1;
549 }
550 let spaces_end = pos;
551
552 Some(BlockquoteComponents {
553 indent: &line[0..indent_end],
554 markers: &line[indent_end..markers_end],
555 spaces_after: &line[markers_end..spaces_end],
556 content: &line[spaces_end..],
557 })
558}
559
560impl<'a> LintContext<'a> {
561 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
562 #[cfg(not(target_arch = "wasm32"))]
563 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
564 #[cfg(target_arch = "wasm32")]
565 let profile = false;
566
567 let line_offsets = profile_section!("Line offsets", profile, {
568 let mut offsets = vec![0];
569 for (i, c) in content.char_indices() {
570 if c == '\n' {
571 offsets.push(i + 1);
572 }
573 }
574 offsets
575 });
576
577 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
579
580 let html_comment_ranges = profile_section!(
582 "HTML comment ranges",
583 profile,
584 crate::utils::skip_context::compute_html_comment_ranges(content)
585 );
586
587 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
589 if flavor == MarkdownFlavor::MkDocs {
590 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
591 } else {
592 Vec::new()
593 }
594 });
595
596 let mut lines = profile_section!(
598 "Basic line info",
599 profile,
600 Self::compute_basic_line_info(
601 content,
602 &line_offsets,
603 &code_blocks,
604 flavor,
605 &html_comment_ranges,
606 &autodoc_ranges,
607 )
608 );
609
610 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
612
613 profile_section!(
615 "ESM blocks",
616 profile,
617 Self::detect_esm_blocks(content, &mut lines, flavor)
618 );
619
620 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
622
623 profile_section!(
625 "Headings & blockquotes",
626 profile,
627 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
628 );
629
630 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
632
633 for span in &code_spans {
636 if span.end_line > span.line {
637 for line_num in (span.line + 1)..=span.end_line {
639 if let Some(line_info) = lines.get_mut(line_num - 1) {
640 line_info.in_code_span_continuation = true;
641 }
642 }
643 }
644 }
645
646 let (links, broken_links, footnote_refs) = profile_section!(
648 "Links",
649 profile,
650 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
651 );
652
653 let images = profile_section!(
654 "Images",
655 profile,
656 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
657 );
658
659 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
660
661 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
662
663 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
665
666 let table_blocks = profile_section!(
668 "Table blocks",
669 profile,
670 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
671 content,
672 &code_blocks,
673 &code_spans,
674 &html_comment_ranges,
675 )
676 );
677
678 let line_index = profile_section!(
680 "Line index",
681 profile,
682 crate::utils::range_utils::LineIndex::new(content)
683 );
684
685 let jinja_ranges = profile_section!(
687 "Jinja ranges",
688 profile,
689 crate::utils::jinja_utils::find_jinja_ranges(content)
690 );
691
692 Self {
693 content,
694 line_offsets,
695 code_blocks,
696 lines,
697 links,
698 images,
699 broken_links,
700 footnote_refs,
701 reference_defs,
702 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
703 list_blocks,
704 char_frequency,
705 html_tags_cache: OnceLock::new(),
706 emphasis_spans_cache: OnceLock::new(),
707 table_rows_cache: OnceLock::new(),
708 bare_urls_cache: OnceLock::new(),
709 has_mixed_list_nesting_cache: OnceLock::new(),
710 html_comment_ranges,
711 table_blocks,
712 line_index,
713 jinja_ranges,
714 flavor,
715 source_file,
716 }
717 }
718
719 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
721 Arc::clone(
722 self.code_spans_cache
723 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
724 )
725 }
726
727 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
729 &self.html_comment_ranges
730 }
731
732 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
734 Arc::clone(self.html_tags_cache.get_or_init(|| {
735 Arc::new(Self::parse_html_tags(
736 self.content,
737 &self.lines,
738 &self.code_blocks,
739 self.flavor,
740 ))
741 }))
742 }
743
744 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
746 Arc::clone(
747 self.emphasis_spans_cache
748 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
749 )
750 }
751
752 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
754 Arc::clone(
755 self.table_rows_cache
756 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
757 )
758 }
759
760 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
762 Arc::clone(
763 self.bare_urls_cache
764 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
765 )
766 }
767
768 pub fn has_mixed_list_nesting(&self) -> bool {
772 *self
773 .has_mixed_list_nesting_cache
774 .get_or_init(|| self.compute_mixed_list_nesting())
775 }
776
777 fn compute_mixed_list_nesting(&self) -> bool {
779 let mut stack: Vec<(usize, bool)> = Vec::new();
784 let mut last_was_blank = false;
785
786 for line_info in &self.lines {
787 if line_info.in_code_block
789 || line_info.in_front_matter
790 || line_info.in_mkdocstrings
791 || line_info.in_html_comment
792 || line_info.in_esm_block
793 {
794 continue;
795 }
796
797 if line_info.is_blank {
799 last_was_blank = true;
800 continue;
801 }
802
803 if let Some(list_item) = &line_info.list_item {
804 let current_pos = if list_item.marker_column == 1 {
806 0
807 } else {
808 list_item.marker_column
809 };
810
811 if last_was_blank && current_pos == 0 {
813 stack.clear();
814 }
815 last_was_blank = false;
816
817 while let Some(&(pos, _)) = stack.last() {
819 if pos >= current_pos {
820 stack.pop();
821 } else {
822 break;
823 }
824 }
825
826 if let Some(&(_, parent_is_ordered)) = stack.last()
828 && parent_is_ordered != list_item.is_ordered
829 {
830 return true; }
832
833 stack.push((current_pos, list_item.is_ordered));
834 } else {
835 last_was_blank = false;
837 }
838 }
839
840 false
841 }
842
843 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
845 match self.line_offsets.binary_search(&offset) {
846 Ok(line) => (line + 1, 1),
847 Err(line) => {
848 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
849 (line, offset - line_start + 1)
850 }
851 }
852 }
853
854 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
856 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
858 return true;
859 }
860
861 self.code_spans()
863 .iter()
864 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
865 }
866
867 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
869 if line_num > 0 {
870 self.lines.get(line_num - 1)
871 } else {
872 None
873 }
874 }
875
876 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
878 self.line_info(line_num).map(|info| info.byte_offset)
879 }
880
881 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
883 let normalized_id = ref_id.to_lowercase();
884 self.reference_defs
885 .iter()
886 .find(|def| def.id == normalized_id)
887 .map(|def| def.url.as_str())
888 }
889
890 pub fn is_in_list_block(&self, line_num: usize) -> bool {
892 self.list_blocks
893 .iter()
894 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
895 }
896
897 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
899 self.list_blocks
900 .iter()
901 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
902 }
903
904 pub fn is_in_code_block(&self, line_num: usize) -> bool {
908 if line_num == 0 || line_num > self.lines.len() {
909 return false;
910 }
911 self.lines[line_num - 1].in_code_block
912 }
913
914 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
916 if line_num == 0 || line_num > self.lines.len() {
917 return false;
918 }
919 self.lines[line_num - 1].in_front_matter
920 }
921
922 pub fn is_in_html_block(&self, line_num: usize) -> bool {
924 if line_num == 0 || line_num > self.lines.len() {
925 return false;
926 }
927 self.lines[line_num - 1].in_html_block
928 }
929
930 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
932 if line_num == 0 || line_num > self.lines.len() {
933 return false;
934 }
935
936 let col_0indexed = if col > 0 { col - 1 } else { 0 };
940 let code_spans = self.code_spans();
941 code_spans.iter().any(|span| {
942 if line_num < span.line || line_num > span.end_line {
944 return false;
945 }
946
947 if span.line == span.end_line {
948 col_0indexed >= span.start_col && col_0indexed < span.end_col
950 } else if line_num == span.line {
951 col_0indexed >= span.start_col
953 } else if line_num == span.end_line {
954 col_0indexed < span.end_col
956 } else {
957 true
959 }
960 })
961 }
962
963 #[inline]
965 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
966 let code_spans = self.code_spans();
967 code_spans
968 .iter()
969 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
970 }
971
972 #[inline]
975 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
976 self.reference_defs
977 .iter()
978 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
979 }
980
981 #[inline]
985 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
986 self.html_comment_ranges
987 .iter()
988 .any(|range| byte_pos >= range.start && byte_pos < range.end)
989 }
990
991 #[inline]
994 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
995 self.html_tags()
996 .iter()
997 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
998 }
999
1000 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1002 self.jinja_ranges
1003 .iter()
1004 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1005 }
1006
1007 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1009 self.reference_defs.iter().any(|def| {
1010 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1011 byte_pos >= start && byte_pos < end
1012 } else {
1013 false
1014 }
1015 })
1016 }
1017
1018 pub fn has_char(&self, ch: char) -> bool {
1020 match ch {
1021 '#' => self.char_frequency.hash_count > 0,
1022 '*' => self.char_frequency.asterisk_count > 0,
1023 '_' => self.char_frequency.underscore_count > 0,
1024 '-' => self.char_frequency.hyphen_count > 0,
1025 '+' => self.char_frequency.plus_count > 0,
1026 '>' => self.char_frequency.gt_count > 0,
1027 '|' => self.char_frequency.pipe_count > 0,
1028 '[' => self.char_frequency.bracket_count > 0,
1029 '`' => self.char_frequency.backtick_count > 0,
1030 '<' => self.char_frequency.lt_count > 0,
1031 '!' => self.char_frequency.exclamation_count > 0,
1032 '\n' => self.char_frequency.newline_count > 0,
1033 _ => self.content.contains(ch), }
1035 }
1036
1037 pub fn char_count(&self, ch: char) -> usize {
1039 match ch {
1040 '#' => self.char_frequency.hash_count,
1041 '*' => self.char_frequency.asterisk_count,
1042 '_' => self.char_frequency.underscore_count,
1043 '-' => self.char_frequency.hyphen_count,
1044 '+' => self.char_frequency.plus_count,
1045 '>' => self.char_frequency.gt_count,
1046 '|' => self.char_frequency.pipe_count,
1047 '[' => self.char_frequency.bracket_count,
1048 '`' => self.char_frequency.backtick_count,
1049 '<' => self.char_frequency.lt_count,
1050 '!' => self.char_frequency.exclamation_count,
1051 '\n' => self.char_frequency.newline_count,
1052 _ => self.content.matches(ch).count(), }
1054 }
1055
1056 pub fn likely_has_headings(&self) -> bool {
1058 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1060
1061 pub fn likely_has_lists(&self) -> bool {
1063 self.char_frequency.asterisk_count > 0
1064 || self.char_frequency.hyphen_count > 0
1065 || self.char_frequency.plus_count > 0
1066 }
1067
1068 pub fn likely_has_emphasis(&self) -> bool {
1070 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1071 }
1072
1073 pub fn likely_has_tables(&self) -> bool {
1075 self.char_frequency.pipe_count > 2
1076 }
1077
1078 pub fn likely_has_blockquotes(&self) -> bool {
1080 self.char_frequency.gt_count > 0
1081 }
1082
1083 pub fn likely_has_code(&self) -> bool {
1085 self.char_frequency.backtick_count > 0
1086 }
1087
1088 pub fn likely_has_links_or_images(&self) -> bool {
1090 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1091 }
1092
1093 pub fn likely_has_html(&self) -> bool {
1095 self.char_frequency.lt_count > 0
1096 }
1097
1098 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1100 self.html_tags()
1101 .iter()
1102 .filter(|tag| tag.line == line_num)
1103 .cloned()
1104 .collect()
1105 }
1106
1107 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1109 self.emphasis_spans()
1110 .iter()
1111 .filter(|span| span.line == line_num)
1112 .cloned()
1113 .collect()
1114 }
1115
1116 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1118 self.table_rows()
1119 .iter()
1120 .filter(|row| row.line == line_num)
1121 .cloned()
1122 .collect()
1123 }
1124
1125 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1127 self.bare_urls()
1128 .iter()
1129 .filter(|url| url.line == line_num)
1130 .cloned()
1131 .collect()
1132 }
1133
1134 #[inline]
1140 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1141 let idx = match lines.binary_search_by(|line| {
1143 if byte_offset < line.byte_offset {
1144 std::cmp::Ordering::Greater
1145 } else if byte_offset > line.byte_offset + line.byte_len {
1146 std::cmp::Ordering::Less
1147 } else {
1148 std::cmp::Ordering::Equal
1149 }
1150 }) {
1151 Ok(idx) => idx,
1152 Err(idx) => idx.saturating_sub(1),
1153 };
1154
1155 let line = &lines[idx];
1156 let line_num = idx + 1;
1157 let col = byte_offset.saturating_sub(line.byte_offset);
1158
1159 (idx, line_num, col)
1160 }
1161
1162 #[inline]
1164 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1165 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1167
1168 if idx > 0 {
1170 let span = &code_spans[idx - 1];
1171 if offset >= span.byte_offset && offset < span.byte_end {
1172 return true;
1173 }
1174 }
1175
1176 false
1177 }
1178
1179 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1183 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1184
1185 let mut link_ranges = Vec::new();
1186 let mut options = Options::empty();
1187 options.insert(Options::ENABLE_WIKILINKS);
1188 options.insert(Options::ENABLE_FOOTNOTES);
1189
1190 let parser = Parser::new_ext(content, options).into_offset_iter();
1191 let mut link_stack: Vec<usize> = Vec::new();
1192
1193 for (event, range) in parser {
1194 match event {
1195 Event::Start(Tag::Link { .. }) => {
1196 link_stack.push(range.start);
1197 }
1198 Event::End(TagEnd::Link) => {
1199 if let Some(start_pos) = link_stack.pop() {
1200 link_ranges.push((start_pos, range.end));
1201 }
1202 }
1203 _ => {}
1204 }
1205 }
1206
1207 link_ranges
1208 }
1209
1210 fn parse_links(
1212 content: &'a str,
1213 lines: &[LineInfo],
1214 code_blocks: &[(usize, usize)],
1215 code_spans: &[CodeSpan],
1216 flavor: MarkdownFlavor,
1217 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1218 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1219 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1220 use std::collections::HashSet;
1221
1222 let mut links = Vec::with_capacity(content.len() / 500);
1223 let mut broken_links = Vec::new();
1224 let mut footnote_refs = Vec::new();
1225
1226 let mut found_positions = HashSet::new();
1228
1229 let mut options = Options::empty();
1239 options.insert(Options::ENABLE_WIKILINKS);
1240 options.insert(Options::ENABLE_FOOTNOTES);
1241
1242 let parser = Parser::new_with_broken_link_callback(
1243 content,
1244 options,
1245 Some(|link: BrokenLink<'_>| {
1246 broken_links.push(BrokenLinkInfo {
1247 reference: link.reference.to_string(),
1248 span: link.span.clone(),
1249 });
1250 None
1251 }),
1252 )
1253 .into_offset_iter();
1254
1255 let mut link_stack: Vec<(
1256 usize,
1257 usize,
1258 pulldown_cmark::CowStr<'a>,
1259 LinkType,
1260 pulldown_cmark::CowStr<'a>,
1261 )> = Vec::new();
1262 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1265 match event {
1266 Event::Start(Tag::Link {
1267 link_type,
1268 dest_url,
1269 id,
1270 ..
1271 }) => {
1272 link_stack.push((range.start, range.end, dest_url, link_type, id));
1274 text_chunks.clear();
1275 }
1276 Event::Text(text) if !link_stack.is_empty() => {
1277 text_chunks.push((text.to_string(), range.start, range.end));
1279 }
1280 Event::Code(code) if !link_stack.is_empty() => {
1281 let code_text = format!("`{code}`");
1283 text_chunks.push((code_text, range.start, range.end));
1284 }
1285 Event::End(TagEnd::Link) => {
1286 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1287 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1289 text_chunks.clear();
1290 continue;
1291 }
1292
1293 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1295
1296 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1298 text_chunks.clear();
1299 continue;
1300 }
1301
1302 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1303
1304 let is_reference = matches!(
1305 link_type,
1306 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1307 );
1308
1309 let link_text = if start_pos < content.len() {
1312 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1313
1314 let mut close_pos = None;
1318 let mut depth = 0;
1319 let mut in_code_span = false;
1320
1321 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1322 let mut backslash_count = 0;
1324 let mut j = i;
1325 while j > 0 && link_bytes[j - 1] == b'\\' {
1326 backslash_count += 1;
1327 j -= 1;
1328 }
1329 let is_escaped = backslash_count % 2 != 0;
1330
1331 if byte == b'`' && !is_escaped {
1333 in_code_span = !in_code_span;
1334 }
1335
1336 if !is_escaped && !in_code_span {
1338 if byte == b'[' {
1339 depth += 1;
1340 } else if byte == b']' {
1341 if depth == 0 {
1342 close_pos = Some(i);
1344 break;
1345 } else {
1346 depth -= 1;
1347 }
1348 }
1349 }
1350 }
1351
1352 if let Some(pos) = close_pos {
1353 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1354 } else {
1355 Cow::Borrowed("")
1356 }
1357 } else {
1358 Cow::Borrowed("")
1359 };
1360
1361 let reference_id = if is_reference && !ref_id.is_empty() {
1363 Some(Cow::Owned(ref_id.to_lowercase()))
1364 } else if is_reference {
1365 Some(Cow::Owned(link_text.to_lowercase()))
1367 } else {
1368 None
1369 };
1370
1371 found_positions.insert(start_pos);
1373
1374 links.push(ParsedLink {
1375 line: line_num,
1376 start_col: col_start,
1377 end_col: col_end,
1378 byte_offset: start_pos,
1379 byte_end: range.end,
1380 text: link_text,
1381 url: Cow::Owned(url.to_string()),
1382 is_reference,
1383 reference_id,
1384 link_type,
1385 });
1386
1387 text_chunks.clear();
1388 }
1389 }
1390 Event::FootnoteReference(footnote_id) => {
1391 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1394 continue;
1395 }
1396
1397 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1398 footnote_refs.push(FootnoteRef {
1399 id: footnote_id.to_string(),
1400 line: line_num,
1401 byte_offset: range.start,
1402 byte_end: range.end,
1403 });
1404 }
1405 _ => {}
1406 }
1407 }
1408
1409 for cap in LINK_PATTERN.captures_iter(content) {
1413 let full_match = cap.get(0).unwrap();
1414 let match_start = full_match.start();
1415 let match_end = full_match.end();
1416
1417 if found_positions.contains(&match_start) {
1419 continue;
1420 }
1421
1422 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1424 continue;
1425 }
1426
1427 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1429 continue;
1430 }
1431
1432 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1434 continue;
1435 }
1436
1437 if Self::is_offset_in_code_span(code_spans, match_start) {
1439 continue;
1440 }
1441
1442 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1444 continue;
1445 }
1446
1447 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1449
1450 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1452 continue;
1453 }
1454
1455 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1456
1457 let text = cap.get(1).map_or("", |m| m.as_str());
1458
1459 if let Some(ref_id) = cap.get(6) {
1461 let ref_id_str = ref_id.as_str();
1462 let normalized_ref = if ref_id_str.is_empty() {
1463 Cow::Owned(text.to_lowercase()) } else {
1465 Cow::Owned(ref_id_str.to_lowercase())
1466 };
1467
1468 links.push(ParsedLink {
1470 line: line_num,
1471 start_col: col_start,
1472 end_col: col_end,
1473 byte_offset: match_start,
1474 byte_end: match_end,
1475 text: Cow::Borrowed(text),
1476 url: Cow::Borrowed(""), is_reference: true,
1478 reference_id: Some(normalized_ref),
1479 link_type: LinkType::Reference, });
1481 }
1482 }
1483
1484 (links, broken_links, footnote_refs)
1485 }
1486
1487 fn parse_images(
1489 content: &'a str,
1490 lines: &[LineInfo],
1491 code_blocks: &[(usize, usize)],
1492 code_spans: &[CodeSpan],
1493 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1494 ) -> Vec<ParsedImage<'a>> {
1495 use crate::utils::skip_context::is_in_html_comment_ranges;
1496 use std::collections::HashSet;
1497
1498 let mut images = Vec::with_capacity(content.len() / 1000);
1500 let mut found_positions = HashSet::new();
1501
1502 let parser = Parser::new(content).into_offset_iter();
1504 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1505 Vec::new();
1506 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1509 match event {
1510 Event::Start(Tag::Image {
1511 link_type,
1512 dest_url,
1513 id,
1514 ..
1515 }) => {
1516 image_stack.push((range.start, dest_url, link_type, id));
1517 text_chunks.clear();
1518 }
1519 Event::Text(text) if !image_stack.is_empty() => {
1520 text_chunks.push((text.to_string(), range.start, range.end));
1521 }
1522 Event::Code(code) if !image_stack.is_empty() => {
1523 let code_text = format!("`{code}`");
1524 text_chunks.push((code_text, range.start, range.end));
1525 }
1526 Event::End(TagEnd::Image) => {
1527 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1528 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1530 continue;
1531 }
1532
1533 if Self::is_offset_in_code_span(code_spans, start_pos) {
1535 continue;
1536 }
1537
1538 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1540 continue;
1541 }
1542
1543 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1545 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1546
1547 let is_reference = matches!(
1548 link_type,
1549 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1550 );
1551
1552 let alt_text = if start_pos < content.len() {
1555 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1556
1557 let mut close_pos = None;
1560 let mut depth = 0;
1561
1562 if image_bytes.len() > 2 {
1563 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1564 let mut backslash_count = 0;
1566 let mut j = i;
1567 while j > 0 && image_bytes[j - 1] == b'\\' {
1568 backslash_count += 1;
1569 j -= 1;
1570 }
1571 let is_escaped = backslash_count % 2 != 0;
1572
1573 if !is_escaped {
1574 if byte == b'[' {
1575 depth += 1;
1576 } else if byte == b']' {
1577 if depth == 0 {
1578 close_pos = Some(i);
1580 break;
1581 } else {
1582 depth -= 1;
1583 }
1584 }
1585 }
1586 }
1587 }
1588
1589 if let Some(pos) = close_pos {
1590 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1591 } else {
1592 Cow::Borrowed("")
1593 }
1594 } else {
1595 Cow::Borrowed("")
1596 };
1597
1598 let reference_id = if is_reference && !ref_id.is_empty() {
1599 Some(Cow::Owned(ref_id.to_lowercase()))
1600 } else if is_reference {
1601 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1603 None
1604 };
1605
1606 found_positions.insert(start_pos);
1607 images.push(ParsedImage {
1608 line: line_num,
1609 start_col: col_start,
1610 end_col: col_end,
1611 byte_offset: start_pos,
1612 byte_end: range.end,
1613 alt_text,
1614 url: Cow::Owned(url.to_string()),
1615 is_reference,
1616 reference_id,
1617 link_type,
1618 });
1619 }
1620 }
1621 _ => {}
1622 }
1623 }
1624
1625 for cap in IMAGE_PATTERN.captures_iter(content) {
1627 let full_match = cap.get(0).unwrap();
1628 let match_start = full_match.start();
1629 let match_end = full_match.end();
1630
1631 if found_positions.contains(&match_start) {
1633 continue;
1634 }
1635
1636 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1638 continue;
1639 }
1640
1641 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1643 || Self::is_offset_in_code_span(code_spans, match_start)
1644 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1645 {
1646 continue;
1647 }
1648
1649 if let Some(ref_id) = cap.get(6) {
1651 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1652 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1653 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1654 let ref_id_str = ref_id.as_str();
1655 let normalized_ref = if ref_id_str.is_empty() {
1656 Cow::Owned(alt_text.to_lowercase())
1657 } else {
1658 Cow::Owned(ref_id_str.to_lowercase())
1659 };
1660
1661 images.push(ParsedImage {
1662 line: line_num,
1663 start_col: col_start,
1664 end_col: col_end,
1665 byte_offset: match_start,
1666 byte_end: match_end,
1667 alt_text: Cow::Borrowed(alt_text),
1668 url: Cow::Borrowed(""),
1669 is_reference: true,
1670 reference_id: Some(normalized_ref),
1671 link_type: LinkType::Reference, });
1673 }
1674 }
1675
1676 images
1677 }
1678
1679 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1681 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1685 if line_info.in_code_block {
1687 continue;
1688 }
1689
1690 let line = line_info.content(content);
1691 let line_num = line_idx + 1;
1692
1693 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1694 let id = cap.get(1).unwrap().as_str().to_lowercase();
1695 let url = cap.get(2).unwrap().as_str().to_string();
1696 let title_match = cap.get(3).or_else(|| cap.get(4));
1697 let title = title_match.map(|m| m.as_str().to_string());
1698
1699 let match_obj = cap.get(0).unwrap();
1702 let byte_offset = line_info.byte_offset + match_obj.start();
1703 let byte_end = line_info.byte_offset + match_obj.end();
1704
1705 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1707 let start = line_info.byte_offset + m.start().saturating_sub(1);
1709 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1711 } else {
1712 (None, None)
1713 };
1714
1715 refs.push(ReferenceDef {
1716 line: line_num,
1717 id,
1718 url,
1719 title,
1720 byte_offset,
1721 byte_end,
1722 title_byte_start,
1723 title_byte_end,
1724 });
1725 }
1726 }
1727
1728 refs
1729 }
1730
1731 #[inline]
1735 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1736 let trimmed_start = line.trim_start();
1737 if !trimmed_start.starts_with('>') {
1738 return None;
1739 }
1740
1741 let mut remaining = line;
1743 let mut total_prefix_len = 0;
1744
1745 loop {
1746 let trimmed = remaining.trim_start();
1747 if !trimmed.starts_with('>') {
1748 break;
1749 }
1750
1751 let leading_ws_len = remaining.len() - trimmed.len();
1753 total_prefix_len += leading_ws_len + 1;
1754
1755 let after_gt = &trimmed[1..];
1756
1757 if let Some(stripped) = after_gt.strip_prefix(' ') {
1759 total_prefix_len += 1;
1760 remaining = stripped;
1761 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1762 total_prefix_len += 1;
1763 remaining = stripped;
1764 } else {
1765 remaining = after_gt;
1766 }
1767 }
1768
1769 Some((&line[..total_prefix_len], remaining))
1770 }
1771
1772 #[inline]
1776 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1777 let bytes = line.as_bytes();
1778 let mut i = 0;
1779
1780 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1782 i += 1;
1783 }
1784
1785 if i >= bytes.len() {
1787 return None;
1788 }
1789 let marker = bytes[i] as char;
1790 if marker != '-' && marker != '*' && marker != '+' {
1791 return None;
1792 }
1793 let marker_pos = i;
1794 i += 1;
1795
1796 let spacing_start = i;
1798 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1799 i += 1;
1800 }
1801
1802 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1803 }
1804
1805 #[inline]
1809 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1810 let bytes = line.as_bytes();
1811 let mut i = 0;
1812
1813 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1815 i += 1;
1816 }
1817
1818 let number_start = i;
1820 while i < bytes.len() && bytes[i].is_ascii_digit() {
1821 i += 1;
1822 }
1823 if i == number_start {
1824 return None; }
1826
1827 if i >= bytes.len() {
1829 return None;
1830 }
1831 let delimiter = bytes[i] as char;
1832 if delimiter != '.' && delimiter != ')' {
1833 return None;
1834 }
1835 let delimiter_pos = i;
1836 i += 1;
1837
1838 let spacing_start = i;
1840 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1841 i += 1;
1842 }
1843
1844 Some((
1845 &line[..number_start],
1846 &line[number_start..delimiter_pos],
1847 delimiter,
1848 &line[spacing_start..i],
1849 &line[i..],
1850 ))
1851 }
1852
1853 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1856 let num_lines = line_offsets.len();
1857 let mut in_code_block = vec![false; num_lines];
1858
1859 for &(start, end) in code_blocks {
1861 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1863 let mut boundary = start;
1864 while boundary > 0 && !content.is_char_boundary(boundary) {
1865 boundary -= 1;
1866 }
1867 boundary
1868 } else {
1869 start
1870 };
1871
1872 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1873 let mut boundary = end;
1874 while boundary < content.len() && !content.is_char_boundary(boundary) {
1875 boundary += 1;
1876 }
1877 boundary
1878 } else {
1879 end.min(content.len())
1880 };
1881
1882 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1901 let first_line = first_line_after.saturating_sub(1);
1902 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1903
1904 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1906 *flag = true;
1907 }
1908 }
1909
1910 in_code_block
1911 }
1912
1913 fn compute_basic_line_info(
1915 content: &str,
1916 line_offsets: &[usize],
1917 code_blocks: &[(usize, usize)],
1918 flavor: MarkdownFlavor,
1919 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1920 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1921 ) -> Vec<LineInfo> {
1922 let content_lines: Vec<&str> = content.lines().collect();
1923 let mut lines = Vec::with_capacity(content_lines.len());
1924
1925 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1927
1928 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1931
1932 for (i, line) in content_lines.iter().enumerate() {
1933 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1934 let indent = line.len() - line.trim_start().len();
1935
1936 let blockquote_parse = Self::parse_blockquote_prefix(line);
1938
1939 let is_blank = if let Some((_, content)) = blockquote_parse {
1941 content.trim().is_empty()
1943 } else {
1944 line.trim().is_empty()
1945 };
1946
1947 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1949
1950 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1952 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1953 let line_end_offset = byte_offset + line.len();
1956 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1957 html_comment_ranges,
1958 byte_offset,
1959 line_end_offset,
1960 );
1961 let list_item = if !(in_code_block
1962 || is_blank
1963 || in_mkdocstrings
1964 || in_html_comment
1965 || (front_matter_end > 0 && i < front_matter_end))
1966 {
1967 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1969 (content, prefix.len())
1970 } else {
1971 (&**line, 0)
1972 };
1973
1974 if let Some((leading_spaces, marker, spacing, _content)) =
1975 Self::parse_unordered_list(line_for_list_check)
1976 {
1977 let marker_column = blockquote_prefix_len + leading_spaces.len();
1978 let content_column = marker_column + 1 + spacing.len();
1979
1980 if spacing.is_empty() {
1987 None
1988 } else {
1989 Some(ListItemInfo {
1990 marker: marker.to_string(),
1991 is_ordered: false,
1992 number: None,
1993 marker_column,
1994 content_column,
1995 })
1996 }
1997 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1998 Self::parse_ordered_list(line_for_list_check)
1999 {
2000 let marker = format!("{number_str}{delimiter}");
2001 let marker_column = blockquote_prefix_len + leading_spaces.len();
2002 let content_column = marker_column + marker.len() + spacing.len();
2003
2004 if spacing.is_empty() {
2007 None
2008 } else {
2009 Some(ListItemInfo {
2010 marker,
2011 is_ordered: true,
2012 number: number_str.parse().ok(),
2013 marker_column,
2014 content_column,
2015 })
2016 }
2017 } else {
2018 None
2019 }
2020 } else {
2021 None
2022 };
2023
2024 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2027 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2028
2029 lines.push(LineInfo {
2030 byte_offset,
2031 byte_len: line.len(),
2032 indent,
2033 is_blank,
2034 in_code_block,
2035 in_front_matter,
2036 in_html_block: false, in_html_comment,
2038 list_item,
2039 heading: None, blockquote: None, in_mkdocstrings,
2042 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2045 });
2046 }
2047
2048 lines
2049 }
2050
2051 fn detect_headings_and_blockquotes(
2053 content: &str,
2054 lines: &mut [LineInfo],
2055 flavor: MarkdownFlavor,
2056 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2057 link_byte_ranges: &[(usize, usize)],
2058 ) {
2059 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2061 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2062 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2063 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2064
2065 let content_lines: Vec<&str> = content.lines().collect();
2066
2067 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2069
2070 for i in 0..lines.len() {
2072 if lines[i].in_code_block {
2073 continue;
2074 }
2075
2076 if front_matter_end > 0 && i < front_matter_end {
2078 continue;
2079 }
2080
2081 if lines[i].in_html_block {
2083 continue;
2084 }
2085
2086 let line = content_lines[i];
2087
2088 if let Some(bq) = parse_blockquote_detailed(line) {
2090 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2092
2093 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2095
2096 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2098 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2101
2102 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2106
2107 lines[i].blockquote = Some(BlockquoteInfo {
2108 nesting_level,
2109 indent: bq.indent.to_string(),
2110 marker_column,
2111 prefix,
2112 content: bq.content.to_string(),
2113 has_no_space_after_marker: has_no_space,
2114 has_multiple_spaces_after_marker: has_multiple_spaces,
2115 needs_md028_fix,
2116 });
2117 }
2118
2119 if lines[i].is_blank {
2121 continue;
2122 }
2123
2124 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2127 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2128 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2129 } else {
2130 false
2131 };
2132
2133 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2134 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2136 continue;
2137 }
2138 let line_offset = lines[i].byte_offset;
2141 if link_byte_ranges
2142 .iter()
2143 .any(|&(start, end)| line_offset > start && line_offset < end)
2144 {
2145 continue;
2146 }
2147 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2148 let hashes = caps.get(2).map_or("", |m| m.as_str());
2149 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2150 let rest = caps.get(4).map_or("", |m| m.as_str());
2151
2152 let level = hashes.len() as u8;
2153 let marker_column = leading_spaces.len();
2154
2155 let (text, has_closing, closing_seq) = {
2157 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2159 if rest[id_start..].trim_end().ends_with('}') {
2161 (&rest[..id_start], &rest[id_start..])
2163 } else {
2164 (rest, "")
2165 }
2166 } else {
2167 (rest, "")
2168 };
2169
2170 let trimmed_rest = rest_without_id.trim_end();
2172 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2173 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2176
2177 let last_hash_char_idx = char_positions
2179 .iter()
2180 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2181
2182 if let Some(mut char_idx) = last_hash_char_idx {
2183 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2185 char_idx -= 1;
2186 }
2187
2188 let start_of_hashes = char_positions[char_idx].0;
2190
2191 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2193
2194 let potential_closing = &trimmed_rest[start_of_hashes..];
2196 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2197
2198 if is_all_hashes && has_space_before {
2199 let closing_hashes = potential_closing.to_string();
2201 let text_part = if !custom_id_part.is_empty() {
2204 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2207 } else {
2208 trimmed_rest[..start_of_hashes].trim_end().to_string()
2209 };
2210 (text_part, true, closing_hashes)
2211 } else {
2212 (rest.to_string(), false, String::new())
2214 }
2215 } else {
2216 (rest.to_string(), false, String::new())
2218 }
2219 } else {
2220 (rest.to_string(), false, String::new())
2222 }
2223 };
2224
2225 let content_column = marker_column + hashes.len() + spaces_after.len();
2226
2227 let raw_text = text.trim().to_string();
2229 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2230
2231 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2233 let next_line = content_lines[i + 1];
2234 if !lines[i + 1].in_code_block
2235 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2236 && let Some(next_line_id) =
2237 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2238 {
2239 custom_id = Some(next_line_id);
2240 }
2241 }
2242
2243 let is_valid = !spaces_after.is_empty()
2253 || rest.is_empty()
2254 || level > 1
2255 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2256
2257 lines[i].heading = Some(HeadingInfo {
2258 level,
2259 style: HeadingStyle::ATX,
2260 marker: hashes.to_string(),
2261 marker_column,
2262 content_column,
2263 text: clean_text,
2264 custom_id,
2265 raw_text,
2266 has_closing_sequence: has_closing,
2267 closing_sequence: closing_seq,
2268 is_valid,
2269 });
2270 }
2271 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2273 let next_line = content_lines[i + 1];
2274 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2275 if front_matter_end > 0 && i < front_matter_end {
2277 continue;
2278 }
2279
2280 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2282 {
2283 continue;
2284 }
2285
2286 let underline = next_line.trim();
2287
2288 let level = if underline.starts_with('=') { 1 } else { 2 };
2289 let style = if level == 1 {
2290 HeadingStyle::Setext1
2291 } else {
2292 HeadingStyle::Setext2
2293 };
2294
2295 let raw_text = line.trim().to_string();
2297 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2298
2299 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2301 let attr_line = content_lines[i + 2];
2302 if !lines[i + 2].in_code_block
2303 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2304 && let Some(attr_line_id) =
2305 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2306 {
2307 custom_id = Some(attr_line_id);
2308 }
2309 }
2310
2311 lines[i].heading = Some(HeadingInfo {
2312 level,
2313 style,
2314 marker: underline.to_string(),
2315 marker_column: next_line.len() - next_line.trim_start().len(),
2316 content_column: lines[i].indent,
2317 text: clean_text,
2318 custom_id,
2319 raw_text,
2320 has_closing_sequence: false,
2321 closing_sequence: String::new(),
2322 is_valid: true, });
2324 }
2325 }
2326 }
2327 }
2328
2329 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2331 const BLOCK_ELEMENTS: &[&str] = &[
2334 "address",
2335 "article",
2336 "aside",
2337 "audio",
2338 "blockquote",
2339 "canvas",
2340 "details",
2341 "dialog",
2342 "dd",
2343 "div",
2344 "dl",
2345 "dt",
2346 "embed",
2347 "fieldset",
2348 "figcaption",
2349 "figure",
2350 "footer",
2351 "form",
2352 "h1",
2353 "h2",
2354 "h3",
2355 "h4",
2356 "h5",
2357 "h6",
2358 "header",
2359 "hr",
2360 "iframe",
2361 "li",
2362 "main",
2363 "menu",
2364 "nav",
2365 "noscript",
2366 "object",
2367 "ol",
2368 "p",
2369 "picture",
2370 "pre",
2371 "script",
2372 "search",
2373 "section",
2374 "source",
2375 "style",
2376 "summary",
2377 "svg",
2378 "table",
2379 "tbody",
2380 "td",
2381 "template",
2382 "textarea",
2383 "tfoot",
2384 "th",
2385 "thead",
2386 "tr",
2387 "track",
2388 "ul",
2389 "video",
2390 ];
2391
2392 let mut i = 0;
2393 while i < lines.len() {
2394 if lines[i].in_code_block || lines[i].in_front_matter {
2396 i += 1;
2397 continue;
2398 }
2399
2400 let trimmed = lines[i].content(content).trim_start();
2401
2402 if trimmed.starts_with('<') && trimmed.len() > 1 {
2404 let after_bracket = &trimmed[1..];
2406 let is_closing = after_bracket.starts_with('/');
2407 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2408
2409 let tag_name = tag_start
2411 .chars()
2412 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2413 .collect::<String>()
2414 .to_lowercase();
2415
2416 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2418 lines[i].in_html_block = true;
2420
2421 if !is_closing {
2424 let closing_tag = format!("</{tag_name}>");
2425 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2427 let mut j = i + 1;
2428 while j < lines.len() && j < i + 100 {
2429 if !allow_blank_lines && lines[j].is_blank {
2432 break;
2433 }
2434
2435 lines[j].in_html_block = true;
2436
2437 if lines[j].content(content).contains(&closing_tag) {
2439 break;
2440 }
2441 j += 1;
2442 }
2443 }
2444 }
2445 }
2446
2447 i += 1;
2448 }
2449 }
2450
2451 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2454 if !flavor.supports_esm_blocks() {
2456 return;
2457 }
2458
2459 let mut in_multiline_comment = false;
2460
2461 for line in lines.iter_mut() {
2462 if line.is_blank || line.in_html_comment {
2464 continue;
2465 }
2466
2467 let trimmed = line.content(content).trim_start();
2468
2469 if in_multiline_comment {
2471 if trimmed.contains("*/") {
2472 in_multiline_comment = false;
2473 }
2474 continue;
2475 }
2476
2477 if trimmed.starts_with("//") {
2479 continue;
2480 }
2481
2482 if trimmed.starts_with("/*") {
2484 if !trimmed.contains("*/") {
2485 in_multiline_comment = true;
2486 }
2487 continue;
2488 }
2489
2490 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2492 line.in_esm_block = true;
2493 } else {
2494 break;
2496 }
2497 }
2498 }
2499
2500 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2502 let mut code_spans = Vec::new();
2503
2504 if !content.contains('`') {
2506 return code_spans;
2507 }
2508
2509 let parser = Parser::new(content).into_offset_iter();
2511
2512 for (event, range) in parser {
2513 if let Event::Code(_) = event {
2514 let start_pos = range.start;
2515 let end_pos = range.end;
2516
2517 let full_span = &content[start_pos..end_pos];
2519 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2520
2521 let content_start = start_pos + backtick_count;
2523 let content_end = end_pos - backtick_count;
2524 let span_content = if content_start < content_end {
2525 content[content_start..content_end].to_string()
2526 } else {
2527 String::new()
2528 };
2529
2530 let line_idx = lines
2533 .partition_point(|line| line.byte_offset <= start_pos)
2534 .saturating_sub(1);
2535 let line_num = line_idx + 1;
2536 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2537
2538 let end_line_idx = lines
2540 .partition_point(|line| line.byte_offset <= end_pos)
2541 .saturating_sub(1);
2542 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2543
2544 let line_content = lines[line_idx].content(content);
2547 let col_start = if byte_col_start <= line_content.len() {
2548 line_content[..byte_col_start].chars().count()
2549 } else {
2550 line_content.chars().count()
2551 };
2552
2553 let end_line_content = lines[end_line_idx].content(content);
2554 let col_end = if byte_col_end <= end_line_content.len() {
2555 end_line_content[..byte_col_end].chars().count()
2556 } else {
2557 end_line_content.chars().count()
2558 };
2559
2560 code_spans.push(CodeSpan {
2561 line: line_num,
2562 end_line: end_line_idx + 1,
2563 start_col: col_start,
2564 end_col: col_end,
2565 byte_offset: start_pos,
2566 byte_end: end_pos,
2567 backtick_count,
2568 content: span_content,
2569 });
2570 }
2571 }
2572
2573 code_spans.sort_by_key(|span| span.byte_offset);
2575
2576 code_spans
2577 }
2578
2579 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2590 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2592
2593 #[inline]
2596 fn reset_tracking_state(
2597 list_item: &ListItemInfo,
2598 has_list_breaking_content: &mut bool,
2599 min_continuation: &mut usize,
2600 ) {
2601 *has_list_breaking_content = false;
2602 let marker_width = if list_item.is_ordered {
2603 list_item.marker.len() + 1 } else {
2605 list_item.marker.len()
2606 };
2607 *min_continuation = if list_item.is_ordered {
2608 marker_width
2609 } else {
2610 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2611 };
2612 }
2613
2614 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2617 let mut last_list_item_line = 0;
2618 let mut current_indent_level = 0;
2619 let mut last_marker_width = 0;
2620
2621 let mut has_list_breaking_content_since_last_item = false;
2623 let mut min_continuation_for_tracking = 0;
2624
2625 for (line_idx, line_info) in lines.iter().enumerate() {
2626 let line_num = line_idx + 1;
2627
2628 if line_info.in_code_block {
2630 if let Some(ref mut block) = current_block {
2631 let min_continuation_indent =
2633 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2634
2635 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2637
2638 match context {
2639 CodeBlockContext::Indented => {
2640 block.end_line = line_num;
2642 continue;
2643 }
2644 CodeBlockContext::Standalone => {
2645 let completed_block = current_block.take().unwrap();
2647 list_blocks.push(completed_block);
2648 continue;
2649 }
2650 CodeBlockContext::Adjacent => {
2651 block.end_line = line_num;
2653 continue;
2654 }
2655 }
2656 } else {
2657 continue;
2659 }
2660 }
2661
2662 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2664 caps.get(0).unwrap().as_str().to_string()
2665 } else {
2666 String::new()
2667 };
2668
2669 if current_block.is_some()
2672 && line_info.list_item.is_none()
2673 && !line_info.is_blank
2674 && !line_info.in_code_span_continuation
2675 {
2676 let line_content = line_info.content(content).trim();
2677
2678 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2683 let breaks_list = line_info.heading.is_some()
2684 || line_content.starts_with("---")
2685 || line_content.starts_with("***")
2686 || line_content.starts_with("___")
2687 || crate::utils::skip_context::is_table_line(line_content)
2688 || line_content.starts_with(">")
2689 || (line_info.indent > 0
2690 && line_info.indent < min_continuation_for_tracking
2691 && !is_lazy_continuation);
2692
2693 if breaks_list {
2694 has_list_breaking_content_since_last_item = true;
2695 }
2696 }
2697
2698 if line_info.in_code_span_continuation
2701 && line_info.list_item.is_none()
2702 && let Some(ref mut block) = current_block
2703 {
2704 block.end_line = line_num;
2705 }
2706
2707 let is_valid_continuation =
2712 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2714 && line_info.list_item.is_none()
2715 && !line_info.is_blank
2716 && !line_info.in_code_block
2717 && is_valid_continuation
2718 && let Some(ref mut block) = current_block
2719 {
2720 block.end_line = line_num;
2721 }
2722
2723 if let Some(list_item) = &line_info.list_item {
2725 let item_indent = list_item.marker_column;
2727 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2730 let is_nested = nesting > block.nesting_level;
2734 let same_type =
2735 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2736 let same_context = block.blockquote_prefix == blockquote_prefix;
2737 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2739
2740 let marker_compatible =
2742 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2743
2744 let has_non_list_content = has_list_breaking_content_since_last_item;
2747
2748 let mut continues_list = if is_nested {
2752 same_context && reasonable_distance && !has_non_list_content
2754 } else {
2755 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2757 };
2758
2759 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2762 if block.item_lines.contains(&(line_num - 1)) {
2765 continues_list = true;
2767 } else {
2768 continues_list = true;
2772 }
2773 }
2774
2775 if continues_list {
2776 block.end_line = line_num;
2778 block.item_lines.push(line_num);
2779
2780 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2782 list_item.marker.len() + 1
2783 } else {
2784 list_item.marker.len()
2785 });
2786
2787 if !block.is_ordered
2789 && block.marker.is_some()
2790 && block.marker.as_ref() != Some(&list_item.marker)
2791 {
2792 block.marker = None;
2794 }
2795
2796 reset_tracking_state(
2798 list_item,
2799 &mut has_list_breaking_content_since_last_item,
2800 &mut min_continuation_for_tracking,
2801 );
2802 } else {
2803 list_blocks.push(block.clone());
2806
2807 *block = ListBlock {
2808 start_line: line_num,
2809 end_line: line_num,
2810 is_ordered: list_item.is_ordered,
2811 marker: if list_item.is_ordered {
2812 None
2813 } else {
2814 Some(list_item.marker.clone())
2815 },
2816 blockquote_prefix: blockquote_prefix.clone(),
2817 item_lines: vec![line_num],
2818 nesting_level: nesting,
2819 max_marker_width: if list_item.is_ordered {
2820 list_item.marker.len() + 1
2821 } else {
2822 list_item.marker.len()
2823 },
2824 };
2825
2826 reset_tracking_state(
2828 list_item,
2829 &mut has_list_breaking_content_since_last_item,
2830 &mut min_continuation_for_tracking,
2831 );
2832 }
2833 } else {
2834 current_block = Some(ListBlock {
2836 start_line: line_num,
2837 end_line: line_num,
2838 is_ordered: list_item.is_ordered,
2839 marker: if list_item.is_ordered {
2840 None
2841 } else {
2842 Some(list_item.marker.clone())
2843 },
2844 blockquote_prefix,
2845 item_lines: vec![line_num],
2846 nesting_level: nesting,
2847 max_marker_width: list_item.marker.len(),
2848 });
2849
2850 reset_tracking_state(
2852 list_item,
2853 &mut has_list_breaking_content_since_last_item,
2854 &mut min_continuation_for_tracking,
2855 );
2856 }
2857
2858 last_list_item_line = line_num;
2859 current_indent_level = item_indent;
2860 last_marker_width = if list_item.is_ordered {
2861 list_item.marker.len() + 1 } else {
2863 list_item.marker.len()
2864 };
2865 } else if let Some(ref mut block) = current_block {
2866 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2876 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2877 } else {
2878 false
2879 };
2880
2881 let min_continuation_indent = if block.is_ordered {
2885 current_indent_level + last_marker_width
2886 } else {
2887 current_indent_level + 2 };
2889
2890 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2891 block.end_line = line_num;
2893 } else if line_info.is_blank {
2894 let mut check_idx = line_idx + 1;
2897 let mut found_continuation = false;
2898
2899 while check_idx < lines.len() && lines[check_idx].is_blank {
2901 check_idx += 1;
2902 }
2903
2904 if check_idx < lines.len() {
2905 let next_line = &lines[check_idx];
2906 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2908 found_continuation = true;
2909 }
2910 else if !next_line.in_code_block
2912 && next_line.list_item.is_some()
2913 && let Some(item) = &next_line.list_item
2914 {
2915 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2916 .find(next_line.content(content))
2917 .map_or(String::new(), |m| m.as_str().to_string());
2918 if item.marker_column == current_indent_level
2919 && item.is_ordered == block.is_ordered
2920 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2921 {
2922 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2925 if let Some(between_line) = lines.get(idx) {
2926 let between_content = between_line.content(content);
2927 let trimmed = between_content.trim();
2928 if trimmed.is_empty() {
2930 return false;
2931 }
2932 let line_indent = between_content.len() - between_content.trim_start().len();
2934
2935 if trimmed.starts_with("```")
2937 || trimmed.starts_with("~~~")
2938 || trimmed.starts_with("---")
2939 || trimmed.starts_with("***")
2940 || trimmed.starts_with("___")
2941 || trimmed.starts_with(">")
2942 || crate::utils::skip_context::is_table_line(trimmed)
2943 || between_line.heading.is_some()
2944 {
2945 return true; }
2947
2948 line_indent >= min_continuation_indent
2950 } else {
2951 false
2952 }
2953 });
2954
2955 if block.is_ordered {
2956 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2959 if let Some(between_line) = lines.get(idx) {
2960 let trimmed = between_line.content(content).trim();
2961 if trimmed.is_empty() {
2962 return false;
2963 }
2964 trimmed.starts_with("```")
2966 || trimmed.starts_with("~~~")
2967 || trimmed.starts_with("---")
2968 || trimmed.starts_with("***")
2969 || trimmed.starts_with("___")
2970 || trimmed.starts_with(">")
2971 || crate::utils::skip_context::is_table_line(trimmed)
2972 || between_line.heading.is_some()
2973 } else {
2974 false
2975 }
2976 });
2977 found_continuation = !has_structural_separators;
2978 } else {
2979 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2981 if let Some(between_line) = lines.get(idx) {
2982 let trimmed = between_line.content(content).trim();
2983 if trimmed.is_empty() {
2984 return false;
2985 }
2986 trimmed.starts_with("```")
2988 || trimmed.starts_with("~~~")
2989 || trimmed.starts_with("---")
2990 || trimmed.starts_with("***")
2991 || trimmed.starts_with("___")
2992 || trimmed.starts_with(">")
2993 || crate::utils::skip_context::is_table_line(trimmed)
2994 || between_line.heading.is_some()
2995 } else {
2996 false
2997 }
2998 });
2999 found_continuation = !has_structural_separators;
3000 }
3001 }
3002 }
3003 }
3004
3005 if found_continuation {
3006 block.end_line = line_num;
3008 } else {
3009 list_blocks.push(block.clone());
3011 current_block = None;
3012 }
3013 } else {
3014 let min_required_indent = if block.is_ordered {
3017 current_indent_level + last_marker_width
3018 } else {
3019 current_indent_level + 2
3020 };
3021
3022 let line_content = line_info.content(content).trim();
3027
3028 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3030
3031 let is_structural_separator = line_info.heading.is_some()
3032 || line_content.starts_with("```")
3033 || line_content.starts_with("~~~")
3034 || line_content.starts_with("---")
3035 || line_content.starts_with("***")
3036 || line_content.starts_with("___")
3037 || line_content.starts_with(">")
3038 || looks_like_table;
3039
3040 let is_lazy_continuation = !is_structural_separator
3043 && !line_info.is_blank
3044 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3045
3046 if is_lazy_continuation {
3047 let content_to_check = if !blockquote_prefix.is_empty() {
3050 line_info
3052 .content(content)
3053 .strip_prefix(&blockquote_prefix)
3054 .unwrap_or(line_info.content(content))
3055 .trim()
3056 } else {
3057 line_info.content(content).trim()
3058 };
3059
3060 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3061
3062 if starts_with_uppercase && last_list_item_line > 0 {
3065 list_blocks.push(block.clone());
3067 current_block = None;
3068 } else {
3069 block.end_line = line_num;
3071 }
3072 } else {
3073 list_blocks.push(block.clone());
3075 current_block = None;
3076 }
3077 }
3078 }
3079 }
3080
3081 if let Some(block) = current_block {
3083 list_blocks.push(block);
3084 }
3085
3086 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3088
3089 list_blocks
3090 }
3091
3092 fn compute_char_frequency(content: &str) -> CharFrequency {
3094 let mut frequency = CharFrequency::default();
3095
3096 for ch in content.chars() {
3097 match ch {
3098 '#' => frequency.hash_count += 1,
3099 '*' => frequency.asterisk_count += 1,
3100 '_' => frequency.underscore_count += 1,
3101 '-' => frequency.hyphen_count += 1,
3102 '+' => frequency.plus_count += 1,
3103 '>' => frequency.gt_count += 1,
3104 '|' => frequency.pipe_count += 1,
3105 '[' => frequency.bracket_count += 1,
3106 '`' => frequency.backtick_count += 1,
3107 '<' => frequency.lt_count += 1,
3108 '!' => frequency.exclamation_count += 1,
3109 '\n' => frequency.newline_count += 1,
3110 _ => {}
3111 }
3112 }
3113
3114 frequency
3115 }
3116
3117 fn parse_html_tags(
3119 content: &str,
3120 lines: &[LineInfo],
3121 code_blocks: &[(usize, usize)],
3122 flavor: MarkdownFlavor,
3123 ) -> Vec<HtmlTag> {
3124 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3125 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3126
3127 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3128
3129 for cap in HTML_TAG_REGEX.captures_iter(content) {
3130 let full_match = cap.get(0).unwrap();
3131 let match_start = full_match.start();
3132 let match_end = full_match.end();
3133
3134 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3136 continue;
3137 }
3138
3139 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3140 let tag_name_original = cap.get(2).unwrap().as_str();
3141 let tag_name = tag_name_original.to_lowercase();
3142 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3143
3144 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3147 continue;
3148 }
3149
3150 let mut line_num = 1;
3152 let mut col_start = match_start;
3153 let mut col_end = match_end;
3154 for (idx, line_info) in lines.iter().enumerate() {
3155 if match_start >= line_info.byte_offset {
3156 line_num = idx + 1;
3157 col_start = match_start - line_info.byte_offset;
3158 col_end = match_end - line_info.byte_offset;
3159 } else {
3160 break;
3161 }
3162 }
3163
3164 html_tags.push(HtmlTag {
3165 line: line_num,
3166 start_col: col_start,
3167 end_col: col_end,
3168 byte_offset: match_start,
3169 byte_end: match_end,
3170 tag_name,
3171 is_closing,
3172 is_self_closing,
3173 raw_content: full_match.as_str().to_string(),
3174 });
3175 }
3176
3177 html_tags
3178 }
3179
3180 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3182 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3183 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3184
3185 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3186
3187 for cap in EMPHASIS_REGEX.captures_iter(content) {
3188 let full_match = cap.get(0).unwrap();
3189 let match_start = full_match.start();
3190 let match_end = full_match.end();
3191
3192 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3194 continue;
3195 }
3196
3197 let opening_markers = cap.get(1).unwrap().as_str();
3198 let content_part = cap.get(2).unwrap().as_str();
3199 let closing_markers = cap.get(3).unwrap().as_str();
3200
3201 if opening_markers.chars().next() != closing_markers.chars().next()
3203 || opening_markers.len() != closing_markers.len()
3204 {
3205 continue;
3206 }
3207
3208 let marker = opening_markers.chars().next().unwrap();
3209 let marker_count = opening_markers.len();
3210
3211 let mut line_num = 1;
3213 let mut col_start = match_start;
3214 let mut col_end = match_end;
3215 for (idx, line_info) in lines.iter().enumerate() {
3216 if match_start >= line_info.byte_offset {
3217 line_num = idx + 1;
3218 col_start = match_start - line_info.byte_offset;
3219 col_end = match_end - line_info.byte_offset;
3220 } else {
3221 break;
3222 }
3223 }
3224
3225 emphasis_spans.push(EmphasisSpan {
3226 line: line_num,
3227 start_col: col_start,
3228 end_col: col_end,
3229 byte_offset: match_start,
3230 byte_end: match_end,
3231 marker,
3232 marker_count,
3233 content: content_part.to_string(),
3234 });
3235 }
3236
3237 emphasis_spans
3238 }
3239
3240 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3242 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3243
3244 for (line_idx, line_info) in lines.iter().enumerate() {
3245 if line_info.in_code_block || line_info.is_blank {
3247 continue;
3248 }
3249
3250 let line = line_info.content(content);
3251 let line_num = line_idx + 1;
3252
3253 if !line.contains('|') {
3255 continue;
3256 }
3257
3258 let parts: Vec<&str> = line.split('|').collect();
3260 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3261
3262 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3264 let mut column_alignments = Vec::new();
3265
3266 if is_separator {
3267 for part in &parts[1..parts.len() - 1] {
3268 let trimmed = part.trim();
3270 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3271 "center".to_string()
3272 } else if trimmed.ends_with(':') {
3273 "right".to_string()
3274 } else if trimmed.starts_with(':') {
3275 "left".to_string()
3276 } else {
3277 "none".to_string()
3278 };
3279 column_alignments.push(alignment);
3280 }
3281 }
3282
3283 table_rows.push(TableRow {
3284 line: line_num,
3285 is_separator,
3286 column_count,
3287 column_alignments,
3288 });
3289 }
3290
3291 table_rows
3292 }
3293
3294 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3296 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3297
3298 for cap in BARE_URL_PATTERN.captures_iter(content) {
3300 let full_match = cap.get(0).unwrap();
3301 let match_start = full_match.start();
3302 let match_end = full_match.end();
3303
3304 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3306 continue;
3307 }
3308
3309 let preceding_char = if match_start > 0 {
3311 content.chars().nth(match_start - 1)
3312 } else {
3313 None
3314 };
3315 let following_char = content.chars().nth(match_end);
3316
3317 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3318 continue;
3319 }
3320 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3321 continue;
3322 }
3323
3324 let url = full_match.as_str();
3325 let url_type = if url.starts_with("https://") {
3326 "https"
3327 } else if url.starts_with("http://") {
3328 "http"
3329 } else if url.starts_with("ftp://") {
3330 "ftp"
3331 } else {
3332 "other"
3333 };
3334
3335 let mut line_num = 1;
3337 let mut col_start = match_start;
3338 let mut col_end = match_end;
3339 for (idx, line_info) in lines.iter().enumerate() {
3340 if match_start >= line_info.byte_offset {
3341 line_num = idx + 1;
3342 col_start = match_start - line_info.byte_offset;
3343 col_end = match_end - line_info.byte_offset;
3344 } else {
3345 break;
3346 }
3347 }
3348
3349 bare_urls.push(BareUrl {
3350 line: line_num,
3351 start_col: col_start,
3352 end_col: col_end,
3353 byte_offset: match_start,
3354 byte_end: match_end,
3355 url: url.to_string(),
3356 url_type: url_type.to_string(),
3357 });
3358 }
3359
3360 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3362 let full_match = cap.get(0).unwrap();
3363 let match_start = full_match.start();
3364 let match_end = full_match.end();
3365
3366 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3368 continue;
3369 }
3370
3371 let preceding_char = if match_start > 0 {
3373 content.chars().nth(match_start - 1)
3374 } else {
3375 None
3376 };
3377 let following_char = content.chars().nth(match_end);
3378
3379 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3380 continue;
3381 }
3382 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3383 continue;
3384 }
3385
3386 let email = full_match.as_str();
3387
3388 let mut line_num = 1;
3390 let mut col_start = match_start;
3391 let mut col_end = match_end;
3392 for (idx, line_info) in lines.iter().enumerate() {
3393 if match_start >= line_info.byte_offset {
3394 line_num = idx + 1;
3395 col_start = match_start - line_info.byte_offset;
3396 col_end = match_end - line_info.byte_offset;
3397 } else {
3398 break;
3399 }
3400 }
3401
3402 bare_urls.push(BareUrl {
3403 line: line_num,
3404 start_col: col_start,
3405 end_col: col_end,
3406 byte_offset: match_start,
3407 byte_end: match_end,
3408 url: email.to_string(),
3409 url_type: "email".to_string(),
3410 });
3411 }
3412
3413 bare_urls
3414 }
3415
3416 #[must_use]
3436 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3437 ValidHeadingsIter::new(&self.lines)
3438 }
3439
3440 #[must_use]
3444 pub fn has_valid_headings(&self) -> bool {
3445 self.lines
3446 .iter()
3447 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3448 }
3449}
3450
3451fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3453 if list_blocks.len() < 2 {
3454 return;
3455 }
3456
3457 let mut merger = ListBlockMerger::new(content, lines);
3458 *list_blocks = merger.merge(list_blocks);
3459}
3460
3461struct ListBlockMerger<'a> {
3463 content: &'a str,
3464 lines: &'a [LineInfo],
3465}
3466
3467impl<'a> ListBlockMerger<'a> {
3468 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3469 Self { content, lines }
3470 }
3471
3472 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3473 let mut merged = Vec::with_capacity(list_blocks.len());
3474 let mut current = list_blocks[0].clone();
3475
3476 for next in list_blocks.iter().skip(1) {
3477 if self.should_merge_blocks(¤t, next) {
3478 current = self.merge_two_blocks(current, next);
3479 } else {
3480 merged.push(current);
3481 current = next.clone();
3482 }
3483 }
3484
3485 merged.push(current);
3486 merged
3487 }
3488
3489 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3491 if !self.blocks_are_compatible(current, next) {
3493 return false;
3494 }
3495
3496 let spacing = self.analyze_spacing_between(current, next);
3498 match spacing {
3499 BlockSpacing::Consecutive => true,
3500 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3501 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3502 self.can_merge_with_content_between(current, next)
3503 }
3504 }
3505 }
3506
3507 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3509 current.is_ordered == next.is_ordered
3510 && current.blockquote_prefix == next.blockquote_prefix
3511 && current.nesting_level == next.nesting_level
3512 }
3513
3514 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3516 let gap = next.start_line - current.end_line;
3517
3518 match gap {
3519 1 => BlockSpacing::Consecutive,
3520 2 => BlockSpacing::SingleBlank,
3521 _ if gap > 2 => {
3522 if self.has_only_blank_lines_between(current, next) {
3523 BlockSpacing::MultipleBlanks
3524 } else {
3525 BlockSpacing::ContentBetween
3526 }
3527 }
3528 _ => BlockSpacing::Consecutive, }
3530 }
3531
3532 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3534 if has_meaningful_content_between(self.content, current, next, self.lines) {
3537 return false; }
3539
3540 !current.is_ordered && current.marker == next.marker
3542 }
3543
3544 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3546 if has_meaningful_content_between(self.content, current, next, self.lines) {
3548 return false; }
3550
3551 current.is_ordered && next.is_ordered
3553 }
3554
3555 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3557 for line_num in (current.end_line + 1)..next.start_line {
3558 if let Some(line_info) = self.lines.get(line_num - 1)
3559 && !line_info.content(self.content).trim().is_empty()
3560 {
3561 return false;
3562 }
3563 }
3564 true
3565 }
3566
3567 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3569 current.end_line = next.end_line;
3570 current.item_lines.extend_from_slice(&next.item_lines);
3571
3572 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3574
3575 if !current.is_ordered && self.markers_differ(¤t, next) {
3577 current.marker = None; }
3579
3580 current
3581 }
3582
3583 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3585 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3586 }
3587}
3588
3589#[derive(Debug, PartialEq)]
3591enum BlockSpacing {
3592 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3597
3598fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3600 for line_num in (current.end_line + 1)..next.start_line {
3602 if let Some(line_info) = lines.get(line_num - 1) {
3603 let trimmed = line_info.content(content).trim();
3605
3606 if trimmed.is_empty() {
3608 continue;
3609 }
3610
3611 if line_info.heading.is_some() {
3615 return true; }
3617
3618 if is_horizontal_rule(trimmed) {
3620 return true; }
3622
3623 if crate::utils::skip_context::is_table_line(trimmed) {
3625 return true; }
3627
3628 if trimmed.starts_with('>') {
3630 return true; }
3632
3633 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3635 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3636
3637 let min_continuation_indent = if current.is_ordered {
3639 current.nesting_level + current.max_marker_width + 1 } else {
3641 current.nesting_level + 2
3642 };
3643
3644 if line_indent < min_continuation_indent {
3645 return true; }
3648 }
3649
3650 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3652
3653 let min_indent = if current.is_ordered {
3655 current.nesting_level + current.max_marker_width
3656 } else {
3657 current.nesting_level + 2
3658 };
3659
3660 if line_indent < min_indent {
3662 return true; }
3664
3665 }
3668 }
3669
3670 false
3672}
3673
3674pub fn is_horizontal_rule_line(line: &str) -> bool {
3681 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3683 if leading_spaces > 3 || line.starts_with('\t') {
3684 return false;
3685 }
3686
3687 is_horizontal_rule_content(line.trim())
3688}
3689
3690pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3693 if trimmed.len() < 3 {
3694 return false;
3695 }
3696
3697 let chars: Vec<char> = trimmed.chars().collect();
3699 if let Some(&first_char) = chars.first()
3700 && (first_char == '-' || first_char == '*' || first_char == '_')
3701 {
3702 let mut count = 0;
3703 for &ch in &chars {
3704 if ch == first_char {
3705 count += 1;
3706 } else if ch != ' ' && ch != '\t' {
3707 return false; }
3709 }
3710 return count >= 3;
3711 }
3712 false
3713}
3714
3715pub fn is_horizontal_rule(trimmed: &str) -> bool {
3717 is_horizontal_rule_content(trimmed)
3718}
3719
3720#[cfg(test)]
3722mod tests {
3723 use super::*;
3724
3725 #[test]
3726 fn test_empty_content() {
3727 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3728 assert_eq!(ctx.content, "");
3729 assert_eq!(ctx.line_offsets, vec![0]);
3730 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3731 assert_eq!(ctx.lines.len(), 0);
3732 }
3733
3734 #[test]
3735 fn test_single_line() {
3736 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3737 assert_eq!(ctx.content, "# Hello");
3738 assert_eq!(ctx.line_offsets, vec![0]);
3739 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3740 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3741 }
3742
3743 #[test]
3744 fn test_multi_line() {
3745 let content = "# Title\n\nSecond line\nThird line";
3746 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3747 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3748 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3755
3756 #[test]
3757 fn test_line_info() {
3758 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3759 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3760
3761 assert_eq!(ctx.lines.len(), 7);
3763
3764 let line1 = &ctx.lines[0];
3766 assert_eq!(line1.content(ctx.content), "# Title");
3767 assert_eq!(line1.byte_offset, 0);
3768 assert_eq!(line1.indent, 0);
3769 assert!(!line1.is_blank);
3770 assert!(!line1.in_code_block);
3771 assert!(line1.list_item.is_none());
3772
3773 let line2 = &ctx.lines[1];
3775 assert_eq!(line2.content(ctx.content), " indented");
3776 assert_eq!(line2.byte_offset, 8);
3777 assert_eq!(line2.indent, 4);
3778 assert!(!line2.is_blank);
3779
3780 let line3 = &ctx.lines[2];
3782 assert_eq!(line3.content(ctx.content), "");
3783 assert!(line3.is_blank);
3784
3785 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3787 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3788 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3789 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3790 }
3791
3792 #[test]
3793 fn test_list_item_detection() {
3794 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3795 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3796
3797 let line1 = &ctx.lines[0];
3799 assert!(line1.list_item.is_some());
3800 let list1 = line1.list_item.as_ref().unwrap();
3801 assert_eq!(list1.marker, "-");
3802 assert!(!list1.is_ordered);
3803 assert_eq!(list1.marker_column, 0);
3804 assert_eq!(list1.content_column, 2);
3805
3806 let line2 = &ctx.lines[1];
3808 assert!(line2.list_item.is_some());
3809 let list2 = line2.list_item.as_ref().unwrap();
3810 assert_eq!(list2.marker, "*");
3811 assert_eq!(list2.marker_column, 2);
3812
3813 let line3 = &ctx.lines[2];
3815 assert!(line3.list_item.is_some());
3816 let list3 = line3.list_item.as_ref().unwrap();
3817 assert_eq!(list3.marker, "1.");
3818 assert!(list3.is_ordered);
3819 assert_eq!(list3.number, Some(1));
3820
3821 let line6 = &ctx.lines[5];
3823 assert!(line6.list_item.is_none());
3824 }
3825
3826 #[test]
3827 fn test_offset_to_line_col_edge_cases() {
3828 let content = "a\nb\nc";
3829 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3830 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3838
3839 #[test]
3840 fn test_mdx_esm_blocks() {
3841 let content = r##"import {Chart} from './snowfall.js'
3842export const year = 2023
3843
3844# Last year's snowfall
3845
3846In {year}, the snowfall was above average.
3847It was followed by a warm spring which caused
3848flood conditions in many of the nearby rivers.
3849
3850<Chart color="#fcb32c" year={year} />
3851"##;
3852
3853 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3854
3855 assert_eq!(ctx.lines.len(), 10);
3857 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3858 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3859 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3860 assert!(
3861 !ctx.lines[3].in_esm_block,
3862 "Line 4 (heading) should NOT be in_esm_block"
3863 );
3864 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3865 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3866 }
3867
3868 #[test]
3869 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3870 let content = r#"import {Chart} from './snowfall.js'
3871export const year = 2023
3872
3873# Last year's snowfall
3874"#;
3875
3876 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3877
3878 assert!(
3880 !ctx.lines[0].in_esm_block,
3881 "Line 1 should NOT be in_esm_block in Standard flavor"
3882 );
3883 assert!(
3884 !ctx.lines[1].in_esm_block,
3885 "Line 2 should NOT be in_esm_block in Standard flavor"
3886 );
3887 }
3888}