1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15 ($name:expr, $profile:expr, $code:expr) => {{
16 let start = std::time::Instant::now();
17 let result = $code;
18 if $profile {
19 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20 }
21 result
22 }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(
34 r#"(?sx)
35 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36 (?:
37 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
38 |
39 \[([^\]]*)\] # Reference ID in group 6
40 )"#
41 ).unwrap()
42});
43
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?sx)
49 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50 (?:
51 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
52 |
53 \[([^\]]*)\] # Reference ID in group 6
54 )"#
55 ).unwrap()
56});
57
58static REF_DEF_PATTERN: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71#[derive(Debug, Clone)]
73pub struct LineInfo {
74 pub byte_offset: usize,
76 pub byte_len: usize,
78 pub indent: usize,
80 pub visual_indent: usize,
84 pub is_blank: bool,
86 pub in_code_block: bool,
88 pub in_front_matter: bool,
90 pub in_html_block: bool,
92 pub in_html_comment: bool,
94 pub list_item: Option<ListItemInfo>,
96 pub heading: Option<HeadingInfo>,
98 pub blockquote: Option<BlockquoteInfo>,
100 pub in_mkdocstrings: bool,
102 pub in_esm_block: bool,
104 pub in_code_span_continuation: bool,
106 pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112 pub fn content<'a>(&self, source: &'a str) -> &'a str {
114 &source[self.byte_offset..self.byte_offset + self.byte_len]
115 }
116}
117
118#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121 pub marker: String,
123 pub is_ordered: bool,
125 pub number: Option<usize>,
127 pub marker_column: usize,
129 pub content_column: usize,
131}
132
133#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136 ATX,
138 Setext1,
140 Setext2,
142}
143
144#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147 pub line: usize,
149 pub start_col: usize,
151 pub end_col: usize,
153 pub byte_offset: usize,
155 pub byte_end: usize,
157 pub text: Cow<'a, str>,
159 pub url: Cow<'a, str>,
161 pub is_reference: bool,
163 pub reference_id: Option<Cow<'a, str>>,
165 pub link_type: LinkType,
167}
168
169#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172 pub reference: String,
174 pub span: std::ops::Range<usize>,
176}
177
178#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181 pub id: String,
183 pub line: usize,
185 pub byte_offset: usize,
187 pub byte_end: usize,
189}
190
191#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194 pub line: usize,
196 pub start_col: usize,
198 pub end_col: usize,
200 pub byte_offset: usize,
202 pub byte_end: usize,
204 pub alt_text: Cow<'a, str>,
206 pub url: Cow<'a, str>,
208 pub is_reference: bool,
210 pub reference_id: Option<Cow<'a, str>>,
212 pub link_type: LinkType,
214}
215
216#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219 pub line: usize,
221 pub id: String,
223 pub url: String,
225 pub title: Option<String>,
227 pub byte_offset: usize,
229 pub byte_end: usize,
231 pub title_byte_start: Option<usize>,
233 pub title_byte_end: Option<usize>,
235}
236
237#[derive(Debug, Clone)]
239pub struct CodeSpan {
240 pub line: usize,
242 pub end_line: usize,
244 pub start_col: usize,
246 pub end_col: usize,
248 pub byte_offset: usize,
250 pub byte_end: usize,
252 pub backtick_count: usize,
254 pub content: String,
256}
257
258#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261 pub level: u8,
263 pub style: HeadingStyle,
265 pub marker: String,
267 pub marker_column: usize,
269 pub content_column: usize,
271 pub text: String,
273 pub custom_id: Option<String>,
275 pub raw_text: String,
277 pub has_closing_sequence: bool,
279 pub closing_sequence: String,
281 pub is_valid: bool,
284}
285
286#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292 pub line_num: usize,
294 pub heading: &'a HeadingInfo,
296 pub line_info: &'a LineInfo,
298}
299
300pub struct ValidHeadingsIter<'a> {
305 lines: &'a [LineInfo],
306 current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310 fn new(lines: &'a [LineInfo]) -> Self {
311 Self {
312 lines,
313 current_index: 0,
314 }
315 }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319 type Item = ValidHeading<'a>;
320
321 fn next(&mut self) -> Option<Self::Item> {
322 while self.current_index < self.lines.len() {
323 let idx = self.current_index;
324 self.current_index += 1;
325
326 let line_info = &self.lines[idx];
327 if let Some(heading) = &line_info.heading
328 && heading.is_valid
329 {
330 return Some(ValidHeading {
331 line_num: idx + 1, heading,
333 line_info,
334 });
335 }
336 }
337 None
338 }
339}
340
341#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344 pub nesting_level: usize,
346 pub indent: String,
348 pub marker_column: usize,
350 pub prefix: String,
352 pub content: String,
354 pub has_no_space_after_marker: bool,
356 pub has_multiple_spaces_after_marker: bool,
358 pub needs_md028_fix: bool,
360}
361
362#[derive(Debug, Clone)]
364pub struct ListBlock {
365 pub start_line: usize,
367 pub end_line: usize,
369 pub is_ordered: bool,
371 pub marker: Option<String>,
373 pub blockquote_prefix: String,
375 pub item_lines: Vec<usize>,
377 pub nesting_level: usize,
379 pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388 pub hash_count: usize,
390 pub asterisk_count: usize,
392 pub underscore_count: usize,
394 pub hyphen_count: usize,
396 pub plus_count: usize,
398 pub gt_count: usize,
400 pub pipe_count: usize,
402 pub bracket_count: usize,
404 pub backtick_count: usize,
406 pub lt_count: usize,
408 pub exclamation_count: usize,
410 pub newline_count: usize,
412}
413
414#[derive(Debug, Clone)]
416pub struct HtmlTag {
417 pub line: usize,
419 pub start_col: usize,
421 pub end_col: usize,
423 pub byte_offset: usize,
425 pub byte_end: usize,
427 pub tag_name: String,
429 pub is_closing: bool,
431 pub is_self_closing: bool,
433 pub raw_content: String,
435}
436
437#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440 pub line: usize,
442 pub start_col: usize,
444 pub end_col: usize,
446 pub byte_offset: usize,
448 pub byte_end: usize,
450 pub marker: char,
452 pub marker_count: usize,
454 pub content: String,
456}
457
458#[derive(Debug, Clone)]
460pub struct TableRow {
461 pub line: usize,
463 pub is_separator: bool,
465 pub column_count: usize,
467 pub column_alignments: Vec<String>, }
470
471#[derive(Debug, Clone)]
473pub struct BareUrl {
474 pub line: usize,
476 pub start_col: usize,
478 pub end_col: usize,
480 pub byte_offset: usize,
482 pub byte_end: usize,
484 pub url: String,
486 pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491 pub content: &'a str,
492 pub line_offsets: Vec<usize>,
493 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
515
516struct BlockquoteComponents<'a> {
518 indent: &'a str,
519 markers: &'a str,
520 spaces_after: &'a str,
521 content: &'a str,
522}
523
524#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527 let bytes = line.as_bytes();
528 let mut pos = 0;
529
530 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532 pos += 1;
533 }
534 let indent_end = pos;
535
536 if pos >= bytes.len() || bytes[pos] != b'>' {
538 return None;
539 }
540
541 while pos < bytes.len() && bytes[pos] == b'>' {
543 pos += 1;
544 }
545 let markers_end = pos;
546
547 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549 pos += 1;
550 }
551 let spaces_end = pos;
552
553 Some(BlockquoteComponents {
554 indent: &line[0..indent_end],
555 markers: &line[indent_end..markers_end],
556 spaces_after: &line[markers_end..spaces_end],
557 content: &line[spaces_end..],
558 })
559}
560
561impl<'a> LintContext<'a> {
562 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563 #[cfg(not(target_arch = "wasm32"))]
564 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565 #[cfg(target_arch = "wasm32")]
566 let profile = false;
567
568 let line_offsets = profile_section!("Line offsets", profile, {
569 let mut offsets = vec![0];
570 for (i, c) in content.char_indices() {
571 if c == '\n' {
572 offsets.push(i + 1);
573 }
574 }
575 offsets
576 });
577
578 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581 let html_comment_ranges = profile_section!(
583 "HTML comment ranges",
584 profile,
585 crate::utils::skip_context::compute_html_comment_ranges(content)
586 );
587
588 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590 if flavor == MarkdownFlavor::MkDocs {
591 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592 } else {
593 Vec::new()
594 }
595 });
596
597 let mut lines = profile_section!(
599 "Basic line info",
600 profile,
601 Self::compute_basic_line_info(
602 content,
603 &line_offsets,
604 &code_blocks,
605 flavor,
606 &html_comment_ranges,
607 &autodoc_ranges,
608 )
609 );
610
611 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614 profile_section!(
616 "ESM blocks",
617 profile,
618 Self::detect_esm_blocks(content, &mut lines, flavor)
619 );
620
621 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624 profile_section!(
626 "Headings & blockquotes",
627 profile,
628 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629 );
630
631 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634 for span in &code_spans {
637 if span.end_line > span.line {
638 for line_num in (span.line + 1)..=span.end_line {
640 if let Some(line_info) = lines.get_mut(line_num - 1) {
641 line_info.in_code_span_continuation = true;
642 }
643 }
644 }
645 }
646
647 let (links, broken_links, footnote_refs) = profile_section!(
649 "Links",
650 profile,
651 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652 );
653
654 let images = profile_section!(
655 "Images",
656 profile,
657 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658 );
659
660 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667 let table_blocks = profile_section!(
669 "Table blocks",
670 profile,
671 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672 content,
673 &code_blocks,
674 &code_spans,
675 &html_comment_ranges,
676 )
677 );
678
679 let line_index = profile_section!(
681 "Line index",
682 profile,
683 crate::utils::range_utils::LineIndex::new(content)
684 );
685
686 let jinja_ranges = profile_section!(
688 "Jinja ranges",
689 profile,
690 crate::utils::jinja_utils::find_jinja_ranges(content)
691 );
692
693 Self {
694 content,
695 line_offsets,
696 code_blocks,
697 lines,
698 links,
699 images,
700 broken_links,
701 footnote_refs,
702 reference_defs,
703 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704 list_blocks,
705 char_frequency,
706 html_tags_cache: OnceLock::new(),
707 emphasis_spans_cache: OnceLock::new(),
708 table_rows_cache: OnceLock::new(),
709 bare_urls_cache: OnceLock::new(),
710 has_mixed_list_nesting_cache: OnceLock::new(),
711 html_comment_ranges,
712 table_blocks,
713 line_index,
714 jinja_ranges,
715 flavor,
716 source_file,
717 }
718 }
719
720 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722 Arc::clone(
723 self.code_spans_cache
724 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725 )
726 }
727
728 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730 &self.html_comment_ranges
731 }
732
733 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735 Arc::clone(self.html_tags_cache.get_or_init(|| {
736 Arc::new(Self::parse_html_tags(
737 self.content,
738 &self.lines,
739 &self.code_blocks,
740 self.flavor,
741 ))
742 }))
743 }
744
745 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747 Arc::clone(
748 self.emphasis_spans_cache
749 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750 )
751 }
752
753 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755 Arc::clone(
756 self.table_rows_cache
757 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758 )
759 }
760
761 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763 Arc::clone(
764 self.bare_urls_cache
765 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766 )
767 }
768
769 pub fn has_mixed_list_nesting(&self) -> bool {
773 *self
774 .has_mixed_list_nesting_cache
775 .get_or_init(|| self.compute_mixed_list_nesting())
776 }
777
778 fn compute_mixed_list_nesting(&self) -> bool {
780 let mut stack: Vec<(usize, bool)> = Vec::new();
785 let mut last_was_blank = false;
786
787 for line_info in &self.lines {
788 if line_info.in_code_block
790 || line_info.in_front_matter
791 || line_info.in_mkdocstrings
792 || line_info.in_html_comment
793 || line_info.in_esm_block
794 {
795 continue;
796 }
797
798 if line_info.is_blank {
800 last_was_blank = true;
801 continue;
802 }
803
804 if let Some(list_item) = &line_info.list_item {
805 let current_pos = if list_item.marker_column == 1 {
807 0
808 } else {
809 list_item.marker_column
810 };
811
812 if last_was_blank && current_pos == 0 {
814 stack.clear();
815 }
816 last_was_blank = false;
817
818 while let Some(&(pos, _)) = stack.last() {
820 if pos >= current_pos {
821 stack.pop();
822 } else {
823 break;
824 }
825 }
826
827 if let Some(&(_, parent_is_ordered)) = stack.last()
829 && parent_is_ordered != list_item.is_ordered
830 {
831 return true; }
833
834 stack.push((current_pos, list_item.is_ordered));
835 } else {
836 last_was_blank = false;
838 }
839 }
840
841 false
842 }
843
844 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846 match self.line_offsets.binary_search(&offset) {
847 Ok(line) => (line + 1, 1),
848 Err(line) => {
849 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850 (line, offset - line_start + 1)
851 }
852 }
853 }
854
855 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859 return true;
860 }
861
862 self.code_spans()
864 .iter()
865 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866 }
867
868 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870 if line_num > 0 {
871 self.lines.get(line_num - 1)
872 } else {
873 None
874 }
875 }
876
877 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879 self.line_info(line_num).map(|info| info.byte_offset)
880 }
881
882 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884 let normalized_id = ref_id.to_lowercase();
885 self.reference_defs
886 .iter()
887 .find(|def| def.id == normalized_id)
888 .map(|def| def.url.as_str())
889 }
890
891 pub fn is_in_list_block(&self, line_num: usize) -> bool {
893 self.list_blocks
894 .iter()
895 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896 }
897
898 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900 self.list_blocks
901 .iter()
902 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903 }
904
905 pub fn is_in_code_block(&self, line_num: usize) -> bool {
909 if line_num == 0 || line_num > self.lines.len() {
910 return false;
911 }
912 self.lines[line_num - 1].in_code_block
913 }
914
915 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917 if line_num == 0 || line_num > self.lines.len() {
918 return false;
919 }
920 self.lines[line_num - 1].in_front_matter
921 }
922
923 pub fn is_in_html_block(&self, line_num: usize) -> bool {
925 if line_num == 0 || line_num > self.lines.len() {
926 return false;
927 }
928 self.lines[line_num - 1].in_html_block
929 }
930
931 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933 if line_num == 0 || line_num > self.lines.len() {
934 return false;
935 }
936
937 let col_0indexed = if col > 0 { col - 1 } else { 0 };
941 let code_spans = self.code_spans();
942 code_spans.iter().any(|span| {
943 if line_num < span.line || line_num > span.end_line {
945 return false;
946 }
947
948 if span.line == span.end_line {
949 col_0indexed >= span.start_col && col_0indexed < span.end_col
951 } else if line_num == span.line {
952 col_0indexed >= span.start_col
954 } else if line_num == span.end_line {
955 col_0indexed < span.end_col
957 } else {
958 true
960 }
961 })
962 }
963
964 #[inline]
966 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967 let code_spans = self.code_spans();
968 code_spans
969 .iter()
970 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971 }
972
973 #[inline]
976 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977 self.reference_defs
978 .iter()
979 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980 }
981
982 #[inline]
986 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987 self.html_comment_ranges
988 .iter()
989 .any(|range| byte_pos >= range.start && byte_pos < range.end)
990 }
991
992 #[inline]
995 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996 self.html_tags()
997 .iter()
998 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999 }
1000
1001 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003 self.jinja_ranges
1004 .iter()
1005 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006 }
1007
1008 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010 self.reference_defs.iter().any(|def| {
1011 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012 byte_pos >= start && byte_pos < end
1013 } else {
1014 false
1015 }
1016 })
1017 }
1018
1019 pub fn has_char(&self, ch: char) -> bool {
1021 match ch {
1022 '#' => self.char_frequency.hash_count > 0,
1023 '*' => self.char_frequency.asterisk_count > 0,
1024 '_' => self.char_frequency.underscore_count > 0,
1025 '-' => self.char_frequency.hyphen_count > 0,
1026 '+' => self.char_frequency.plus_count > 0,
1027 '>' => self.char_frequency.gt_count > 0,
1028 '|' => self.char_frequency.pipe_count > 0,
1029 '[' => self.char_frequency.bracket_count > 0,
1030 '`' => self.char_frequency.backtick_count > 0,
1031 '<' => self.char_frequency.lt_count > 0,
1032 '!' => self.char_frequency.exclamation_count > 0,
1033 '\n' => self.char_frequency.newline_count > 0,
1034 _ => self.content.contains(ch), }
1036 }
1037
1038 pub fn char_count(&self, ch: char) -> usize {
1040 match ch {
1041 '#' => self.char_frequency.hash_count,
1042 '*' => self.char_frequency.asterisk_count,
1043 '_' => self.char_frequency.underscore_count,
1044 '-' => self.char_frequency.hyphen_count,
1045 '+' => self.char_frequency.plus_count,
1046 '>' => self.char_frequency.gt_count,
1047 '|' => self.char_frequency.pipe_count,
1048 '[' => self.char_frequency.bracket_count,
1049 '`' => self.char_frequency.backtick_count,
1050 '<' => self.char_frequency.lt_count,
1051 '!' => self.char_frequency.exclamation_count,
1052 '\n' => self.char_frequency.newline_count,
1053 _ => self.content.matches(ch).count(), }
1055 }
1056
1057 pub fn likely_has_headings(&self) -> bool {
1059 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1061
1062 pub fn likely_has_lists(&self) -> bool {
1064 self.char_frequency.asterisk_count > 0
1065 || self.char_frequency.hyphen_count > 0
1066 || self.char_frequency.plus_count > 0
1067 }
1068
1069 pub fn likely_has_emphasis(&self) -> bool {
1071 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072 }
1073
1074 pub fn likely_has_tables(&self) -> bool {
1076 self.char_frequency.pipe_count > 2
1077 }
1078
1079 pub fn likely_has_blockquotes(&self) -> bool {
1081 self.char_frequency.gt_count > 0
1082 }
1083
1084 pub fn likely_has_code(&self) -> bool {
1086 self.char_frequency.backtick_count > 0
1087 }
1088
1089 pub fn likely_has_links_or_images(&self) -> bool {
1091 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092 }
1093
1094 pub fn likely_has_html(&self) -> bool {
1096 self.char_frequency.lt_count > 0
1097 }
1098
1099 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101 self.html_tags()
1102 .iter()
1103 .filter(|tag| tag.line == line_num)
1104 .cloned()
1105 .collect()
1106 }
1107
1108 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110 self.emphasis_spans()
1111 .iter()
1112 .filter(|span| span.line == line_num)
1113 .cloned()
1114 .collect()
1115 }
1116
1117 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119 self.table_rows()
1120 .iter()
1121 .filter(|row| row.line == line_num)
1122 .cloned()
1123 .collect()
1124 }
1125
1126 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128 self.bare_urls()
1129 .iter()
1130 .filter(|url| url.line == line_num)
1131 .cloned()
1132 .collect()
1133 }
1134
1135 #[inline]
1141 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142 let idx = match lines.binary_search_by(|line| {
1144 if byte_offset < line.byte_offset {
1145 std::cmp::Ordering::Greater
1146 } else if byte_offset > line.byte_offset + line.byte_len {
1147 std::cmp::Ordering::Less
1148 } else {
1149 std::cmp::Ordering::Equal
1150 }
1151 }) {
1152 Ok(idx) => idx,
1153 Err(idx) => idx.saturating_sub(1),
1154 };
1155
1156 let line = &lines[idx];
1157 let line_num = idx + 1;
1158 let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160 (idx, line_num, col)
1161 }
1162
1163 #[inline]
1165 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169 if idx > 0 {
1171 let span = &code_spans[idx - 1];
1172 if offset >= span.byte_offset && offset < span.byte_end {
1173 return true;
1174 }
1175 }
1176
1177 false
1178 }
1179
1180 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186 let mut link_ranges = Vec::new();
1187 let mut options = Options::empty();
1188 options.insert(Options::ENABLE_WIKILINKS);
1189 options.insert(Options::ENABLE_FOOTNOTES);
1190
1191 let parser = Parser::new_ext(content, options).into_offset_iter();
1192 let mut link_stack: Vec<usize> = Vec::new();
1193
1194 for (event, range) in parser {
1195 match event {
1196 Event::Start(Tag::Link { .. }) => {
1197 link_stack.push(range.start);
1198 }
1199 Event::End(TagEnd::Link) => {
1200 if let Some(start_pos) = link_stack.pop() {
1201 link_ranges.push((start_pos, range.end));
1202 }
1203 }
1204 _ => {}
1205 }
1206 }
1207
1208 link_ranges
1209 }
1210
1211 fn parse_links(
1213 content: &'a str,
1214 lines: &[LineInfo],
1215 code_blocks: &[(usize, usize)],
1216 code_spans: &[CodeSpan],
1217 flavor: MarkdownFlavor,
1218 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221 use std::collections::HashSet;
1222
1223 let mut links = Vec::with_capacity(content.len() / 500);
1224 let mut broken_links = Vec::new();
1225 let mut footnote_refs = Vec::new();
1226
1227 let mut found_positions = HashSet::new();
1229
1230 let mut options = Options::empty();
1240 options.insert(Options::ENABLE_WIKILINKS);
1241 options.insert(Options::ENABLE_FOOTNOTES);
1242
1243 let parser = Parser::new_with_broken_link_callback(
1244 content,
1245 options,
1246 Some(|link: BrokenLink<'_>| {
1247 broken_links.push(BrokenLinkInfo {
1248 reference: link.reference.to_string(),
1249 span: link.span.clone(),
1250 });
1251 None
1252 }),
1253 )
1254 .into_offset_iter();
1255
1256 let mut link_stack: Vec<(
1257 usize,
1258 usize,
1259 pulldown_cmark::CowStr<'a>,
1260 LinkType,
1261 pulldown_cmark::CowStr<'a>,
1262 )> = Vec::new();
1263 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1266 match event {
1267 Event::Start(Tag::Link {
1268 link_type,
1269 dest_url,
1270 id,
1271 ..
1272 }) => {
1273 link_stack.push((range.start, range.end, dest_url, link_type, id));
1275 text_chunks.clear();
1276 }
1277 Event::Text(text) if !link_stack.is_empty() => {
1278 text_chunks.push((text.to_string(), range.start, range.end));
1280 }
1281 Event::Code(code) if !link_stack.is_empty() => {
1282 let code_text = format!("`{code}`");
1284 text_chunks.push((code_text, range.start, range.end));
1285 }
1286 Event::End(TagEnd::Link) => {
1287 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290 text_chunks.clear();
1291 continue;
1292 }
1293
1294 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299 text_chunks.clear();
1300 continue;
1301 }
1302
1303 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305 let is_reference = matches!(
1306 link_type,
1307 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308 );
1309
1310 let link_text = if start_pos < content.len() {
1313 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315 let mut close_pos = None;
1319 let mut depth = 0;
1320 let mut in_code_span = false;
1321
1322 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323 let mut backslash_count = 0;
1325 let mut j = i;
1326 while j > 0 && link_bytes[j - 1] == b'\\' {
1327 backslash_count += 1;
1328 j -= 1;
1329 }
1330 let is_escaped = backslash_count % 2 != 0;
1331
1332 if byte == b'`' && !is_escaped {
1334 in_code_span = !in_code_span;
1335 }
1336
1337 if !is_escaped && !in_code_span {
1339 if byte == b'[' {
1340 depth += 1;
1341 } else if byte == b']' {
1342 if depth == 0 {
1343 close_pos = Some(i);
1345 break;
1346 } else {
1347 depth -= 1;
1348 }
1349 }
1350 }
1351 }
1352
1353 if let Some(pos) = close_pos {
1354 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355 } else {
1356 Cow::Borrowed("")
1357 }
1358 } else {
1359 Cow::Borrowed("")
1360 };
1361
1362 let reference_id = if is_reference && !ref_id.is_empty() {
1364 Some(Cow::Owned(ref_id.to_lowercase()))
1365 } else if is_reference {
1366 Some(Cow::Owned(link_text.to_lowercase()))
1368 } else {
1369 None
1370 };
1371
1372 found_positions.insert(start_pos);
1374
1375 links.push(ParsedLink {
1376 line: line_num,
1377 start_col: col_start,
1378 end_col: col_end,
1379 byte_offset: start_pos,
1380 byte_end: range.end,
1381 text: link_text,
1382 url: Cow::Owned(url.to_string()),
1383 is_reference,
1384 reference_id,
1385 link_type,
1386 });
1387
1388 text_chunks.clear();
1389 }
1390 }
1391 Event::FootnoteReference(footnote_id) => {
1392 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395 continue;
1396 }
1397
1398 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399 footnote_refs.push(FootnoteRef {
1400 id: footnote_id.to_string(),
1401 line: line_num,
1402 byte_offset: range.start,
1403 byte_end: range.end,
1404 });
1405 }
1406 _ => {}
1407 }
1408 }
1409
1410 for cap in LINK_PATTERN.captures_iter(content) {
1414 let full_match = cap.get(0).unwrap();
1415 let match_start = full_match.start();
1416 let match_end = full_match.end();
1417
1418 if found_positions.contains(&match_start) {
1420 continue;
1421 }
1422
1423 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425 continue;
1426 }
1427
1428 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430 continue;
1431 }
1432
1433 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435 continue;
1436 }
1437
1438 if Self::is_offset_in_code_span(code_spans, match_start) {
1440 continue;
1441 }
1442
1443 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445 continue;
1446 }
1447
1448 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453 continue;
1454 }
1455
1456 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458 let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460 if let Some(ref_id) = cap.get(6) {
1462 let ref_id_str = ref_id.as_str();
1463 let normalized_ref = if ref_id_str.is_empty() {
1464 Cow::Owned(text.to_lowercase()) } else {
1466 Cow::Owned(ref_id_str.to_lowercase())
1467 };
1468
1469 links.push(ParsedLink {
1471 line: line_num,
1472 start_col: col_start,
1473 end_col: col_end,
1474 byte_offset: match_start,
1475 byte_end: match_end,
1476 text: Cow::Borrowed(text),
1477 url: Cow::Borrowed(""), is_reference: true,
1479 reference_id: Some(normalized_ref),
1480 link_type: LinkType::Reference, });
1482 }
1483 }
1484
1485 (links, broken_links, footnote_refs)
1486 }
1487
1488 fn parse_images(
1490 content: &'a str,
1491 lines: &[LineInfo],
1492 code_blocks: &[(usize, usize)],
1493 code_spans: &[CodeSpan],
1494 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495 ) -> Vec<ParsedImage<'a>> {
1496 use crate::utils::skip_context::is_in_html_comment_ranges;
1497 use std::collections::HashSet;
1498
1499 let mut images = Vec::with_capacity(content.len() / 1000);
1501 let mut found_positions = HashSet::new();
1502
1503 let parser = Parser::new(content).into_offset_iter();
1505 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506 Vec::new();
1507 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1510 match event {
1511 Event::Start(Tag::Image {
1512 link_type,
1513 dest_url,
1514 id,
1515 ..
1516 }) => {
1517 image_stack.push((range.start, dest_url, link_type, id));
1518 text_chunks.clear();
1519 }
1520 Event::Text(text) if !image_stack.is_empty() => {
1521 text_chunks.push((text.to_string(), range.start, range.end));
1522 }
1523 Event::Code(code) if !image_stack.is_empty() => {
1524 let code_text = format!("`{code}`");
1525 text_chunks.push((code_text, range.start, range.end));
1526 }
1527 Event::End(TagEnd::Image) => {
1528 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531 continue;
1532 }
1533
1534 if Self::is_offset_in_code_span(code_spans, start_pos) {
1536 continue;
1537 }
1538
1539 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541 continue;
1542 }
1543
1544 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548 let is_reference = matches!(
1549 link_type,
1550 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551 );
1552
1553 let alt_text = if start_pos < content.len() {
1556 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558 let mut close_pos = None;
1561 let mut depth = 0;
1562
1563 if image_bytes.len() > 2 {
1564 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565 let mut backslash_count = 0;
1567 let mut j = i;
1568 while j > 0 && image_bytes[j - 1] == b'\\' {
1569 backslash_count += 1;
1570 j -= 1;
1571 }
1572 let is_escaped = backslash_count % 2 != 0;
1573
1574 if !is_escaped {
1575 if byte == b'[' {
1576 depth += 1;
1577 } else if byte == b']' {
1578 if depth == 0 {
1579 close_pos = Some(i);
1581 break;
1582 } else {
1583 depth -= 1;
1584 }
1585 }
1586 }
1587 }
1588 }
1589
1590 if let Some(pos) = close_pos {
1591 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592 } else {
1593 Cow::Borrowed("")
1594 }
1595 } else {
1596 Cow::Borrowed("")
1597 };
1598
1599 let reference_id = if is_reference && !ref_id.is_empty() {
1600 Some(Cow::Owned(ref_id.to_lowercase()))
1601 } else if is_reference {
1602 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1604 None
1605 };
1606
1607 found_positions.insert(start_pos);
1608 images.push(ParsedImage {
1609 line: line_num,
1610 start_col: col_start,
1611 end_col: col_end,
1612 byte_offset: start_pos,
1613 byte_end: range.end,
1614 alt_text,
1615 url: Cow::Owned(url.to_string()),
1616 is_reference,
1617 reference_id,
1618 link_type,
1619 });
1620 }
1621 }
1622 _ => {}
1623 }
1624 }
1625
1626 for cap in IMAGE_PATTERN.captures_iter(content) {
1628 let full_match = cap.get(0).unwrap();
1629 let match_start = full_match.start();
1630 let match_end = full_match.end();
1631
1632 if found_positions.contains(&match_start) {
1634 continue;
1635 }
1636
1637 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639 continue;
1640 }
1641
1642 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644 || Self::is_offset_in_code_span(code_spans, match_start)
1645 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646 {
1647 continue;
1648 }
1649
1650 if let Some(ref_id) = cap.get(6) {
1652 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655 let ref_id_str = ref_id.as_str();
1656 let normalized_ref = if ref_id_str.is_empty() {
1657 Cow::Owned(alt_text.to_lowercase())
1658 } else {
1659 Cow::Owned(ref_id_str.to_lowercase())
1660 };
1661
1662 images.push(ParsedImage {
1663 line: line_num,
1664 start_col: col_start,
1665 end_col: col_end,
1666 byte_offset: match_start,
1667 byte_end: match_end,
1668 alt_text: Cow::Borrowed(alt_text),
1669 url: Cow::Borrowed(""),
1670 is_reference: true,
1671 reference_id: Some(normalized_ref),
1672 link_type: LinkType::Reference, });
1674 }
1675 }
1676
1677 images
1678 }
1679
1680 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1686 if line_info.in_code_block {
1688 continue;
1689 }
1690
1691 let line = line_info.content(content);
1692 let line_num = line_idx + 1;
1693
1694 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695 let id = cap.get(1).unwrap().as_str().to_lowercase();
1696 let url = cap.get(2).unwrap().as_str().to_string();
1697 let title_match = cap.get(3).or_else(|| cap.get(4));
1698 let title = title_match.map(|m| m.as_str().to_string());
1699
1700 let match_obj = cap.get(0).unwrap();
1703 let byte_offset = line_info.byte_offset + match_obj.start();
1704 let byte_end = line_info.byte_offset + match_obj.end();
1705
1706 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708 let start = line_info.byte_offset + m.start().saturating_sub(1);
1710 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1712 } else {
1713 (None, None)
1714 };
1715
1716 refs.push(ReferenceDef {
1717 line: line_num,
1718 id,
1719 url,
1720 title,
1721 byte_offset,
1722 byte_end,
1723 title_byte_start,
1724 title_byte_end,
1725 });
1726 }
1727 }
1728
1729 refs
1730 }
1731
1732 #[inline]
1736 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737 let trimmed_start = line.trim_start();
1738 if !trimmed_start.starts_with('>') {
1739 return None;
1740 }
1741
1742 let mut remaining = line;
1744 let mut total_prefix_len = 0;
1745
1746 loop {
1747 let trimmed = remaining.trim_start();
1748 if !trimmed.starts_with('>') {
1749 break;
1750 }
1751
1752 let leading_ws_len = remaining.len() - trimmed.len();
1754 total_prefix_len += leading_ws_len + 1;
1755
1756 let after_gt = &trimmed[1..];
1757
1758 if let Some(stripped) = after_gt.strip_prefix(' ') {
1760 total_prefix_len += 1;
1761 remaining = stripped;
1762 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763 total_prefix_len += 1;
1764 remaining = stripped;
1765 } else {
1766 remaining = after_gt;
1767 }
1768 }
1769
1770 Some((&line[..total_prefix_len], remaining))
1771 }
1772
1773 fn detect_list_items_with_pulldown(
1794 content: &str,
1795 line_offsets: &[usize],
1796 flavor: MarkdownFlavor,
1797 front_matter_end: usize,
1798 ) -> std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)> {
1799 use std::collections::HashMap;
1800
1801 let mut list_items = HashMap::new();
1802
1803 let mut options = Options::empty();
1804 options.insert(Options::ENABLE_TABLES);
1805 options.insert(Options::ENABLE_FOOTNOTES);
1806 options.insert(Options::ENABLE_STRIKETHROUGH);
1807 options.insert(Options::ENABLE_TASKLISTS);
1808 options.insert(Options::ENABLE_GFM);
1810
1811 let _ = flavor;
1813
1814 let parser = Parser::new_ext(content, options).into_offset_iter();
1815 let mut list_depth: usize = 0;
1816 let mut list_stack: Vec<bool> = Vec::new();
1817
1818 for (event, range) in parser {
1819 match event {
1820 Event::Start(Tag::List(start_number)) => {
1821 list_depth += 1;
1822 list_stack.push(start_number.is_some());
1823 }
1824 Event::End(TagEnd::List(_)) => {
1825 list_depth = list_depth.saturating_sub(1);
1826 list_stack.pop();
1827 }
1828 Event::Start(Tag::Item) if list_depth > 0 => {
1829 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1831 let item_start = range.start;
1833
1834 let mut line_idx = match line_offsets.binary_search(&item_start) {
1836 Ok(idx) => idx,
1837 Err(idx) => idx.saturating_sub(1),
1838 };
1839
1840 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1844 line_idx += 1;
1845 }
1846
1847 if front_matter_end > 0 && line_idx < front_matter_end {
1849 continue;
1850 }
1851
1852 if line_idx < line_offsets.len() {
1853 let line_start_byte = line_offsets[line_idx];
1854 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
1855 let line = &content[line_start_byte..line_end.min(content.len())];
1856
1857 let line = line
1859 .strip_suffix('\n')
1860 .or_else(|| line.strip_suffix("\r\n"))
1861 .unwrap_or(line);
1862
1863 let blockquote_parse = Self::parse_blockquote_prefix(line);
1865 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
1866 (prefix.len(), content)
1867 } else {
1868 (0, line)
1869 };
1870
1871 if current_list_is_ordered {
1873 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1874 Self::parse_ordered_list(line_to_parse)
1875 {
1876 let marker = format!("{number_str}{delimiter}");
1877 let marker_column = blockquote_prefix_len + leading_spaces.len();
1878 let content_column = marker_column + marker.len() + spacing.len();
1879 let number = number_str.parse().ok();
1880
1881 list_items.entry(line_start_byte).or_insert((
1882 true,
1883 marker,
1884 marker_column,
1885 content_column,
1886 number,
1887 ));
1888 }
1889 } else if let Some((leading_spaces, marker, spacing, _content)) =
1890 Self::parse_unordered_list(line_to_parse)
1891 {
1892 let marker_column = blockquote_prefix_len + leading_spaces.len();
1893 let content_column = marker_column + 1 + spacing.len();
1894
1895 list_items.entry(line_start_byte).or_insert((
1896 false,
1897 marker.to_string(),
1898 marker_column,
1899 content_column,
1900 None,
1901 ));
1902 }
1903 }
1904 }
1905 _ => {}
1906 }
1907 }
1908
1909 list_items
1910 }
1911
1912 #[inline]
1916 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1917 let bytes = line.as_bytes();
1918 let mut i = 0;
1919
1920 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1922 i += 1;
1923 }
1924
1925 if i >= bytes.len() {
1927 return None;
1928 }
1929 let marker = bytes[i] as char;
1930 if marker != '-' && marker != '*' && marker != '+' {
1931 return None;
1932 }
1933 let marker_pos = i;
1934 i += 1;
1935
1936 let spacing_start = i;
1938 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1939 i += 1;
1940 }
1941
1942 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1943 }
1944
1945 #[inline]
1949 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1950 let bytes = line.as_bytes();
1951 let mut i = 0;
1952
1953 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1955 i += 1;
1956 }
1957
1958 let number_start = i;
1960 while i < bytes.len() && bytes[i].is_ascii_digit() {
1961 i += 1;
1962 }
1963 if i == number_start {
1964 return None; }
1966
1967 if i >= bytes.len() {
1969 return None;
1970 }
1971 let delimiter = bytes[i] as char;
1972 if delimiter != '.' && delimiter != ')' {
1973 return None;
1974 }
1975 let delimiter_pos = i;
1976 i += 1;
1977
1978 let spacing_start = i;
1980 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1981 i += 1;
1982 }
1983
1984 Some((
1985 &line[..number_start],
1986 &line[number_start..delimiter_pos],
1987 delimiter,
1988 &line[spacing_start..i],
1989 &line[i..],
1990 ))
1991 }
1992
1993 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1996 let num_lines = line_offsets.len();
1997 let mut in_code_block = vec![false; num_lines];
1998
1999 for &(start, end) in code_blocks {
2001 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2003 let mut boundary = start;
2004 while boundary > 0 && !content.is_char_boundary(boundary) {
2005 boundary -= 1;
2006 }
2007 boundary
2008 } else {
2009 start
2010 };
2011
2012 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2013 let mut boundary = end;
2014 while boundary < content.len() && !content.is_char_boundary(boundary) {
2015 boundary += 1;
2016 }
2017 boundary
2018 } else {
2019 end.min(content.len())
2020 };
2021
2022 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2041 let first_line = first_line_after.saturating_sub(1);
2042 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2043
2044 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2046 *flag = true;
2047 }
2048 }
2049
2050 in_code_block
2051 }
2052
2053 fn compute_basic_line_info(
2055 content: &str,
2056 line_offsets: &[usize],
2057 code_blocks: &[(usize, usize)],
2058 flavor: MarkdownFlavor,
2059 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2060 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2061 ) -> Vec<LineInfo> {
2062 let content_lines: Vec<&str> = content.lines().collect();
2063 let mut lines = Vec::with_capacity(content_lines.len());
2064
2065 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2067
2068 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2071
2072 let list_item_map = Self::detect_list_items_with_pulldown(content, line_offsets, flavor, front_matter_end);
2074
2075 for (i, line) in content_lines.iter().enumerate() {
2076 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2077 let indent = line.len() - line.trim_start().len();
2078 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2080
2081 let blockquote_parse = Self::parse_blockquote_prefix(line);
2083
2084 let is_blank = if let Some((_, content)) = blockquote_parse {
2086 content.trim().is_empty()
2088 } else {
2089 line.trim().is_empty()
2090 };
2091
2092 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2094
2095 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2097 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2098 let line_end_offset = byte_offset + line.len();
2101 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2102 html_comment_ranges,
2103 byte_offset,
2104 line_end_offset,
2105 );
2106 let list_item =
2109 list_item_map
2110 .get(&byte_offset)
2111 .map(
2112 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2113 marker: marker.clone(),
2114 is_ordered: *is_ordered,
2115 number: *number,
2116 marker_column: *marker_column,
2117 content_column: *content_column,
2118 },
2119 );
2120
2121 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2124 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2125
2126 lines.push(LineInfo {
2127 byte_offset,
2128 byte_len: line.len(),
2129 indent,
2130 visual_indent,
2131 is_blank,
2132 in_code_block,
2133 in_front_matter,
2134 in_html_block: false, in_html_comment,
2136 list_item,
2137 heading: None, blockquote: None, in_mkdocstrings,
2140 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2143 });
2144 }
2145
2146 lines
2147 }
2148
2149 fn detect_headings_and_blockquotes(
2151 content: &str,
2152 lines: &mut [LineInfo],
2153 flavor: MarkdownFlavor,
2154 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2155 link_byte_ranges: &[(usize, usize)],
2156 ) {
2157 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2159 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2160 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2161 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2162
2163 let content_lines: Vec<&str> = content.lines().collect();
2164
2165 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2167
2168 for i in 0..lines.len() {
2170 if lines[i].in_code_block {
2171 continue;
2172 }
2173
2174 if front_matter_end > 0 && i < front_matter_end {
2176 continue;
2177 }
2178
2179 if lines[i].in_html_block {
2181 continue;
2182 }
2183
2184 let line = content_lines[i];
2185
2186 if let Some(bq) = parse_blockquote_detailed(line) {
2188 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2190
2191 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2193
2194 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2196 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2199
2200 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2204
2205 lines[i].blockquote = Some(BlockquoteInfo {
2206 nesting_level,
2207 indent: bq.indent.to_string(),
2208 marker_column,
2209 prefix,
2210 content: bq.content.to_string(),
2211 has_no_space_after_marker: has_no_space,
2212 has_multiple_spaces_after_marker: has_multiple_spaces,
2213 needs_md028_fix,
2214 });
2215 }
2216
2217 if lines[i].is_blank {
2219 continue;
2220 }
2221
2222 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2225 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2226 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2227 } else {
2228 false
2229 };
2230
2231 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2232 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2234 continue;
2235 }
2236 let line_offset = lines[i].byte_offset;
2239 if link_byte_ranges
2240 .iter()
2241 .any(|&(start, end)| line_offset > start && line_offset < end)
2242 {
2243 continue;
2244 }
2245 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2246 let hashes = caps.get(2).map_or("", |m| m.as_str());
2247 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2248 let rest = caps.get(4).map_or("", |m| m.as_str());
2249
2250 let level = hashes.len() as u8;
2251 let marker_column = leading_spaces.len();
2252
2253 let (text, has_closing, closing_seq) = {
2255 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2257 if rest[id_start..].trim_end().ends_with('}') {
2259 (&rest[..id_start], &rest[id_start..])
2261 } else {
2262 (rest, "")
2263 }
2264 } else {
2265 (rest, "")
2266 };
2267
2268 let trimmed_rest = rest_without_id.trim_end();
2270 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2271 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2274
2275 let last_hash_char_idx = char_positions
2277 .iter()
2278 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2279
2280 if let Some(mut char_idx) = last_hash_char_idx {
2281 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2283 char_idx -= 1;
2284 }
2285
2286 let start_of_hashes = char_positions[char_idx].0;
2288
2289 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2291
2292 let potential_closing = &trimmed_rest[start_of_hashes..];
2294 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2295
2296 if is_all_hashes && has_space_before {
2297 let closing_hashes = potential_closing.to_string();
2299 let text_part = if !custom_id_part.is_empty() {
2302 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2305 } else {
2306 trimmed_rest[..start_of_hashes].trim_end().to_string()
2307 };
2308 (text_part, true, closing_hashes)
2309 } else {
2310 (rest.to_string(), false, String::new())
2312 }
2313 } else {
2314 (rest.to_string(), false, String::new())
2316 }
2317 } else {
2318 (rest.to_string(), false, String::new())
2320 }
2321 };
2322
2323 let content_column = marker_column + hashes.len() + spaces_after.len();
2324
2325 let raw_text = text.trim().to_string();
2327 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2328
2329 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2331 let next_line = content_lines[i + 1];
2332 if !lines[i + 1].in_code_block
2333 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2334 && let Some(next_line_id) =
2335 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2336 {
2337 custom_id = Some(next_line_id);
2338 }
2339 }
2340
2341 let is_valid = !spaces_after.is_empty()
2351 || rest.is_empty()
2352 || level > 1
2353 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2354
2355 lines[i].heading = Some(HeadingInfo {
2356 level,
2357 style: HeadingStyle::ATX,
2358 marker: hashes.to_string(),
2359 marker_column,
2360 content_column,
2361 text: clean_text,
2362 custom_id,
2363 raw_text,
2364 has_closing_sequence: has_closing,
2365 closing_sequence: closing_seq,
2366 is_valid,
2367 });
2368 }
2369 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2371 let next_line = content_lines[i + 1];
2372 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2373 if front_matter_end > 0 && i < front_matter_end {
2375 continue;
2376 }
2377
2378 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2380 {
2381 continue;
2382 }
2383
2384 let content_line = line.trim();
2387
2388 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2390 continue;
2391 }
2392
2393 if content_line.starts_with('_') {
2395 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2396 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2397 continue;
2398 }
2399 }
2400
2401 if let Some(first_char) = content_line.chars().next()
2403 && first_char.is_ascii_digit()
2404 {
2405 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2406 if num_end < content_line.len() {
2407 let next = content_line.chars().nth(num_end);
2408 if next == Some('.') || next == Some(')') {
2409 continue;
2410 }
2411 }
2412 }
2413
2414 if ATX_HEADING_REGEX.is_match(line) {
2416 continue;
2417 }
2418
2419 if content_line.starts_with('>') {
2421 continue;
2422 }
2423
2424 let trimmed_start = line.trim_start();
2426 if trimmed_start.len() >= 3 {
2427 let first_three: String = trimmed_start.chars().take(3).collect();
2428 if first_three == "```" || first_three == "~~~" {
2429 continue;
2430 }
2431 }
2432
2433 if content_line.starts_with('<') {
2435 continue;
2436 }
2437
2438 let underline = next_line.trim();
2439
2440 let level = if underline.starts_with('=') { 1 } else { 2 };
2441 let style = if level == 1 {
2442 HeadingStyle::Setext1
2443 } else {
2444 HeadingStyle::Setext2
2445 };
2446
2447 let raw_text = line.trim().to_string();
2449 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2450
2451 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2453 let attr_line = content_lines[i + 2];
2454 if !lines[i + 2].in_code_block
2455 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2456 && let Some(attr_line_id) =
2457 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2458 {
2459 custom_id = Some(attr_line_id);
2460 }
2461 }
2462
2463 lines[i].heading = Some(HeadingInfo {
2464 level,
2465 style,
2466 marker: underline.to_string(),
2467 marker_column: next_line.len() - next_line.trim_start().len(),
2468 content_column: lines[i].indent,
2469 text: clean_text,
2470 custom_id,
2471 raw_text,
2472 has_closing_sequence: false,
2473 closing_sequence: String::new(),
2474 is_valid: true, });
2476 }
2477 }
2478 }
2479 }
2480
2481 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2483 const BLOCK_ELEMENTS: &[&str] = &[
2486 "address",
2487 "article",
2488 "aside",
2489 "audio",
2490 "blockquote",
2491 "canvas",
2492 "details",
2493 "dialog",
2494 "dd",
2495 "div",
2496 "dl",
2497 "dt",
2498 "embed",
2499 "fieldset",
2500 "figcaption",
2501 "figure",
2502 "footer",
2503 "form",
2504 "h1",
2505 "h2",
2506 "h3",
2507 "h4",
2508 "h5",
2509 "h6",
2510 "header",
2511 "hr",
2512 "iframe",
2513 "li",
2514 "main",
2515 "menu",
2516 "nav",
2517 "noscript",
2518 "object",
2519 "ol",
2520 "p",
2521 "picture",
2522 "pre",
2523 "script",
2524 "search",
2525 "section",
2526 "source",
2527 "style",
2528 "summary",
2529 "svg",
2530 "table",
2531 "tbody",
2532 "td",
2533 "template",
2534 "textarea",
2535 "tfoot",
2536 "th",
2537 "thead",
2538 "tr",
2539 "track",
2540 "ul",
2541 "video",
2542 ];
2543
2544 let mut i = 0;
2545 while i < lines.len() {
2546 if lines[i].in_code_block || lines[i].in_front_matter {
2548 i += 1;
2549 continue;
2550 }
2551
2552 let trimmed = lines[i].content(content).trim_start();
2553
2554 if trimmed.starts_with('<') && trimmed.len() > 1 {
2556 let after_bracket = &trimmed[1..];
2558 let is_closing = after_bracket.starts_with('/');
2559 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2560
2561 let tag_name = tag_start
2563 .chars()
2564 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2565 .collect::<String>()
2566 .to_lowercase();
2567
2568 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2570 lines[i].in_html_block = true;
2572
2573 if !is_closing {
2576 let closing_tag = format!("</{tag_name}>");
2577 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2579 let mut j = i + 1;
2580 let mut found_closing_tag = false;
2581 while j < lines.len() && j < i + 100 {
2582 if !allow_blank_lines && lines[j].is_blank {
2585 break;
2586 }
2587
2588 lines[j].in_html_block = true;
2589
2590 if lines[j].content(content).contains(&closing_tag) {
2592 found_closing_tag = true;
2593 }
2594
2595 if found_closing_tag {
2598 j += 1;
2599 while j < lines.len() && j < i + 100 {
2601 if lines[j].is_blank {
2602 break;
2603 }
2604 lines[j].in_html_block = true;
2605 j += 1;
2606 }
2607 break;
2608 }
2609 j += 1;
2610 }
2611 }
2612 }
2613 }
2614
2615 i += 1;
2616 }
2617 }
2618
2619 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2622 if !flavor.supports_esm_blocks() {
2624 return;
2625 }
2626
2627 let mut in_multiline_comment = false;
2628
2629 for line in lines.iter_mut() {
2630 if line.is_blank || line.in_html_comment {
2632 continue;
2633 }
2634
2635 let trimmed = line.content(content).trim_start();
2636
2637 if in_multiline_comment {
2639 if trimmed.contains("*/") {
2640 in_multiline_comment = false;
2641 }
2642 continue;
2643 }
2644
2645 if trimmed.starts_with("//") {
2647 continue;
2648 }
2649
2650 if trimmed.starts_with("/*") {
2652 if !trimmed.contains("*/") {
2653 in_multiline_comment = true;
2654 }
2655 continue;
2656 }
2657
2658 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2660 line.in_esm_block = true;
2661 } else {
2662 break;
2664 }
2665 }
2666 }
2667
2668 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2670 let mut code_spans = Vec::new();
2671
2672 if !content.contains('`') {
2674 return code_spans;
2675 }
2676
2677 let parser = Parser::new(content).into_offset_iter();
2679
2680 for (event, range) in parser {
2681 if let Event::Code(_) = event {
2682 let start_pos = range.start;
2683 let end_pos = range.end;
2684
2685 let full_span = &content[start_pos..end_pos];
2687 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2688
2689 let content_start = start_pos + backtick_count;
2691 let content_end = end_pos - backtick_count;
2692 let span_content = if content_start < content_end {
2693 content[content_start..content_end].to_string()
2694 } else {
2695 String::new()
2696 };
2697
2698 let line_idx = lines
2701 .partition_point(|line| line.byte_offset <= start_pos)
2702 .saturating_sub(1);
2703 let line_num = line_idx + 1;
2704 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2705
2706 let end_line_idx = lines
2708 .partition_point(|line| line.byte_offset <= end_pos)
2709 .saturating_sub(1);
2710 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2711
2712 let line_content = lines[line_idx].content(content);
2715 let col_start = if byte_col_start <= line_content.len() {
2716 line_content[..byte_col_start].chars().count()
2717 } else {
2718 line_content.chars().count()
2719 };
2720
2721 let end_line_content = lines[end_line_idx].content(content);
2722 let col_end = if byte_col_end <= end_line_content.len() {
2723 end_line_content[..byte_col_end].chars().count()
2724 } else {
2725 end_line_content.chars().count()
2726 };
2727
2728 code_spans.push(CodeSpan {
2729 line: line_num,
2730 end_line: end_line_idx + 1,
2731 start_col: col_start,
2732 end_col: col_end,
2733 byte_offset: start_pos,
2734 byte_end: end_pos,
2735 backtick_count,
2736 content: span_content,
2737 });
2738 }
2739 }
2740
2741 code_spans.sort_by_key(|span| span.byte_offset);
2743
2744 code_spans
2745 }
2746
2747 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2758 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2760
2761 #[inline]
2764 fn reset_tracking_state(
2765 list_item: &ListItemInfo,
2766 has_list_breaking_content: &mut bool,
2767 min_continuation: &mut usize,
2768 ) {
2769 *has_list_breaking_content = false;
2770 let marker_width = if list_item.is_ordered {
2771 list_item.marker.len() + 1 } else {
2773 list_item.marker.len()
2774 };
2775 *min_continuation = if list_item.is_ordered {
2776 marker_width
2777 } else {
2778 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2779 };
2780 }
2781
2782 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2785 let mut last_list_item_line = 0;
2786 let mut current_indent_level = 0;
2787 let mut last_marker_width = 0;
2788
2789 let mut has_list_breaking_content_since_last_item = false;
2791 let mut min_continuation_for_tracking = 0;
2792
2793 for (line_idx, line_info) in lines.iter().enumerate() {
2794 let line_num = line_idx + 1;
2795
2796 if line_info.in_code_block {
2798 if let Some(ref mut block) = current_block {
2799 let min_continuation_indent =
2801 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2802
2803 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2805
2806 match context {
2807 CodeBlockContext::Indented => {
2808 block.end_line = line_num;
2810 continue;
2811 }
2812 CodeBlockContext::Standalone => {
2813 let completed_block = current_block.take().unwrap();
2815 list_blocks.push(completed_block);
2816 continue;
2817 }
2818 CodeBlockContext::Adjacent => {
2819 block.end_line = line_num;
2821 continue;
2822 }
2823 }
2824 } else {
2825 continue;
2827 }
2828 }
2829
2830 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2832 caps.get(0).unwrap().as_str().to_string()
2833 } else {
2834 String::new()
2835 };
2836
2837 if current_block.is_some()
2840 && line_info.list_item.is_none()
2841 && !line_info.is_blank
2842 && !line_info.in_code_span_continuation
2843 {
2844 let line_content = line_info.content(content).trim();
2845
2846 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2851 let breaks_list = line_info.heading.is_some()
2852 || line_content.starts_with("---")
2853 || line_content.starts_with("***")
2854 || line_content.starts_with("___")
2855 || crate::utils::skip_context::is_table_line(line_content)
2856 || line_content.starts_with(">")
2857 || (line_info.indent > 0
2858 && line_info.indent < min_continuation_for_tracking
2859 && !is_lazy_continuation);
2860
2861 if breaks_list {
2862 has_list_breaking_content_since_last_item = true;
2863 }
2864 }
2865
2866 if line_info.in_code_span_continuation
2869 && line_info.list_item.is_none()
2870 && let Some(ref mut block) = current_block
2871 {
2872 block.end_line = line_num;
2873 }
2874
2875 let is_valid_continuation =
2880 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2882 && line_info.list_item.is_none()
2883 && !line_info.is_blank
2884 && !line_info.in_code_block
2885 && is_valid_continuation
2886 && let Some(ref mut block) = current_block
2887 {
2888 block.end_line = line_num;
2889 }
2890
2891 if let Some(list_item) = &line_info.list_item {
2893 let item_indent = list_item.marker_column;
2895 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2898 let is_nested = nesting > block.nesting_level;
2902 let same_type =
2903 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2904 let same_context = block.blockquote_prefix == blockquote_prefix;
2905 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2907
2908 let marker_compatible =
2910 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2911
2912 let has_non_list_content = has_list_breaking_content_since_last_item;
2915
2916 let mut continues_list = if is_nested {
2920 same_context && reasonable_distance && !has_non_list_content
2922 } else {
2923 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2925 };
2926
2927 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2930 if block.item_lines.contains(&(line_num - 1)) {
2933 continues_list = true;
2935 } else {
2936 continues_list = true;
2940 }
2941 }
2942
2943 if continues_list {
2944 block.end_line = line_num;
2946 block.item_lines.push(line_num);
2947
2948 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2950 list_item.marker.len() + 1
2951 } else {
2952 list_item.marker.len()
2953 });
2954
2955 if !block.is_ordered
2957 && block.marker.is_some()
2958 && block.marker.as_ref() != Some(&list_item.marker)
2959 {
2960 block.marker = None;
2962 }
2963
2964 reset_tracking_state(
2966 list_item,
2967 &mut has_list_breaking_content_since_last_item,
2968 &mut min_continuation_for_tracking,
2969 );
2970 } else {
2971 list_blocks.push(block.clone());
2974
2975 *block = ListBlock {
2976 start_line: line_num,
2977 end_line: line_num,
2978 is_ordered: list_item.is_ordered,
2979 marker: if list_item.is_ordered {
2980 None
2981 } else {
2982 Some(list_item.marker.clone())
2983 },
2984 blockquote_prefix: blockquote_prefix.clone(),
2985 item_lines: vec![line_num],
2986 nesting_level: nesting,
2987 max_marker_width: if list_item.is_ordered {
2988 list_item.marker.len() + 1
2989 } else {
2990 list_item.marker.len()
2991 },
2992 };
2993
2994 reset_tracking_state(
2996 list_item,
2997 &mut has_list_breaking_content_since_last_item,
2998 &mut min_continuation_for_tracking,
2999 );
3000 }
3001 } else {
3002 current_block = Some(ListBlock {
3004 start_line: line_num,
3005 end_line: line_num,
3006 is_ordered: list_item.is_ordered,
3007 marker: if list_item.is_ordered {
3008 None
3009 } else {
3010 Some(list_item.marker.clone())
3011 },
3012 blockquote_prefix,
3013 item_lines: vec![line_num],
3014 nesting_level: nesting,
3015 max_marker_width: list_item.marker.len(),
3016 });
3017
3018 reset_tracking_state(
3020 list_item,
3021 &mut has_list_breaking_content_since_last_item,
3022 &mut min_continuation_for_tracking,
3023 );
3024 }
3025
3026 last_list_item_line = line_num;
3027 current_indent_level = item_indent;
3028 last_marker_width = if list_item.is_ordered {
3029 list_item.marker.len() + 1 } else {
3031 list_item.marker.len()
3032 };
3033 } else if let Some(ref mut block) = current_block {
3034 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3044 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3045 } else {
3046 false
3047 };
3048
3049 let min_continuation_indent = if block.is_ordered {
3053 current_indent_level + last_marker_width
3054 } else {
3055 current_indent_level + 2 };
3057
3058 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3059 block.end_line = line_num;
3061 } else if line_info.is_blank {
3062 let mut check_idx = line_idx + 1;
3065 let mut found_continuation = false;
3066
3067 while check_idx < lines.len() && lines[check_idx].is_blank {
3069 check_idx += 1;
3070 }
3071
3072 if check_idx < lines.len() {
3073 let next_line = &lines[check_idx];
3074 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
3076 found_continuation = true;
3077 }
3078 else if !next_line.in_code_block
3080 && next_line.list_item.is_some()
3081 && let Some(item) = &next_line.list_item
3082 {
3083 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3084 .find(next_line.content(content))
3085 .map_or(String::new(), |m| m.as_str().to_string());
3086 if item.marker_column == current_indent_level
3087 && item.is_ordered == block.is_ordered
3088 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3089 {
3090 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3093 if let Some(between_line) = lines.get(idx) {
3094 let between_content = between_line.content(content);
3095 let trimmed = between_content.trim();
3096 if trimmed.is_empty() {
3098 return false;
3099 }
3100 let line_indent = between_content.len() - between_content.trim_start().len();
3102
3103 if trimmed.starts_with("```")
3105 || trimmed.starts_with("~~~")
3106 || trimmed.starts_with("---")
3107 || trimmed.starts_with("***")
3108 || trimmed.starts_with("___")
3109 || trimmed.starts_with(">")
3110 || crate::utils::skip_context::is_table_line(trimmed)
3111 || between_line.heading.is_some()
3112 {
3113 return true; }
3115
3116 line_indent >= min_continuation_indent
3118 } else {
3119 false
3120 }
3121 });
3122
3123 if block.is_ordered {
3124 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3127 if let Some(between_line) = lines.get(idx) {
3128 let trimmed = between_line.content(content).trim();
3129 if trimmed.is_empty() {
3130 return false;
3131 }
3132 trimmed.starts_with("```")
3134 || trimmed.starts_with("~~~")
3135 || trimmed.starts_with("---")
3136 || trimmed.starts_with("***")
3137 || trimmed.starts_with("___")
3138 || trimmed.starts_with(">")
3139 || crate::utils::skip_context::is_table_line(trimmed)
3140 || between_line.heading.is_some()
3141 } else {
3142 false
3143 }
3144 });
3145 found_continuation = !has_structural_separators;
3146 } else {
3147 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3149 if let Some(between_line) = lines.get(idx) {
3150 let trimmed = between_line.content(content).trim();
3151 if trimmed.is_empty() {
3152 return false;
3153 }
3154 trimmed.starts_with("```")
3156 || trimmed.starts_with("~~~")
3157 || trimmed.starts_with("---")
3158 || trimmed.starts_with("***")
3159 || trimmed.starts_with("___")
3160 || trimmed.starts_with(">")
3161 || crate::utils::skip_context::is_table_line(trimmed)
3162 || between_line.heading.is_some()
3163 } else {
3164 false
3165 }
3166 });
3167 found_continuation = !has_structural_separators;
3168 }
3169 }
3170 }
3171 }
3172
3173 if found_continuation {
3174 block.end_line = line_num;
3176 } else {
3177 list_blocks.push(block.clone());
3179 current_block = None;
3180 }
3181 } else {
3182 let min_required_indent = if block.is_ordered {
3185 current_indent_level + last_marker_width
3186 } else {
3187 current_indent_level + 2
3188 };
3189
3190 let line_content = line_info.content(content).trim();
3195
3196 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3198
3199 let is_structural_separator = line_info.heading.is_some()
3200 || line_content.starts_with("```")
3201 || line_content.starts_with("~~~")
3202 || line_content.starts_with("---")
3203 || line_content.starts_with("***")
3204 || line_content.starts_with("___")
3205 || line_content.starts_with(">")
3206 || looks_like_table;
3207
3208 let is_lazy_continuation = !is_structural_separator
3211 && !line_info.is_blank
3212 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3213
3214 if is_lazy_continuation {
3215 let content_to_check = if !blockquote_prefix.is_empty() {
3218 line_info
3220 .content(content)
3221 .strip_prefix(&blockquote_prefix)
3222 .unwrap_or(line_info.content(content))
3223 .trim()
3224 } else {
3225 line_info.content(content).trim()
3226 };
3227
3228 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3229
3230 if starts_with_uppercase && last_list_item_line > 0 {
3233 list_blocks.push(block.clone());
3235 current_block = None;
3236 } else {
3237 block.end_line = line_num;
3239 }
3240 } else {
3241 list_blocks.push(block.clone());
3243 current_block = None;
3244 }
3245 }
3246 }
3247 }
3248
3249 if let Some(block) = current_block {
3251 list_blocks.push(block);
3252 }
3253
3254 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3256
3257 list_blocks
3258 }
3259
3260 fn compute_char_frequency(content: &str) -> CharFrequency {
3262 let mut frequency = CharFrequency::default();
3263
3264 for ch in content.chars() {
3265 match ch {
3266 '#' => frequency.hash_count += 1,
3267 '*' => frequency.asterisk_count += 1,
3268 '_' => frequency.underscore_count += 1,
3269 '-' => frequency.hyphen_count += 1,
3270 '+' => frequency.plus_count += 1,
3271 '>' => frequency.gt_count += 1,
3272 '|' => frequency.pipe_count += 1,
3273 '[' => frequency.bracket_count += 1,
3274 '`' => frequency.backtick_count += 1,
3275 '<' => frequency.lt_count += 1,
3276 '!' => frequency.exclamation_count += 1,
3277 '\n' => frequency.newline_count += 1,
3278 _ => {}
3279 }
3280 }
3281
3282 frequency
3283 }
3284
3285 fn parse_html_tags(
3287 content: &str,
3288 lines: &[LineInfo],
3289 code_blocks: &[(usize, usize)],
3290 flavor: MarkdownFlavor,
3291 ) -> Vec<HtmlTag> {
3292 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3293 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3294
3295 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3296
3297 for cap in HTML_TAG_REGEX.captures_iter(content) {
3298 let full_match = cap.get(0).unwrap();
3299 let match_start = full_match.start();
3300 let match_end = full_match.end();
3301
3302 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3304 continue;
3305 }
3306
3307 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3308 let tag_name_original = cap.get(2).unwrap().as_str();
3309 let tag_name = tag_name_original.to_lowercase();
3310 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3311
3312 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3315 continue;
3316 }
3317
3318 let mut line_num = 1;
3320 let mut col_start = match_start;
3321 let mut col_end = match_end;
3322 for (idx, line_info) in lines.iter().enumerate() {
3323 if match_start >= line_info.byte_offset {
3324 line_num = idx + 1;
3325 col_start = match_start - line_info.byte_offset;
3326 col_end = match_end - line_info.byte_offset;
3327 } else {
3328 break;
3329 }
3330 }
3331
3332 html_tags.push(HtmlTag {
3333 line: line_num,
3334 start_col: col_start,
3335 end_col: col_end,
3336 byte_offset: match_start,
3337 byte_end: match_end,
3338 tag_name,
3339 is_closing,
3340 is_self_closing,
3341 raw_content: full_match.as_str().to_string(),
3342 });
3343 }
3344
3345 html_tags
3346 }
3347
3348 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3350 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3351 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3352
3353 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3354
3355 for cap in EMPHASIS_REGEX.captures_iter(content) {
3356 let full_match = cap.get(0).unwrap();
3357 let match_start = full_match.start();
3358 let match_end = full_match.end();
3359
3360 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3362 continue;
3363 }
3364
3365 let opening_markers = cap.get(1).unwrap().as_str();
3366 let content_part = cap.get(2).unwrap().as_str();
3367 let closing_markers = cap.get(3).unwrap().as_str();
3368
3369 if opening_markers.chars().next() != closing_markers.chars().next()
3371 || opening_markers.len() != closing_markers.len()
3372 {
3373 continue;
3374 }
3375
3376 let marker = opening_markers.chars().next().unwrap();
3377 let marker_count = opening_markers.len();
3378
3379 let mut line_num = 1;
3381 let mut col_start = match_start;
3382 let mut col_end = match_end;
3383 for (idx, line_info) in lines.iter().enumerate() {
3384 if match_start >= line_info.byte_offset {
3385 line_num = idx + 1;
3386 col_start = match_start - line_info.byte_offset;
3387 col_end = match_end - line_info.byte_offset;
3388 } else {
3389 break;
3390 }
3391 }
3392
3393 emphasis_spans.push(EmphasisSpan {
3394 line: line_num,
3395 start_col: col_start,
3396 end_col: col_end,
3397 byte_offset: match_start,
3398 byte_end: match_end,
3399 marker,
3400 marker_count,
3401 content: content_part.to_string(),
3402 });
3403 }
3404
3405 emphasis_spans
3406 }
3407
3408 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3410 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3411
3412 for (line_idx, line_info) in lines.iter().enumerate() {
3413 if line_info.in_code_block || line_info.is_blank {
3415 continue;
3416 }
3417
3418 let line = line_info.content(content);
3419 let line_num = line_idx + 1;
3420
3421 if !line.contains('|') {
3423 continue;
3424 }
3425
3426 let parts: Vec<&str> = line.split('|').collect();
3428 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3429
3430 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3432 let mut column_alignments = Vec::new();
3433
3434 if is_separator {
3435 for part in &parts[1..parts.len() - 1] {
3436 let trimmed = part.trim();
3438 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3439 "center".to_string()
3440 } else if trimmed.ends_with(':') {
3441 "right".to_string()
3442 } else if trimmed.starts_with(':') {
3443 "left".to_string()
3444 } else {
3445 "none".to_string()
3446 };
3447 column_alignments.push(alignment);
3448 }
3449 }
3450
3451 table_rows.push(TableRow {
3452 line: line_num,
3453 is_separator,
3454 column_count,
3455 column_alignments,
3456 });
3457 }
3458
3459 table_rows
3460 }
3461
3462 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3464 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3465
3466 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3468 let full_match = cap.get(0).unwrap();
3469 let match_start = full_match.start();
3470 let match_end = full_match.end();
3471
3472 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3474 continue;
3475 }
3476
3477 let preceding_char = if match_start > 0 {
3479 content.chars().nth(match_start - 1)
3480 } else {
3481 None
3482 };
3483 let following_char = content.chars().nth(match_end);
3484
3485 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3486 continue;
3487 }
3488 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3489 continue;
3490 }
3491
3492 let url = full_match.as_str();
3493 let url_type = if url.starts_with("https://") {
3494 "https"
3495 } else if url.starts_with("http://") {
3496 "http"
3497 } else if url.starts_with("ftp://") {
3498 "ftp"
3499 } else {
3500 "other"
3501 };
3502
3503 let mut line_num = 1;
3505 let mut col_start = match_start;
3506 let mut col_end = match_end;
3507 for (idx, line_info) in lines.iter().enumerate() {
3508 if match_start >= line_info.byte_offset {
3509 line_num = idx + 1;
3510 col_start = match_start - line_info.byte_offset;
3511 col_end = match_end - line_info.byte_offset;
3512 } else {
3513 break;
3514 }
3515 }
3516
3517 bare_urls.push(BareUrl {
3518 line: line_num,
3519 start_col: col_start,
3520 end_col: col_end,
3521 byte_offset: match_start,
3522 byte_end: match_end,
3523 url: url.to_string(),
3524 url_type: url_type.to_string(),
3525 });
3526 }
3527
3528 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3530 let full_match = cap.get(0).unwrap();
3531 let match_start = full_match.start();
3532 let match_end = full_match.end();
3533
3534 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3536 continue;
3537 }
3538
3539 let preceding_char = if match_start > 0 {
3541 content.chars().nth(match_start - 1)
3542 } else {
3543 None
3544 };
3545 let following_char = content.chars().nth(match_end);
3546
3547 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3548 continue;
3549 }
3550 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3551 continue;
3552 }
3553
3554 let email = full_match.as_str();
3555
3556 let mut line_num = 1;
3558 let mut col_start = match_start;
3559 let mut col_end = match_end;
3560 for (idx, line_info) in lines.iter().enumerate() {
3561 if match_start >= line_info.byte_offset {
3562 line_num = idx + 1;
3563 col_start = match_start - line_info.byte_offset;
3564 col_end = match_end - line_info.byte_offset;
3565 } else {
3566 break;
3567 }
3568 }
3569
3570 bare_urls.push(BareUrl {
3571 line: line_num,
3572 start_col: col_start,
3573 end_col: col_end,
3574 byte_offset: match_start,
3575 byte_end: match_end,
3576 url: email.to_string(),
3577 url_type: "email".to_string(),
3578 });
3579 }
3580
3581 bare_urls
3582 }
3583
3584 #[must_use]
3604 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3605 ValidHeadingsIter::new(&self.lines)
3606 }
3607
3608 #[must_use]
3612 pub fn has_valid_headings(&self) -> bool {
3613 self.lines
3614 .iter()
3615 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3616 }
3617}
3618
3619fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3621 if list_blocks.len() < 2 {
3622 return;
3623 }
3624
3625 let mut merger = ListBlockMerger::new(content, lines);
3626 *list_blocks = merger.merge(list_blocks);
3627}
3628
3629struct ListBlockMerger<'a> {
3631 content: &'a str,
3632 lines: &'a [LineInfo],
3633}
3634
3635impl<'a> ListBlockMerger<'a> {
3636 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3637 Self { content, lines }
3638 }
3639
3640 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3641 let mut merged = Vec::with_capacity(list_blocks.len());
3642 let mut current = list_blocks[0].clone();
3643
3644 for next in list_blocks.iter().skip(1) {
3645 if self.should_merge_blocks(¤t, next) {
3646 current = self.merge_two_blocks(current, next);
3647 } else {
3648 merged.push(current);
3649 current = next.clone();
3650 }
3651 }
3652
3653 merged.push(current);
3654 merged
3655 }
3656
3657 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3659 if !self.blocks_are_compatible(current, next) {
3661 return false;
3662 }
3663
3664 let spacing = self.analyze_spacing_between(current, next);
3666 match spacing {
3667 BlockSpacing::Consecutive => true,
3668 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3669 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3670 self.can_merge_with_content_between(current, next)
3671 }
3672 }
3673 }
3674
3675 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3677 current.is_ordered == next.is_ordered
3678 && current.blockquote_prefix == next.blockquote_prefix
3679 && current.nesting_level == next.nesting_level
3680 }
3681
3682 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3684 let gap = next.start_line - current.end_line;
3685
3686 match gap {
3687 1 => BlockSpacing::Consecutive,
3688 2 => BlockSpacing::SingleBlank,
3689 _ if gap > 2 => {
3690 if self.has_only_blank_lines_between(current, next) {
3691 BlockSpacing::MultipleBlanks
3692 } else {
3693 BlockSpacing::ContentBetween
3694 }
3695 }
3696 _ => BlockSpacing::Consecutive, }
3698 }
3699
3700 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3702 if has_meaningful_content_between(self.content, current, next, self.lines) {
3705 return false; }
3707
3708 !current.is_ordered && current.marker == next.marker
3710 }
3711
3712 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3714 if has_meaningful_content_between(self.content, current, next, self.lines) {
3716 return false; }
3718
3719 current.is_ordered && next.is_ordered
3721 }
3722
3723 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3725 for line_num in (current.end_line + 1)..next.start_line {
3726 if let Some(line_info) = self.lines.get(line_num - 1)
3727 && !line_info.content(self.content).trim().is_empty()
3728 {
3729 return false;
3730 }
3731 }
3732 true
3733 }
3734
3735 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3737 current.end_line = next.end_line;
3738 current.item_lines.extend_from_slice(&next.item_lines);
3739
3740 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3742
3743 if !current.is_ordered && self.markers_differ(¤t, next) {
3745 current.marker = None; }
3747
3748 current
3749 }
3750
3751 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3753 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3754 }
3755}
3756
3757#[derive(Debug, PartialEq)]
3759enum BlockSpacing {
3760 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3765
3766fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3768 for line_num in (current.end_line + 1)..next.start_line {
3770 if let Some(line_info) = lines.get(line_num - 1) {
3771 let trimmed = line_info.content(content).trim();
3773
3774 if trimmed.is_empty() {
3776 continue;
3777 }
3778
3779 if line_info.heading.is_some() {
3783 return true; }
3785
3786 if is_horizontal_rule(trimmed) {
3788 return true; }
3790
3791 if crate::utils::skip_context::is_table_line(trimmed) {
3793 return true; }
3795
3796 if trimmed.starts_with('>') {
3798 return true; }
3800
3801 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3803 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3804
3805 let min_continuation_indent = if current.is_ordered {
3807 current.nesting_level + current.max_marker_width + 1 } else {
3809 current.nesting_level + 2
3810 };
3811
3812 if line_indent < min_continuation_indent {
3813 return true; }
3816 }
3817
3818 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3820
3821 let min_indent = if current.is_ordered {
3823 current.nesting_level + current.max_marker_width
3824 } else {
3825 current.nesting_level + 2
3826 };
3827
3828 if line_indent < min_indent {
3830 return true; }
3832
3833 }
3836 }
3837
3838 false
3840}
3841
3842pub fn is_horizontal_rule_line(line: &str) -> bool {
3849 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3851 if leading_spaces > 3 || line.starts_with('\t') {
3852 return false;
3853 }
3854
3855 is_horizontal_rule_content(line.trim())
3856}
3857
3858pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3861 if trimmed.len() < 3 {
3862 return false;
3863 }
3864
3865 let chars: Vec<char> = trimmed.chars().collect();
3867 if let Some(&first_char) = chars.first()
3868 && (first_char == '-' || first_char == '*' || first_char == '_')
3869 {
3870 let mut count = 0;
3871 for &ch in &chars {
3872 if ch == first_char {
3873 count += 1;
3874 } else if ch != ' ' && ch != '\t' {
3875 return false; }
3877 }
3878 return count >= 3;
3879 }
3880 false
3881}
3882
3883pub fn is_horizontal_rule(trimmed: &str) -> bool {
3885 is_horizontal_rule_content(trimmed)
3886}
3887
3888#[cfg(test)]
3890mod tests {
3891 use super::*;
3892
3893 #[test]
3894 fn test_empty_content() {
3895 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3896 assert_eq!(ctx.content, "");
3897 assert_eq!(ctx.line_offsets, vec![0]);
3898 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3899 assert_eq!(ctx.lines.len(), 0);
3900 }
3901
3902 #[test]
3903 fn test_single_line() {
3904 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3905 assert_eq!(ctx.content, "# Hello");
3906 assert_eq!(ctx.line_offsets, vec![0]);
3907 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3908 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3909 }
3910
3911 #[test]
3912 fn test_multi_line() {
3913 let content = "# Title\n\nSecond line\nThird line";
3914 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3915 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3916 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3923
3924 #[test]
3925 fn test_line_info() {
3926 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3927 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3928
3929 assert_eq!(ctx.lines.len(), 7);
3931
3932 let line1 = &ctx.lines[0];
3934 assert_eq!(line1.content(ctx.content), "# Title");
3935 assert_eq!(line1.byte_offset, 0);
3936 assert_eq!(line1.indent, 0);
3937 assert!(!line1.is_blank);
3938 assert!(!line1.in_code_block);
3939 assert!(line1.list_item.is_none());
3940
3941 let line2 = &ctx.lines[1];
3943 assert_eq!(line2.content(ctx.content), " indented");
3944 assert_eq!(line2.byte_offset, 8);
3945 assert_eq!(line2.indent, 4);
3946 assert!(!line2.is_blank);
3947
3948 let line3 = &ctx.lines[2];
3950 assert_eq!(line3.content(ctx.content), "");
3951 assert!(line3.is_blank);
3952
3953 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3955 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3956 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3957 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3958 }
3959
3960 #[test]
3961 fn test_list_item_detection() {
3962 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3963 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3964
3965 let line1 = &ctx.lines[0];
3967 assert!(line1.list_item.is_some());
3968 let list1 = line1.list_item.as_ref().unwrap();
3969 assert_eq!(list1.marker, "-");
3970 assert!(!list1.is_ordered);
3971 assert_eq!(list1.marker_column, 0);
3972 assert_eq!(list1.content_column, 2);
3973
3974 let line2 = &ctx.lines[1];
3976 assert!(line2.list_item.is_some());
3977 let list2 = line2.list_item.as_ref().unwrap();
3978 assert_eq!(list2.marker, "*");
3979 assert_eq!(list2.marker_column, 2);
3980
3981 let line3 = &ctx.lines[2];
3983 assert!(line3.list_item.is_some());
3984 let list3 = line3.list_item.as_ref().unwrap();
3985 assert_eq!(list3.marker, "1.");
3986 assert!(list3.is_ordered);
3987 assert_eq!(list3.number, Some(1));
3988
3989 let line6 = &ctx.lines[5];
3991 assert!(line6.list_item.is_none());
3992 }
3993
3994 #[test]
3995 fn test_offset_to_line_col_edge_cases() {
3996 let content = "a\nb\nc";
3997 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3998 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4006
4007 #[test]
4008 fn test_mdx_esm_blocks() {
4009 let content = r##"import {Chart} from './snowfall.js'
4010export const year = 2023
4011
4012# Last year's snowfall
4013
4014In {year}, the snowfall was above average.
4015It was followed by a warm spring which caused
4016flood conditions in many of the nearby rivers.
4017
4018<Chart color="#fcb32c" year={year} />
4019"##;
4020
4021 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4022
4023 assert_eq!(ctx.lines.len(), 10);
4025 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4026 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4027 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4028 assert!(
4029 !ctx.lines[3].in_esm_block,
4030 "Line 4 (heading) should NOT be in_esm_block"
4031 );
4032 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4033 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4034 }
4035
4036 #[test]
4037 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4038 let content = r#"import {Chart} from './snowfall.js'
4039export const year = 2023
4040
4041# Last year's snowfall
4042"#;
4043
4044 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4045
4046 assert!(
4048 !ctx.lines[0].in_esm_block,
4049 "Line 1 should NOT be in_esm_block in Standard flavor"
4050 );
4051 assert!(
4052 !ctx.lines[1].in_esm_block,
4053 "Line 2 should NOT be in_esm_block in Standard flavor"
4054 );
4055 }
4056}