1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15 ($name:expr, $profile:expr, $code:expr) => {{
16 let start = std::time::Instant::now();
17 let result = $code;
18 if $profile {
19 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20 }
21 result
22 }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(
34 r#"(?sx)
35 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36 (?:
37 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
38 |
39 \[([^\]]*)\] # Reference ID in group 6
40 )"#
41 ).unwrap()
42});
43
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?sx)
49 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50 (?:
51 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
52 |
53 \[([^\]]*)\] # Reference ID in group 6
54 )"#
55 ).unwrap()
56});
57
58static REF_DEF_PATTERN: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71#[derive(Debug, Clone)]
73pub struct LineInfo {
74 pub byte_offset: usize,
76 pub byte_len: usize,
78 pub indent: usize,
80 pub visual_indent: usize,
84 pub is_blank: bool,
86 pub in_code_block: bool,
88 pub in_front_matter: bool,
90 pub in_html_block: bool,
92 pub in_html_comment: bool,
94 pub list_item: Option<ListItemInfo>,
96 pub heading: Option<HeadingInfo>,
98 pub blockquote: Option<BlockquoteInfo>,
100 pub in_mkdocstrings: bool,
102 pub in_esm_block: bool,
104 pub in_code_span_continuation: bool,
106 pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112 pub fn content<'a>(&self, source: &'a str) -> &'a str {
114 &source[self.byte_offset..self.byte_offset + self.byte_len]
115 }
116}
117
118#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121 pub marker: String,
123 pub is_ordered: bool,
125 pub number: Option<usize>,
127 pub marker_column: usize,
129 pub content_column: usize,
131}
132
133#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136 ATX,
138 Setext1,
140 Setext2,
142}
143
144#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147 pub line: usize,
149 pub start_col: usize,
151 pub end_col: usize,
153 pub byte_offset: usize,
155 pub byte_end: usize,
157 pub text: Cow<'a, str>,
159 pub url: Cow<'a, str>,
161 pub is_reference: bool,
163 pub reference_id: Option<Cow<'a, str>>,
165 pub link_type: LinkType,
167}
168
169#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172 pub reference: String,
174 pub span: std::ops::Range<usize>,
176}
177
178#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181 pub id: String,
183 pub line: usize,
185 pub byte_offset: usize,
187 pub byte_end: usize,
189}
190
191#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194 pub line: usize,
196 pub start_col: usize,
198 pub end_col: usize,
200 pub byte_offset: usize,
202 pub byte_end: usize,
204 pub alt_text: Cow<'a, str>,
206 pub url: Cow<'a, str>,
208 pub is_reference: bool,
210 pub reference_id: Option<Cow<'a, str>>,
212 pub link_type: LinkType,
214}
215
216#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219 pub line: usize,
221 pub id: String,
223 pub url: String,
225 pub title: Option<String>,
227 pub byte_offset: usize,
229 pub byte_end: usize,
231 pub title_byte_start: Option<usize>,
233 pub title_byte_end: Option<usize>,
235}
236
237#[derive(Debug, Clone)]
239pub struct CodeSpan {
240 pub line: usize,
242 pub end_line: usize,
244 pub start_col: usize,
246 pub end_col: usize,
248 pub byte_offset: usize,
250 pub byte_end: usize,
252 pub backtick_count: usize,
254 pub content: String,
256}
257
258#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261 pub level: u8,
263 pub style: HeadingStyle,
265 pub marker: String,
267 pub marker_column: usize,
269 pub content_column: usize,
271 pub text: String,
273 pub custom_id: Option<String>,
275 pub raw_text: String,
277 pub has_closing_sequence: bool,
279 pub closing_sequence: String,
281 pub is_valid: bool,
284}
285
286#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292 pub line_num: usize,
294 pub heading: &'a HeadingInfo,
296 pub line_info: &'a LineInfo,
298}
299
300pub struct ValidHeadingsIter<'a> {
305 lines: &'a [LineInfo],
306 current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310 fn new(lines: &'a [LineInfo]) -> Self {
311 Self {
312 lines,
313 current_index: 0,
314 }
315 }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319 type Item = ValidHeading<'a>;
320
321 fn next(&mut self) -> Option<Self::Item> {
322 while self.current_index < self.lines.len() {
323 let idx = self.current_index;
324 self.current_index += 1;
325
326 let line_info = &self.lines[idx];
327 if let Some(heading) = &line_info.heading
328 && heading.is_valid
329 {
330 return Some(ValidHeading {
331 line_num: idx + 1, heading,
333 line_info,
334 });
335 }
336 }
337 None
338 }
339}
340
341#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344 pub nesting_level: usize,
346 pub indent: String,
348 pub marker_column: usize,
350 pub prefix: String,
352 pub content: String,
354 pub has_no_space_after_marker: bool,
356 pub has_multiple_spaces_after_marker: bool,
358 pub needs_md028_fix: bool,
360}
361
362#[derive(Debug, Clone)]
364pub struct ListBlock {
365 pub start_line: usize,
367 pub end_line: usize,
369 pub is_ordered: bool,
371 pub marker: Option<String>,
373 pub blockquote_prefix: String,
375 pub item_lines: Vec<usize>,
377 pub nesting_level: usize,
379 pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388 pub hash_count: usize,
390 pub asterisk_count: usize,
392 pub underscore_count: usize,
394 pub hyphen_count: usize,
396 pub plus_count: usize,
398 pub gt_count: usize,
400 pub pipe_count: usize,
402 pub bracket_count: usize,
404 pub backtick_count: usize,
406 pub lt_count: usize,
408 pub exclamation_count: usize,
410 pub newline_count: usize,
412}
413
414#[derive(Debug, Clone)]
416pub struct HtmlTag {
417 pub line: usize,
419 pub start_col: usize,
421 pub end_col: usize,
423 pub byte_offset: usize,
425 pub byte_end: usize,
427 pub tag_name: String,
429 pub is_closing: bool,
431 pub is_self_closing: bool,
433 pub raw_content: String,
435}
436
437#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440 pub line: usize,
442 pub start_col: usize,
444 pub end_col: usize,
446 pub byte_offset: usize,
448 pub byte_end: usize,
450 pub marker: char,
452 pub marker_count: usize,
454 pub content: String,
456}
457
458#[derive(Debug, Clone)]
460pub struct TableRow {
461 pub line: usize,
463 pub is_separator: bool,
465 pub column_count: usize,
467 pub column_alignments: Vec<String>, }
470
471#[derive(Debug, Clone)]
473pub struct BareUrl {
474 pub line: usize,
476 pub start_col: usize,
478 pub end_col: usize,
480 pub byte_offset: usize,
482 pub byte_end: usize,
484 pub url: String,
486 pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491 pub content: &'a str,
492 pub line_offsets: Vec<usize>,
493 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
515
516struct BlockquoteComponents<'a> {
518 indent: &'a str,
519 markers: &'a str,
520 spaces_after: &'a str,
521 content: &'a str,
522}
523
524#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527 let bytes = line.as_bytes();
528 let mut pos = 0;
529
530 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532 pos += 1;
533 }
534 let indent_end = pos;
535
536 if pos >= bytes.len() || bytes[pos] != b'>' {
538 return None;
539 }
540
541 while pos < bytes.len() && bytes[pos] == b'>' {
543 pos += 1;
544 }
545 let markers_end = pos;
546
547 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549 pos += 1;
550 }
551 let spaces_end = pos;
552
553 Some(BlockquoteComponents {
554 indent: &line[0..indent_end],
555 markers: &line[indent_end..markers_end],
556 spaces_after: &line[markers_end..spaces_end],
557 content: &line[spaces_end..],
558 })
559}
560
561impl<'a> LintContext<'a> {
562 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563 #[cfg(not(target_arch = "wasm32"))]
564 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565 #[cfg(target_arch = "wasm32")]
566 let profile = false;
567
568 let line_offsets = profile_section!("Line offsets", profile, {
569 let mut offsets = vec![0];
570 for (i, c) in content.char_indices() {
571 if c == '\n' {
572 offsets.push(i + 1);
573 }
574 }
575 offsets
576 });
577
578 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581 let html_comment_ranges = profile_section!(
583 "HTML comment ranges",
584 profile,
585 crate::utils::skip_context::compute_html_comment_ranges(content)
586 );
587
588 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590 if flavor == MarkdownFlavor::MkDocs {
591 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592 } else {
593 Vec::new()
594 }
595 });
596
597 let mut lines = profile_section!(
599 "Basic line info",
600 profile,
601 Self::compute_basic_line_info(
602 content,
603 &line_offsets,
604 &code_blocks,
605 flavor,
606 &html_comment_ranges,
607 &autodoc_ranges,
608 )
609 );
610
611 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614 profile_section!(
616 "ESM blocks",
617 profile,
618 Self::detect_esm_blocks(content, &mut lines, flavor)
619 );
620
621 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624 profile_section!(
626 "Headings & blockquotes",
627 profile,
628 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629 );
630
631 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634 for span in &code_spans {
637 if span.end_line > span.line {
638 for line_num in (span.line + 1)..=span.end_line {
640 if let Some(line_info) = lines.get_mut(line_num - 1) {
641 line_info.in_code_span_continuation = true;
642 }
643 }
644 }
645 }
646
647 let (links, broken_links, footnote_refs) = profile_section!(
649 "Links",
650 profile,
651 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652 );
653
654 let images = profile_section!(
655 "Images",
656 profile,
657 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658 );
659
660 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667 let table_blocks = profile_section!(
669 "Table blocks",
670 profile,
671 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672 content,
673 &code_blocks,
674 &code_spans,
675 &html_comment_ranges,
676 )
677 );
678
679 let line_index = profile_section!(
681 "Line index",
682 profile,
683 crate::utils::range_utils::LineIndex::new(content)
684 );
685
686 let jinja_ranges = profile_section!(
688 "Jinja ranges",
689 profile,
690 crate::utils::jinja_utils::find_jinja_ranges(content)
691 );
692
693 Self {
694 content,
695 line_offsets,
696 code_blocks,
697 lines,
698 links,
699 images,
700 broken_links,
701 footnote_refs,
702 reference_defs,
703 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704 list_blocks,
705 char_frequency,
706 html_tags_cache: OnceLock::new(),
707 emphasis_spans_cache: OnceLock::new(),
708 table_rows_cache: OnceLock::new(),
709 bare_urls_cache: OnceLock::new(),
710 has_mixed_list_nesting_cache: OnceLock::new(),
711 html_comment_ranges,
712 table_blocks,
713 line_index,
714 jinja_ranges,
715 flavor,
716 source_file,
717 }
718 }
719
720 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722 Arc::clone(
723 self.code_spans_cache
724 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725 )
726 }
727
728 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730 &self.html_comment_ranges
731 }
732
733 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735 Arc::clone(self.html_tags_cache.get_or_init(|| {
736 Arc::new(Self::parse_html_tags(
737 self.content,
738 &self.lines,
739 &self.code_blocks,
740 self.flavor,
741 ))
742 }))
743 }
744
745 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747 Arc::clone(
748 self.emphasis_spans_cache
749 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750 )
751 }
752
753 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755 Arc::clone(
756 self.table_rows_cache
757 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758 )
759 }
760
761 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763 Arc::clone(
764 self.bare_urls_cache
765 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766 )
767 }
768
769 pub fn has_mixed_list_nesting(&self) -> bool {
773 *self
774 .has_mixed_list_nesting_cache
775 .get_or_init(|| self.compute_mixed_list_nesting())
776 }
777
778 fn compute_mixed_list_nesting(&self) -> bool {
780 let mut stack: Vec<(usize, bool)> = Vec::new();
785 let mut last_was_blank = false;
786
787 for line_info in &self.lines {
788 if line_info.in_code_block
790 || line_info.in_front_matter
791 || line_info.in_mkdocstrings
792 || line_info.in_html_comment
793 || line_info.in_esm_block
794 {
795 continue;
796 }
797
798 if line_info.is_blank {
800 last_was_blank = true;
801 continue;
802 }
803
804 if let Some(list_item) = &line_info.list_item {
805 let current_pos = if list_item.marker_column == 1 {
807 0
808 } else {
809 list_item.marker_column
810 };
811
812 if last_was_blank && current_pos == 0 {
814 stack.clear();
815 }
816 last_was_blank = false;
817
818 while let Some(&(pos, _)) = stack.last() {
820 if pos >= current_pos {
821 stack.pop();
822 } else {
823 break;
824 }
825 }
826
827 if let Some(&(_, parent_is_ordered)) = stack.last()
829 && parent_is_ordered != list_item.is_ordered
830 {
831 return true; }
833
834 stack.push((current_pos, list_item.is_ordered));
835 } else {
836 last_was_blank = false;
838 }
839 }
840
841 false
842 }
843
844 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846 match self.line_offsets.binary_search(&offset) {
847 Ok(line) => (line + 1, 1),
848 Err(line) => {
849 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850 (line, offset - line_start + 1)
851 }
852 }
853 }
854
855 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859 return true;
860 }
861
862 self.code_spans()
864 .iter()
865 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866 }
867
868 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870 if line_num > 0 {
871 self.lines.get(line_num - 1)
872 } else {
873 None
874 }
875 }
876
877 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879 self.line_info(line_num).map(|info| info.byte_offset)
880 }
881
882 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884 let normalized_id = ref_id.to_lowercase();
885 self.reference_defs
886 .iter()
887 .find(|def| def.id == normalized_id)
888 .map(|def| def.url.as_str())
889 }
890
891 pub fn is_in_list_block(&self, line_num: usize) -> bool {
893 self.list_blocks
894 .iter()
895 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896 }
897
898 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900 self.list_blocks
901 .iter()
902 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903 }
904
905 pub fn is_in_code_block(&self, line_num: usize) -> bool {
909 if line_num == 0 || line_num > self.lines.len() {
910 return false;
911 }
912 self.lines[line_num - 1].in_code_block
913 }
914
915 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917 if line_num == 0 || line_num > self.lines.len() {
918 return false;
919 }
920 self.lines[line_num - 1].in_front_matter
921 }
922
923 pub fn is_in_html_block(&self, line_num: usize) -> bool {
925 if line_num == 0 || line_num > self.lines.len() {
926 return false;
927 }
928 self.lines[line_num - 1].in_html_block
929 }
930
931 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933 if line_num == 0 || line_num > self.lines.len() {
934 return false;
935 }
936
937 let col_0indexed = if col > 0 { col - 1 } else { 0 };
941 let code_spans = self.code_spans();
942 code_spans.iter().any(|span| {
943 if line_num < span.line || line_num > span.end_line {
945 return false;
946 }
947
948 if span.line == span.end_line {
949 col_0indexed >= span.start_col && col_0indexed < span.end_col
951 } else if line_num == span.line {
952 col_0indexed >= span.start_col
954 } else if line_num == span.end_line {
955 col_0indexed < span.end_col
957 } else {
958 true
960 }
961 })
962 }
963
964 #[inline]
966 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967 let code_spans = self.code_spans();
968 code_spans
969 .iter()
970 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971 }
972
973 #[inline]
976 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977 self.reference_defs
978 .iter()
979 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980 }
981
982 #[inline]
986 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987 self.html_comment_ranges
988 .iter()
989 .any(|range| byte_pos >= range.start && byte_pos < range.end)
990 }
991
992 #[inline]
995 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996 self.html_tags()
997 .iter()
998 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999 }
1000
1001 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003 self.jinja_ranges
1004 .iter()
1005 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006 }
1007
1008 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010 self.reference_defs.iter().any(|def| {
1011 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012 byte_pos >= start && byte_pos < end
1013 } else {
1014 false
1015 }
1016 })
1017 }
1018
1019 pub fn has_char(&self, ch: char) -> bool {
1021 match ch {
1022 '#' => self.char_frequency.hash_count > 0,
1023 '*' => self.char_frequency.asterisk_count > 0,
1024 '_' => self.char_frequency.underscore_count > 0,
1025 '-' => self.char_frequency.hyphen_count > 0,
1026 '+' => self.char_frequency.plus_count > 0,
1027 '>' => self.char_frequency.gt_count > 0,
1028 '|' => self.char_frequency.pipe_count > 0,
1029 '[' => self.char_frequency.bracket_count > 0,
1030 '`' => self.char_frequency.backtick_count > 0,
1031 '<' => self.char_frequency.lt_count > 0,
1032 '!' => self.char_frequency.exclamation_count > 0,
1033 '\n' => self.char_frequency.newline_count > 0,
1034 _ => self.content.contains(ch), }
1036 }
1037
1038 pub fn char_count(&self, ch: char) -> usize {
1040 match ch {
1041 '#' => self.char_frequency.hash_count,
1042 '*' => self.char_frequency.asterisk_count,
1043 '_' => self.char_frequency.underscore_count,
1044 '-' => self.char_frequency.hyphen_count,
1045 '+' => self.char_frequency.plus_count,
1046 '>' => self.char_frequency.gt_count,
1047 '|' => self.char_frequency.pipe_count,
1048 '[' => self.char_frequency.bracket_count,
1049 '`' => self.char_frequency.backtick_count,
1050 '<' => self.char_frequency.lt_count,
1051 '!' => self.char_frequency.exclamation_count,
1052 '\n' => self.char_frequency.newline_count,
1053 _ => self.content.matches(ch).count(), }
1055 }
1056
1057 pub fn likely_has_headings(&self) -> bool {
1059 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1061
1062 pub fn likely_has_lists(&self) -> bool {
1064 self.char_frequency.asterisk_count > 0
1065 || self.char_frequency.hyphen_count > 0
1066 || self.char_frequency.plus_count > 0
1067 }
1068
1069 pub fn likely_has_emphasis(&self) -> bool {
1071 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072 }
1073
1074 pub fn likely_has_tables(&self) -> bool {
1076 self.char_frequency.pipe_count > 2
1077 }
1078
1079 pub fn likely_has_blockquotes(&self) -> bool {
1081 self.char_frequency.gt_count > 0
1082 }
1083
1084 pub fn likely_has_code(&self) -> bool {
1086 self.char_frequency.backtick_count > 0
1087 }
1088
1089 pub fn likely_has_links_or_images(&self) -> bool {
1091 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092 }
1093
1094 pub fn likely_has_html(&self) -> bool {
1096 self.char_frequency.lt_count > 0
1097 }
1098
1099 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101 self.html_tags()
1102 .iter()
1103 .filter(|tag| tag.line == line_num)
1104 .cloned()
1105 .collect()
1106 }
1107
1108 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110 self.emphasis_spans()
1111 .iter()
1112 .filter(|span| span.line == line_num)
1113 .cloned()
1114 .collect()
1115 }
1116
1117 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119 self.table_rows()
1120 .iter()
1121 .filter(|row| row.line == line_num)
1122 .cloned()
1123 .collect()
1124 }
1125
1126 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128 self.bare_urls()
1129 .iter()
1130 .filter(|url| url.line == line_num)
1131 .cloned()
1132 .collect()
1133 }
1134
1135 #[inline]
1141 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142 let idx = match lines.binary_search_by(|line| {
1144 if byte_offset < line.byte_offset {
1145 std::cmp::Ordering::Greater
1146 } else if byte_offset > line.byte_offset + line.byte_len {
1147 std::cmp::Ordering::Less
1148 } else {
1149 std::cmp::Ordering::Equal
1150 }
1151 }) {
1152 Ok(idx) => idx,
1153 Err(idx) => idx.saturating_sub(1),
1154 };
1155
1156 let line = &lines[idx];
1157 let line_num = idx + 1;
1158 let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160 (idx, line_num, col)
1161 }
1162
1163 #[inline]
1165 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169 if idx > 0 {
1171 let span = &code_spans[idx - 1];
1172 if offset >= span.byte_offset && offset < span.byte_end {
1173 return true;
1174 }
1175 }
1176
1177 false
1178 }
1179
1180 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186 let mut link_ranges = Vec::new();
1187 let mut options = Options::empty();
1188 options.insert(Options::ENABLE_WIKILINKS);
1189 options.insert(Options::ENABLE_FOOTNOTES);
1190
1191 let parser = Parser::new_ext(content, options).into_offset_iter();
1192 let mut link_stack: Vec<usize> = Vec::new();
1193
1194 for (event, range) in parser {
1195 match event {
1196 Event::Start(Tag::Link { .. }) => {
1197 link_stack.push(range.start);
1198 }
1199 Event::End(TagEnd::Link) => {
1200 if let Some(start_pos) = link_stack.pop() {
1201 link_ranges.push((start_pos, range.end));
1202 }
1203 }
1204 _ => {}
1205 }
1206 }
1207
1208 link_ranges
1209 }
1210
1211 fn parse_links(
1213 content: &'a str,
1214 lines: &[LineInfo],
1215 code_blocks: &[(usize, usize)],
1216 code_spans: &[CodeSpan],
1217 flavor: MarkdownFlavor,
1218 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221 use std::collections::HashSet;
1222
1223 let mut links = Vec::with_capacity(content.len() / 500);
1224 let mut broken_links = Vec::new();
1225 let mut footnote_refs = Vec::new();
1226
1227 let mut found_positions = HashSet::new();
1229
1230 let mut options = Options::empty();
1240 options.insert(Options::ENABLE_WIKILINKS);
1241 options.insert(Options::ENABLE_FOOTNOTES);
1242
1243 let parser = Parser::new_with_broken_link_callback(
1244 content,
1245 options,
1246 Some(|link: BrokenLink<'_>| {
1247 broken_links.push(BrokenLinkInfo {
1248 reference: link.reference.to_string(),
1249 span: link.span.clone(),
1250 });
1251 None
1252 }),
1253 )
1254 .into_offset_iter();
1255
1256 let mut link_stack: Vec<(
1257 usize,
1258 usize,
1259 pulldown_cmark::CowStr<'a>,
1260 LinkType,
1261 pulldown_cmark::CowStr<'a>,
1262 )> = Vec::new();
1263 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1266 match event {
1267 Event::Start(Tag::Link {
1268 link_type,
1269 dest_url,
1270 id,
1271 ..
1272 }) => {
1273 link_stack.push((range.start, range.end, dest_url, link_type, id));
1275 text_chunks.clear();
1276 }
1277 Event::Text(text) if !link_stack.is_empty() => {
1278 text_chunks.push((text.to_string(), range.start, range.end));
1280 }
1281 Event::Code(code) if !link_stack.is_empty() => {
1282 let code_text = format!("`{code}`");
1284 text_chunks.push((code_text, range.start, range.end));
1285 }
1286 Event::End(TagEnd::Link) => {
1287 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290 text_chunks.clear();
1291 continue;
1292 }
1293
1294 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299 text_chunks.clear();
1300 continue;
1301 }
1302
1303 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305 let is_reference = matches!(
1306 link_type,
1307 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308 );
1309
1310 let link_text = if start_pos < content.len() {
1313 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315 let mut close_pos = None;
1319 let mut depth = 0;
1320 let mut in_code_span = false;
1321
1322 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323 let mut backslash_count = 0;
1325 let mut j = i;
1326 while j > 0 && link_bytes[j - 1] == b'\\' {
1327 backslash_count += 1;
1328 j -= 1;
1329 }
1330 let is_escaped = backslash_count % 2 != 0;
1331
1332 if byte == b'`' && !is_escaped {
1334 in_code_span = !in_code_span;
1335 }
1336
1337 if !is_escaped && !in_code_span {
1339 if byte == b'[' {
1340 depth += 1;
1341 } else if byte == b']' {
1342 if depth == 0 {
1343 close_pos = Some(i);
1345 break;
1346 } else {
1347 depth -= 1;
1348 }
1349 }
1350 }
1351 }
1352
1353 if let Some(pos) = close_pos {
1354 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355 } else {
1356 Cow::Borrowed("")
1357 }
1358 } else {
1359 Cow::Borrowed("")
1360 };
1361
1362 let reference_id = if is_reference && !ref_id.is_empty() {
1364 Some(Cow::Owned(ref_id.to_lowercase()))
1365 } else if is_reference {
1366 Some(Cow::Owned(link_text.to_lowercase()))
1368 } else {
1369 None
1370 };
1371
1372 found_positions.insert(start_pos);
1374
1375 links.push(ParsedLink {
1376 line: line_num,
1377 start_col: col_start,
1378 end_col: col_end,
1379 byte_offset: start_pos,
1380 byte_end: range.end,
1381 text: link_text,
1382 url: Cow::Owned(url.to_string()),
1383 is_reference,
1384 reference_id,
1385 link_type,
1386 });
1387
1388 text_chunks.clear();
1389 }
1390 }
1391 Event::FootnoteReference(footnote_id) => {
1392 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395 continue;
1396 }
1397
1398 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399 footnote_refs.push(FootnoteRef {
1400 id: footnote_id.to_string(),
1401 line: line_num,
1402 byte_offset: range.start,
1403 byte_end: range.end,
1404 });
1405 }
1406 _ => {}
1407 }
1408 }
1409
1410 for cap in LINK_PATTERN.captures_iter(content) {
1414 let full_match = cap.get(0).unwrap();
1415 let match_start = full_match.start();
1416 let match_end = full_match.end();
1417
1418 if found_positions.contains(&match_start) {
1420 continue;
1421 }
1422
1423 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425 continue;
1426 }
1427
1428 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430 continue;
1431 }
1432
1433 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435 continue;
1436 }
1437
1438 if Self::is_offset_in_code_span(code_spans, match_start) {
1440 continue;
1441 }
1442
1443 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445 continue;
1446 }
1447
1448 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453 continue;
1454 }
1455
1456 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458 let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460 if let Some(ref_id) = cap.get(6) {
1462 let ref_id_str = ref_id.as_str();
1463 let normalized_ref = if ref_id_str.is_empty() {
1464 Cow::Owned(text.to_lowercase()) } else {
1466 Cow::Owned(ref_id_str.to_lowercase())
1467 };
1468
1469 links.push(ParsedLink {
1471 line: line_num,
1472 start_col: col_start,
1473 end_col: col_end,
1474 byte_offset: match_start,
1475 byte_end: match_end,
1476 text: Cow::Borrowed(text),
1477 url: Cow::Borrowed(""), is_reference: true,
1479 reference_id: Some(normalized_ref),
1480 link_type: LinkType::Reference, });
1482 }
1483 }
1484
1485 (links, broken_links, footnote_refs)
1486 }
1487
1488 fn parse_images(
1490 content: &'a str,
1491 lines: &[LineInfo],
1492 code_blocks: &[(usize, usize)],
1493 code_spans: &[CodeSpan],
1494 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495 ) -> Vec<ParsedImage<'a>> {
1496 use crate::utils::skip_context::is_in_html_comment_ranges;
1497 use std::collections::HashSet;
1498
1499 let mut images = Vec::with_capacity(content.len() / 1000);
1501 let mut found_positions = HashSet::new();
1502
1503 let parser = Parser::new(content).into_offset_iter();
1505 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506 Vec::new();
1507 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1510 match event {
1511 Event::Start(Tag::Image {
1512 link_type,
1513 dest_url,
1514 id,
1515 ..
1516 }) => {
1517 image_stack.push((range.start, dest_url, link_type, id));
1518 text_chunks.clear();
1519 }
1520 Event::Text(text) if !image_stack.is_empty() => {
1521 text_chunks.push((text.to_string(), range.start, range.end));
1522 }
1523 Event::Code(code) if !image_stack.is_empty() => {
1524 let code_text = format!("`{code}`");
1525 text_chunks.push((code_text, range.start, range.end));
1526 }
1527 Event::End(TagEnd::Image) => {
1528 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531 continue;
1532 }
1533
1534 if Self::is_offset_in_code_span(code_spans, start_pos) {
1536 continue;
1537 }
1538
1539 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541 continue;
1542 }
1543
1544 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548 let is_reference = matches!(
1549 link_type,
1550 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551 );
1552
1553 let alt_text = if start_pos < content.len() {
1556 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558 let mut close_pos = None;
1561 let mut depth = 0;
1562
1563 if image_bytes.len() > 2 {
1564 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565 let mut backslash_count = 0;
1567 let mut j = i;
1568 while j > 0 && image_bytes[j - 1] == b'\\' {
1569 backslash_count += 1;
1570 j -= 1;
1571 }
1572 let is_escaped = backslash_count % 2 != 0;
1573
1574 if !is_escaped {
1575 if byte == b'[' {
1576 depth += 1;
1577 } else if byte == b']' {
1578 if depth == 0 {
1579 close_pos = Some(i);
1581 break;
1582 } else {
1583 depth -= 1;
1584 }
1585 }
1586 }
1587 }
1588 }
1589
1590 if let Some(pos) = close_pos {
1591 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592 } else {
1593 Cow::Borrowed("")
1594 }
1595 } else {
1596 Cow::Borrowed("")
1597 };
1598
1599 let reference_id = if is_reference && !ref_id.is_empty() {
1600 Some(Cow::Owned(ref_id.to_lowercase()))
1601 } else if is_reference {
1602 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1604 None
1605 };
1606
1607 found_positions.insert(start_pos);
1608 images.push(ParsedImage {
1609 line: line_num,
1610 start_col: col_start,
1611 end_col: col_end,
1612 byte_offset: start_pos,
1613 byte_end: range.end,
1614 alt_text,
1615 url: Cow::Owned(url.to_string()),
1616 is_reference,
1617 reference_id,
1618 link_type,
1619 });
1620 }
1621 }
1622 _ => {}
1623 }
1624 }
1625
1626 for cap in IMAGE_PATTERN.captures_iter(content) {
1628 let full_match = cap.get(0).unwrap();
1629 let match_start = full_match.start();
1630 let match_end = full_match.end();
1631
1632 if found_positions.contains(&match_start) {
1634 continue;
1635 }
1636
1637 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639 continue;
1640 }
1641
1642 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644 || Self::is_offset_in_code_span(code_spans, match_start)
1645 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646 {
1647 continue;
1648 }
1649
1650 if let Some(ref_id) = cap.get(6) {
1652 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655 let ref_id_str = ref_id.as_str();
1656 let normalized_ref = if ref_id_str.is_empty() {
1657 Cow::Owned(alt_text.to_lowercase())
1658 } else {
1659 Cow::Owned(ref_id_str.to_lowercase())
1660 };
1661
1662 images.push(ParsedImage {
1663 line: line_num,
1664 start_col: col_start,
1665 end_col: col_end,
1666 byte_offset: match_start,
1667 byte_end: match_end,
1668 alt_text: Cow::Borrowed(alt_text),
1669 url: Cow::Borrowed(""),
1670 is_reference: true,
1671 reference_id: Some(normalized_ref),
1672 link_type: LinkType::Reference, });
1674 }
1675 }
1676
1677 images
1678 }
1679
1680 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1686 if line_info.in_code_block {
1688 continue;
1689 }
1690
1691 let line = line_info.content(content);
1692 let line_num = line_idx + 1;
1693
1694 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695 let id = cap.get(1).unwrap().as_str().to_lowercase();
1696 let url = cap.get(2).unwrap().as_str().to_string();
1697 let title_match = cap.get(3).or_else(|| cap.get(4));
1698 let title = title_match.map(|m| m.as_str().to_string());
1699
1700 let match_obj = cap.get(0).unwrap();
1703 let byte_offset = line_info.byte_offset + match_obj.start();
1704 let byte_end = line_info.byte_offset + match_obj.end();
1705
1706 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708 let start = line_info.byte_offset + m.start().saturating_sub(1);
1710 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1712 } else {
1713 (None, None)
1714 };
1715
1716 refs.push(ReferenceDef {
1717 line: line_num,
1718 id,
1719 url,
1720 title,
1721 byte_offset,
1722 byte_end,
1723 title_byte_start,
1724 title_byte_end,
1725 });
1726 }
1727 }
1728
1729 refs
1730 }
1731
1732 #[inline]
1736 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737 let trimmed_start = line.trim_start();
1738 if !trimmed_start.starts_with('>') {
1739 return None;
1740 }
1741
1742 let mut remaining = line;
1744 let mut total_prefix_len = 0;
1745
1746 loop {
1747 let trimmed = remaining.trim_start();
1748 if !trimmed.starts_with('>') {
1749 break;
1750 }
1751
1752 let leading_ws_len = remaining.len() - trimmed.len();
1754 total_prefix_len += leading_ws_len + 1;
1755
1756 let after_gt = &trimmed[1..];
1757
1758 if let Some(stripped) = after_gt.strip_prefix(' ') {
1760 total_prefix_len += 1;
1761 remaining = stripped;
1762 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763 total_prefix_len += 1;
1764 remaining = stripped;
1765 } else {
1766 remaining = after_gt;
1767 }
1768 }
1769
1770 Some((&line[..total_prefix_len], remaining))
1771 }
1772
1773 fn detect_list_items_with_pulldown(
1794 content: &str,
1795 line_offsets: &[usize],
1796 flavor: MarkdownFlavor,
1797 front_matter_end: usize,
1798 ) -> std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)> {
1799 use std::collections::HashMap;
1800
1801 let mut list_items = HashMap::new();
1802
1803 let mut options = Options::empty();
1804 options.insert(Options::ENABLE_TABLES);
1805 options.insert(Options::ENABLE_FOOTNOTES);
1806 options.insert(Options::ENABLE_STRIKETHROUGH);
1807 options.insert(Options::ENABLE_TASKLISTS);
1808 options.insert(Options::ENABLE_GFM);
1810
1811 let _ = flavor;
1813
1814 let parser = Parser::new_ext(content, options).into_offset_iter();
1815 let mut list_depth: usize = 0;
1816 let mut list_stack: Vec<bool> = Vec::new();
1817
1818 for (event, range) in parser {
1819 match event {
1820 Event::Start(Tag::List(start_number)) => {
1821 list_depth += 1;
1822 list_stack.push(start_number.is_some());
1823 }
1824 Event::End(TagEnd::List(_)) => {
1825 list_depth = list_depth.saturating_sub(1);
1826 list_stack.pop();
1827 }
1828 Event::Start(Tag::Item) if list_depth > 0 => {
1829 let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1831 let item_start = range.start;
1833
1834 let mut line_idx = match line_offsets.binary_search(&item_start) {
1836 Ok(idx) => idx,
1837 Err(idx) => idx.saturating_sub(1),
1838 };
1839
1840 if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1844 line_idx += 1;
1845 }
1846
1847 if front_matter_end > 0 && line_idx < front_matter_end {
1849 continue;
1850 }
1851
1852 if line_idx < line_offsets.len() {
1853 let line_start_byte = line_offsets[line_idx];
1854 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
1855 let line = &content[line_start_byte..line_end.min(content.len())];
1856
1857 let line = line
1859 .strip_suffix('\n')
1860 .or_else(|| line.strip_suffix("\r\n"))
1861 .unwrap_or(line);
1862
1863 let blockquote_parse = Self::parse_blockquote_prefix(line);
1865 let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
1866 (prefix.len(), content)
1867 } else {
1868 (0, line)
1869 };
1870
1871 if current_list_is_ordered {
1873 if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1874 Self::parse_ordered_list(line_to_parse)
1875 {
1876 let marker = format!("{number_str}{delimiter}");
1877 let marker_column = blockquote_prefix_len + leading_spaces.len();
1878 let content_column = marker_column + marker.len() + spacing.len();
1879 let number = number_str.parse().ok();
1880
1881 list_items.entry(line_start_byte).or_insert((
1882 true,
1883 marker,
1884 marker_column,
1885 content_column,
1886 number,
1887 ));
1888 }
1889 } else if let Some((leading_spaces, marker, spacing, _content)) =
1890 Self::parse_unordered_list(line_to_parse)
1891 {
1892 let marker_column = blockquote_prefix_len + leading_spaces.len();
1893 let content_column = marker_column + 1 + spacing.len();
1894
1895 list_items.entry(line_start_byte).or_insert((
1896 false,
1897 marker.to_string(),
1898 marker_column,
1899 content_column,
1900 None,
1901 ));
1902 }
1903 }
1904 }
1905 _ => {}
1906 }
1907 }
1908
1909 list_items
1910 }
1911
1912 #[inline]
1916 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1917 let bytes = line.as_bytes();
1918 let mut i = 0;
1919
1920 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1922 i += 1;
1923 }
1924
1925 if i >= bytes.len() {
1927 return None;
1928 }
1929 let marker = bytes[i] as char;
1930 if marker != '-' && marker != '*' && marker != '+' {
1931 return None;
1932 }
1933 let marker_pos = i;
1934 i += 1;
1935
1936 let spacing_start = i;
1938 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1939 i += 1;
1940 }
1941
1942 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1943 }
1944
1945 #[inline]
1949 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1950 let bytes = line.as_bytes();
1951 let mut i = 0;
1952
1953 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1955 i += 1;
1956 }
1957
1958 let number_start = i;
1960 while i < bytes.len() && bytes[i].is_ascii_digit() {
1961 i += 1;
1962 }
1963 if i == number_start {
1964 return None; }
1966
1967 if i >= bytes.len() {
1969 return None;
1970 }
1971 let delimiter = bytes[i] as char;
1972 if delimiter != '.' && delimiter != ')' {
1973 return None;
1974 }
1975 let delimiter_pos = i;
1976 i += 1;
1977
1978 let spacing_start = i;
1980 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1981 i += 1;
1982 }
1983
1984 Some((
1985 &line[..number_start],
1986 &line[number_start..delimiter_pos],
1987 delimiter,
1988 &line[spacing_start..i],
1989 &line[i..],
1990 ))
1991 }
1992
1993 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1996 let num_lines = line_offsets.len();
1997 let mut in_code_block = vec![false; num_lines];
1998
1999 for &(start, end) in code_blocks {
2001 let safe_start = if start > 0 && !content.is_char_boundary(start) {
2003 let mut boundary = start;
2004 while boundary > 0 && !content.is_char_boundary(boundary) {
2005 boundary -= 1;
2006 }
2007 boundary
2008 } else {
2009 start
2010 };
2011
2012 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2013 let mut boundary = end;
2014 while boundary < content.len() && !content.is_char_boundary(boundary) {
2015 boundary += 1;
2016 }
2017 boundary
2018 } else {
2019 end.min(content.len())
2020 };
2021
2022 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2041 let first_line = first_line_after.saturating_sub(1);
2042 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2043
2044 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2046 *flag = true;
2047 }
2048 }
2049
2050 in_code_block
2051 }
2052
2053 fn compute_basic_line_info(
2055 content: &str,
2056 line_offsets: &[usize],
2057 code_blocks: &[(usize, usize)],
2058 flavor: MarkdownFlavor,
2059 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2060 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2061 ) -> Vec<LineInfo> {
2062 let content_lines: Vec<&str> = content.lines().collect();
2063 let mut lines = Vec::with_capacity(content_lines.len());
2064
2065 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2067
2068 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2071
2072 let list_item_map = Self::detect_list_items_with_pulldown(content, line_offsets, flavor, front_matter_end);
2074
2075 for (i, line) in content_lines.iter().enumerate() {
2076 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2077 let indent = line.len() - line.trim_start().len();
2078 let visual_indent = ElementCache::calculate_indentation_width_default(line);
2080
2081 let blockquote_parse = Self::parse_blockquote_prefix(line);
2083
2084 let is_blank = if let Some((_, content)) = blockquote_parse {
2086 content.trim().is_empty()
2088 } else {
2089 line.trim().is_empty()
2090 };
2091
2092 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2094
2095 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2097 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2098 let line_end_offset = byte_offset + line.len();
2101 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2102 html_comment_ranges,
2103 byte_offset,
2104 line_end_offset,
2105 );
2106 let list_item =
2109 list_item_map
2110 .get(&byte_offset)
2111 .map(
2112 |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2113 marker: marker.clone(),
2114 is_ordered: *is_ordered,
2115 number: *number,
2116 marker_column: *marker_column,
2117 content_column: *content_column,
2118 },
2119 );
2120
2121 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2124 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2125
2126 lines.push(LineInfo {
2127 byte_offset,
2128 byte_len: line.len(),
2129 indent,
2130 visual_indent,
2131 is_blank,
2132 in_code_block,
2133 in_front_matter,
2134 in_html_block: false, in_html_comment,
2136 list_item,
2137 heading: None, blockquote: None, in_mkdocstrings,
2140 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2143 });
2144 }
2145
2146 lines
2147 }
2148
2149 fn detect_headings_and_blockquotes(
2151 content: &str,
2152 lines: &mut [LineInfo],
2153 flavor: MarkdownFlavor,
2154 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2155 link_byte_ranges: &[(usize, usize)],
2156 ) {
2157 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2159 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2160 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2161 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2162
2163 let content_lines: Vec<&str> = content.lines().collect();
2164
2165 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2167
2168 for i in 0..lines.len() {
2170 if lines[i].in_code_block {
2171 continue;
2172 }
2173
2174 if front_matter_end > 0 && i < front_matter_end {
2176 continue;
2177 }
2178
2179 if lines[i].in_html_block {
2181 continue;
2182 }
2183
2184 let line = content_lines[i];
2185
2186 if let Some(bq) = parse_blockquote_detailed(line) {
2188 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2190
2191 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2193
2194 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2196 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2199
2200 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2204
2205 lines[i].blockquote = Some(BlockquoteInfo {
2206 nesting_level,
2207 indent: bq.indent.to_string(),
2208 marker_column,
2209 prefix,
2210 content: bq.content.to_string(),
2211 has_no_space_after_marker: has_no_space,
2212 has_multiple_spaces_after_marker: has_multiple_spaces,
2213 needs_md028_fix,
2214 });
2215 }
2216
2217 if lines[i].is_blank {
2219 continue;
2220 }
2221
2222 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2225 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2226 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2227 } else {
2228 false
2229 };
2230
2231 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2232 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2234 continue;
2235 }
2236 let line_offset = lines[i].byte_offset;
2239 if link_byte_ranges
2240 .iter()
2241 .any(|&(start, end)| line_offset > start && line_offset < end)
2242 {
2243 continue;
2244 }
2245 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2246 let hashes = caps.get(2).map_or("", |m| m.as_str());
2247 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2248 let rest = caps.get(4).map_or("", |m| m.as_str());
2249
2250 let level = hashes.len() as u8;
2251 let marker_column = leading_spaces.len();
2252
2253 let (text, has_closing, closing_seq) = {
2255 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2257 if rest[id_start..].trim_end().ends_with('}') {
2259 (&rest[..id_start], &rest[id_start..])
2261 } else {
2262 (rest, "")
2263 }
2264 } else {
2265 (rest, "")
2266 };
2267
2268 let trimmed_rest = rest_without_id.trim_end();
2270 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2271 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2274
2275 let last_hash_char_idx = char_positions
2277 .iter()
2278 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2279
2280 if let Some(mut char_idx) = last_hash_char_idx {
2281 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2283 char_idx -= 1;
2284 }
2285
2286 let start_of_hashes = char_positions[char_idx].0;
2288
2289 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2291
2292 let potential_closing = &trimmed_rest[start_of_hashes..];
2294 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2295
2296 if is_all_hashes && has_space_before {
2297 let closing_hashes = potential_closing.to_string();
2299 let text_part = if !custom_id_part.is_empty() {
2302 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2305 } else {
2306 trimmed_rest[..start_of_hashes].trim_end().to_string()
2307 };
2308 (text_part, true, closing_hashes)
2309 } else {
2310 (rest.to_string(), false, String::new())
2312 }
2313 } else {
2314 (rest.to_string(), false, String::new())
2316 }
2317 } else {
2318 (rest.to_string(), false, String::new())
2320 }
2321 };
2322
2323 let content_column = marker_column + hashes.len() + spaces_after.len();
2324
2325 let raw_text = text.trim().to_string();
2327 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2328
2329 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2331 let next_line = content_lines[i + 1];
2332 if !lines[i + 1].in_code_block
2333 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2334 && let Some(next_line_id) =
2335 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2336 {
2337 custom_id = Some(next_line_id);
2338 }
2339 }
2340
2341 let is_valid = !spaces_after.is_empty()
2351 || rest.is_empty()
2352 || level > 1
2353 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2354
2355 lines[i].heading = Some(HeadingInfo {
2356 level,
2357 style: HeadingStyle::ATX,
2358 marker: hashes.to_string(),
2359 marker_column,
2360 content_column,
2361 text: clean_text,
2362 custom_id,
2363 raw_text,
2364 has_closing_sequence: has_closing,
2365 closing_sequence: closing_seq,
2366 is_valid,
2367 });
2368 }
2369 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2371 let next_line = content_lines[i + 1];
2372 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2373 if front_matter_end > 0 && i < front_matter_end {
2375 continue;
2376 }
2377
2378 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2380 {
2381 continue;
2382 }
2383
2384 let content_line = line.trim();
2387
2388 if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2390 continue;
2391 }
2392
2393 if content_line.starts_with('_') {
2395 let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2396 if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2397 continue;
2398 }
2399 }
2400
2401 if let Some(first_char) = content_line.chars().next()
2403 && first_char.is_ascii_digit()
2404 {
2405 let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2406 if num_end < content_line.len() {
2407 let next = content_line.chars().nth(num_end);
2408 if next == Some('.') || next == Some(')') {
2409 continue;
2410 }
2411 }
2412 }
2413
2414 if ATX_HEADING_REGEX.is_match(line) {
2416 continue;
2417 }
2418
2419 if content_line.starts_with('>') {
2421 continue;
2422 }
2423
2424 let trimmed_start = line.trim_start();
2426 if trimmed_start.len() >= 3 {
2427 let first_three: String = trimmed_start.chars().take(3).collect();
2428 if first_three == "```" || first_three == "~~~" {
2429 continue;
2430 }
2431 }
2432
2433 if content_line.starts_with('<') {
2435 continue;
2436 }
2437
2438 let underline = next_line.trim();
2439
2440 let level = if underline.starts_with('=') { 1 } else { 2 };
2441 let style = if level == 1 {
2442 HeadingStyle::Setext1
2443 } else {
2444 HeadingStyle::Setext2
2445 };
2446
2447 let raw_text = line.trim().to_string();
2449 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2450
2451 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2453 let attr_line = content_lines[i + 2];
2454 if !lines[i + 2].in_code_block
2455 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2456 && let Some(attr_line_id) =
2457 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2458 {
2459 custom_id = Some(attr_line_id);
2460 }
2461 }
2462
2463 lines[i].heading = Some(HeadingInfo {
2464 level,
2465 style,
2466 marker: underline.to_string(),
2467 marker_column: next_line.len() - next_line.trim_start().len(),
2468 content_column: lines[i].indent,
2469 text: clean_text,
2470 custom_id,
2471 raw_text,
2472 has_closing_sequence: false,
2473 closing_sequence: String::new(),
2474 is_valid: true, });
2476 }
2477 }
2478 }
2479 }
2480
2481 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2483 const BLOCK_ELEMENTS: &[&str] = &[
2486 "address",
2487 "article",
2488 "aside",
2489 "audio",
2490 "blockquote",
2491 "canvas",
2492 "details",
2493 "dialog",
2494 "dd",
2495 "div",
2496 "dl",
2497 "dt",
2498 "embed",
2499 "fieldset",
2500 "figcaption",
2501 "figure",
2502 "footer",
2503 "form",
2504 "h1",
2505 "h2",
2506 "h3",
2507 "h4",
2508 "h5",
2509 "h6",
2510 "header",
2511 "hr",
2512 "iframe",
2513 "li",
2514 "main",
2515 "menu",
2516 "nav",
2517 "noscript",
2518 "object",
2519 "ol",
2520 "p",
2521 "picture",
2522 "pre",
2523 "script",
2524 "search",
2525 "section",
2526 "source",
2527 "style",
2528 "summary",
2529 "svg",
2530 "table",
2531 "tbody",
2532 "td",
2533 "template",
2534 "textarea",
2535 "tfoot",
2536 "th",
2537 "thead",
2538 "tr",
2539 "track",
2540 "ul",
2541 "video",
2542 ];
2543
2544 let mut i = 0;
2545 while i < lines.len() {
2546 if lines[i].in_code_block || lines[i].in_front_matter {
2548 i += 1;
2549 continue;
2550 }
2551
2552 let trimmed = lines[i].content(content).trim_start();
2553
2554 if trimmed.starts_with('<') && trimmed.len() > 1 {
2556 let after_bracket = &trimmed[1..];
2558 let is_closing = after_bracket.starts_with('/');
2559 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2560
2561 let tag_name = tag_start
2563 .chars()
2564 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2565 .collect::<String>()
2566 .to_lowercase();
2567
2568 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2570 lines[i].in_html_block = true;
2572
2573 if !is_closing {
2576 let closing_tag = format!("</{tag_name}>");
2577 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2579 let mut j = i + 1;
2580 let mut found_closing_tag = false;
2581 while j < lines.len() && j < i + 100 {
2582 if !allow_blank_lines && lines[j].is_blank {
2585 break;
2586 }
2587
2588 lines[j].in_html_block = true;
2589
2590 if lines[j].content(content).contains(&closing_tag) {
2592 found_closing_tag = true;
2593 }
2594
2595 if found_closing_tag {
2598 j += 1;
2599 while j < lines.len() && j < i + 100 {
2601 if lines[j].is_blank {
2602 break;
2603 }
2604 lines[j].in_html_block = true;
2605 j += 1;
2606 }
2607 break;
2608 }
2609 j += 1;
2610 }
2611 }
2612 }
2613 }
2614
2615 i += 1;
2616 }
2617 }
2618
2619 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2622 if !flavor.supports_esm_blocks() {
2624 return;
2625 }
2626
2627 let mut in_multiline_comment = false;
2628
2629 for line in lines.iter_mut() {
2630 if line.is_blank || line.in_html_comment {
2632 continue;
2633 }
2634
2635 let trimmed = line.content(content).trim_start();
2636
2637 if in_multiline_comment {
2639 if trimmed.contains("*/") {
2640 in_multiline_comment = false;
2641 }
2642 continue;
2643 }
2644
2645 if trimmed.starts_with("//") {
2647 continue;
2648 }
2649
2650 if trimmed.starts_with("/*") {
2652 if !trimmed.contains("*/") {
2653 in_multiline_comment = true;
2654 }
2655 continue;
2656 }
2657
2658 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2660 line.in_esm_block = true;
2661 } else {
2662 break;
2664 }
2665 }
2666 }
2667
2668 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2670 let mut code_spans = Vec::new();
2671
2672 if !content.contains('`') {
2674 return code_spans;
2675 }
2676
2677 let parser = Parser::new(content).into_offset_iter();
2679
2680 for (event, range) in parser {
2681 if let Event::Code(_) = event {
2682 let start_pos = range.start;
2683 let end_pos = range.end;
2684
2685 let full_span = &content[start_pos..end_pos];
2687 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2688
2689 let content_start = start_pos + backtick_count;
2691 let content_end = end_pos - backtick_count;
2692 let span_content = if content_start < content_end {
2693 content[content_start..content_end].to_string()
2694 } else {
2695 String::new()
2696 };
2697
2698 let line_idx = lines
2701 .partition_point(|line| line.byte_offset <= start_pos)
2702 .saturating_sub(1);
2703 let line_num = line_idx + 1;
2704 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2705
2706 let end_line_idx = lines
2708 .partition_point(|line| line.byte_offset <= end_pos)
2709 .saturating_sub(1);
2710 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2711
2712 let line_content = lines[line_idx].content(content);
2715 let col_start = if byte_col_start <= line_content.len() {
2716 line_content[..byte_col_start].chars().count()
2717 } else {
2718 line_content.chars().count()
2719 };
2720
2721 let end_line_content = lines[end_line_idx].content(content);
2722 let col_end = if byte_col_end <= end_line_content.len() {
2723 end_line_content[..byte_col_end].chars().count()
2724 } else {
2725 end_line_content.chars().count()
2726 };
2727
2728 code_spans.push(CodeSpan {
2729 line: line_num,
2730 end_line: end_line_idx + 1,
2731 start_col: col_start,
2732 end_col: col_end,
2733 byte_offset: start_pos,
2734 byte_end: end_pos,
2735 backtick_count,
2736 content: span_content,
2737 });
2738 }
2739 }
2740
2741 code_spans.sort_by_key(|span| span.byte_offset);
2743
2744 code_spans
2745 }
2746
2747 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2758 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2760
2761 #[inline]
2764 fn reset_tracking_state(
2765 list_item: &ListItemInfo,
2766 has_list_breaking_content: &mut bool,
2767 min_continuation: &mut usize,
2768 ) {
2769 *has_list_breaking_content = false;
2770 let marker_width = if list_item.is_ordered {
2771 list_item.marker.len() + 1 } else {
2773 list_item.marker.len()
2774 };
2775 *min_continuation = if list_item.is_ordered {
2776 marker_width
2777 } else {
2778 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2779 };
2780 }
2781
2782 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2785 let mut last_list_item_line = 0;
2786 let mut current_indent_level = 0;
2787 let mut last_marker_width = 0;
2788
2789 let mut has_list_breaking_content_since_last_item = false;
2791 let mut min_continuation_for_tracking = 0;
2792
2793 for (line_idx, line_info) in lines.iter().enumerate() {
2794 let line_num = line_idx + 1;
2795
2796 if line_info.in_code_block {
2798 if let Some(ref mut block) = current_block {
2799 let min_continuation_indent =
2801 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2802
2803 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2805
2806 match context {
2807 CodeBlockContext::Indented => {
2808 block.end_line = line_num;
2810 continue;
2811 }
2812 CodeBlockContext::Standalone => {
2813 let completed_block = current_block.take().unwrap();
2815 list_blocks.push(completed_block);
2816 continue;
2817 }
2818 CodeBlockContext::Adjacent => {
2819 block.end_line = line_num;
2821 continue;
2822 }
2823 }
2824 } else {
2825 continue;
2827 }
2828 }
2829
2830 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2832 caps.get(0).unwrap().as_str().to_string()
2833 } else {
2834 String::new()
2835 };
2836
2837 if current_block.is_some()
2840 && line_info.list_item.is_none()
2841 && !line_info.is_blank
2842 && !line_info.in_code_span_continuation
2843 {
2844 let line_content = line_info.content(content).trim();
2845
2846 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2851 let breaks_list = line_info.heading.is_some()
2852 || line_content.starts_with("---")
2853 || line_content.starts_with("***")
2854 || line_content.starts_with("___")
2855 || crate::utils::skip_context::is_table_line(line_content)
2856 || line_content.starts_with(">")
2857 || (line_info.indent > 0
2858 && line_info.indent < min_continuation_for_tracking
2859 && !is_lazy_continuation);
2860
2861 if breaks_list {
2862 has_list_breaking_content_since_last_item = true;
2863 }
2864 }
2865
2866 if line_info.in_code_span_continuation
2869 && line_info.list_item.is_none()
2870 && let Some(ref mut block) = current_block
2871 {
2872 block.end_line = line_num;
2873 }
2874
2875 let is_valid_continuation =
2880 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2882 && line_info.list_item.is_none()
2883 && !line_info.is_blank
2884 && !line_info.in_code_block
2885 && is_valid_continuation
2886 && let Some(ref mut block) = current_block
2887 {
2888 block.end_line = line_num;
2889 }
2890
2891 if let Some(list_item) = &line_info.list_item {
2893 let item_indent = list_item.marker_column;
2895 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2898 let is_nested = nesting > block.nesting_level;
2902 let same_type =
2903 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2904 let same_context = block.blockquote_prefix == blockquote_prefix;
2905 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2907
2908 let marker_compatible =
2910 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2911
2912 let has_non_list_content = has_list_breaking_content_since_last_item;
2915
2916 let mut continues_list = if is_nested {
2920 same_context && reasonable_distance && !has_non_list_content
2922 } else {
2923 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2925 };
2926
2927 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2930 if block.item_lines.contains(&(line_num - 1)) {
2933 continues_list = true;
2935 } else {
2936 continues_list = true;
2940 }
2941 }
2942
2943 if continues_list {
2944 block.end_line = line_num;
2946 block.item_lines.push(line_num);
2947
2948 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2950 list_item.marker.len() + 1
2951 } else {
2952 list_item.marker.len()
2953 });
2954
2955 if !block.is_ordered
2957 && block.marker.is_some()
2958 && block.marker.as_ref() != Some(&list_item.marker)
2959 {
2960 block.marker = None;
2962 }
2963
2964 reset_tracking_state(
2966 list_item,
2967 &mut has_list_breaking_content_since_last_item,
2968 &mut min_continuation_for_tracking,
2969 );
2970 } else {
2971 list_blocks.push(block.clone());
2974
2975 *block = ListBlock {
2976 start_line: line_num,
2977 end_line: line_num,
2978 is_ordered: list_item.is_ordered,
2979 marker: if list_item.is_ordered {
2980 None
2981 } else {
2982 Some(list_item.marker.clone())
2983 },
2984 blockquote_prefix: blockquote_prefix.clone(),
2985 item_lines: vec![line_num],
2986 nesting_level: nesting,
2987 max_marker_width: if list_item.is_ordered {
2988 list_item.marker.len() + 1
2989 } else {
2990 list_item.marker.len()
2991 },
2992 };
2993
2994 reset_tracking_state(
2996 list_item,
2997 &mut has_list_breaking_content_since_last_item,
2998 &mut min_continuation_for_tracking,
2999 );
3000 }
3001 } else {
3002 current_block = Some(ListBlock {
3004 start_line: line_num,
3005 end_line: line_num,
3006 is_ordered: list_item.is_ordered,
3007 marker: if list_item.is_ordered {
3008 None
3009 } else {
3010 Some(list_item.marker.clone())
3011 },
3012 blockquote_prefix,
3013 item_lines: vec![line_num],
3014 nesting_level: nesting,
3015 max_marker_width: list_item.marker.len(),
3016 });
3017
3018 reset_tracking_state(
3020 list_item,
3021 &mut has_list_breaking_content_since_last_item,
3022 &mut min_continuation_for_tracking,
3023 );
3024 }
3025
3026 last_list_item_line = line_num;
3027 current_indent_level = item_indent;
3028 last_marker_width = if list_item.is_ordered {
3029 list_item.marker.len() + 1 } else {
3031 list_item.marker.len()
3032 };
3033 } else if let Some(ref mut block) = current_block {
3034 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3044 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3045 } else {
3046 false
3047 };
3048
3049 let min_continuation_indent = if block.is_ordered {
3053 current_indent_level + last_marker_width
3054 } else {
3055 current_indent_level + 2 };
3057
3058 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3059 block.end_line = line_num;
3061 } else if line_info.is_blank {
3062 let mut check_idx = line_idx + 1;
3065 let mut found_continuation = false;
3066
3067 while check_idx < lines.len() && lines[check_idx].is_blank {
3069 check_idx += 1;
3070 }
3071
3072 if check_idx < lines.len() {
3073 let next_line = &lines[check_idx];
3074 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
3076 found_continuation = true;
3077 }
3078 else if !next_line.in_code_block
3080 && next_line.list_item.is_some()
3081 && let Some(item) = &next_line.list_item
3082 {
3083 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3084 .find(next_line.content(content))
3085 .map_or(String::new(), |m| m.as_str().to_string());
3086 if item.marker_column == current_indent_level
3087 && item.is_ordered == block.is_ordered
3088 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3089 {
3090 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3093 if let Some(between_line) = lines.get(idx) {
3094 let between_content = between_line.content(content);
3095 let trimmed = between_content.trim();
3096 if trimmed.is_empty() {
3098 return false;
3099 }
3100 let line_indent = between_content.len() - between_content.trim_start().len();
3102
3103 if trimmed.starts_with("```")
3105 || trimmed.starts_with("~~~")
3106 || trimmed.starts_with("---")
3107 || trimmed.starts_with("***")
3108 || trimmed.starts_with("___")
3109 || trimmed.starts_with(">")
3110 || crate::utils::skip_context::is_table_line(trimmed)
3111 || between_line.heading.is_some()
3112 {
3113 return true; }
3115
3116 line_indent >= min_continuation_indent
3118 } else {
3119 false
3120 }
3121 });
3122
3123 if block.is_ordered {
3124 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3127 if let Some(between_line) = lines.get(idx) {
3128 let trimmed = between_line.content(content).trim();
3129 if trimmed.is_empty() {
3130 return false;
3131 }
3132 trimmed.starts_with("```")
3134 || trimmed.starts_with("~~~")
3135 || trimmed.starts_with("---")
3136 || trimmed.starts_with("***")
3137 || trimmed.starts_with("___")
3138 || trimmed.starts_with(">")
3139 || crate::utils::skip_context::is_table_line(trimmed)
3140 || between_line.heading.is_some()
3141 } else {
3142 false
3143 }
3144 });
3145 found_continuation = !has_structural_separators;
3146 } else {
3147 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3149 if let Some(between_line) = lines.get(idx) {
3150 let trimmed = between_line.content(content).trim();
3151 if trimmed.is_empty() {
3152 return false;
3153 }
3154 trimmed.starts_with("```")
3156 || trimmed.starts_with("~~~")
3157 || trimmed.starts_with("---")
3158 || trimmed.starts_with("***")
3159 || trimmed.starts_with("___")
3160 || trimmed.starts_with(">")
3161 || crate::utils::skip_context::is_table_line(trimmed)
3162 || between_line.heading.is_some()
3163 } else {
3164 false
3165 }
3166 });
3167 found_continuation = !has_structural_separators;
3168 }
3169 }
3170 }
3171 }
3172
3173 if found_continuation {
3174 block.end_line = line_num;
3176 } else {
3177 list_blocks.push(block.clone());
3179 current_block = None;
3180 }
3181 } else {
3182 let min_required_indent = if block.is_ordered {
3185 current_indent_level + last_marker_width
3186 } else {
3187 current_indent_level + 2
3188 };
3189
3190 let line_content = line_info.content(content).trim();
3195
3196 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3198
3199 let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3202 let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3203 let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3204
3205 let is_structural_separator = line_info.heading.is_some()
3206 || line_content.starts_with("```")
3207 || line_content.starts_with("~~~")
3208 || line_content.starts_with("---")
3209 || line_content.starts_with("***")
3210 || line_content.starts_with("___")
3211 || blockquote_level_changed
3212 || looks_like_table;
3213
3214 let is_lazy_continuation = !is_structural_separator
3217 && !line_info.is_blank
3218 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3219
3220 if is_lazy_continuation {
3221 let content_to_check = if !blockquote_prefix.is_empty() {
3224 line_info
3226 .content(content)
3227 .strip_prefix(&blockquote_prefix)
3228 .unwrap_or(line_info.content(content))
3229 .trim()
3230 } else {
3231 line_info.content(content).trim()
3232 };
3233
3234 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3235
3236 if starts_with_uppercase && last_list_item_line > 0 {
3239 list_blocks.push(block.clone());
3241 current_block = None;
3242 } else {
3243 block.end_line = line_num;
3245 }
3246 } else {
3247 list_blocks.push(block.clone());
3249 current_block = None;
3250 }
3251 }
3252 }
3253 }
3254
3255 if let Some(block) = current_block {
3257 list_blocks.push(block);
3258 }
3259
3260 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3262
3263 list_blocks
3264 }
3265
3266 fn compute_char_frequency(content: &str) -> CharFrequency {
3268 let mut frequency = CharFrequency::default();
3269
3270 for ch in content.chars() {
3271 match ch {
3272 '#' => frequency.hash_count += 1,
3273 '*' => frequency.asterisk_count += 1,
3274 '_' => frequency.underscore_count += 1,
3275 '-' => frequency.hyphen_count += 1,
3276 '+' => frequency.plus_count += 1,
3277 '>' => frequency.gt_count += 1,
3278 '|' => frequency.pipe_count += 1,
3279 '[' => frequency.bracket_count += 1,
3280 '`' => frequency.backtick_count += 1,
3281 '<' => frequency.lt_count += 1,
3282 '!' => frequency.exclamation_count += 1,
3283 '\n' => frequency.newline_count += 1,
3284 _ => {}
3285 }
3286 }
3287
3288 frequency
3289 }
3290
3291 fn parse_html_tags(
3293 content: &str,
3294 lines: &[LineInfo],
3295 code_blocks: &[(usize, usize)],
3296 flavor: MarkdownFlavor,
3297 ) -> Vec<HtmlTag> {
3298 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3299 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3300
3301 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3302
3303 for cap in HTML_TAG_REGEX.captures_iter(content) {
3304 let full_match = cap.get(0).unwrap();
3305 let match_start = full_match.start();
3306 let match_end = full_match.end();
3307
3308 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3310 continue;
3311 }
3312
3313 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3314 let tag_name_original = cap.get(2).unwrap().as_str();
3315 let tag_name = tag_name_original.to_lowercase();
3316 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3317
3318 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3321 continue;
3322 }
3323
3324 let mut line_num = 1;
3326 let mut col_start = match_start;
3327 let mut col_end = match_end;
3328 for (idx, line_info) in lines.iter().enumerate() {
3329 if match_start >= line_info.byte_offset {
3330 line_num = idx + 1;
3331 col_start = match_start - line_info.byte_offset;
3332 col_end = match_end - line_info.byte_offset;
3333 } else {
3334 break;
3335 }
3336 }
3337
3338 html_tags.push(HtmlTag {
3339 line: line_num,
3340 start_col: col_start,
3341 end_col: col_end,
3342 byte_offset: match_start,
3343 byte_end: match_end,
3344 tag_name,
3345 is_closing,
3346 is_self_closing,
3347 raw_content: full_match.as_str().to_string(),
3348 });
3349 }
3350
3351 html_tags
3352 }
3353
3354 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3356 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3357 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3358
3359 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3360
3361 for cap in EMPHASIS_REGEX.captures_iter(content) {
3362 let full_match = cap.get(0).unwrap();
3363 let match_start = full_match.start();
3364 let match_end = full_match.end();
3365
3366 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3368 continue;
3369 }
3370
3371 let opening_markers = cap.get(1).unwrap().as_str();
3372 let content_part = cap.get(2).unwrap().as_str();
3373 let closing_markers = cap.get(3).unwrap().as_str();
3374
3375 if opening_markers.chars().next() != closing_markers.chars().next()
3377 || opening_markers.len() != closing_markers.len()
3378 {
3379 continue;
3380 }
3381
3382 let marker = opening_markers.chars().next().unwrap();
3383 let marker_count = opening_markers.len();
3384
3385 let mut line_num = 1;
3387 let mut col_start = match_start;
3388 let mut col_end = match_end;
3389 for (idx, line_info) in lines.iter().enumerate() {
3390 if match_start >= line_info.byte_offset {
3391 line_num = idx + 1;
3392 col_start = match_start - line_info.byte_offset;
3393 col_end = match_end - line_info.byte_offset;
3394 } else {
3395 break;
3396 }
3397 }
3398
3399 emphasis_spans.push(EmphasisSpan {
3400 line: line_num,
3401 start_col: col_start,
3402 end_col: col_end,
3403 byte_offset: match_start,
3404 byte_end: match_end,
3405 marker,
3406 marker_count,
3407 content: content_part.to_string(),
3408 });
3409 }
3410
3411 emphasis_spans
3412 }
3413
3414 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3416 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3417
3418 for (line_idx, line_info) in lines.iter().enumerate() {
3419 if line_info.in_code_block || line_info.is_blank {
3421 continue;
3422 }
3423
3424 let line = line_info.content(content);
3425 let line_num = line_idx + 1;
3426
3427 if !line.contains('|') {
3429 continue;
3430 }
3431
3432 let parts: Vec<&str> = line.split('|').collect();
3434 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3435
3436 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3438 let mut column_alignments = Vec::new();
3439
3440 if is_separator {
3441 for part in &parts[1..parts.len() - 1] {
3442 let trimmed = part.trim();
3444 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3445 "center".to_string()
3446 } else if trimmed.ends_with(':') {
3447 "right".to_string()
3448 } else if trimmed.starts_with(':') {
3449 "left".to_string()
3450 } else {
3451 "none".to_string()
3452 };
3453 column_alignments.push(alignment);
3454 }
3455 }
3456
3457 table_rows.push(TableRow {
3458 line: line_num,
3459 is_separator,
3460 column_count,
3461 column_alignments,
3462 });
3463 }
3464
3465 table_rows
3466 }
3467
3468 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3470 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3471
3472 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3474 let full_match = cap.get(0).unwrap();
3475 let match_start = full_match.start();
3476 let match_end = full_match.end();
3477
3478 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3480 continue;
3481 }
3482
3483 let preceding_char = if match_start > 0 {
3485 content.chars().nth(match_start - 1)
3486 } else {
3487 None
3488 };
3489 let following_char = content.chars().nth(match_end);
3490
3491 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3492 continue;
3493 }
3494 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3495 continue;
3496 }
3497
3498 let url = full_match.as_str();
3499 let url_type = if url.starts_with("https://") {
3500 "https"
3501 } else if url.starts_with("http://") {
3502 "http"
3503 } else if url.starts_with("ftp://") {
3504 "ftp"
3505 } else {
3506 "other"
3507 };
3508
3509 let mut line_num = 1;
3511 let mut col_start = match_start;
3512 let mut col_end = match_end;
3513 for (idx, line_info) in lines.iter().enumerate() {
3514 if match_start >= line_info.byte_offset {
3515 line_num = idx + 1;
3516 col_start = match_start - line_info.byte_offset;
3517 col_end = match_end - line_info.byte_offset;
3518 } else {
3519 break;
3520 }
3521 }
3522
3523 bare_urls.push(BareUrl {
3524 line: line_num,
3525 start_col: col_start,
3526 end_col: col_end,
3527 byte_offset: match_start,
3528 byte_end: match_end,
3529 url: url.to_string(),
3530 url_type: url_type.to_string(),
3531 });
3532 }
3533
3534 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3536 let full_match = cap.get(0).unwrap();
3537 let match_start = full_match.start();
3538 let match_end = full_match.end();
3539
3540 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3542 continue;
3543 }
3544
3545 let preceding_char = if match_start > 0 {
3547 content.chars().nth(match_start - 1)
3548 } else {
3549 None
3550 };
3551 let following_char = content.chars().nth(match_end);
3552
3553 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3554 continue;
3555 }
3556 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3557 continue;
3558 }
3559
3560 let email = full_match.as_str();
3561
3562 let mut line_num = 1;
3564 let mut col_start = match_start;
3565 let mut col_end = match_end;
3566 for (idx, line_info) in lines.iter().enumerate() {
3567 if match_start >= line_info.byte_offset {
3568 line_num = idx + 1;
3569 col_start = match_start - line_info.byte_offset;
3570 col_end = match_end - line_info.byte_offset;
3571 } else {
3572 break;
3573 }
3574 }
3575
3576 bare_urls.push(BareUrl {
3577 line: line_num,
3578 start_col: col_start,
3579 end_col: col_end,
3580 byte_offset: match_start,
3581 byte_end: match_end,
3582 url: email.to_string(),
3583 url_type: "email".to_string(),
3584 });
3585 }
3586
3587 bare_urls
3588 }
3589
3590 #[must_use]
3610 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3611 ValidHeadingsIter::new(&self.lines)
3612 }
3613
3614 #[must_use]
3618 pub fn has_valid_headings(&self) -> bool {
3619 self.lines
3620 .iter()
3621 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3622 }
3623}
3624
3625fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3627 if list_blocks.len() < 2 {
3628 return;
3629 }
3630
3631 let mut merger = ListBlockMerger::new(content, lines);
3632 *list_blocks = merger.merge(list_blocks);
3633}
3634
3635struct ListBlockMerger<'a> {
3637 content: &'a str,
3638 lines: &'a [LineInfo],
3639}
3640
3641impl<'a> ListBlockMerger<'a> {
3642 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3643 Self { content, lines }
3644 }
3645
3646 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3647 let mut merged = Vec::with_capacity(list_blocks.len());
3648 let mut current = list_blocks[0].clone();
3649
3650 for next in list_blocks.iter().skip(1) {
3651 if self.should_merge_blocks(¤t, next) {
3652 current = self.merge_two_blocks(current, next);
3653 } else {
3654 merged.push(current);
3655 current = next.clone();
3656 }
3657 }
3658
3659 merged.push(current);
3660 merged
3661 }
3662
3663 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3665 if !self.blocks_are_compatible(current, next) {
3667 return false;
3668 }
3669
3670 let spacing = self.analyze_spacing_between(current, next);
3672 match spacing {
3673 BlockSpacing::Consecutive => true,
3674 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3675 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3676 self.can_merge_with_content_between(current, next)
3677 }
3678 }
3679 }
3680
3681 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3683 current.is_ordered == next.is_ordered
3684 && current.blockquote_prefix == next.blockquote_prefix
3685 && current.nesting_level == next.nesting_level
3686 }
3687
3688 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3690 let gap = next.start_line - current.end_line;
3691
3692 match gap {
3693 1 => BlockSpacing::Consecutive,
3694 2 => BlockSpacing::SingleBlank,
3695 _ if gap > 2 => {
3696 if self.has_only_blank_lines_between(current, next) {
3697 BlockSpacing::MultipleBlanks
3698 } else {
3699 BlockSpacing::ContentBetween
3700 }
3701 }
3702 _ => BlockSpacing::Consecutive, }
3704 }
3705
3706 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3708 if has_meaningful_content_between(self.content, current, next, self.lines) {
3711 return false; }
3713
3714 !current.is_ordered && current.marker == next.marker
3716 }
3717
3718 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3720 if has_meaningful_content_between(self.content, current, next, self.lines) {
3722 return false; }
3724
3725 current.is_ordered && next.is_ordered
3727 }
3728
3729 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3731 for line_num in (current.end_line + 1)..next.start_line {
3732 if let Some(line_info) = self.lines.get(line_num - 1)
3733 && !line_info.content(self.content).trim().is_empty()
3734 {
3735 return false;
3736 }
3737 }
3738 true
3739 }
3740
3741 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3743 current.end_line = next.end_line;
3744 current.item_lines.extend_from_slice(&next.item_lines);
3745
3746 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3748
3749 if !current.is_ordered && self.markers_differ(¤t, next) {
3751 current.marker = None; }
3753
3754 current
3755 }
3756
3757 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3759 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3760 }
3761}
3762
3763#[derive(Debug, PartialEq)]
3765enum BlockSpacing {
3766 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3771
3772fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3774 for line_num in (current.end_line + 1)..next.start_line {
3776 if let Some(line_info) = lines.get(line_num - 1) {
3777 let trimmed = line_info.content(content).trim();
3779
3780 if trimmed.is_empty() {
3782 continue;
3783 }
3784
3785 if line_info.heading.is_some() {
3789 return true; }
3791
3792 if is_horizontal_rule(trimmed) {
3794 return true; }
3796
3797 if crate::utils::skip_context::is_table_line(trimmed) {
3799 return true; }
3801
3802 if trimmed.starts_with('>') {
3804 return true; }
3806
3807 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3809 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3810
3811 let min_continuation_indent = if current.is_ordered {
3813 current.nesting_level + current.max_marker_width + 1 } else {
3815 current.nesting_level + 2
3816 };
3817
3818 if line_indent < min_continuation_indent {
3819 return true; }
3822 }
3823
3824 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3826
3827 let min_indent = if current.is_ordered {
3829 current.nesting_level + current.max_marker_width
3830 } else {
3831 current.nesting_level + 2
3832 };
3833
3834 if line_indent < min_indent {
3836 return true; }
3838
3839 }
3842 }
3843
3844 false
3846}
3847
3848pub fn is_horizontal_rule_line(line: &str) -> bool {
3855 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3857 if leading_spaces > 3 || line.starts_with('\t') {
3858 return false;
3859 }
3860
3861 is_horizontal_rule_content(line.trim())
3862}
3863
3864pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3867 if trimmed.len() < 3 {
3868 return false;
3869 }
3870
3871 let chars: Vec<char> = trimmed.chars().collect();
3873 if let Some(&first_char) = chars.first()
3874 && (first_char == '-' || first_char == '*' || first_char == '_')
3875 {
3876 let mut count = 0;
3877 for &ch in &chars {
3878 if ch == first_char {
3879 count += 1;
3880 } else if ch != ' ' && ch != '\t' {
3881 return false; }
3883 }
3884 return count >= 3;
3885 }
3886 false
3887}
3888
3889pub fn is_horizontal_rule(trimmed: &str) -> bool {
3891 is_horizontal_rule_content(trimmed)
3892}
3893
3894#[cfg(test)]
3896mod tests {
3897 use super::*;
3898
3899 #[test]
3900 fn test_empty_content() {
3901 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3902 assert_eq!(ctx.content, "");
3903 assert_eq!(ctx.line_offsets, vec![0]);
3904 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3905 assert_eq!(ctx.lines.len(), 0);
3906 }
3907
3908 #[test]
3909 fn test_single_line() {
3910 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3911 assert_eq!(ctx.content, "# Hello");
3912 assert_eq!(ctx.line_offsets, vec![0]);
3913 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3914 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3915 }
3916
3917 #[test]
3918 fn test_multi_line() {
3919 let content = "# Title\n\nSecond line\nThird line";
3920 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3921 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3922 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3929
3930 #[test]
3931 fn test_line_info() {
3932 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3933 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3934
3935 assert_eq!(ctx.lines.len(), 7);
3937
3938 let line1 = &ctx.lines[0];
3940 assert_eq!(line1.content(ctx.content), "# Title");
3941 assert_eq!(line1.byte_offset, 0);
3942 assert_eq!(line1.indent, 0);
3943 assert!(!line1.is_blank);
3944 assert!(!line1.in_code_block);
3945 assert!(line1.list_item.is_none());
3946
3947 let line2 = &ctx.lines[1];
3949 assert_eq!(line2.content(ctx.content), " indented");
3950 assert_eq!(line2.byte_offset, 8);
3951 assert_eq!(line2.indent, 4);
3952 assert!(!line2.is_blank);
3953
3954 let line3 = &ctx.lines[2];
3956 assert_eq!(line3.content(ctx.content), "");
3957 assert!(line3.is_blank);
3958
3959 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3961 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3962 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3963 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3964 }
3965
3966 #[test]
3967 fn test_list_item_detection() {
3968 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3969 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3970
3971 let line1 = &ctx.lines[0];
3973 assert!(line1.list_item.is_some());
3974 let list1 = line1.list_item.as_ref().unwrap();
3975 assert_eq!(list1.marker, "-");
3976 assert!(!list1.is_ordered);
3977 assert_eq!(list1.marker_column, 0);
3978 assert_eq!(list1.content_column, 2);
3979
3980 let line2 = &ctx.lines[1];
3982 assert!(line2.list_item.is_some());
3983 let list2 = line2.list_item.as_ref().unwrap();
3984 assert_eq!(list2.marker, "*");
3985 assert_eq!(list2.marker_column, 2);
3986
3987 let line3 = &ctx.lines[2];
3989 assert!(line3.list_item.is_some());
3990 let list3 = line3.list_item.as_ref().unwrap();
3991 assert_eq!(list3.marker, "1.");
3992 assert!(list3.is_ordered);
3993 assert_eq!(list3.number, Some(1));
3994
3995 let line6 = &ctx.lines[5];
3997 assert!(line6.list_item.is_none());
3998 }
3999
4000 #[test]
4001 fn test_offset_to_line_col_edge_cases() {
4002 let content = "a\nb\nc";
4003 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4004 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
4012
4013 #[test]
4014 fn test_mdx_esm_blocks() {
4015 let content = r##"import {Chart} from './snowfall.js'
4016export const year = 2023
4017
4018# Last year's snowfall
4019
4020In {year}, the snowfall was above average.
4021It was followed by a warm spring which caused
4022flood conditions in many of the nearby rivers.
4023
4024<Chart color="#fcb32c" year={year} />
4025"##;
4026
4027 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4028
4029 assert_eq!(ctx.lines.len(), 10);
4031 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4032 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4033 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4034 assert!(
4035 !ctx.lines[3].in_esm_block,
4036 "Line 4 (heading) should NOT be in_esm_block"
4037 );
4038 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4039 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4040 }
4041
4042 #[test]
4043 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4044 let content = r#"import {Chart} from './snowfall.js'
4045export const year = 2023
4046
4047# Last year's snowfall
4048"#;
4049
4050 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4051
4052 assert!(
4054 !ctx.lines[0].in_esm_block,
4055 "Line 1 should NOT be in_esm_block in Standard flavor"
4056 );
4057 assert!(
4058 !ctx.lines[1].in_esm_block,
4059 "Line 2 should NOT be in_esm_block in Standard flavor"
4060 );
4061 }
4062}