1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15 ($name:expr, $profile:expr, $code:expr) => {{
16 let start = std::time::Instant::now();
17 let result = $code;
18 if $profile {
19 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20 }
21 result
22 }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(
34 r#"(?sx)
35 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36 (?:
37 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
38 |
39 \[([^\]]*)\] # Reference ID in group 6
40 )"#
41 ).unwrap()
42});
43
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?sx)
49 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50 (?:
51 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
52 |
53 \[([^\]]*)\] # Reference ID in group 6
54 )"#
55 ).unwrap()
56});
57
58static REF_DEF_PATTERN: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71#[derive(Debug, Clone)]
73pub struct LineInfo {
74 pub byte_offset: usize,
76 pub byte_len: usize,
78 pub indent: usize,
80 pub visual_indent: usize,
84 pub is_blank: bool,
86 pub in_code_block: bool,
88 pub in_front_matter: bool,
90 pub in_html_block: bool,
92 pub in_html_comment: bool,
94 pub list_item: Option<ListItemInfo>,
96 pub heading: Option<HeadingInfo>,
98 pub blockquote: Option<BlockquoteInfo>,
100 pub in_mkdocstrings: bool,
102 pub in_esm_block: bool,
104 pub in_code_span_continuation: bool,
106 pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112 pub fn content<'a>(&self, source: &'a str) -> &'a str {
114 &source[self.byte_offset..self.byte_offset + self.byte_len]
115 }
116}
117
118#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121 pub marker: String,
123 pub is_ordered: bool,
125 pub number: Option<usize>,
127 pub marker_column: usize,
129 pub content_column: usize,
131}
132
133#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136 ATX,
138 Setext1,
140 Setext2,
142}
143
144#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147 pub line: usize,
149 pub start_col: usize,
151 pub end_col: usize,
153 pub byte_offset: usize,
155 pub byte_end: usize,
157 pub text: Cow<'a, str>,
159 pub url: Cow<'a, str>,
161 pub is_reference: bool,
163 pub reference_id: Option<Cow<'a, str>>,
165 pub link_type: LinkType,
167}
168
169#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172 pub reference: String,
174 pub span: std::ops::Range<usize>,
176}
177
178#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181 pub id: String,
183 pub line: usize,
185 pub byte_offset: usize,
187 pub byte_end: usize,
189}
190
191#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194 pub line: usize,
196 pub start_col: usize,
198 pub end_col: usize,
200 pub byte_offset: usize,
202 pub byte_end: usize,
204 pub alt_text: Cow<'a, str>,
206 pub url: Cow<'a, str>,
208 pub is_reference: bool,
210 pub reference_id: Option<Cow<'a, str>>,
212 pub link_type: LinkType,
214}
215
216#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219 pub line: usize,
221 pub id: String,
223 pub url: String,
225 pub title: Option<String>,
227 pub byte_offset: usize,
229 pub byte_end: usize,
231 pub title_byte_start: Option<usize>,
233 pub title_byte_end: Option<usize>,
235}
236
237#[derive(Debug, Clone)]
239pub struct CodeSpan {
240 pub line: usize,
242 pub end_line: usize,
244 pub start_col: usize,
246 pub end_col: usize,
248 pub byte_offset: usize,
250 pub byte_end: usize,
252 pub backtick_count: usize,
254 pub content: String,
256}
257
258#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261 pub level: u8,
263 pub style: HeadingStyle,
265 pub marker: String,
267 pub marker_column: usize,
269 pub content_column: usize,
271 pub text: String,
273 pub custom_id: Option<String>,
275 pub raw_text: String,
277 pub has_closing_sequence: bool,
279 pub closing_sequence: String,
281 pub is_valid: bool,
284}
285
286#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292 pub line_num: usize,
294 pub heading: &'a HeadingInfo,
296 pub line_info: &'a LineInfo,
298}
299
300pub struct ValidHeadingsIter<'a> {
305 lines: &'a [LineInfo],
306 current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310 fn new(lines: &'a [LineInfo]) -> Self {
311 Self {
312 lines,
313 current_index: 0,
314 }
315 }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319 type Item = ValidHeading<'a>;
320
321 fn next(&mut self) -> Option<Self::Item> {
322 while self.current_index < self.lines.len() {
323 let idx = self.current_index;
324 self.current_index += 1;
325
326 let line_info = &self.lines[idx];
327 if let Some(heading) = &line_info.heading
328 && heading.is_valid
329 {
330 return Some(ValidHeading {
331 line_num: idx + 1, heading,
333 line_info,
334 });
335 }
336 }
337 None
338 }
339}
340
341#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344 pub nesting_level: usize,
346 pub indent: String,
348 pub marker_column: usize,
350 pub prefix: String,
352 pub content: String,
354 pub has_no_space_after_marker: bool,
356 pub has_multiple_spaces_after_marker: bool,
358 pub needs_md028_fix: bool,
360}
361
362#[derive(Debug, Clone)]
364pub struct ListBlock {
365 pub start_line: usize,
367 pub end_line: usize,
369 pub is_ordered: bool,
371 pub marker: Option<String>,
373 pub blockquote_prefix: String,
375 pub item_lines: Vec<usize>,
377 pub nesting_level: usize,
379 pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388 pub hash_count: usize,
390 pub asterisk_count: usize,
392 pub underscore_count: usize,
394 pub hyphen_count: usize,
396 pub plus_count: usize,
398 pub gt_count: usize,
400 pub pipe_count: usize,
402 pub bracket_count: usize,
404 pub backtick_count: usize,
406 pub lt_count: usize,
408 pub exclamation_count: usize,
410 pub newline_count: usize,
412}
413
414#[derive(Debug, Clone)]
416pub struct HtmlTag {
417 pub line: usize,
419 pub start_col: usize,
421 pub end_col: usize,
423 pub byte_offset: usize,
425 pub byte_end: usize,
427 pub tag_name: String,
429 pub is_closing: bool,
431 pub is_self_closing: bool,
433 pub raw_content: String,
435}
436
437#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440 pub line: usize,
442 pub start_col: usize,
444 pub end_col: usize,
446 pub byte_offset: usize,
448 pub byte_end: usize,
450 pub marker: char,
452 pub marker_count: usize,
454 pub content: String,
456}
457
458#[derive(Debug, Clone)]
460pub struct TableRow {
461 pub line: usize,
463 pub is_separator: bool,
465 pub column_count: usize,
467 pub column_alignments: Vec<String>, }
470
471#[derive(Debug, Clone)]
473pub struct BareUrl {
474 pub line: usize,
476 pub start_col: usize,
478 pub end_col: usize,
480 pub byte_offset: usize,
482 pub byte_end: usize,
484 pub url: String,
486 pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491 pub content: &'a str,
492 pub line_offsets: Vec<usize>,
493 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
515
516struct BlockquoteComponents<'a> {
518 indent: &'a str,
519 markers: &'a str,
520 spaces_after: &'a str,
521 content: &'a str,
522}
523
524#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527 let bytes = line.as_bytes();
528 let mut pos = 0;
529
530 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532 pos += 1;
533 }
534 let indent_end = pos;
535
536 if pos >= bytes.len() || bytes[pos] != b'>' {
538 return None;
539 }
540
541 while pos < bytes.len() && bytes[pos] == b'>' {
543 pos += 1;
544 }
545 let markers_end = pos;
546
547 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549 pos += 1;
550 }
551 let spaces_end = pos;
552
553 Some(BlockquoteComponents {
554 indent: &line[0..indent_end],
555 markers: &line[indent_end..markers_end],
556 spaces_after: &line[markers_end..spaces_end],
557 content: &line[spaces_end..],
558 })
559}
560
561impl<'a> LintContext<'a> {
562 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563 #[cfg(not(target_arch = "wasm32"))]
564 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565 #[cfg(target_arch = "wasm32")]
566 let profile = false;
567
568 let line_offsets = profile_section!("Line offsets", profile, {
569 let mut offsets = vec![0];
570 for (i, c) in content.char_indices() {
571 if c == '\n' {
572 offsets.push(i + 1);
573 }
574 }
575 offsets
576 });
577
578 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581 let html_comment_ranges = profile_section!(
583 "HTML comment ranges",
584 profile,
585 crate::utils::skip_context::compute_html_comment_ranges(content)
586 );
587
588 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590 if flavor == MarkdownFlavor::MkDocs {
591 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592 } else {
593 Vec::new()
594 }
595 });
596
597 let mut lines = profile_section!(
599 "Basic line info",
600 profile,
601 Self::compute_basic_line_info(
602 content,
603 &line_offsets,
604 &code_blocks,
605 flavor,
606 &html_comment_ranges,
607 &autodoc_ranges,
608 )
609 );
610
611 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614 profile_section!(
616 "ESM blocks",
617 profile,
618 Self::detect_esm_blocks(content, &mut lines, flavor)
619 );
620
621 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624 profile_section!(
626 "Headings & blockquotes",
627 profile,
628 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629 );
630
631 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634 for span in &code_spans {
637 if span.end_line > span.line {
638 for line_num in (span.line + 1)..=span.end_line {
640 if let Some(line_info) = lines.get_mut(line_num - 1) {
641 line_info.in_code_span_continuation = true;
642 }
643 }
644 }
645 }
646
647 let (links, broken_links, footnote_refs) = profile_section!(
649 "Links",
650 profile,
651 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652 );
653
654 let images = profile_section!(
655 "Images",
656 profile,
657 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658 );
659
660 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667 let table_blocks = profile_section!(
669 "Table blocks",
670 profile,
671 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672 content,
673 &code_blocks,
674 &code_spans,
675 &html_comment_ranges,
676 )
677 );
678
679 let line_index = profile_section!(
681 "Line index",
682 profile,
683 crate::utils::range_utils::LineIndex::new(content)
684 );
685
686 let jinja_ranges = profile_section!(
688 "Jinja ranges",
689 profile,
690 crate::utils::jinja_utils::find_jinja_ranges(content)
691 );
692
693 Self {
694 content,
695 line_offsets,
696 code_blocks,
697 lines,
698 links,
699 images,
700 broken_links,
701 footnote_refs,
702 reference_defs,
703 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704 list_blocks,
705 char_frequency,
706 html_tags_cache: OnceLock::new(),
707 emphasis_spans_cache: OnceLock::new(),
708 table_rows_cache: OnceLock::new(),
709 bare_urls_cache: OnceLock::new(),
710 has_mixed_list_nesting_cache: OnceLock::new(),
711 html_comment_ranges,
712 table_blocks,
713 line_index,
714 jinja_ranges,
715 flavor,
716 source_file,
717 }
718 }
719
720 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722 Arc::clone(
723 self.code_spans_cache
724 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725 )
726 }
727
728 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730 &self.html_comment_ranges
731 }
732
733 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735 Arc::clone(self.html_tags_cache.get_or_init(|| {
736 Arc::new(Self::parse_html_tags(
737 self.content,
738 &self.lines,
739 &self.code_blocks,
740 self.flavor,
741 ))
742 }))
743 }
744
745 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747 Arc::clone(
748 self.emphasis_spans_cache
749 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750 )
751 }
752
753 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755 Arc::clone(
756 self.table_rows_cache
757 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758 )
759 }
760
761 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763 Arc::clone(
764 self.bare_urls_cache
765 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766 )
767 }
768
769 pub fn has_mixed_list_nesting(&self) -> bool {
773 *self
774 .has_mixed_list_nesting_cache
775 .get_or_init(|| self.compute_mixed_list_nesting())
776 }
777
778 fn compute_mixed_list_nesting(&self) -> bool {
780 let mut stack: Vec<(usize, bool)> = Vec::new();
785 let mut last_was_blank = false;
786
787 for line_info in &self.lines {
788 if line_info.in_code_block
790 || line_info.in_front_matter
791 || line_info.in_mkdocstrings
792 || line_info.in_html_comment
793 || line_info.in_esm_block
794 {
795 continue;
796 }
797
798 if line_info.is_blank {
800 last_was_blank = true;
801 continue;
802 }
803
804 if let Some(list_item) = &line_info.list_item {
805 let current_pos = if list_item.marker_column == 1 {
807 0
808 } else {
809 list_item.marker_column
810 };
811
812 if last_was_blank && current_pos == 0 {
814 stack.clear();
815 }
816 last_was_blank = false;
817
818 while let Some(&(pos, _)) = stack.last() {
820 if pos >= current_pos {
821 stack.pop();
822 } else {
823 break;
824 }
825 }
826
827 if let Some(&(_, parent_is_ordered)) = stack.last()
829 && parent_is_ordered != list_item.is_ordered
830 {
831 return true; }
833
834 stack.push((current_pos, list_item.is_ordered));
835 } else {
836 last_was_blank = false;
838 }
839 }
840
841 false
842 }
843
844 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846 match self.line_offsets.binary_search(&offset) {
847 Ok(line) => (line + 1, 1),
848 Err(line) => {
849 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850 (line, offset - line_start + 1)
851 }
852 }
853 }
854
855 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859 return true;
860 }
861
862 self.code_spans()
864 .iter()
865 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866 }
867
868 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870 if line_num > 0 {
871 self.lines.get(line_num - 1)
872 } else {
873 None
874 }
875 }
876
877 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879 self.line_info(line_num).map(|info| info.byte_offset)
880 }
881
882 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884 let normalized_id = ref_id.to_lowercase();
885 self.reference_defs
886 .iter()
887 .find(|def| def.id == normalized_id)
888 .map(|def| def.url.as_str())
889 }
890
891 pub fn is_in_list_block(&self, line_num: usize) -> bool {
893 self.list_blocks
894 .iter()
895 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896 }
897
898 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900 self.list_blocks
901 .iter()
902 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903 }
904
905 pub fn is_in_code_block(&self, line_num: usize) -> bool {
909 if line_num == 0 || line_num > self.lines.len() {
910 return false;
911 }
912 self.lines[line_num - 1].in_code_block
913 }
914
915 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917 if line_num == 0 || line_num > self.lines.len() {
918 return false;
919 }
920 self.lines[line_num - 1].in_front_matter
921 }
922
923 pub fn is_in_html_block(&self, line_num: usize) -> bool {
925 if line_num == 0 || line_num > self.lines.len() {
926 return false;
927 }
928 self.lines[line_num - 1].in_html_block
929 }
930
931 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933 if line_num == 0 || line_num > self.lines.len() {
934 return false;
935 }
936
937 let col_0indexed = if col > 0 { col - 1 } else { 0 };
941 let code_spans = self.code_spans();
942 code_spans.iter().any(|span| {
943 if line_num < span.line || line_num > span.end_line {
945 return false;
946 }
947
948 if span.line == span.end_line {
949 col_0indexed >= span.start_col && col_0indexed < span.end_col
951 } else if line_num == span.line {
952 col_0indexed >= span.start_col
954 } else if line_num == span.end_line {
955 col_0indexed < span.end_col
957 } else {
958 true
960 }
961 })
962 }
963
964 #[inline]
966 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967 let code_spans = self.code_spans();
968 code_spans
969 .iter()
970 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971 }
972
973 #[inline]
976 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977 self.reference_defs
978 .iter()
979 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980 }
981
982 #[inline]
986 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987 self.html_comment_ranges
988 .iter()
989 .any(|range| byte_pos >= range.start && byte_pos < range.end)
990 }
991
992 #[inline]
995 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996 self.html_tags()
997 .iter()
998 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999 }
1000
1001 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003 self.jinja_ranges
1004 .iter()
1005 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006 }
1007
1008 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010 self.reference_defs.iter().any(|def| {
1011 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012 byte_pos >= start && byte_pos < end
1013 } else {
1014 false
1015 }
1016 })
1017 }
1018
1019 pub fn has_char(&self, ch: char) -> bool {
1021 match ch {
1022 '#' => self.char_frequency.hash_count > 0,
1023 '*' => self.char_frequency.asterisk_count > 0,
1024 '_' => self.char_frequency.underscore_count > 0,
1025 '-' => self.char_frequency.hyphen_count > 0,
1026 '+' => self.char_frequency.plus_count > 0,
1027 '>' => self.char_frequency.gt_count > 0,
1028 '|' => self.char_frequency.pipe_count > 0,
1029 '[' => self.char_frequency.bracket_count > 0,
1030 '`' => self.char_frequency.backtick_count > 0,
1031 '<' => self.char_frequency.lt_count > 0,
1032 '!' => self.char_frequency.exclamation_count > 0,
1033 '\n' => self.char_frequency.newline_count > 0,
1034 _ => self.content.contains(ch), }
1036 }
1037
1038 pub fn char_count(&self, ch: char) -> usize {
1040 match ch {
1041 '#' => self.char_frequency.hash_count,
1042 '*' => self.char_frequency.asterisk_count,
1043 '_' => self.char_frequency.underscore_count,
1044 '-' => self.char_frequency.hyphen_count,
1045 '+' => self.char_frequency.plus_count,
1046 '>' => self.char_frequency.gt_count,
1047 '|' => self.char_frequency.pipe_count,
1048 '[' => self.char_frequency.bracket_count,
1049 '`' => self.char_frequency.backtick_count,
1050 '<' => self.char_frequency.lt_count,
1051 '!' => self.char_frequency.exclamation_count,
1052 '\n' => self.char_frequency.newline_count,
1053 _ => self.content.matches(ch).count(), }
1055 }
1056
1057 pub fn likely_has_headings(&self) -> bool {
1059 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1061
1062 pub fn likely_has_lists(&self) -> bool {
1064 self.char_frequency.asterisk_count > 0
1065 || self.char_frequency.hyphen_count > 0
1066 || self.char_frequency.plus_count > 0
1067 }
1068
1069 pub fn likely_has_emphasis(&self) -> bool {
1071 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072 }
1073
1074 pub fn likely_has_tables(&self) -> bool {
1076 self.char_frequency.pipe_count > 2
1077 }
1078
1079 pub fn likely_has_blockquotes(&self) -> bool {
1081 self.char_frequency.gt_count > 0
1082 }
1083
1084 pub fn likely_has_code(&self) -> bool {
1086 self.char_frequency.backtick_count > 0
1087 }
1088
1089 pub fn likely_has_links_or_images(&self) -> bool {
1091 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092 }
1093
1094 pub fn likely_has_html(&self) -> bool {
1096 self.char_frequency.lt_count > 0
1097 }
1098
1099 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101 self.html_tags()
1102 .iter()
1103 .filter(|tag| tag.line == line_num)
1104 .cloned()
1105 .collect()
1106 }
1107
1108 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110 self.emphasis_spans()
1111 .iter()
1112 .filter(|span| span.line == line_num)
1113 .cloned()
1114 .collect()
1115 }
1116
1117 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119 self.table_rows()
1120 .iter()
1121 .filter(|row| row.line == line_num)
1122 .cloned()
1123 .collect()
1124 }
1125
1126 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128 self.bare_urls()
1129 .iter()
1130 .filter(|url| url.line == line_num)
1131 .cloned()
1132 .collect()
1133 }
1134
1135 #[inline]
1141 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142 let idx = match lines.binary_search_by(|line| {
1144 if byte_offset < line.byte_offset {
1145 std::cmp::Ordering::Greater
1146 } else if byte_offset > line.byte_offset + line.byte_len {
1147 std::cmp::Ordering::Less
1148 } else {
1149 std::cmp::Ordering::Equal
1150 }
1151 }) {
1152 Ok(idx) => idx,
1153 Err(idx) => idx.saturating_sub(1),
1154 };
1155
1156 let line = &lines[idx];
1157 let line_num = idx + 1;
1158 let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160 (idx, line_num, col)
1161 }
1162
1163 #[inline]
1165 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169 if idx > 0 {
1171 let span = &code_spans[idx - 1];
1172 if offset >= span.byte_offset && offset < span.byte_end {
1173 return true;
1174 }
1175 }
1176
1177 false
1178 }
1179
1180 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186 let mut link_ranges = Vec::new();
1187 let mut options = Options::empty();
1188 options.insert(Options::ENABLE_WIKILINKS);
1189 options.insert(Options::ENABLE_FOOTNOTES);
1190
1191 let parser = Parser::new_ext(content, options).into_offset_iter();
1192 let mut link_stack: Vec<usize> = Vec::new();
1193
1194 for (event, range) in parser {
1195 match event {
1196 Event::Start(Tag::Link { .. }) => {
1197 link_stack.push(range.start);
1198 }
1199 Event::End(TagEnd::Link) => {
1200 if let Some(start_pos) = link_stack.pop() {
1201 link_ranges.push((start_pos, range.end));
1202 }
1203 }
1204 _ => {}
1205 }
1206 }
1207
1208 link_ranges
1209 }
1210
1211 fn parse_links(
1213 content: &'a str,
1214 lines: &[LineInfo],
1215 code_blocks: &[(usize, usize)],
1216 code_spans: &[CodeSpan],
1217 flavor: MarkdownFlavor,
1218 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221 use std::collections::HashSet;
1222
1223 let mut links = Vec::with_capacity(content.len() / 500);
1224 let mut broken_links = Vec::new();
1225 let mut footnote_refs = Vec::new();
1226
1227 let mut found_positions = HashSet::new();
1229
1230 let mut options = Options::empty();
1240 options.insert(Options::ENABLE_WIKILINKS);
1241 options.insert(Options::ENABLE_FOOTNOTES);
1242
1243 let parser = Parser::new_with_broken_link_callback(
1244 content,
1245 options,
1246 Some(|link: BrokenLink<'_>| {
1247 broken_links.push(BrokenLinkInfo {
1248 reference: link.reference.to_string(),
1249 span: link.span.clone(),
1250 });
1251 None
1252 }),
1253 )
1254 .into_offset_iter();
1255
1256 let mut link_stack: Vec<(
1257 usize,
1258 usize,
1259 pulldown_cmark::CowStr<'a>,
1260 LinkType,
1261 pulldown_cmark::CowStr<'a>,
1262 )> = Vec::new();
1263 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1266 match event {
1267 Event::Start(Tag::Link {
1268 link_type,
1269 dest_url,
1270 id,
1271 ..
1272 }) => {
1273 link_stack.push((range.start, range.end, dest_url, link_type, id));
1275 text_chunks.clear();
1276 }
1277 Event::Text(text) if !link_stack.is_empty() => {
1278 text_chunks.push((text.to_string(), range.start, range.end));
1280 }
1281 Event::Code(code) if !link_stack.is_empty() => {
1282 let code_text = format!("`{code}`");
1284 text_chunks.push((code_text, range.start, range.end));
1285 }
1286 Event::End(TagEnd::Link) => {
1287 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290 text_chunks.clear();
1291 continue;
1292 }
1293
1294 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299 text_chunks.clear();
1300 continue;
1301 }
1302
1303 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305 let is_reference = matches!(
1306 link_type,
1307 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308 );
1309
1310 let link_text = if start_pos < content.len() {
1313 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315 let mut close_pos = None;
1319 let mut depth = 0;
1320 let mut in_code_span = false;
1321
1322 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323 let mut backslash_count = 0;
1325 let mut j = i;
1326 while j > 0 && link_bytes[j - 1] == b'\\' {
1327 backslash_count += 1;
1328 j -= 1;
1329 }
1330 let is_escaped = backslash_count % 2 != 0;
1331
1332 if byte == b'`' && !is_escaped {
1334 in_code_span = !in_code_span;
1335 }
1336
1337 if !is_escaped && !in_code_span {
1339 if byte == b'[' {
1340 depth += 1;
1341 } else if byte == b']' {
1342 if depth == 0 {
1343 close_pos = Some(i);
1345 break;
1346 } else {
1347 depth -= 1;
1348 }
1349 }
1350 }
1351 }
1352
1353 if let Some(pos) = close_pos {
1354 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355 } else {
1356 Cow::Borrowed("")
1357 }
1358 } else {
1359 Cow::Borrowed("")
1360 };
1361
1362 let reference_id = if is_reference && !ref_id.is_empty() {
1364 Some(Cow::Owned(ref_id.to_lowercase()))
1365 } else if is_reference {
1366 Some(Cow::Owned(link_text.to_lowercase()))
1368 } else {
1369 None
1370 };
1371
1372 found_positions.insert(start_pos);
1374
1375 links.push(ParsedLink {
1376 line: line_num,
1377 start_col: col_start,
1378 end_col: col_end,
1379 byte_offset: start_pos,
1380 byte_end: range.end,
1381 text: link_text,
1382 url: Cow::Owned(url.to_string()),
1383 is_reference,
1384 reference_id,
1385 link_type,
1386 });
1387
1388 text_chunks.clear();
1389 }
1390 }
1391 Event::FootnoteReference(footnote_id) => {
1392 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395 continue;
1396 }
1397
1398 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399 footnote_refs.push(FootnoteRef {
1400 id: footnote_id.to_string(),
1401 line: line_num,
1402 byte_offset: range.start,
1403 byte_end: range.end,
1404 });
1405 }
1406 _ => {}
1407 }
1408 }
1409
1410 for cap in LINK_PATTERN.captures_iter(content) {
1414 let full_match = cap.get(0).unwrap();
1415 let match_start = full_match.start();
1416 let match_end = full_match.end();
1417
1418 if found_positions.contains(&match_start) {
1420 continue;
1421 }
1422
1423 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425 continue;
1426 }
1427
1428 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430 continue;
1431 }
1432
1433 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435 continue;
1436 }
1437
1438 if Self::is_offset_in_code_span(code_spans, match_start) {
1440 continue;
1441 }
1442
1443 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445 continue;
1446 }
1447
1448 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453 continue;
1454 }
1455
1456 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458 let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460 if let Some(ref_id) = cap.get(6) {
1462 let ref_id_str = ref_id.as_str();
1463 let normalized_ref = if ref_id_str.is_empty() {
1464 Cow::Owned(text.to_lowercase()) } else {
1466 Cow::Owned(ref_id_str.to_lowercase())
1467 };
1468
1469 links.push(ParsedLink {
1471 line: line_num,
1472 start_col: col_start,
1473 end_col: col_end,
1474 byte_offset: match_start,
1475 byte_end: match_end,
1476 text: Cow::Borrowed(text),
1477 url: Cow::Borrowed(""), is_reference: true,
1479 reference_id: Some(normalized_ref),
1480 link_type: LinkType::Reference, });
1482 }
1483 }
1484
1485 (links, broken_links, footnote_refs)
1486 }
1487
1488 fn parse_images(
1490 content: &'a str,
1491 lines: &[LineInfo],
1492 code_blocks: &[(usize, usize)],
1493 code_spans: &[CodeSpan],
1494 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495 ) -> Vec<ParsedImage<'a>> {
1496 use crate::utils::skip_context::is_in_html_comment_ranges;
1497 use std::collections::HashSet;
1498
1499 let mut images = Vec::with_capacity(content.len() / 1000);
1501 let mut found_positions = HashSet::new();
1502
1503 let parser = Parser::new(content).into_offset_iter();
1505 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506 Vec::new();
1507 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1510 match event {
1511 Event::Start(Tag::Image {
1512 link_type,
1513 dest_url,
1514 id,
1515 ..
1516 }) => {
1517 image_stack.push((range.start, dest_url, link_type, id));
1518 text_chunks.clear();
1519 }
1520 Event::Text(text) if !image_stack.is_empty() => {
1521 text_chunks.push((text.to_string(), range.start, range.end));
1522 }
1523 Event::Code(code) if !image_stack.is_empty() => {
1524 let code_text = format!("`{code}`");
1525 text_chunks.push((code_text, range.start, range.end));
1526 }
1527 Event::End(TagEnd::Image) => {
1528 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531 continue;
1532 }
1533
1534 if Self::is_offset_in_code_span(code_spans, start_pos) {
1536 continue;
1537 }
1538
1539 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541 continue;
1542 }
1543
1544 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548 let is_reference = matches!(
1549 link_type,
1550 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551 );
1552
1553 let alt_text = if start_pos < content.len() {
1556 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558 let mut close_pos = None;
1561 let mut depth = 0;
1562
1563 if image_bytes.len() > 2 {
1564 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565 let mut backslash_count = 0;
1567 let mut j = i;
1568 while j > 0 && image_bytes[j - 1] == b'\\' {
1569 backslash_count += 1;
1570 j -= 1;
1571 }
1572 let is_escaped = backslash_count % 2 != 0;
1573
1574 if !is_escaped {
1575 if byte == b'[' {
1576 depth += 1;
1577 } else if byte == b']' {
1578 if depth == 0 {
1579 close_pos = Some(i);
1581 break;
1582 } else {
1583 depth -= 1;
1584 }
1585 }
1586 }
1587 }
1588 }
1589
1590 if let Some(pos) = close_pos {
1591 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592 } else {
1593 Cow::Borrowed("")
1594 }
1595 } else {
1596 Cow::Borrowed("")
1597 };
1598
1599 let reference_id = if is_reference && !ref_id.is_empty() {
1600 Some(Cow::Owned(ref_id.to_lowercase()))
1601 } else if is_reference {
1602 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1604 None
1605 };
1606
1607 found_positions.insert(start_pos);
1608 images.push(ParsedImage {
1609 line: line_num,
1610 start_col: col_start,
1611 end_col: col_end,
1612 byte_offset: start_pos,
1613 byte_end: range.end,
1614 alt_text,
1615 url: Cow::Owned(url.to_string()),
1616 is_reference,
1617 reference_id,
1618 link_type,
1619 });
1620 }
1621 }
1622 _ => {}
1623 }
1624 }
1625
1626 for cap in IMAGE_PATTERN.captures_iter(content) {
1628 let full_match = cap.get(0).unwrap();
1629 let match_start = full_match.start();
1630 let match_end = full_match.end();
1631
1632 if found_positions.contains(&match_start) {
1634 continue;
1635 }
1636
1637 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639 continue;
1640 }
1641
1642 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644 || Self::is_offset_in_code_span(code_spans, match_start)
1645 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646 {
1647 continue;
1648 }
1649
1650 if let Some(ref_id) = cap.get(6) {
1652 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655 let ref_id_str = ref_id.as_str();
1656 let normalized_ref = if ref_id_str.is_empty() {
1657 Cow::Owned(alt_text.to_lowercase())
1658 } else {
1659 Cow::Owned(ref_id_str.to_lowercase())
1660 };
1661
1662 images.push(ParsedImage {
1663 line: line_num,
1664 start_col: col_start,
1665 end_col: col_end,
1666 byte_offset: match_start,
1667 byte_end: match_end,
1668 alt_text: Cow::Borrowed(alt_text),
1669 url: Cow::Borrowed(""),
1670 is_reference: true,
1671 reference_id: Some(normalized_ref),
1672 link_type: LinkType::Reference, });
1674 }
1675 }
1676
1677 images
1678 }
1679
1680 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1686 if line_info.in_code_block {
1688 continue;
1689 }
1690
1691 let line = line_info.content(content);
1692 let line_num = line_idx + 1;
1693
1694 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695 let id = cap.get(1).unwrap().as_str().to_lowercase();
1696 let url = cap.get(2).unwrap().as_str().to_string();
1697 let title_match = cap.get(3).or_else(|| cap.get(4));
1698 let title = title_match.map(|m| m.as_str().to_string());
1699
1700 let match_obj = cap.get(0).unwrap();
1703 let byte_offset = line_info.byte_offset + match_obj.start();
1704 let byte_end = line_info.byte_offset + match_obj.end();
1705
1706 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708 let start = line_info.byte_offset + m.start().saturating_sub(1);
1710 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1712 } else {
1713 (None, None)
1714 };
1715
1716 refs.push(ReferenceDef {
1717 line: line_num,
1718 id,
1719 url,
1720 title,
1721 byte_offset,
1722 byte_end,
1723 title_byte_start,
1724 title_byte_end,
1725 });
1726 }
1727 }
1728
1729 refs
1730 }
1731
1732 #[inline]
1736 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737 let trimmed_start = line.trim_start();
1738 if !trimmed_start.starts_with('>') {
1739 return None;
1740 }
1741
1742 let mut remaining = line;
1744 let mut total_prefix_len = 0;
1745
1746 loop {
1747 let trimmed = remaining.trim_start();
1748 if !trimmed.starts_with('>') {
1749 break;
1750 }
1751
1752 let leading_ws_len = remaining.len() - trimmed.len();
1754 total_prefix_len += leading_ws_len + 1;
1755
1756 let after_gt = &trimmed[1..];
1757
1758 if let Some(stripped) = after_gt.strip_prefix(' ') {
1760 total_prefix_len += 1;
1761 remaining = stripped;
1762 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763 total_prefix_len += 1;
1764 remaining = stripped;
1765 } else {
1766 remaining = after_gt;
1767 }
1768 }
1769
1770 Some((&line[..total_prefix_len], remaining))
1771 }
1772
1773 #[inline]
1777 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1778 let bytes = line.as_bytes();
1779 let mut i = 0;
1780
1781 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1783 i += 1;
1784 }
1785
1786 if i >= bytes.len() {
1788 return None;
1789 }
1790 let marker = bytes[i] as char;
1791 if marker != '-' && marker != '*' && marker != '+' {
1792 return None;
1793 }
1794 let marker_pos = i;
1795 i += 1;
1796
1797 let spacing_start = i;
1799 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1800 i += 1;
1801 }
1802
1803 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1804 }
1805
1806 #[inline]
1810 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1811 let bytes = line.as_bytes();
1812 let mut i = 0;
1813
1814 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1816 i += 1;
1817 }
1818
1819 let number_start = i;
1821 while i < bytes.len() && bytes[i].is_ascii_digit() {
1822 i += 1;
1823 }
1824 if i == number_start {
1825 return None; }
1827
1828 if i >= bytes.len() {
1830 return None;
1831 }
1832 let delimiter = bytes[i] as char;
1833 if delimiter != '.' && delimiter != ')' {
1834 return None;
1835 }
1836 let delimiter_pos = i;
1837 i += 1;
1838
1839 let spacing_start = i;
1841 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1842 i += 1;
1843 }
1844
1845 Some((
1846 &line[..number_start],
1847 &line[number_start..delimiter_pos],
1848 delimiter,
1849 &line[spacing_start..i],
1850 &line[i..],
1851 ))
1852 }
1853
1854 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1857 let num_lines = line_offsets.len();
1858 let mut in_code_block = vec![false; num_lines];
1859
1860 for &(start, end) in code_blocks {
1862 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1864 let mut boundary = start;
1865 while boundary > 0 && !content.is_char_boundary(boundary) {
1866 boundary -= 1;
1867 }
1868 boundary
1869 } else {
1870 start
1871 };
1872
1873 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1874 let mut boundary = end;
1875 while boundary < content.len() && !content.is_char_boundary(boundary) {
1876 boundary += 1;
1877 }
1878 boundary
1879 } else {
1880 end.min(content.len())
1881 };
1882
1883 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1902 let first_line = first_line_after.saturating_sub(1);
1903 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1904
1905 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1907 *flag = true;
1908 }
1909 }
1910
1911 in_code_block
1912 }
1913
1914 fn compute_basic_line_info(
1916 content: &str,
1917 line_offsets: &[usize],
1918 code_blocks: &[(usize, usize)],
1919 flavor: MarkdownFlavor,
1920 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1921 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1922 ) -> Vec<LineInfo> {
1923 let content_lines: Vec<&str> = content.lines().collect();
1924 let mut lines = Vec::with_capacity(content_lines.len());
1925
1926 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1928
1929 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1932
1933 for (i, line) in content_lines.iter().enumerate() {
1934 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1935 let indent = line.len() - line.trim_start().len();
1936 let visual_indent = ElementCache::calculate_indentation_width_default(line);
1938
1939 let blockquote_parse = Self::parse_blockquote_prefix(line);
1941
1942 let is_blank = if let Some((_, content)) = blockquote_parse {
1944 content.trim().is_empty()
1946 } else {
1947 line.trim().is_empty()
1948 };
1949
1950 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1952
1953 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1955 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1956 let line_end_offset = byte_offset + line.len();
1959 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1960 html_comment_ranges,
1961 byte_offset,
1962 line_end_offset,
1963 );
1964 let list_item = if !(in_code_block
1965 || is_blank
1966 || in_mkdocstrings
1967 || in_html_comment
1968 || (front_matter_end > 0 && i < front_matter_end))
1969 {
1970 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1972 (content, prefix.len())
1973 } else {
1974 (&**line, 0)
1975 };
1976
1977 if let Some((leading_spaces, marker, spacing, _content)) =
1978 Self::parse_unordered_list(line_for_list_check)
1979 {
1980 let marker_column = blockquote_prefix_len + leading_spaces.len();
1981 let content_column = marker_column + 1 + spacing.len();
1982
1983 if spacing.is_empty() {
1990 None
1991 } else {
1992 Some(ListItemInfo {
1993 marker: marker.to_string(),
1994 is_ordered: false,
1995 number: None,
1996 marker_column,
1997 content_column,
1998 })
1999 }
2000 } else if let Some((leading_spaces, number_str, delimiter, spacing, content)) =
2001 Self::parse_ordered_list(line_for_list_check)
2002 {
2003 let marker = format!("{number_str}{delimiter}");
2004 let marker_column = blockquote_prefix_len + leading_spaces.len();
2005 let content_column = marker_column + marker.len() + spacing.len();
2006
2007 let content_after_spacing = content.trim();
2015 if spacing.is_empty() && !content_after_spacing.is_empty() {
2016 None
2017 } else {
2018 Some(ListItemInfo {
2019 marker,
2020 is_ordered: true,
2021 number: number_str.parse().ok(),
2022 marker_column,
2023 content_column,
2024 })
2025 }
2026 } else {
2027 None
2028 }
2029 } else {
2030 None
2031 };
2032
2033 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2036 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2037
2038 lines.push(LineInfo {
2039 byte_offset,
2040 byte_len: line.len(),
2041 indent,
2042 visual_indent,
2043 is_blank,
2044 in_code_block,
2045 in_front_matter,
2046 in_html_block: false, in_html_comment,
2048 list_item,
2049 heading: None, blockquote: None, in_mkdocstrings,
2052 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2055 });
2056 }
2057
2058 lines
2059 }
2060
2061 fn detect_headings_and_blockquotes(
2063 content: &str,
2064 lines: &mut [LineInfo],
2065 flavor: MarkdownFlavor,
2066 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2067 link_byte_ranges: &[(usize, usize)],
2068 ) {
2069 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2071 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2072 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2073 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2074
2075 let content_lines: Vec<&str> = content.lines().collect();
2076
2077 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2079
2080 for i in 0..lines.len() {
2082 if lines[i].in_code_block {
2083 continue;
2084 }
2085
2086 if front_matter_end > 0 && i < front_matter_end {
2088 continue;
2089 }
2090
2091 if lines[i].in_html_block {
2093 continue;
2094 }
2095
2096 let line = content_lines[i];
2097
2098 if let Some(bq) = parse_blockquote_detailed(line) {
2100 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2102
2103 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2105
2106 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2108 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2111
2112 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2116
2117 lines[i].blockquote = Some(BlockquoteInfo {
2118 nesting_level,
2119 indent: bq.indent.to_string(),
2120 marker_column,
2121 prefix,
2122 content: bq.content.to_string(),
2123 has_no_space_after_marker: has_no_space,
2124 has_multiple_spaces_after_marker: has_multiple_spaces,
2125 needs_md028_fix,
2126 });
2127 }
2128
2129 if lines[i].is_blank {
2131 continue;
2132 }
2133
2134 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2137 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2138 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2139 } else {
2140 false
2141 };
2142
2143 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2144 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2146 continue;
2147 }
2148 let line_offset = lines[i].byte_offset;
2151 if link_byte_ranges
2152 .iter()
2153 .any(|&(start, end)| line_offset > start && line_offset < end)
2154 {
2155 continue;
2156 }
2157 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2158 let hashes = caps.get(2).map_or("", |m| m.as_str());
2159 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2160 let rest = caps.get(4).map_or("", |m| m.as_str());
2161
2162 let level = hashes.len() as u8;
2163 let marker_column = leading_spaces.len();
2164
2165 let (text, has_closing, closing_seq) = {
2167 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2169 if rest[id_start..].trim_end().ends_with('}') {
2171 (&rest[..id_start], &rest[id_start..])
2173 } else {
2174 (rest, "")
2175 }
2176 } else {
2177 (rest, "")
2178 };
2179
2180 let trimmed_rest = rest_without_id.trim_end();
2182 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2183 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2186
2187 let last_hash_char_idx = char_positions
2189 .iter()
2190 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2191
2192 if let Some(mut char_idx) = last_hash_char_idx {
2193 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2195 char_idx -= 1;
2196 }
2197
2198 let start_of_hashes = char_positions[char_idx].0;
2200
2201 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2203
2204 let potential_closing = &trimmed_rest[start_of_hashes..];
2206 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2207
2208 if is_all_hashes && has_space_before {
2209 let closing_hashes = potential_closing.to_string();
2211 let text_part = if !custom_id_part.is_empty() {
2214 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2217 } else {
2218 trimmed_rest[..start_of_hashes].trim_end().to_string()
2219 };
2220 (text_part, true, closing_hashes)
2221 } else {
2222 (rest.to_string(), false, String::new())
2224 }
2225 } else {
2226 (rest.to_string(), false, String::new())
2228 }
2229 } else {
2230 (rest.to_string(), false, String::new())
2232 }
2233 };
2234
2235 let content_column = marker_column + hashes.len() + spaces_after.len();
2236
2237 let raw_text = text.trim().to_string();
2239 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2240
2241 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2243 let next_line = content_lines[i + 1];
2244 if !lines[i + 1].in_code_block
2245 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2246 && let Some(next_line_id) =
2247 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2248 {
2249 custom_id = Some(next_line_id);
2250 }
2251 }
2252
2253 let is_valid = !spaces_after.is_empty()
2263 || rest.is_empty()
2264 || level > 1
2265 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2266
2267 lines[i].heading = Some(HeadingInfo {
2268 level,
2269 style: HeadingStyle::ATX,
2270 marker: hashes.to_string(),
2271 marker_column,
2272 content_column,
2273 text: clean_text,
2274 custom_id,
2275 raw_text,
2276 has_closing_sequence: has_closing,
2277 closing_sequence: closing_seq,
2278 is_valid,
2279 });
2280 }
2281 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2283 let next_line = content_lines[i + 1];
2284 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2285 if front_matter_end > 0 && i < front_matter_end {
2287 continue;
2288 }
2289
2290 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2292 {
2293 continue;
2294 }
2295
2296 let underline = next_line.trim();
2297
2298 let level = if underline.starts_with('=') { 1 } else { 2 };
2299 let style = if level == 1 {
2300 HeadingStyle::Setext1
2301 } else {
2302 HeadingStyle::Setext2
2303 };
2304
2305 let raw_text = line.trim().to_string();
2307 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2308
2309 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2311 let attr_line = content_lines[i + 2];
2312 if !lines[i + 2].in_code_block
2313 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2314 && let Some(attr_line_id) =
2315 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2316 {
2317 custom_id = Some(attr_line_id);
2318 }
2319 }
2320
2321 lines[i].heading = Some(HeadingInfo {
2322 level,
2323 style,
2324 marker: underline.to_string(),
2325 marker_column: next_line.len() - next_line.trim_start().len(),
2326 content_column: lines[i].indent,
2327 text: clean_text,
2328 custom_id,
2329 raw_text,
2330 has_closing_sequence: false,
2331 closing_sequence: String::new(),
2332 is_valid: true, });
2334 }
2335 }
2336 }
2337 }
2338
2339 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2341 const BLOCK_ELEMENTS: &[&str] = &[
2344 "address",
2345 "article",
2346 "aside",
2347 "audio",
2348 "blockquote",
2349 "canvas",
2350 "details",
2351 "dialog",
2352 "dd",
2353 "div",
2354 "dl",
2355 "dt",
2356 "embed",
2357 "fieldset",
2358 "figcaption",
2359 "figure",
2360 "footer",
2361 "form",
2362 "h1",
2363 "h2",
2364 "h3",
2365 "h4",
2366 "h5",
2367 "h6",
2368 "header",
2369 "hr",
2370 "iframe",
2371 "li",
2372 "main",
2373 "menu",
2374 "nav",
2375 "noscript",
2376 "object",
2377 "ol",
2378 "p",
2379 "picture",
2380 "pre",
2381 "script",
2382 "search",
2383 "section",
2384 "source",
2385 "style",
2386 "summary",
2387 "svg",
2388 "table",
2389 "tbody",
2390 "td",
2391 "template",
2392 "textarea",
2393 "tfoot",
2394 "th",
2395 "thead",
2396 "tr",
2397 "track",
2398 "ul",
2399 "video",
2400 ];
2401
2402 let mut i = 0;
2403 while i < lines.len() {
2404 if lines[i].in_code_block || lines[i].in_front_matter {
2406 i += 1;
2407 continue;
2408 }
2409
2410 let trimmed = lines[i].content(content).trim_start();
2411
2412 if trimmed.starts_with('<') && trimmed.len() > 1 {
2414 let after_bracket = &trimmed[1..];
2416 let is_closing = after_bracket.starts_with('/');
2417 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2418
2419 let tag_name = tag_start
2421 .chars()
2422 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2423 .collect::<String>()
2424 .to_lowercase();
2425
2426 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2428 lines[i].in_html_block = true;
2430
2431 if !is_closing {
2434 let closing_tag = format!("</{tag_name}>");
2435 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2437 let mut j = i + 1;
2438 while j < lines.len() && j < i + 100 {
2439 if !allow_blank_lines && lines[j].is_blank {
2442 break;
2443 }
2444
2445 lines[j].in_html_block = true;
2446
2447 if lines[j].content(content).contains(&closing_tag) {
2449 break;
2450 }
2451 j += 1;
2452 }
2453 }
2454 }
2455 }
2456
2457 i += 1;
2458 }
2459 }
2460
2461 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2464 if !flavor.supports_esm_blocks() {
2466 return;
2467 }
2468
2469 let mut in_multiline_comment = false;
2470
2471 for line in lines.iter_mut() {
2472 if line.is_blank || line.in_html_comment {
2474 continue;
2475 }
2476
2477 let trimmed = line.content(content).trim_start();
2478
2479 if in_multiline_comment {
2481 if trimmed.contains("*/") {
2482 in_multiline_comment = false;
2483 }
2484 continue;
2485 }
2486
2487 if trimmed.starts_with("//") {
2489 continue;
2490 }
2491
2492 if trimmed.starts_with("/*") {
2494 if !trimmed.contains("*/") {
2495 in_multiline_comment = true;
2496 }
2497 continue;
2498 }
2499
2500 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2502 line.in_esm_block = true;
2503 } else {
2504 break;
2506 }
2507 }
2508 }
2509
2510 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2512 let mut code_spans = Vec::new();
2513
2514 if !content.contains('`') {
2516 return code_spans;
2517 }
2518
2519 let parser = Parser::new(content).into_offset_iter();
2521
2522 for (event, range) in parser {
2523 if let Event::Code(_) = event {
2524 let start_pos = range.start;
2525 let end_pos = range.end;
2526
2527 let full_span = &content[start_pos..end_pos];
2529 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2530
2531 let content_start = start_pos + backtick_count;
2533 let content_end = end_pos - backtick_count;
2534 let span_content = if content_start < content_end {
2535 content[content_start..content_end].to_string()
2536 } else {
2537 String::new()
2538 };
2539
2540 let line_idx = lines
2543 .partition_point(|line| line.byte_offset <= start_pos)
2544 .saturating_sub(1);
2545 let line_num = line_idx + 1;
2546 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2547
2548 let end_line_idx = lines
2550 .partition_point(|line| line.byte_offset <= end_pos)
2551 .saturating_sub(1);
2552 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2553
2554 let line_content = lines[line_idx].content(content);
2557 let col_start = if byte_col_start <= line_content.len() {
2558 line_content[..byte_col_start].chars().count()
2559 } else {
2560 line_content.chars().count()
2561 };
2562
2563 let end_line_content = lines[end_line_idx].content(content);
2564 let col_end = if byte_col_end <= end_line_content.len() {
2565 end_line_content[..byte_col_end].chars().count()
2566 } else {
2567 end_line_content.chars().count()
2568 };
2569
2570 code_spans.push(CodeSpan {
2571 line: line_num,
2572 end_line: end_line_idx + 1,
2573 start_col: col_start,
2574 end_col: col_end,
2575 byte_offset: start_pos,
2576 byte_end: end_pos,
2577 backtick_count,
2578 content: span_content,
2579 });
2580 }
2581 }
2582
2583 code_spans.sort_by_key(|span| span.byte_offset);
2585
2586 code_spans
2587 }
2588
2589 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2600 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2602
2603 #[inline]
2606 fn reset_tracking_state(
2607 list_item: &ListItemInfo,
2608 has_list_breaking_content: &mut bool,
2609 min_continuation: &mut usize,
2610 ) {
2611 *has_list_breaking_content = false;
2612 let marker_width = if list_item.is_ordered {
2613 list_item.marker.len() + 1 } else {
2615 list_item.marker.len()
2616 };
2617 *min_continuation = if list_item.is_ordered {
2618 marker_width
2619 } else {
2620 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2621 };
2622 }
2623
2624 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2627 let mut last_list_item_line = 0;
2628 let mut current_indent_level = 0;
2629 let mut last_marker_width = 0;
2630
2631 let mut has_list_breaking_content_since_last_item = false;
2633 let mut min_continuation_for_tracking = 0;
2634
2635 for (line_idx, line_info) in lines.iter().enumerate() {
2636 let line_num = line_idx + 1;
2637
2638 if line_info.in_code_block {
2640 if let Some(ref mut block) = current_block {
2641 let min_continuation_indent =
2643 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2644
2645 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2647
2648 match context {
2649 CodeBlockContext::Indented => {
2650 block.end_line = line_num;
2652 continue;
2653 }
2654 CodeBlockContext::Standalone => {
2655 let completed_block = current_block.take().unwrap();
2657 list_blocks.push(completed_block);
2658 continue;
2659 }
2660 CodeBlockContext::Adjacent => {
2661 block.end_line = line_num;
2663 continue;
2664 }
2665 }
2666 } else {
2667 continue;
2669 }
2670 }
2671
2672 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2674 caps.get(0).unwrap().as_str().to_string()
2675 } else {
2676 String::new()
2677 };
2678
2679 if current_block.is_some()
2682 && line_info.list_item.is_none()
2683 && !line_info.is_blank
2684 && !line_info.in_code_span_continuation
2685 {
2686 let line_content = line_info.content(content).trim();
2687
2688 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2693 let breaks_list = line_info.heading.is_some()
2694 || line_content.starts_with("---")
2695 || line_content.starts_with("***")
2696 || line_content.starts_with("___")
2697 || crate::utils::skip_context::is_table_line(line_content)
2698 || line_content.starts_with(">")
2699 || (line_info.indent > 0
2700 && line_info.indent < min_continuation_for_tracking
2701 && !is_lazy_continuation);
2702
2703 if breaks_list {
2704 has_list_breaking_content_since_last_item = true;
2705 }
2706 }
2707
2708 if line_info.in_code_span_continuation
2711 && line_info.list_item.is_none()
2712 && let Some(ref mut block) = current_block
2713 {
2714 block.end_line = line_num;
2715 }
2716
2717 let is_valid_continuation =
2722 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2724 && line_info.list_item.is_none()
2725 && !line_info.is_blank
2726 && !line_info.in_code_block
2727 && is_valid_continuation
2728 && let Some(ref mut block) = current_block
2729 {
2730 block.end_line = line_num;
2731 }
2732
2733 if let Some(list_item) = &line_info.list_item {
2735 let item_indent = list_item.marker_column;
2737 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2740 let is_nested = nesting > block.nesting_level;
2744 let same_type =
2745 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2746 let same_context = block.blockquote_prefix == blockquote_prefix;
2747 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2749
2750 let marker_compatible =
2752 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2753
2754 let has_non_list_content = has_list_breaking_content_since_last_item;
2757
2758 let mut continues_list = if is_nested {
2762 same_context && reasonable_distance && !has_non_list_content
2764 } else {
2765 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2767 };
2768
2769 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2772 if block.item_lines.contains(&(line_num - 1)) {
2775 continues_list = true;
2777 } else {
2778 continues_list = true;
2782 }
2783 }
2784
2785 if continues_list {
2786 block.end_line = line_num;
2788 block.item_lines.push(line_num);
2789
2790 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2792 list_item.marker.len() + 1
2793 } else {
2794 list_item.marker.len()
2795 });
2796
2797 if !block.is_ordered
2799 && block.marker.is_some()
2800 && block.marker.as_ref() != Some(&list_item.marker)
2801 {
2802 block.marker = None;
2804 }
2805
2806 reset_tracking_state(
2808 list_item,
2809 &mut has_list_breaking_content_since_last_item,
2810 &mut min_continuation_for_tracking,
2811 );
2812 } else {
2813 list_blocks.push(block.clone());
2816
2817 *block = ListBlock {
2818 start_line: line_num,
2819 end_line: line_num,
2820 is_ordered: list_item.is_ordered,
2821 marker: if list_item.is_ordered {
2822 None
2823 } else {
2824 Some(list_item.marker.clone())
2825 },
2826 blockquote_prefix: blockquote_prefix.clone(),
2827 item_lines: vec![line_num],
2828 nesting_level: nesting,
2829 max_marker_width: if list_item.is_ordered {
2830 list_item.marker.len() + 1
2831 } else {
2832 list_item.marker.len()
2833 },
2834 };
2835
2836 reset_tracking_state(
2838 list_item,
2839 &mut has_list_breaking_content_since_last_item,
2840 &mut min_continuation_for_tracking,
2841 );
2842 }
2843 } else {
2844 current_block = Some(ListBlock {
2846 start_line: line_num,
2847 end_line: line_num,
2848 is_ordered: list_item.is_ordered,
2849 marker: if list_item.is_ordered {
2850 None
2851 } else {
2852 Some(list_item.marker.clone())
2853 },
2854 blockquote_prefix,
2855 item_lines: vec![line_num],
2856 nesting_level: nesting,
2857 max_marker_width: list_item.marker.len(),
2858 });
2859
2860 reset_tracking_state(
2862 list_item,
2863 &mut has_list_breaking_content_since_last_item,
2864 &mut min_continuation_for_tracking,
2865 );
2866 }
2867
2868 last_list_item_line = line_num;
2869 current_indent_level = item_indent;
2870 last_marker_width = if list_item.is_ordered {
2871 list_item.marker.len() + 1 } else {
2873 list_item.marker.len()
2874 };
2875 } else if let Some(ref mut block) = current_block {
2876 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2886 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2887 } else {
2888 false
2889 };
2890
2891 let min_continuation_indent = if block.is_ordered {
2895 current_indent_level + last_marker_width
2896 } else {
2897 current_indent_level + 2 };
2899
2900 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2901 block.end_line = line_num;
2903 } else if line_info.is_blank {
2904 let mut check_idx = line_idx + 1;
2907 let mut found_continuation = false;
2908
2909 while check_idx < lines.len() && lines[check_idx].is_blank {
2911 check_idx += 1;
2912 }
2913
2914 if check_idx < lines.len() {
2915 let next_line = &lines[check_idx];
2916 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2918 found_continuation = true;
2919 }
2920 else if !next_line.in_code_block
2922 && next_line.list_item.is_some()
2923 && let Some(item) = &next_line.list_item
2924 {
2925 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2926 .find(next_line.content(content))
2927 .map_or(String::new(), |m| m.as_str().to_string());
2928 if item.marker_column == current_indent_level
2929 && item.is_ordered == block.is_ordered
2930 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2931 {
2932 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2935 if let Some(between_line) = lines.get(idx) {
2936 let between_content = between_line.content(content);
2937 let trimmed = between_content.trim();
2938 if trimmed.is_empty() {
2940 return false;
2941 }
2942 let line_indent = between_content.len() - between_content.trim_start().len();
2944
2945 if trimmed.starts_with("```")
2947 || trimmed.starts_with("~~~")
2948 || trimmed.starts_with("---")
2949 || trimmed.starts_with("***")
2950 || trimmed.starts_with("___")
2951 || trimmed.starts_with(">")
2952 || crate::utils::skip_context::is_table_line(trimmed)
2953 || between_line.heading.is_some()
2954 {
2955 return true; }
2957
2958 line_indent >= min_continuation_indent
2960 } else {
2961 false
2962 }
2963 });
2964
2965 if block.is_ordered {
2966 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2969 if let Some(between_line) = lines.get(idx) {
2970 let trimmed = between_line.content(content).trim();
2971 if trimmed.is_empty() {
2972 return false;
2973 }
2974 trimmed.starts_with("```")
2976 || trimmed.starts_with("~~~")
2977 || trimmed.starts_with("---")
2978 || trimmed.starts_with("***")
2979 || trimmed.starts_with("___")
2980 || trimmed.starts_with(">")
2981 || crate::utils::skip_context::is_table_line(trimmed)
2982 || between_line.heading.is_some()
2983 } else {
2984 false
2985 }
2986 });
2987 found_continuation = !has_structural_separators;
2988 } else {
2989 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2991 if let Some(between_line) = lines.get(idx) {
2992 let trimmed = between_line.content(content).trim();
2993 if trimmed.is_empty() {
2994 return false;
2995 }
2996 trimmed.starts_with("```")
2998 || trimmed.starts_with("~~~")
2999 || trimmed.starts_with("---")
3000 || trimmed.starts_with("***")
3001 || trimmed.starts_with("___")
3002 || trimmed.starts_with(">")
3003 || crate::utils::skip_context::is_table_line(trimmed)
3004 || between_line.heading.is_some()
3005 } else {
3006 false
3007 }
3008 });
3009 found_continuation = !has_structural_separators;
3010 }
3011 }
3012 }
3013 }
3014
3015 if found_continuation {
3016 block.end_line = line_num;
3018 } else {
3019 list_blocks.push(block.clone());
3021 current_block = None;
3022 }
3023 } else {
3024 let min_required_indent = if block.is_ordered {
3027 current_indent_level + last_marker_width
3028 } else {
3029 current_indent_level + 2
3030 };
3031
3032 let line_content = line_info.content(content).trim();
3037
3038 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3040
3041 let is_structural_separator = line_info.heading.is_some()
3042 || line_content.starts_with("```")
3043 || line_content.starts_with("~~~")
3044 || line_content.starts_with("---")
3045 || line_content.starts_with("***")
3046 || line_content.starts_with("___")
3047 || line_content.starts_with(">")
3048 || looks_like_table;
3049
3050 let is_lazy_continuation = !is_structural_separator
3053 && !line_info.is_blank
3054 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3055
3056 if is_lazy_continuation {
3057 let content_to_check = if !blockquote_prefix.is_empty() {
3060 line_info
3062 .content(content)
3063 .strip_prefix(&blockquote_prefix)
3064 .unwrap_or(line_info.content(content))
3065 .trim()
3066 } else {
3067 line_info.content(content).trim()
3068 };
3069
3070 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3071
3072 if starts_with_uppercase && last_list_item_line > 0 {
3075 list_blocks.push(block.clone());
3077 current_block = None;
3078 } else {
3079 block.end_line = line_num;
3081 }
3082 } else {
3083 list_blocks.push(block.clone());
3085 current_block = None;
3086 }
3087 }
3088 }
3089 }
3090
3091 if let Some(block) = current_block {
3093 list_blocks.push(block);
3094 }
3095
3096 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3098
3099 list_blocks
3100 }
3101
3102 fn compute_char_frequency(content: &str) -> CharFrequency {
3104 let mut frequency = CharFrequency::default();
3105
3106 for ch in content.chars() {
3107 match ch {
3108 '#' => frequency.hash_count += 1,
3109 '*' => frequency.asterisk_count += 1,
3110 '_' => frequency.underscore_count += 1,
3111 '-' => frequency.hyphen_count += 1,
3112 '+' => frequency.plus_count += 1,
3113 '>' => frequency.gt_count += 1,
3114 '|' => frequency.pipe_count += 1,
3115 '[' => frequency.bracket_count += 1,
3116 '`' => frequency.backtick_count += 1,
3117 '<' => frequency.lt_count += 1,
3118 '!' => frequency.exclamation_count += 1,
3119 '\n' => frequency.newline_count += 1,
3120 _ => {}
3121 }
3122 }
3123
3124 frequency
3125 }
3126
3127 fn parse_html_tags(
3129 content: &str,
3130 lines: &[LineInfo],
3131 code_blocks: &[(usize, usize)],
3132 flavor: MarkdownFlavor,
3133 ) -> Vec<HtmlTag> {
3134 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3135 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3136
3137 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3138
3139 for cap in HTML_TAG_REGEX.captures_iter(content) {
3140 let full_match = cap.get(0).unwrap();
3141 let match_start = full_match.start();
3142 let match_end = full_match.end();
3143
3144 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3146 continue;
3147 }
3148
3149 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3150 let tag_name_original = cap.get(2).unwrap().as_str();
3151 let tag_name = tag_name_original.to_lowercase();
3152 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3153
3154 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3157 continue;
3158 }
3159
3160 let mut line_num = 1;
3162 let mut col_start = match_start;
3163 let mut col_end = match_end;
3164 for (idx, line_info) in lines.iter().enumerate() {
3165 if match_start >= line_info.byte_offset {
3166 line_num = idx + 1;
3167 col_start = match_start - line_info.byte_offset;
3168 col_end = match_end - line_info.byte_offset;
3169 } else {
3170 break;
3171 }
3172 }
3173
3174 html_tags.push(HtmlTag {
3175 line: line_num,
3176 start_col: col_start,
3177 end_col: col_end,
3178 byte_offset: match_start,
3179 byte_end: match_end,
3180 tag_name,
3181 is_closing,
3182 is_self_closing,
3183 raw_content: full_match.as_str().to_string(),
3184 });
3185 }
3186
3187 html_tags
3188 }
3189
3190 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3192 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3193 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3194
3195 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3196
3197 for cap in EMPHASIS_REGEX.captures_iter(content) {
3198 let full_match = cap.get(0).unwrap();
3199 let match_start = full_match.start();
3200 let match_end = full_match.end();
3201
3202 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3204 continue;
3205 }
3206
3207 let opening_markers = cap.get(1).unwrap().as_str();
3208 let content_part = cap.get(2).unwrap().as_str();
3209 let closing_markers = cap.get(3).unwrap().as_str();
3210
3211 if opening_markers.chars().next() != closing_markers.chars().next()
3213 || opening_markers.len() != closing_markers.len()
3214 {
3215 continue;
3216 }
3217
3218 let marker = opening_markers.chars().next().unwrap();
3219 let marker_count = opening_markers.len();
3220
3221 let mut line_num = 1;
3223 let mut col_start = match_start;
3224 let mut col_end = match_end;
3225 for (idx, line_info) in lines.iter().enumerate() {
3226 if match_start >= line_info.byte_offset {
3227 line_num = idx + 1;
3228 col_start = match_start - line_info.byte_offset;
3229 col_end = match_end - line_info.byte_offset;
3230 } else {
3231 break;
3232 }
3233 }
3234
3235 emphasis_spans.push(EmphasisSpan {
3236 line: line_num,
3237 start_col: col_start,
3238 end_col: col_end,
3239 byte_offset: match_start,
3240 byte_end: match_end,
3241 marker,
3242 marker_count,
3243 content: content_part.to_string(),
3244 });
3245 }
3246
3247 emphasis_spans
3248 }
3249
3250 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3252 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3253
3254 for (line_idx, line_info) in lines.iter().enumerate() {
3255 if line_info.in_code_block || line_info.is_blank {
3257 continue;
3258 }
3259
3260 let line = line_info.content(content);
3261 let line_num = line_idx + 1;
3262
3263 if !line.contains('|') {
3265 continue;
3266 }
3267
3268 let parts: Vec<&str> = line.split('|').collect();
3270 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3271
3272 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3274 let mut column_alignments = Vec::new();
3275
3276 if is_separator {
3277 for part in &parts[1..parts.len() - 1] {
3278 let trimmed = part.trim();
3280 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3281 "center".to_string()
3282 } else if trimmed.ends_with(':') {
3283 "right".to_string()
3284 } else if trimmed.starts_with(':') {
3285 "left".to_string()
3286 } else {
3287 "none".to_string()
3288 };
3289 column_alignments.push(alignment);
3290 }
3291 }
3292
3293 table_rows.push(TableRow {
3294 line: line_num,
3295 is_separator,
3296 column_count,
3297 column_alignments,
3298 });
3299 }
3300
3301 table_rows
3302 }
3303
3304 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3306 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3307
3308 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3310 let full_match = cap.get(0).unwrap();
3311 let match_start = full_match.start();
3312 let match_end = full_match.end();
3313
3314 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3316 continue;
3317 }
3318
3319 let preceding_char = if match_start > 0 {
3321 content.chars().nth(match_start - 1)
3322 } else {
3323 None
3324 };
3325 let following_char = content.chars().nth(match_end);
3326
3327 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3328 continue;
3329 }
3330 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3331 continue;
3332 }
3333
3334 let url = full_match.as_str();
3335 let url_type = if url.starts_with("https://") {
3336 "https"
3337 } else if url.starts_with("http://") {
3338 "http"
3339 } else if url.starts_with("ftp://") {
3340 "ftp"
3341 } else {
3342 "other"
3343 };
3344
3345 let mut line_num = 1;
3347 let mut col_start = match_start;
3348 let mut col_end = match_end;
3349 for (idx, line_info) in lines.iter().enumerate() {
3350 if match_start >= line_info.byte_offset {
3351 line_num = idx + 1;
3352 col_start = match_start - line_info.byte_offset;
3353 col_end = match_end - line_info.byte_offset;
3354 } else {
3355 break;
3356 }
3357 }
3358
3359 bare_urls.push(BareUrl {
3360 line: line_num,
3361 start_col: col_start,
3362 end_col: col_end,
3363 byte_offset: match_start,
3364 byte_end: match_end,
3365 url: url.to_string(),
3366 url_type: url_type.to_string(),
3367 });
3368 }
3369
3370 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3372 let full_match = cap.get(0).unwrap();
3373 let match_start = full_match.start();
3374 let match_end = full_match.end();
3375
3376 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3378 continue;
3379 }
3380
3381 let preceding_char = if match_start > 0 {
3383 content.chars().nth(match_start - 1)
3384 } else {
3385 None
3386 };
3387 let following_char = content.chars().nth(match_end);
3388
3389 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3390 continue;
3391 }
3392 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3393 continue;
3394 }
3395
3396 let email = full_match.as_str();
3397
3398 let mut line_num = 1;
3400 let mut col_start = match_start;
3401 let mut col_end = match_end;
3402 for (idx, line_info) in lines.iter().enumerate() {
3403 if match_start >= line_info.byte_offset {
3404 line_num = idx + 1;
3405 col_start = match_start - line_info.byte_offset;
3406 col_end = match_end - line_info.byte_offset;
3407 } else {
3408 break;
3409 }
3410 }
3411
3412 bare_urls.push(BareUrl {
3413 line: line_num,
3414 start_col: col_start,
3415 end_col: col_end,
3416 byte_offset: match_start,
3417 byte_end: match_end,
3418 url: email.to_string(),
3419 url_type: "email".to_string(),
3420 });
3421 }
3422
3423 bare_urls
3424 }
3425
3426 #[must_use]
3446 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3447 ValidHeadingsIter::new(&self.lines)
3448 }
3449
3450 #[must_use]
3454 pub fn has_valid_headings(&self) -> bool {
3455 self.lines
3456 .iter()
3457 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3458 }
3459}
3460
3461fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3463 if list_blocks.len() < 2 {
3464 return;
3465 }
3466
3467 let mut merger = ListBlockMerger::new(content, lines);
3468 *list_blocks = merger.merge(list_blocks);
3469}
3470
3471struct ListBlockMerger<'a> {
3473 content: &'a str,
3474 lines: &'a [LineInfo],
3475}
3476
3477impl<'a> ListBlockMerger<'a> {
3478 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3479 Self { content, lines }
3480 }
3481
3482 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3483 let mut merged = Vec::with_capacity(list_blocks.len());
3484 let mut current = list_blocks[0].clone();
3485
3486 for next in list_blocks.iter().skip(1) {
3487 if self.should_merge_blocks(¤t, next) {
3488 current = self.merge_two_blocks(current, next);
3489 } else {
3490 merged.push(current);
3491 current = next.clone();
3492 }
3493 }
3494
3495 merged.push(current);
3496 merged
3497 }
3498
3499 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3501 if !self.blocks_are_compatible(current, next) {
3503 return false;
3504 }
3505
3506 let spacing = self.analyze_spacing_between(current, next);
3508 match spacing {
3509 BlockSpacing::Consecutive => true,
3510 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3511 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3512 self.can_merge_with_content_between(current, next)
3513 }
3514 }
3515 }
3516
3517 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3519 current.is_ordered == next.is_ordered
3520 && current.blockquote_prefix == next.blockquote_prefix
3521 && current.nesting_level == next.nesting_level
3522 }
3523
3524 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3526 let gap = next.start_line - current.end_line;
3527
3528 match gap {
3529 1 => BlockSpacing::Consecutive,
3530 2 => BlockSpacing::SingleBlank,
3531 _ if gap > 2 => {
3532 if self.has_only_blank_lines_between(current, next) {
3533 BlockSpacing::MultipleBlanks
3534 } else {
3535 BlockSpacing::ContentBetween
3536 }
3537 }
3538 _ => BlockSpacing::Consecutive, }
3540 }
3541
3542 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3544 if has_meaningful_content_between(self.content, current, next, self.lines) {
3547 return false; }
3549
3550 !current.is_ordered && current.marker == next.marker
3552 }
3553
3554 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3556 if has_meaningful_content_between(self.content, current, next, self.lines) {
3558 return false; }
3560
3561 current.is_ordered && next.is_ordered
3563 }
3564
3565 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3567 for line_num in (current.end_line + 1)..next.start_line {
3568 if let Some(line_info) = self.lines.get(line_num - 1)
3569 && !line_info.content(self.content).trim().is_empty()
3570 {
3571 return false;
3572 }
3573 }
3574 true
3575 }
3576
3577 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3579 current.end_line = next.end_line;
3580 current.item_lines.extend_from_slice(&next.item_lines);
3581
3582 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3584
3585 if !current.is_ordered && self.markers_differ(¤t, next) {
3587 current.marker = None; }
3589
3590 current
3591 }
3592
3593 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3595 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3596 }
3597}
3598
3599#[derive(Debug, PartialEq)]
3601enum BlockSpacing {
3602 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3607
3608fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3610 for line_num in (current.end_line + 1)..next.start_line {
3612 if let Some(line_info) = lines.get(line_num - 1) {
3613 let trimmed = line_info.content(content).trim();
3615
3616 if trimmed.is_empty() {
3618 continue;
3619 }
3620
3621 if line_info.heading.is_some() {
3625 return true; }
3627
3628 if is_horizontal_rule(trimmed) {
3630 return true; }
3632
3633 if crate::utils::skip_context::is_table_line(trimmed) {
3635 return true; }
3637
3638 if trimmed.starts_with('>') {
3640 return true; }
3642
3643 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3645 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3646
3647 let min_continuation_indent = if current.is_ordered {
3649 current.nesting_level + current.max_marker_width + 1 } else {
3651 current.nesting_level + 2
3652 };
3653
3654 if line_indent < min_continuation_indent {
3655 return true; }
3658 }
3659
3660 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3662
3663 let min_indent = if current.is_ordered {
3665 current.nesting_level + current.max_marker_width
3666 } else {
3667 current.nesting_level + 2
3668 };
3669
3670 if line_indent < min_indent {
3672 return true; }
3674
3675 }
3678 }
3679
3680 false
3682}
3683
3684pub fn is_horizontal_rule_line(line: &str) -> bool {
3691 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3693 if leading_spaces > 3 || line.starts_with('\t') {
3694 return false;
3695 }
3696
3697 is_horizontal_rule_content(line.trim())
3698}
3699
3700pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3703 if trimmed.len() < 3 {
3704 return false;
3705 }
3706
3707 let chars: Vec<char> = trimmed.chars().collect();
3709 if let Some(&first_char) = chars.first()
3710 && (first_char == '-' || first_char == '*' || first_char == '_')
3711 {
3712 let mut count = 0;
3713 for &ch in &chars {
3714 if ch == first_char {
3715 count += 1;
3716 } else if ch != ' ' && ch != '\t' {
3717 return false; }
3719 }
3720 return count >= 3;
3721 }
3722 false
3723}
3724
3725pub fn is_horizontal_rule(trimmed: &str) -> bool {
3727 is_horizontal_rule_content(trimmed)
3728}
3729
3730#[cfg(test)]
3732mod tests {
3733 use super::*;
3734
3735 #[test]
3736 fn test_empty_content() {
3737 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3738 assert_eq!(ctx.content, "");
3739 assert_eq!(ctx.line_offsets, vec![0]);
3740 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3741 assert_eq!(ctx.lines.len(), 0);
3742 }
3743
3744 #[test]
3745 fn test_single_line() {
3746 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3747 assert_eq!(ctx.content, "# Hello");
3748 assert_eq!(ctx.line_offsets, vec![0]);
3749 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3750 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3751 }
3752
3753 #[test]
3754 fn test_multi_line() {
3755 let content = "# Title\n\nSecond line\nThird line";
3756 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3757 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3758 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3765
3766 #[test]
3767 fn test_line_info() {
3768 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3769 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3770
3771 assert_eq!(ctx.lines.len(), 7);
3773
3774 let line1 = &ctx.lines[0];
3776 assert_eq!(line1.content(ctx.content), "# Title");
3777 assert_eq!(line1.byte_offset, 0);
3778 assert_eq!(line1.indent, 0);
3779 assert!(!line1.is_blank);
3780 assert!(!line1.in_code_block);
3781 assert!(line1.list_item.is_none());
3782
3783 let line2 = &ctx.lines[1];
3785 assert_eq!(line2.content(ctx.content), " indented");
3786 assert_eq!(line2.byte_offset, 8);
3787 assert_eq!(line2.indent, 4);
3788 assert!(!line2.is_blank);
3789
3790 let line3 = &ctx.lines[2];
3792 assert_eq!(line3.content(ctx.content), "");
3793 assert!(line3.is_blank);
3794
3795 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3797 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3798 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3799 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3800 }
3801
3802 #[test]
3803 fn test_list_item_detection() {
3804 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3805 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3806
3807 let line1 = &ctx.lines[0];
3809 assert!(line1.list_item.is_some());
3810 let list1 = line1.list_item.as_ref().unwrap();
3811 assert_eq!(list1.marker, "-");
3812 assert!(!list1.is_ordered);
3813 assert_eq!(list1.marker_column, 0);
3814 assert_eq!(list1.content_column, 2);
3815
3816 let line2 = &ctx.lines[1];
3818 assert!(line2.list_item.is_some());
3819 let list2 = line2.list_item.as_ref().unwrap();
3820 assert_eq!(list2.marker, "*");
3821 assert_eq!(list2.marker_column, 2);
3822
3823 let line3 = &ctx.lines[2];
3825 assert!(line3.list_item.is_some());
3826 let list3 = line3.list_item.as_ref().unwrap();
3827 assert_eq!(list3.marker, "1.");
3828 assert!(list3.is_ordered);
3829 assert_eq!(list3.number, Some(1));
3830
3831 let line6 = &ctx.lines[5];
3833 assert!(line6.list_item.is_none());
3834 }
3835
3836 #[test]
3837 fn test_offset_to_line_col_edge_cases() {
3838 let content = "a\nb\nc";
3839 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3840 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3848
3849 #[test]
3850 fn test_mdx_esm_blocks() {
3851 let content = r##"import {Chart} from './snowfall.js'
3852export const year = 2023
3853
3854# Last year's snowfall
3855
3856In {year}, the snowfall was above average.
3857It was followed by a warm spring which caused
3858flood conditions in many of the nearby rivers.
3859
3860<Chart color="#fcb32c" year={year} />
3861"##;
3862
3863 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3864
3865 assert_eq!(ctx.lines.len(), 10);
3867 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3868 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3869 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3870 assert!(
3871 !ctx.lines[3].in_esm_block,
3872 "Line 4 (heading) should NOT be in_esm_block"
3873 );
3874 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3875 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3876 }
3877
3878 #[test]
3879 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3880 let content = r#"import {Chart} from './snowfall.js'
3881export const year = 2023
3882
3883# Last year's snowfall
3884"#;
3885
3886 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3887
3888 assert!(
3890 !ctx.lines[0].in_esm_block,
3891 "Line 1 should NOT be in_esm_block in Standard flavor"
3892 );
3893 assert!(
3894 !ctx.lines[1].in_esm_block,
3895 "Line 2 should NOT be in_esm_block in Standard flavor"
3896 );
3897 }
3898}