1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15 ($name:expr, $profile:expr, $code:expr) => {{
16 let start = std::time::Instant::now();
17 let result = $code;
18 if $profile {
19 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20 }
21 result
22 }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(
34 r#"(?sx)
35 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36 (?:
37 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
38 |
39 \[([^\]]*)\] # Reference ID in group 6
40 )"#
41 ).unwrap()
42});
43
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?sx)
49 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50 (?:
51 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
52 |
53 \[([^\]]*)\] # Reference ID in group 6
54 )"#
55 ).unwrap()
56});
57
58static REF_DEF_PATTERN: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71#[derive(Debug, Clone)]
73pub struct LineInfo {
74 pub byte_offset: usize,
76 pub byte_len: usize,
78 pub indent: usize,
80 pub visual_indent: usize,
84 pub is_blank: bool,
86 pub in_code_block: bool,
88 pub in_front_matter: bool,
90 pub in_html_block: bool,
92 pub in_html_comment: bool,
94 pub list_item: Option<ListItemInfo>,
96 pub heading: Option<HeadingInfo>,
98 pub blockquote: Option<BlockquoteInfo>,
100 pub in_mkdocstrings: bool,
102 pub in_esm_block: bool,
104 pub in_code_span_continuation: bool,
106 pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112 pub fn content<'a>(&self, source: &'a str) -> &'a str {
114 &source[self.byte_offset..self.byte_offset + self.byte_len]
115 }
116}
117
118#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121 pub marker: String,
123 pub is_ordered: bool,
125 pub number: Option<usize>,
127 pub marker_column: usize,
129 pub content_column: usize,
131}
132
133#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136 ATX,
138 Setext1,
140 Setext2,
142}
143
144#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147 pub line: usize,
149 pub start_col: usize,
151 pub end_col: usize,
153 pub byte_offset: usize,
155 pub byte_end: usize,
157 pub text: Cow<'a, str>,
159 pub url: Cow<'a, str>,
161 pub is_reference: bool,
163 pub reference_id: Option<Cow<'a, str>>,
165 pub link_type: LinkType,
167}
168
169#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172 pub reference: String,
174 pub span: std::ops::Range<usize>,
176}
177
178#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181 pub id: String,
183 pub line: usize,
185 pub byte_offset: usize,
187 pub byte_end: usize,
189}
190
191#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194 pub line: usize,
196 pub start_col: usize,
198 pub end_col: usize,
200 pub byte_offset: usize,
202 pub byte_end: usize,
204 pub alt_text: Cow<'a, str>,
206 pub url: Cow<'a, str>,
208 pub is_reference: bool,
210 pub reference_id: Option<Cow<'a, str>>,
212 pub link_type: LinkType,
214}
215
216#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219 pub line: usize,
221 pub id: String,
223 pub url: String,
225 pub title: Option<String>,
227 pub byte_offset: usize,
229 pub byte_end: usize,
231 pub title_byte_start: Option<usize>,
233 pub title_byte_end: Option<usize>,
235}
236
237#[derive(Debug, Clone)]
239pub struct CodeSpan {
240 pub line: usize,
242 pub end_line: usize,
244 pub start_col: usize,
246 pub end_col: usize,
248 pub byte_offset: usize,
250 pub byte_end: usize,
252 pub backtick_count: usize,
254 pub content: String,
256}
257
258#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261 pub level: u8,
263 pub style: HeadingStyle,
265 pub marker: String,
267 pub marker_column: usize,
269 pub content_column: usize,
271 pub text: String,
273 pub custom_id: Option<String>,
275 pub raw_text: String,
277 pub has_closing_sequence: bool,
279 pub closing_sequence: String,
281 pub is_valid: bool,
284}
285
286#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292 pub line_num: usize,
294 pub heading: &'a HeadingInfo,
296 pub line_info: &'a LineInfo,
298}
299
300pub struct ValidHeadingsIter<'a> {
305 lines: &'a [LineInfo],
306 current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310 fn new(lines: &'a [LineInfo]) -> Self {
311 Self {
312 lines,
313 current_index: 0,
314 }
315 }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319 type Item = ValidHeading<'a>;
320
321 fn next(&mut self) -> Option<Self::Item> {
322 while self.current_index < self.lines.len() {
323 let idx = self.current_index;
324 self.current_index += 1;
325
326 let line_info = &self.lines[idx];
327 if let Some(heading) = &line_info.heading
328 && heading.is_valid
329 {
330 return Some(ValidHeading {
331 line_num: idx + 1, heading,
333 line_info,
334 });
335 }
336 }
337 None
338 }
339}
340
341#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344 pub nesting_level: usize,
346 pub indent: String,
348 pub marker_column: usize,
350 pub prefix: String,
352 pub content: String,
354 pub has_no_space_after_marker: bool,
356 pub has_multiple_spaces_after_marker: bool,
358 pub needs_md028_fix: bool,
360}
361
362#[derive(Debug, Clone)]
364pub struct ListBlock {
365 pub start_line: usize,
367 pub end_line: usize,
369 pub is_ordered: bool,
371 pub marker: Option<String>,
373 pub blockquote_prefix: String,
375 pub item_lines: Vec<usize>,
377 pub nesting_level: usize,
379 pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388 pub hash_count: usize,
390 pub asterisk_count: usize,
392 pub underscore_count: usize,
394 pub hyphen_count: usize,
396 pub plus_count: usize,
398 pub gt_count: usize,
400 pub pipe_count: usize,
402 pub bracket_count: usize,
404 pub backtick_count: usize,
406 pub lt_count: usize,
408 pub exclamation_count: usize,
410 pub newline_count: usize,
412}
413
414#[derive(Debug, Clone)]
416pub struct HtmlTag {
417 pub line: usize,
419 pub start_col: usize,
421 pub end_col: usize,
423 pub byte_offset: usize,
425 pub byte_end: usize,
427 pub tag_name: String,
429 pub is_closing: bool,
431 pub is_self_closing: bool,
433 pub raw_content: String,
435}
436
437#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440 pub line: usize,
442 pub start_col: usize,
444 pub end_col: usize,
446 pub byte_offset: usize,
448 pub byte_end: usize,
450 pub marker: char,
452 pub marker_count: usize,
454 pub content: String,
456}
457
458#[derive(Debug, Clone)]
460pub struct TableRow {
461 pub line: usize,
463 pub is_separator: bool,
465 pub column_count: usize,
467 pub column_alignments: Vec<String>, }
470
471#[derive(Debug, Clone)]
473pub struct BareUrl {
474 pub line: usize,
476 pub start_col: usize,
478 pub end_col: usize,
480 pub byte_offset: usize,
482 pub byte_end: usize,
484 pub url: String,
486 pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491 pub content: &'a str,
492 pub line_offsets: Vec<usize>,
493 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
515
516struct BlockquoteComponents<'a> {
518 indent: &'a str,
519 markers: &'a str,
520 spaces_after: &'a str,
521 content: &'a str,
522}
523
524#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527 let bytes = line.as_bytes();
528 let mut pos = 0;
529
530 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532 pos += 1;
533 }
534 let indent_end = pos;
535
536 if pos >= bytes.len() || bytes[pos] != b'>' {
538 return None;
539 }
540
541 while pos < bytes.len() && bytes[pos] == b'>' {
543 pos += 1;
544 }
545 let markers_end = pos;
546
547 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549 pos += 1;
550 }
551 let spaces_end = pos;
552
553 Some(BlockquoteComponents {
554 indent: &line[0..indent_end],
555 markers: &line[indent_end..markers_end],
556 spaces_after: &line[markers_end..spaces_end],
557 content: &line[spaces_end..],
558 })
559}
560
561impl<'a> LintContext<'a> {
562 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563 #[cfg(not(target_arch = "wasm32"))]
564 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565 #[cfg(target_arch = "wasm32")]
566 let profile = false;
567
568 let line_offsets = profile_section!("Line offsets", profile, {
569 let mut offsets = vec![0];
570 for (i, c) in content.char_indices() {
571 if c == '\n' {
572 offsets.push(i + 1);
573 }
574 }
575 offsets
576 });
577
578 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581 let html_comment_ranges = profile_section!(
583 "HTML comment ranges",
584 profile,
585 crate::utils::skip_context::compute_html_comment_ranges(content)
586 );
587
588 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590 if flavor == MarkdownFlavor::MkDocs {
591 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592 } else {
593 Vec::new()
594 }
595 });
596
597 let mut lines = profile_section!(
599 "Basic line info",
600 profile,
601 Self::compute_basic_line_info(
602 content,
603 &line_offsets,
604 &code_blocks,
605 flavor,
606 &html_comment_ranges,
607 &autodoc_ranges,
608 )
609 );
610
611 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614 profile_section!(
616 "ESM blocks",
617 profile,
618 Self::detect_esm_blocks(content, &mut lines, flavor)
619 );
620
621 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624 profile_section!(
626 "Headings & blockquotes",
627 profile,
628 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629 );
630
631 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634 for span in &code_spans {
637 if span.end_line > span.line {
638 for line_num in (span.line + 1)..=span.end_line {
640 if let Some(line_info) = lines.get_mut(line_num - 1) {
641 line_info.in_code_span_continuation = true;
642 }
643 }
644 }
645 }
646
647 let (links, broken_links, footnote_refs) = profile_section!(
649 "Links",
650 profile,
651 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652 );
653
654 let images = profile_section!(
655 "Images",
656 profile,
657 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658 );
659
660 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667 let table_blocks = profile_section!(
669 "Table blocks",
670 profile,
671 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672 content,
673 &code_blocks,
674 &code_spans,
675 &html_comment_ranges,
676 )
677 );
678
679 let line_index = profile_section!(
681 "Line index",
682 profile,
683 crate::utils::range_utils::LineIndex::new(content)
684 );
685
686 let jinja_ranges = profile_section!(
688 "Jinja ranges",
689 profile,
690 crate::utils::jinja_utils::find_jinja_ranges(content)
691 );
692
693 Self {
694 content,
695 line_offsets,
696 code_blocks,
697 lines,
698 links,
699 images,
700 broken_links,
701 footnote_refs,
702 reference_defs,
703 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704 list_blocks,
705 char_frequency,
706 html_tags_cache: OnceLock::new(),
707 emphasis_spans_cache: OnceLock::new(),
708 table_rows_cache: OnceLock::new(),
709 bare_urls_cache: OnceLock::new(),
710 has_mixed_list_nesting_cache: OnceLock::new(),
711 html_comment_ranges,
712 table_blocks,
713 line_index,
714 jinja_ranges,
715 flavor,
716 source_file,
717 }
718 }
719
720 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722 Arc::clone(
723 self.code_spans_cache
724 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725 )
726 }
727
728 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730 &self.html_comment_ranges
731 }
732
733 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735 Arc::clone(self.html_tags_cache.get_or_init(|| {
736 Arc::new(Self::parse_html_tags(
737 self.content,
738 &self.lines,
739 &self.code_blocks,
740 self.flavor,
741 ))
742 }))
743 }
744
745 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747 Arc::clone(
748 self.emphasis_spans_cache
749 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750 )
751 }
752
753 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755 Arc::clone(
756 self.table_rows_cache
757 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758 )
759 }
760
761 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763 Arc::clone(
764 self.bare_urls_cache
765 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766 )
767 }
768
769 pub fn has_mixed_list_nesting(&self) -> bool {
773 *self
774 .has_mixed_list_nesting_cache
775 .get_or_init(|| self.compute_mixed_list_nesting())
776 }
777
778 fn compute_mixed_list_nesting(&self) -> bool {
780 let mut stack: Vec<(usize, bool)> = Vec::new();
785 let mut last_was_blank = false;
786
787 for line_info in &self.lines {
788 if line_info.in_code_block
790 || line_info.in_front_matter
791 || line_info.in_mkdocstrings
792 || line_info.in_html_comment
793 || line_info.in_esm_block
794 {
795 continue;
796 }
797
798 if line_info.is_blank {
800 last_was_blank = true;
801 continue;
802 }
803
804 if let Some(list_item) = &line_info.list_item {
805 let current_pos = if list_item.marker_column == 1 {
807 0
808 } else {
809 list_item.marker_column
810 };
811
812 if last_was_blank && current_pos == 0 {
814 stack.clear();
815 }
816 last_was_blank = false;
817
818 while let Some(&(pos, _)) = stack.last() {
820 if pos >= current_pos {
821 stack.pop();
822 } else {
823 break;
824 }
825 }
826
827 if let Some(&(_, parent_is_ordered)) = stack.last()
829 && parent_is_ordered != list_item.is_ordered
830 {
831 return true; }
833
834 stack.push((current_pos, list_item.is_ordered));
835 } else {
836 last_was_blank = false;
838 }
839 }
840
841 false
842 }
843
844 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846 match self.line_offsets.binary_search(&offset) {
847 Ok(line) => (line + 1, 1),
848 Err(line) => {
849 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850 (line, offset - line_start + 1)
851 }
852 }
853 }
854
855 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859 return true;
860 }
861
862 self.code_spans()
864 .iter()
865 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866 }
867
868 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870 if line_num > 0 {
871 self.lines.get(line_num - 1)
872 } else {
873 None
874 }
875 }
876
877 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879 self.line_info(line_num).map(|info| info.byte_offset)
880 }
881
882 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884 let normalized_id = ref_id.to_lowercase();
885 self.reference_defs
886 .iter()
887 .find(|def| def.id == normalized_id)
888 .map(|def| def.url.as_str())
889 }
890
891 pub fn is_in_list_block(&self, line_num: usize) -> bool {
893 self.list_blocks
894 .iter()
895 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896 }
897
898 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900 self.list_blocks
901 .iter()
902 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903 }
904
905 pub fn is_in_code_block(&self, line_num: usize) -> bool {
909 if line_num == 0 || line_num > self.lines.len() {
910 return false;
911 }
912 self.lines[line_num - 1].in_code_block
913 }
914
915 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917 if line_num == 0 || line_num > self.lines.len() {
918 return false;
919 }
920 self.lines[line_num - 1].in_front_matter
921 }
922
923 pub fn is_in_html_block(&self, line_num: usize) -> bool {
925 if line_num == 0 || line_num > self.lines.len() {
926 return false;
927 }
928 self.lines[line_num - 1].in_html_block
929 }
930
931 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933 if line_num == 0 || line_num > self.lines.len() {
934 return false;
935 }
936
937 let col_0indexed = if col > 0 { col - 1 } else { 0 };
941 let code_spans = self.code_spans();
942 code_spans.iter().any(|span| {
943 if line_num < span.line || line_num > span.end_line {
945 return false;
946 }
947
948 if span.line == span.end_line {
949 col_0indexed >= span.start_col && col_0indexed < span.end_col
951 } else if line_num == span.line {
952 col_0indexed >= span.start_col
954 } else if line_num == span.end_line {
955 col_0indexed < span.end_col
957 } else {
958 true
960 }
961 })
962 }
963
964 #[inline]
966 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967 let code_spans = self.code_spans();
968 code_spans
969 .iter()
970 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971 }
972
973 #[inline]
976 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977 self.reference_defs
978 .iter()
979 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980 }
981
982 #[inline]
986 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987 self.html_comment_ranges
988 .iter()
989 .any(|range| byte_pos >= range.start && byte_pos < range.end)
990 }
991
992 #[inline]
995 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996 self.html_tags()
997 .iter()
998 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999 }
1000
1001 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003 self.jinja_ranges
1004 .iter()
1005 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006 }
1007
1008 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010 self.reference_defs.iter().any(|def| {
1011 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012 byte_pos >= start && byte_pos < end
1013 } else {
1014 false
1015 }
1016 })
1017 }
1018
1019 pub fn has_char(&self, ch: char) -> bool {
1021 match ch {
1022 '#' => self.char_frequency.hash_count > 0,
1023 '*' => self.char_frequency.asterisk_count > 0,
1024 '_' => self.char_frequency.underscore_count > 0,
1025 '-' => self.char_frequency.hyphen_count > 0,
1026 '+' => self.char_frequency.plus_count > 0,
1027 '>' => self.char_frequency.gt_count > 0,
1028 '|' => self.char_frequency.pipe_count > 0,
1029 '[' => self.char_frequency.bracket_count > 0,
1030 '`' => self.char_frequency.backtick_count > 0,
1031 '<' => self.char_frequency.lt_count > 0,
1032 '!' => self.char_frequency.exclamation_count > 0,
1033 '\n' => self.char_frequency.newline_count > 0,
1034 _ => self.content.contains(ch), }
1036 }
1037
1038 pub fn char_count(&self, ch: char) -> usize {
1040 match ch {
1041 '#' => self.char_frequency.hash_count,
1042 '*' => self.char_frequency.asterisk_count,
1043 '_' => self.char_frequency.underscore_count,
1044 '-' => self.char_frequency.hyphen_count,
1045 '+' => self.char_frequency.plus_count,
1046 '>' => self.char_frequency.gt_count,
1047 '|' => self.char_frequency.pipe_count,
1048 '[' => self.char_frequency.bracket_count,
1049 '`' => self.char_frequency.backtick_count,
1050 '<' => self.char_frequency.lt_count,
1051 '!' => self.char_frequency.exclamation_count,
1052 '\n' => self.char_frequency.newline_count,
1053 _ => self.content.matches(ch).count(), }
1055 }
1056
1057 pub fn likely_has_headings(&self) -> bool {
1059 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1061
1062 pub fn likely_has_lists(&self) -> bool {
1064 self.char_frequency.asterisk_count > 0
1065 || self.char_frequency.hyphen_count > 0
1066 || self.char_frequency.plus_count > 0
1067 }
1068
1069 pub fn likely_has_emphasis(&self) -> bool {
1071 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072 }
1073
1074 pub fn likely_has_tables(&self) -> bool {
1076 self.char_frequency.pipe_count > 2
1077 }
1078
1079 pub fn likely_has_blockquotes(&self) -> bool {
1081 self.char_frequency.gt_count > 0
1082 }
1083
1084 pub fn likely_has_code(&self) -> bool {
1086 self.char_frequency.backtick_count > 0
1087 }
1088
1089 pub fn likely_has_links_or_images(&self) -> bool {
1091 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092 }
1093
1094 pub fn likely_has_html(&self) -> bool {
1096 self.char_frequency.lt_count > 0
1097 }
1098
1099 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101 self.html_tags()
1102 .iter()
1103 .filter(|tag| tag.line == line_num)
1104 .cloned()
1105 .collect()
1106 }
1107
1108 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110 self.emphasis_spans()
1111 .iter()
1112 .filter(|span| span.line == line_num)
1113 .cloned()
1114 .collect()
1115 }
1116
1117 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119 self.table_rows()
1120 .iter()
1121 .filter(|row| row.line == line_num)
1122 .cloned()
1123 .collect()
1124 }
1125
1126 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128 self.bare_urls()
1129 .iter()
1130 .filter(|url| url.line == line_num)
1131 .cloned()
1132 .collect()
1133 }
1134
1135 #[inline]
1141 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142 let idx = match lines.binary_search_by(|line| {
1144 if byte_offset < line.byte_offset {
1145 std::cmp::Ordering::Greater
1146 } else if byte_offset > line.byte_offset + line.byte_len {
1147 std::cmp::Ordering::Less
1148 } else {
1149 std::cmp::Ordering::Equal
1150 }
1151 }) {
1152 Ok(idx) => idx,
1153 Err(idx) => idx.saturating_sub(1),
1154 };
1155
1156 let line = &lines[idx];
1157 let line_num = idx + 1;
1158 let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160 (idx, line_num, col)
1161 }
1162
1163 #[inline]
1165 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169 if idx > 0 {
1171 let span = &code_spans[idx - 1];
1172 if offset >= span.byte_offset && offset < span.byte_end {
1173 return true;
1174 }
1175 }
1176
1177 false
1178 }
1179
1180 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186 let mut link_ranges = Vec::new();
1187 let mut options = Options::empty();
1188 options.insert(Options::ENABLE_WIKILINKS);
1189 options.insert(Options::ENABLE_FOOTNOTES);
1190
1191 let parser = Parser::new_ext(content, options).into_offset_iter();
1192 let mut link_stack: Vec<usize> = Vec::new();
1193
1194 for (event, range) in parser {
1195 match event {
1196 Event::Start(Tag::Link { .. }) => {
1197 link_stack.push(range.start);
1198 }
1199 Event::End(TagEnd::Link) => {
1200 if let Some(start_pos) = link_stack.pop() {
1201 link_ranges.push((start_pos, range.end));
1202 }
1203 }
1204 _ => {}
1205 }
1206 }
1207
1208 link_ranges
1209 }
1210
1211 fn parse_links(
1213 content: &'a str,
1214 lines: &[LineInfo],
1215 code_blocks: &[(usize, usize)],
1216 code_spans: &[CodeSpan],
1217 flavor: MarkdownFlavor,
1218 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221 use std::collections::HashSet;
1222
1223 let mut links = Vec::with_capacity(content.len() / 500);
1224 let mut broken_links = Vec::new();
1225 let mut footnote_refs = Vec::new();
1226
1227 let mut found_positions = HashSet::new();
1229
1230 let mut options = Options::empty();
1240 options.insert(Options::ENABLE_WIKILINKS);
1241 options.insert(Options::ENABLE_FOOTNOTES);
1242
1243 let parser = Parser::new_with_broken_link_callback(
1244 content,
1245 options,
1246 Some(|link: BrokenLink<'_>| {
1247 broken_links.push(BrokenLinkInfo {
1248 reference: link.reference.to_string(),
1249 span: link.span.clone(),
1250 });
1251 None
1252 }),
1253 )
1254 .into_offset_iter();
1255
1256 let mut link_stack: Vec<(
1257 usize,
1258 usize,
1259 pulldown_cmark::CowStr<'a>,
1260 LinkType,
1261 pulldown_cmark::CowStr<'a>,
1262 )> = Vec::new();
1263 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1266 match event {
1267 Event::Start(Tag::Link {
1268 link_type,
1269 dest_url,
1270 id,
1271 ..
1272 }) => {
1273 link_stack.push((range.start, range.end, dest_url, link_type, id));
1275 text_chunks.clear();
1276 }
1277 Event::Text(text) if !link_stack.is_empty() => {
1278 text_chunks.push((text.to_string(), range.start, range.end));
1280 }
1281 Event::Code(code) if !link_stack.is_empty() => {
1282 let code_text = format!("`{code}`");
1284 text_chunks.push((code_text, range.start, range.end));
1285 }
1286 Event::End(TagEnd::Link) => {
1287 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290 text_chunks.clear();
1291 continue;
1292 }
1293
1294 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299 text_chunks.clear();
1300 continue;
1301 }
1302
1303 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305 let is_reference = matches!(
1306 link_type,
1307 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308 );
1309
1310 let link_text = if start_pos < content.len() {
1313 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315 let mut close_pos = None;
1319 let mut depth = 0;
1320 let mut in_code_span = false;
1321
1322 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323 let mut backslash_count = 0;
1325 let mut j = i;
1326 while j > 0 && link_bytes[j - 1] == b'\\' {
1327 backslash_count += 1;
1328 j -= 1;
1329 }
1330 let is_escaped = backslash_count % 2 != 0;
1331
1332 if byte == b'`' && !is_escaped {
1334 in_code_span = !in_code_span;
1335 }
1336
1337 if !is_escaped && !in_code_span {
1339 if byte == b'[' {
1340 depth += 1;
1341 } else if byte == b']' {
1342 if depth == 0 {
1343 close_pos = Some(i);
1345 break;
1346 } else {
1347 depth -= 1;
1348 }
1349 }
1350 }
1351 }
1352
1353 if let Some(pos) = close_pos {
1354 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355 } else {
1356 Cow::Borrowed("")
1357 }
1358 } else {
1359 Cow::Borrowed("")
1360 };
1361
1362 let reference_id = if is_reference && !ref_id.is_empty() {
1364 Some(Cow::Owned(ref_id.to_lowercase()))
1365 } else if is_reference {
1366 Some(Cow::Owned(link_text.to_lowercase()))
1368 } else {
1369 None
1370 };
1371
1372 found_positions.insert(start_pos);
1374
1375 links.push(ParsedLink {
1376 line: line_num,
1377 start_col: col_start,
1378 end_col: col_end,
1379 byte_offset: start_pos,
1380 byte_end: range.end,
1381 text: link_text,
1382 url: Cow::Owned(url.to_string()),
1383 is_reference,
1384 reference_id,
1385 link_type,
1386 });
1387
1388 text_chunks.clear();
1389 }
1390 }
1391 Event::FootnoteReference(footnote_id) => {
1392 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395 continue;
1396 }
1397
1398 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399 footnote_refs.push(FootnoteRef {
1400 id: footnote_id.to_string(),
1401 line: line_num,
1402 byte_offset: range.start,
1403 byte_end: range.end,
1404 });
1405 }
1406 _ => {}
1407 }
1408 }
1409
1410 for cap in LINK_PATTERN.captures_iter(content) {
1414 let full_match = cap.get(0).unwrap();
1415 let match_start = full_match.start();
1416 let match_end = full_match.end();
1417
1418 if found_positions.contains(&match_start) {
1420 continue;
1421 }
1422
1423 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425 continue;
1426 }
1427
1428 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430 continue;
1431 }
1432
1433 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435 continue;
1436 }
1437
1438 if Self::is_offset_in_code_span(code_spans, match_start) {
1440 continue;
1441 }
1442
1443 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445 continue;
1446 }
1447
1448 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453 continue;
1454 }
1455
1456 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458 let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460 if let Some(ref_id) = cap.get(6) {
1462 let ref_id_str = ref_id.as_str();
1463 let normalized_ref = if ref_id_str.is_empty() {
1464 Cow::Owned(text.to_lowercase()) } else {
1466 Cow::Owned(ref_id_str.to_lowercase())
1467 };
1468
1469 links.push(ParsedLink {
1471 line: line_num,
1472 start_col: col_start,
1473 end_col: col_end,
1474 byte_offset: match_start,
1475 byte_end: match_end,
1476 text: Cow::Borrowed(text),
1477 url: Cow::Borrowed(""), is_reference: true,
1479 reference_id: Some(normalized_ref),
1480 link_type: LinkType::Reference, });
1482 }
1483 }
1484
1485 (links, broken_links, footnote_refs)
1486 }
1487
1488 fn parse_images(
1490 content: &'a str,
1491 lines: &[LineInfo],
1492 code_blocks: &[(usize, usize)],
1493 code_spans: &[CodeSpan],
1494 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495 ) -> Vec<ParsedImage<'a>> {
1496 use crate::utils::skip_context::is_in_html_comment_ranges;
1497 use std::collections::HashSet;
1498
1499 let mut images = Vec::with_capacity(content.len() / 1000);
1501 let mut found_positions = HashSet::new();
1502
1503 let parser = Parser::new(content).into_offset_iter();
1505 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506 Vec::new();
1507 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1510 match event {
1511 Event::Start(Tag::Image {
1512 link_type,
1513 dest_url,
1514 id,
1515 ..
1516 }) => {
1517 image_stack.push((range.start, dest_url, link_type, id));
1518 text_chunks.clear();
1519 }
1520 Event::Text(text) if !image_stack.is_empty() => {
1521 text_chunks.push((text.to_string(), range.start, range.end));
1522 }
1523 Event::Code(code) if !image_stack.is_empty() => {
1524 let code_text = format!("`{code}`");
1525 text_chunks.push((code_text, range.start, range.end));
1526 }
1527 Event::End(TagEnd::Image) => {
1528 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531 continue;
1532 }
1533
1534 if Self::is_offset_in_code_span(code_spans, start_pos) {
1536 continue;
1537 }
1538
1539 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541 continue;
1542 }
1543
1544 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548 let is_reference = matches!(
1549 link_type,
1550 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551 );
1552
1553 let alt_text = if start_pos < content.len() {
1556 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558 let mut close_pos = None;
1561 let mut depth = 0;
1562
1563 if image_bytes.len() > 2 {
1564 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565 let mut backslash_count = 0;
1567 let mut j = i;
1568 while j > 0 && image_bytes[j - 1] == b'\\' {
1569 backslash_count += 1;
1570 j -= 1;
1571 }
1572 let is_escaped = backslash_count % 2 != 0;
1573
1574 if !is_escaped {
1575 if byte == b'[' {
1576 depth += 1;
1577 } else if byte == b']' {
1578 if depth == 0 {
1579 close_pos = Some(i);
1581 break;
1582 } else {
1583 depth -= 1;
1584 }
1585 }
1586 }
1587 }
1588 }
1589
1590 if let Some(pos) = close_pos {
1591 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592 } else {
1593 Cow::Borrowed("")
1594 }
1595 } else {
1596 Cow::Borrowed("")
1597 };
1598
1599 let reference_id = if is_reference && !ref_id.is_empty() {
1600 Some(Cow::Owned(ref_id.to_lowercase()))
1601 } else if is_reference {
1602 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1604 None
1605 };
1606
1607 found_positions.insert(start_pos);
1608 images.push(ParsedImage {
1609 line: line_num,
1610 start_col: col_start,
1611 end_col: col_end,
1612 byte_offset: start_pos,
1613 byte_end: range.end,
1614 alt_text,
1615 url: Cow::Owned(url.to_string()),
1616 is_reference,
1617 reference_id,
1618 link_type,
1619 });
1620 }
1621 }
1622 _ => {}
1623 }
1624 }
1625
1626 for cap in IMAGE_PATTERN.captures_iter(content) {
1628 let full_match = cap.get(0).unwrap();
1629 let match_start = full_match.start();
1630 let match_end = full_match.end();
1631
1632 if found_positions.contains(&match_start) {
1634 continue;
1635 }
1636
1637 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639 continue;
1640 }
1641
1642 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644 || Self::is_offset_in_code_span(code_spans, match_start)
1645 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646 {
1647 continue;
1648 }
1649
1650 if let Some(ref_id) = cap.get(6) {
1652 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655 let ref_id_str = ref_id.as_str();
1656 let normalized_ref = if ref_id_str.is_empty() {
1657 Cow::Owned(alt_text.to_lowercase())
1658 } else {
1659 Cow::Owned(ref_id_str.to_lowercase())
1660 };
1661
1662 images.push(ParsedImage {
1663 line: line_num,
1664 start_col: col_start,
1665 end_col: col_end,
1666 byte_offset: match_start,
1667 byte_end: match_end,
1668 alt_text: Cow::Borrowed(alt_text),
1669 url: Cow::Borrowed(""),
1670 is_reference: true,
1671 reference_id: Some(normalized_ref),
1672 link_type: LinkType::Reference, });
1674 }
1675 }
1676
1677 images
1678 }
1679
1680 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1686 if line_info.in_code_block {
1688 continue;
1689 }
1690
1691 let line = line_info.content(content);
1692 let line_num = line_idx + 1;
1693
1694 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695 let id = cap.get(1).unwrap().as_str().to_lowercase();
1696 let url = cap.get(2).unwrap().as_str().to_string();
1697 let title_match = cap.get(3).or_else(|| cap.get(4));
1698 let title = title_match.map(|m| m.as_str().to_string());
1699
1700 let match_obj = cap.get(0).unwrap();
1703 let byte_offset = line_info.byte_offset + match_obj.start();
1704 let byte_end = line_info.byte_offset + match_obj.end();
1705
1706 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708 let start = line_info.byte_offset + m.start().saturating_sub(1);
1710 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1712 } else {
1713 (None, None)
1714 };
1715
1716 refs.push(ReferenceDef {
1717 line: line_num,
1718 id,
1719 url,
1720 title,
1721 byte_offset,
1722 byte_end,
1723 title_byte_start,
1724 title_byte_end,
1725 });
1726 }
1727 }
1728
1729 refs
1730 }
1731
1732 #[inline]
1736 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737 let trimmed_start = line.trim_start();
1738 if !trimmed_start.starts_with('>') {
1739 return None;
1740 }
1741
1742 let mut remaining = line;
1744 let mut total_prefix_len = 0;
1745
1746 loop {
1747 let trimmed = remaining.trim_start();
1748 if !trimmed.starts_with('>') {
1749 break;
1750 }
1751
1752 let leading_ws_len = remaining.len() - trimmed.len();
1754 total_prefix_len += leading_ws_len + 1;
1755
1756 let after_gt = &trimmed[1..];
1757
1758 if let Some(stripped) = after_gt.strip_prefix(' ') {
1760 total_prefix_len += 1;
1761 remaining = stripped;
1762 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763 total_prefix_len += 1;
1764 remaining = stripped;
1765 } else {
1766 remaining = after_gt;
1767 }
1768 }
1769
1770 Some((&line[..total_prefix_len], remaining))
1771 }
1772
1773 #[inline]
1777 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1778 let bytes = line.as_bytes();
1779 let mut i = 0;
1780
1781 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1783 i += 1;
1784 }
1785
1786 if i >= bytes.len() {
1788 return None;
1789 }
1790 let marker = bytes[i] as char;
1791 if marker != '-' && marker != '*' && marker != '+' {
1792 return None;
1793 }
1794 let marker_pos = i;
1795 i += 1;
1796
1797 let spacing_start = i;
1799 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1800 i += 1;
1801 }
1802
1803 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1804 }
1805
1806 #[inline]
1810 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1811 let bytes = line.as_bytes();
1812 let mut i = 0;
1813
1814 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1816 i += 1;
1817 }
1818
1819 let number_start = i;
1821 while i < bytes.len() && bytes[i].is_ascii_digit() {
1822 i += 1;
1823 }
1824 if i == number_start {
1825 return None; }
1827
1828 if i >= bytes.len() {
1830 return None;
1831 }
1832 let delimiter = bytes[i] as char;
1833 if delimiter != '.' && delimiter != ')' {
1834 return None;
1835 }
1836 let delimiter_pos = i;
1837 i += 1;
1838
1839 let spacing_start = i;
1841 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1842 i += 1;
1843 }
1844
1845 Some((
1846 &line[..number_start],
1847 &line[number_start..delimiter_pos],
1848 delimiter,
1849 &line[spacing_start..i],
1850 &line[i..],
1851 ))
1852 }
1853
1854 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1857 let num_lines = line_offsets.len();
1858 let mut in_code_block = vec![false; num_lines];
1859
1860 for &(start, end) in code_blocks {
1862 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1864 let mut boundary = start;
1865 while boundary > 0 && !content.is_char_boundary(boundary) {
1866 boundary -= 1;
1867 }
1868 boundary
1869 } else {
1870 start
1871 };
1872
1873 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1874 let mut boundary = end;
1875 while boundary < content.len() && !content.is_char_boundary(boundary) {
1876 boundary += 1;
1877 }
1878 boundary
1879 } else {
1880 end.min(content.len())
1881 };
1882
1883 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1902 let first_line = first_line_after.saturating_sub(1);
1903 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1904
1905 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1907 *flag = true;
1908 }
1909 }
1910
1911 in_code_block
1912 }
1913
1914 fn compute_basic_line_info(
1916 content: &str,
1917 line_offsets: &[usize],
1918 code_blocks: &[(usize, usize)],
1919 flavor: MarkdownFlavor,
1920 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1921 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1922 ) -> Vec<LineInfo> {
1923 let content_lines: Vec<&str> = content.lines().collect();
1924 let mut lines = Vec::with_capacity(content_lines.len());
1925
1926 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1928
1929 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1932
1933 for (i, line) in content_lines.iter().enumerate() {
1934 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1935 let indent = line.len() - line.trim_start().len();
1936 let visual_indent = ElementCache::calculate_indentation_width_default(line);
1938
1939 let blockquote_parse = Self::parse_blockquote_prefix(line);
1941
1942 let is_blank = if let Some((_, content)) = blockquote_parse {
1944 content.trim().is_empty()
1946 } else {
1947 line.trim().is_empty()
1948 };
1949
1950 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1952
1953 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1955 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1956 let line_end_offset = byte_offset + line.len();
1959 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1960 html_comment_ranges,
1961 byte_offset,
1962 line_end_offset,
1963 );
1964 let list_item = if !(in_code_block
1965 || is_blank
1966 || in_mkdocstrings
1967 || in_html_comment
1968 || (front_matter_end > 0 && i < front_matter_end))
1969 {
1970 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1972 (content, prefix.len())
1973 } else {
1974 (&**line, 0)
1975 };
1976
1977 if let Some((leading_spaces, marker, spacing, _content)) =
1978 Self::parse_unordered_list(line_for_list_check)
1979 {
1980 let marker_column = blockquote_prefix_len + leading_spaces.len();
1981 let content_column = marker_column + 1 + spacing.len();
1982
1983 if spacing.is_empty() {
1990 None
1991 } else {
1992 Some(ListItemInfo {
1993 marker: marker.to_string(),
1994 is_ordered: false,
1995 number: None,
1996 marker_column,
1997 content_column,
1998 })
1999 }
2000 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2001 Self::parse_ordered_list(line_for_list_check)
2002 {
2003 let marker = format!("{number_str}{delimiter}");
2004 let marker_column = blockquote_prefix_len + leading_spaces.len();
2005 let content_column = marker_column + marker.len() + spacing.len();
2006
2007 if spacing.is_empty() {
2010 None
2011 } else {
2012 Some(ListItemInfo {
2013 marker,
2014 is_ordered: true,
2015 number: number_str.parse().ok(),
2016 marker_column,
2017 content_column,
2018 })
2019 }
2020 } else {
2021 None
2022 }
2023 } else {
2024 None
2025 };
2026
2027 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2030 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2031
2032 lines.push(LineInfo {
2033 byte_offset,
2034 byte_len: line.len(),
2035 indent,
2036 visual_indent,
2037 is_blank,
2038 in_code_block,
2039 in_front_matter,
2040 in_html_block: false, in_html_comment,
2042 list_item,
2043 heading: None, blockquote: None, in_mkdocstrings,
2046 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2049 });
2050 }
2051
2052 lines
2053 }
2054
2055 fn detect_headings_and_blockquotes(
2057 content: &str,
2058 lines: &mut [LineInfo],
2059 flavor: MarkdownFlavor,
2060 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2061 link_byte_ranges: &[(usize, usize)],
2062 ) {
2063 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2065 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2066 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2067 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2068
2069 let content_lines: Vec<&str> = content.lines().collect();
2070
2071 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2073
2074 for i in 0..lines.len() {
2076 if lines[i].in_code_block {
2077 continue;
2078 }
2079
2080 if front_matter_end > 0 && i < front_matter_end {
2082 continue;
2083 }
2084
2085 if lines[i].in_html_block {
2087 continue;
2088 }
2089
2090 let line = content_lines[i];
2091
2092 if let Some(bq) = parse_blockquote_detailed(line) {
2094 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2096
2097 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2099
2100 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2102 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2105
2106 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2110
2111 lines[i].blockquote = Some(BlockquoteInfo {
2112 nesting_level,
2113 indent: bq.indent.to_string(),
2114 marker_column,
2115 prefix,
2116 content: bq.content.to_string(),
2117 has_no_space_after_marker: has_no_space,
2118 has_multiple_spaces_after_marker: has_multiple_spaces,
2119 needs_md028_fix,
2120 });
2121 }
2122
2123 if lines[i].is_blank {
2125 continue;
2126 }
2127
2128 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2131 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2132 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2133 } else {
2134 false
2135 };
2136
2137 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2138 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2140 continue;
2141 }
2142 let line_offset = lines[i].byte_offset;
2145 if link_byte_ranges
2146 .iter()
2147 .any(|&(start, end)| line_offset > start && line_offset < end)
2148 {
2149 continue;
2150 }
2151 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2152 let hashes = caps.get(2).map_or("", |m| m.as_str());
2153 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2154 let rest = caps.get(4).map_or("", |m| m.as_str());
2155
2156 let level = hashes.len() as u8;
2157 let marker_column = leading_spaces.len();
2158
2159 let (text, has_closing, closing_seq) = {
2161 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2163 if rest[id_start..].trim_end().ends_with('}') {
2165 (&rest[..id_start], &rest[id_start..])
2167 } else {
2168 (rest, "")
2169 }
2170 } else {
2171 (rest, "")
2172 };
2173
2174 let trimmed_rest = rest_without_id.trim_end();
2176 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2177 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2180
2181 let last_hash_char_idx = char_positions
2183 .iter()
2184 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2185
2186 if let Some(mut char_idx) = last_hash_char_idx {
2187 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2189 char_idx -= 1;
2190 }
2191
2192 let start_of_hashes = char_positions[char_idx].0;
2194
2195 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2197
2198 let potential_closing = &trimmed_rest[start_of_hashes..];
2200 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2201
2202 if is_all_hashes && has_space_before {
2203 let closing_hashes = potential_closing.to_string();
2205 let text_part = if !custom_id_part.is_empty() {
2208 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2211 } else {
2212 trimmed_rest[..start_of_hashes].trim_end().to_string()
2213 };
2214 (text_part, true, closing_hashes)
2215 } else {
2216 (rest.to_string(), false, String::new())
2218 }
2219 } else {
2220 (rest.to_string(), false, String::new())
2222 }
2223 } else {
2224 (rest.to_string(), false, String::new())
2226 }
2227 };
2228
2229 let content_column = marker_column + hashes.len() + spaces_after.len();
2230
2231 let raw_text = text.trim().to_string();
2233 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2234
2235 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2237 let next_line = content_lines[i + 1];
2238 if !lines[i + 1].in_code_block
2239 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2240 && let Some(next_line_id) =
2241 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2242 {
2243 custom_id = Some(next_line_id);
2244 }
2245 }
2246
2247 let is_valid = !spaces_after.is_empty()
2257 || rest.is_empty()
2258 || level > 1
2259 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2260
2261 lines[i].heading = Some(HeadingInfo {
2262 level,
2263 style: HeadingStyle::ATX,
2264 marker: hashes.to_string(),
2265 marker_column,
2266 content_column,
2267 text: clean_text,
2268 custom_id,
2269 raw_text,
2270 has_closing_sequence: has_closing,
2271 closing_sequence: closing_seq,
2272 is_valid,
2273 });
2274 }
2275 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2277 let next_line = content_lines[i + 1];
2278 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2279 if front_matter_end > 0 && i < front_matter_end {
2281 continue;
2282 }
2283
2284 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2286 {
2287 continue;
2288 }
2289
2290 let underline = next_line.trim();
2291
2292 let level = if underline.starts_with('=') { 1 } else { 2 };
2293 let style = if level == 1 {
2294 HeadingStyle::Setext1
2295 } else {
2296 HeadingStyle::Setext2
2297 };
2298
2299 let raw_text = line.trim().to_string();
2301 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2302
2303 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2305 let attr_line = content_lines[i + 2];
2306 if !lines[i + 2].in_code_block
2307 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2308 && let Some(attr_line_id) =
2309 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2310 {
2311 custom_id = Some(attr_line_id);
2312 }
2313 }
2314
2315 lines[i].heading = Some(HeadingInfo {
2316 level,
2317 style,
2318 marker: underline.to_string(),
2319 marker_column: next_line.len() - next_line.trim_start().len(),
2320 content_column: lines[i].indent,
2321 text: clean_text,
2322 custom_id,
2323 raw_text,
2324 has_closing_sequence: false,
2325 closing_sequence: String::new(),
2326 is_valid: true, });
2328 }
2329 }
2330 }
2331 }
2332
2333 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2335 const BLOCK_ELEMENTS: &[&str] = &[
2338 "address",
2339 "article",
2340 "aside",
2341 "audio",
2342 "blockquote",
2343 "canvas",
2344 "details",
2345 "dialog",
2346 "dd",
2347 "div",
2348 "dl",
2349 "dt",
2350 "embed",
2351 "fieldset",
2352 "figcaption",
2353 "figure",
2354 "footer",
2355 "form",
2356 "h1",
2357 "h2",
2358 "h3",
2359 "h4",
2360 "h5",
2361 "h6",
2362 "header",
2363 "hr",
2364 "iframe",
2365 "li",
2366 "main",
2367 "menu",
2368 "nav",
2369 "noscript",
2370 "object",
2371 "ol",
2372 "p",
2373 "picture",
2374 "pre",
2375 "script",
2376 "search",
2377 "section",
2378 "source",
2379 "style",
2380 "summary",
2381 "svg",
2382 "table",
2383 "tbody",
2384 "td",
2385 "template",
2386 "textarea",
2387 "tfoot",
2388 "th",
2389 "thead",
2390 "tr",
2391 "track",
2392 "ul",
2393 "video",
2394 ];
2395
2396 let mut i = 0;
2397 while i < lines.len() {
2398 if lines[i].in_code_block || lines[i].in_front_matter {
2400 i += 1;
2401 continue;
2402 }
2403
2404 let trimmed = lines[i].content(content).trim_start();
2405
2406 if trimmed.starts_with('<') && trimmed.len() > 1 {
2408 let after_bracket = &trimmed[1..];
2410 let is_closing = after_bracket.starts_with('/');
2411 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2412
2413 let tag_name = tag_start
2415 .chars()
2416 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2417 .collect::<String>()
2418 .to_lowercase();
2419
2420 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2422 lines[i].in_html_block = true;
2424
2425 if !is_closing {
2428 let closing_tag = format!("</{tag_name}>");
2429 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2431 let mut j = i + 1;
2432 while j < lines.len() && j < i + 100 {
2433 if !allow_blank_lines && lines[j].is_blank {
2436 break;
2437 }
2438
2439 lines[j].in_html_block = true;
2440
2441 if lines[j].content(content).contains(&closing_tag) {
2443 break;
2444 }
2445 j += 1;
2446 }
2447 }
2448 }
2449 }
2450
2451 i += 1;
2452 }
2453 }
2454
2455 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2458 if !flavor.supports_esm_blocks() {
2460 return;
2461 }
2462
2463 let mut in_multiline_comment = false;
2464
2465 for line in lines.iter_mut() {
2466 if line.is_blank || line.in_html_comment {
2468 continue;
2469 }
2470
2471 let trimmed = line.content(content).trim_start();
2472
2473 if in_multiline_comment {
2475 if trimmed.contains("*/") {
2476 in_multiline_comment = false;
2477 }
2478 continue;
2479 }
2480
2481 if trimmed.starts_with("//") {
2483 continue;
2484 }
2485
2486 if trimmed.starts_with("/*") {
2488 if !trimmed.contains("*/") {
2489 in_multiline_comment = true;
2490 }
2491 continue;
2492 }
2493
2494 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2496 line.in_esm_block = true;
2497 } else {
2498 break;
2500 }
2501 }
2502 }
2503
2504 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2506 let mut code_spans = Vec::new();
2507
2508 if !content.contains('`') {
2510 return code_spans;
2511 }
2512
2513 let parser = Parser::new(content).into_offset_iter();
2515
2516 for (event, range) in parser {
2517 if let Event::Code(_) = event {
2518 let start_pos = range.start;
2519 let end_pos = range.end;
2520
2521 let full_span = &content[start_pos..end_pos];
2523 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2524
2525 let content_start = start_pos + backtick_count;
2527 let content_end = end_pos - backtick_count;
2528 let span_content = if content_start < content_end {
2529 content[content_start..content_end].to_string()
2530 } else {
2531 String::new()
2532 };
2533
2534 let line_idx = lines
2537 .partition_point(|line| line.byte_offset <= start_pos)
2538 .saturating_sub(1);
2539 let line_num = line_idx + 1;
2540 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2541
2542 let end_line_idx = lines
2544 .partition_point(|line| line.byte_offset <= end_pos)
2545 .saturating_sub(1);
2546 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2547
2548 let line_content = lines[line_idx].content(content);
2551 let col_start = if byte_col_start <= line_content.len() {
2552 line_content[..byte_col_start].chars().count()
2553 } else {
2554 line_content.chars().count()
2555 };
2556
2557 let end_line_content = lines[end_line_idx].content(content);
2558 let col_end = if byte_col_end <= end_line_content.len() {
2559 end_line_content[..byte_col_end].chars().count()
2560 } else {
2561 end_line_content.chars().count()
2562 };
2563
2564 code_spans.push(CodeSpan {
2565 line: line_num,
2566 end_line: end_line_idx + 1,
2567 start_col: col_start,
2568 end_col: col_end,
2569 byte_offset: start_pos,
2570 byte_end: end_pos,
2571 backtick_count,
2572 content: span_content,
2573 });
2574 }
2575 }
2576
2577 code_spans.sort_by_key(|span| span.byte_offset);
2579
2580 code_spans
2581 }
2582
2583 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2594 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2596
2597 #[inline]
2600 fn reset_tracking_state(
2601 list_item: &ListItemInfo,
2602 has_list_breaking_content: &mut bool,
2603 min_continuation: &mut usize,
2604 ) {
2605 *has_list_breaking_content = false;
2606 let marker_width = if list_item.is_ordered {
2607 list_item.marker.len() + 1 } else {
2609 list_item.marker.len()
2610 };
2611 *min_continuation = if list_item.is_ordered {
2612 marker_width
2613 } else {
2614 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2615 };
2616 }
2617
2618 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2621 let mut last_list_item_line = 0;
2622 let mut current_indent_level = 0;
2623 let mut last_marker_width = 0;
2624
2625 let mut has_list_breaking_content_since_last_item = false;
2627 let mut min_continuation_for_tracking = 0;
2628
2629 for (line_idx, line_info) in lines.iter().enumerate() {
2630 let line_num = line_idx + 1;
2631
2632 if line_info.in_code_block {
2634 if let Some(ref mut block) = current_block {
2635 let min_continuation_indent =
2637 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2638
2639 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2641
2642 match context {
2643 CodeBlockContext::Indented => {
2644 block.end_line = line_num;
2646 continue;
2647 }
2648 CodeBlockContext::Standalone => {
2649 let completed_block = current_block.take().unwrap();
2651 list_blocks.push(completed_block);
2652 continue;
2653 }
2654 CodeBlockContext::Adjacent => {
2655 block.end_line = line_num;
2657 continue;
2658 }
2659 }
2660 } else {
2661 continue;
2663 }
2664 }
2665
2666 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2668 caps.get(0).unwrap().as_str().to_string()
2669 } else {
2670 String::new()
2671 };
2672
2673 if current_block.is_some()
2676 && line_info.list_item.is_none()
2677 && !line_info.is_blank
2678 && !line_info.in_code_span_continuation
2679 {
2680 let line_content = line_info.content(content).trim();
2681
2682 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2687 let breaks_list = line_info.heading.is_some()
2688 || line_content.starts_with("---")
2689 || line_content.starts_with("***")
2690 || line_content.starts_with("___")
2691 || crate::utils::skip_context::is_table_line(line_content)
2692 || line_content.starts_with(">")
2693 || (line_info.indent > 0
2694 && line_info.indent < min_continuation_for_tracking
2695 && !is_lazy_continuation);
2696
2697 if breaks_list {
2698 has_list_breaking_content_since_last_item = true;
2699 }
2700 }
2701
2702 if line_info.in_code_span_continuation
2705 && line_info.list_item.is_none()
2706 && let Some(ref mut block) = current_block
2707 {
2708 block.end_line = line_num;
2709 }
2710
2711 let is_valid_continuation =
2716 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2718 && line_info.list_item.is_none()
2719 && !line_info.is_blank
2720 && !line_info.in_code_block
2721 && is_valid_continuation
2722 && let Some(ref mut block) = current_block
2723 {
2724 block.end_line = line_num;
2725 }
2726
2727 if let Some(list_item) = &line_info.list_item {
2729 let item_indent = list_item.marker_column;
2731 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2734 let is_nested = nesting > block.nesting_level;
2738 let same_type =
2739 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2740 let same_context = block.blockquote_prefix == blockquote_prefix;
2741 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2743
2744 let marker_compatible =
2746 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2747
2748 let has_non_list_content = has_list_breaking_content_since_last_item;
2751
2752 let mut continues_list = if is_nested {
2756 same_context && reasonable_distance && !has_non_list_content
2758 } else {
2759 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2761 };
2762
2763 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2766 if block.item_lines.contains(&(line_num - 1)) {
2769 continues_list = true;
2771 } else {
2772 continues_list = true;
2776 }
2777 }
2778
2779 if continues_list {
2780 block.end_line = line_num;
2782 block.item_lines.push(line_num);
2783
2784 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2786 list_item.marker.len() + 1
2787 } else {
2788 list_item.marker.len()
2789 });
2790
2791 if !block.is_ordered
2793 && block.marker.is_some()
2794 && block.marker.as_ref() != Some(&list_item.marker)
2795 {
2796 block.marker = None;
2798 }
2799
2800 reset_tracking_state(
2802 list_item,
2803 &mut has_list_breaking_content_since_last_item,
2804 &mut min_continuation_for_tracking,
2805 );
2806 } else {
2807 list_blocks.push(block.clone());
2810
2811 *block = ListBlock {
2812 start_line: line_num,
2813 end_line: line_num,
2814 is_ordered: list_item.is_ordered,
2815 marker: if list_item.is_ordered {
2816 None
2817 } else {
2818 Some(list_item.marker.clone())
2819 },
2820 blockquote_prefix: blockquote_prefix.clone(),
2821 item_lines: vec![line_num],
2822 nesting_level: nesting,
2823 max_marker_width: if list_item.is_ordered {
2824 list_item.marker.len() + 1
2825 } else {
2826 list_item.marker.len()
2827 },
2828 };
2829
2830 reset_tracking_state(
2832 list_item,
2833 &mut has_list_breaking_content_since_last_item,
2834 &mut min_continuation_for_tracking,
2835 );
2836 }
2837 } else {
2838 current_block = Some(ListBlock {
2840 start_line: line_num,
2841 end_line: line_num,
2842 is_ordered: list_item.is_ordered,
2843 marker: if list_item.is_ordered {
2844 None
2845 } else {
2846 Some(list_item.marker.clone())
2847 },
2848 blockquote_prefix,
2849 item_lines: vec![line_num],
2850 nesting_level: nesting,
2851 max_marker_width: list_item.marker.len(),
2852 });
2853
2854 reset_tracking_state(
2856 list_item,
2857 &mut has_list_breaking_content_since_last_item,
2858 &mut min_continuation_for_tracking,
2859 );
2860 }
2861
2862 last_list_item_line = line_num;
2863 current_indent_level = item_indent;
2864 last_marker_width = if list_item.is_ordered {
2865 list_item.marker.len() + 1 } else {
2867 list_item.marker.len()
2868 };
2869 } else if let Some(ref mut block) = current_block {
2870 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2880 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2881 } else {
2882 false
2883 };
2884
2885 let min_continuation_indent = if block.is_ordered {
2889 current_indent_level + last_marker_width
2890 } else {
2891 current_indent_level + 2 };
2893
2894 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2895 block.end_line = line_num;
2897 } else if line_info.is_blank {
2898 let mut check_idx = line_idx + 1;
2901 let mut found_continuation = false;
2902
2903 while check_idx < lines.len() && lines[check_idx].is_blank {
2905 check_idx += 1;
2906 }
2907
2908 if check_idx < lines.len() {
2909 let next_line = &lines[check_idx];
2910 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2912 found_continuation = true;
2913 }
2914 else if !next_line.in_code_block
2916 && next_line.list_item.is_some()
2917 && let Some(item) = &next_line.list_item
2918 {
2919 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2920 .find(next_line.content(content))
2921 .map_or(String::new(), |m| m.as_str().to_string());
2922 if item.marker_column == current_indent_level
2923 && item.is_ordered == block.is_ordered
2924 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2925 {
2926 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2929 if let Some(between_line) = lines.get(idx) {
2930 let between_content = between_line.content(content);
2931 let trimmed = between_content.trim();
2932 if trimmed.is_empty() {
2934 return false;
2935 }
2936 let line_indent = between_content.len() - between_content.trim_start().len();
2938
2939 if trimmed.starts_with("```")
2941 || trimmed.starts_with("~~~")
2942 || trimmed.starts_with("---")
2943 || trimmed.starts_with("***")
2944 || trimmed.starts_with("___")
2945 || trimmed.starts_with(">")
2946 || crate::utils::skip_context::is_table_line(trimmed)
2947 || between_line.heading.is_some()
2948 {
2949 return true; }
2951
2952 line_indent >= min_continuation_indent
2954 } else {
2955 false
2956 }
2957 });
2958
2959 if block.is_ordered {
2960 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2963 if let Some(between_line) = lines.get(idx) {
2964 let trimmed = between_line.content(content).trim();
2965 if trimmed.is_empty() {
2966 return false;
2967 }
2968 trimmed.starts_with("```")
2970 || trimmed.starts_with("~~~")
2971 || trimmed.starts_with("---")
2972 || trimmed.starts_with("***")
2973 || trimmed.starts_with("___")
2974 || trimmed.starts_with(">")
2975 || crate::utils::skip_context::is_table_line(trimmed)
2976 || between_line.heading.is_some()
2977 } else {
2978 false
2979 }
2980 });
2981 found_continuation = !has_structural_separators;
2982 } else {
2983 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2985 if let Some(between_line) = lines.get(idx) {
2986 let trimmed = between_line.content(content).trim();
2987 if trimmed.is_empty() {
2988 return false;
2989 }
2990 trimmed.starts_with("```")
2992 || trimmed.starts_with("~~~")
2993 || trimmed.starts_with("---")
2994 || trimmed.starts_with("***")
2995 || trimmed.starts_with("___")
2996 || trimmed.starts_with(">")
2997 || crate::utils::skip_context::is_table_line(trimmed)
2998 || between_line.heading.is_some()
2999 } else {
3000 false
3001 }
3002 });
3003 found_continuation = !has_structural_separators;
3004 }
3005 }
3006 }
3007 }
3008
3009 if found_continuation {
3010 block.end_line = line_num;
3012 } else {
3013 list_blocks.push(block.clone());
3015 current_block = None;
3016 }
3017 } else {
3018 let min_required_indent = if block.is_ordered {
3021 current_indent_level + last_marker_width
3022 } else {
3023 current_indent_level + 2
3024 };
3025
3026 let line_content = line_info.content(content).trim();
3031
3032 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3034
3035 let is_structural_separator = line_info.heading.is_some()
3036 || line_content.starts_with("```")
3037 || line_content.starts_with("~~~")
3038 || line_content.starts_with("---")
3039 || line_content.starts_with("***")
3040 || line_content.starts_with("___")
3041 || line_content.starts_with(">")
3042 || looks_like_table;
3043
3044 let is_lazy_continuation = !is_structural_separator
3047 && !line_info.is_blank
3048 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3049
3050 if is_lazy_continuation {
3051 let content_to_check = if !blockquote_prefix.is_empty() {
3054 line_info
3056 .content(content)
3057 .strip_prefix(&blockquote_prefix)
3058 .unwrap_or(line_info.content(content))
3059 .trim()
3060 } else {
3061 line_info.content(content).trim()
3062 };
3063
3064 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3065
3066 if starts_with_uppercase && last_list_item_line > 0 {
3069 list_blocks.push(block.clone());
3071 current_block = None;
3072 } else {
3073 block.end_line = line_num;
3075 }
3076 } else {
3077 list_blocks.push(block.clone());
3079 current_block = None;
3080 }
3081 }
3082 }
3083 }
3084
3085 if let Some(block) = current_block {
3087 list_blocks.push(block);
3088 }
3089
3090 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3092
3093 list_blocks
3094 }
3095
3096 fn compute_char_frequency(content: &str) -> CharFrequency {
3098 let mut frequency = CharFrequency::default();
3099
3100 for ch in content.chars() {
3101 match ch {
3102 '#' => frequency.hash_count += 1,
3103 '*' => frequency.asterisk_count += 1,
3104 '_' => frequency.underscore_count += 1,
3105 '-' => frequency.hyphen_count += 1,
3106 '+' => frequency.plus_count += 1,
3107 '>' => frequency.gt_count += 1,
3108 '|' => frequency.pipe_count += 1,
3109 '[' => frequency.bracket_count += 1,
3110 '`' => frequency.backtick_count += 1,
3111 '<' => frequency.lt_count += 1,
3112 '!' => frequency.exclamation_count += 1,
3113 '\n' => frequency.newline_count += 1,
3114 _ => {}
3115 }
3116 }
3117
3118 frequency
3119 }
3120
3121 fn parse_html_tags(
3123 content: &str,
3124 lines: &[LineInfo],
3125 code_blocks: &[(usize, usize)],
3126 flavor: MarkdownFlavor,
3127 ) -> Vec<HtmlTag> {
3128 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3129 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3130
3131 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3132
3133 for cap in HTML_TAG_REGEX.captures_iter(content) {
3134 let full_match = cap.get(0).unwrap();
3135 let match_start = full_match.start();
3136 let match_end = full_match.end();
3137
3138 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3140 continue;
3141 }
3142
3143 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3144 let tag_name_original = cap.get(2).unwrap().as_str();
3145 let tag_name = tag_name_original.to_lowercase();
3146 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3147
3148 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3151 continue;
3152 }
3153
3154 let mut line_num = 1;
3156 let mut col_start = match_start;
3157 let mut col_end = match_end;
3158 for (idx, line_info) in lines.iter().enumerate() {
3159 if match_start >= line_info.byte_offset {
3160 line_num = idx + 1;
3161 col_start = match_start - line_info.byte_offset;
3162 col_end = match_end - line_info.byte_offset;
3163 } else {
3164 break;
3165 }
3166 }
3167
3168 html_tags.push(HtmlTag {
3169 line: line_num,
3170 start_col: col_start,
3171 end_col: col_end,
3172 byte_offset: match_start,
3173 byte_end: match_end,
3174 tag_name,
3175 is_closing,
3176 is_self_closing,
3177 raw_content: full_match.as_str().to_string(),
3178 });
3179 }
3180
3181 html_tags
3182 }
3183
3184 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3186 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3187 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3188
3189 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3190
3191 for cap in EMPHASIS_REGEX.captures_iter(content) {
3192 let full_match = cap.get(0).unwrap();
3193 let match_start = full_match.start();
3194 let match_end = full_match.end();
3195
3196 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3198 continue;
3199 }
3200
3201 let opening_markers = cap.get(1).unwrap().as_str();
3202 let content_part = cap.get(2).unwrap().as_str();
3203 let closing_markers = cap.get(3).unwrap().as_str();
3204
3205 if opening_markers.chars().next() != closing_markers.chars().next()
3207 || opening_markers.len() != closing_markers.len()
3208 {
3209 continue;
3210 }
3211
3212 let marker = opening_markers.chars().next().unwrap();
3213 let marker_count = opening_markers.len();
3214
3215 let mut line_num = 1;
3217 let mut col_start = match_start;
3218 let mut col_end = match_end;
3219 for (idx, line_info) in lines.iter().enumerate() {
3220 if match_start >= line_info.byte_offset {
3221 line_num = idx + 1;
3222 col_start = match_start - line_info.byte_offset;
3223 col_end = match_end - line_info.byte_offset;
3224 } else {
3225 break;
3226 }
3227 }
3228
3229 emphasis_spans.push(EmphasisSpan {
3230 line: line_num,
3231 start_col: col_start,
3232 end_col: col_end,
3233 byte_offset: match_start,
3234 byte_end: match_end,
3235 marker,
3236 marker_count,
3237 content: content_part.to_string(),
3238 });
3239 }
3240
3241 emphasis_spans
3242 }
3243
3244 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3246 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3247
3248 for (line_idx, line_info) in lines.iter().enumerate() {
3249 if line_info.in_code_block || line_info.is_blank {
3251 continue;
3252 }
3253
3254 let line = line_info.content(content);
3255 let line_num = line_idx + 1;
3256
3257 if !line.contains('|') {
3259 continue;
3260 }
3261
3262 let parts: Vec<&str> = line.split('|').collect();
3264 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3265
3266 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3268 let mut column_alignments = Vec::new();
3269
3270 if is_separator {
3271 for part in &parts[1..parts.len() - 1] {
3272 let trimmed = part.trim();
3274 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3275 "center".to_string()
3276 } else if trimmed.ends_with(':') {
3277 "right".to_string()
3278 } else if trimmed.starts_with(':') {
3279 "left".to_string()
3280 } else {
3281 "none".to_string()
3282 };
3283 column_alignments.push(alignment);
3284 }
3285 }
3286
3287 table_rows.push(TableRow {
3288 line: line_num,
3289 is_separator,
3290 column_count,
3291 column_alignments,
3292 });
3293 }
3294
3295 table_rows
3296 }
3297
3298 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3300 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3301
3302 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3304 let full_match = cap.get(0).unwrap();
3305 let match_start = full_match.start();
3306 let match_end = full_match.end();
3307
3308 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3310 continue;
3311 }
3312
3313 let preceding_char = if match_start > 0 {
3315 content.chars().nth(match_start - 1)
3316 } else {
3317 None
3318 };
3319 let following_char = content.chars().nth(match_end);
3320
3321 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3322 continue;
3323 }
3324 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3325 continue;
3326 }
3327
3328 let url = full_match.as_str();
3329 let url_type = if url.starts_with("https://") {
3330 "https"
3331 } else if url.starts_with("http://") {
3332 "http"
3333 } else if url.starts_with("ftp://") {
3334 "ftp"
3335 } else {
3336 "other"
3337 };
3338
3339 let mut line_num = 1;
3341 let mut col_start = match_start;
3342 let mut col_end = match_end;
3343 for (idx, line_info) in lines.iter().enumerate() {
3344 if match_start >= line_info.byte_offset {
3345 line_num = idx + 1;
3346 col_start = match_start - line_info.byte_offset;
3347 col_end = match_end - line_info.byte_offset;
3348 } else {
3349 break;
3350 }
3351 }
3352
3353 bare_urls.push(BareUrl {
3354 line: line_num,
3355 start_col: col_start,
3356 end_col: col_end,
3357 byte_offset: match_start,
3358 byte_end: match_end,
3359 url: url.to_string(),
3360 url_type: url_type.to_string(),
3361 });
3362 }
3363
3364 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3366 let full_match = cap.get(0).unwrap();
3367 let match_start = full_match.start();
3368 let match_end = full_match.end();
3369
3370 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3372 continue;
3373 }
3374
3375 let preceding_char = if match_start > 0 {
3377 content.chars().nth(match_start - 1)
3378 } else {
3379 None
3380 };
3381 let following_char = content.chars().nth(match_end);
3382
3383 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3384 continue;
3385 }
3386 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3387 continue;
3388 }
3389
3390 let email = full_match.as_str();
3391
3392 let mut line_num = 1;
3394 let mut col_start = match_start;
3395 let mut col_end = match_end;
3396 for (idx, line_info) in lines.iter().enumerate() {
3397 if match_start >= line_info.byte_offset {
3398 line_num = idx + 1;
3399 col_start = match_start - line_info.byte_offset;
3400 col_end = match_end - line_info.byte_offset;
3401 } else {
3402 break;
3403 }
3404 }
3405
3406 bare_urls.push(BareUrl {
3407 line: line_num,
3408 start_col: col_start,
3409 end_col: col_end,
3410 byte_offset: match_start,
3411 byte_end: match_end,
3412 url: email.to_string(),
3413 url_type: "email".to_string(),
3414 });
3415 }
3416
3417 bare_urls
3418 }
3419
3420 #[must_use]
3440 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3441 ValidHeadingsIter::new(&self.lines)
3442 }
3443
3444 #[must_use]
3448 pub fn has_valid_headings(&self) -> bool {
3449 self.lines
3450 .iter()
3451 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3452 }
3453}
3454
3455fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3457 if list_blocks.len() < 2 {
3458 return;
3459 }
3460
3461 let mut merger = ListBlockMerger::new(content, lines);
3462 *list_blocks = merger.merge(list_blocks);
3463}
3464
3465struct ListBlockMerger<'a> {
3467 content: &'a str,
3468 lines: &'a [LineInfo],
3469}
3470
3471impl<'a> ListBlockMerger<'a> {
3472 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3473 Self { content, lines }
3474 }
3475
3476 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3477 let mut merged = Vec::with_capacity(list_blocks.len());
3478 let mut current = list_blocks[0].clone();
3479
3480 for next in list_blocks.iter().skip(1) {
3481 if self.should_merge_blocks(¤t, next) {
3482 current = self.merge_two_blocks(current, next);
3483 } else {
3484 merged.push(current);
3485 current = next.clone();
3486 }
3487 }
3488
3489 merged.push(current);
3490 merged
3491 }
3492
3493 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3495 if !self.blocks_are_compatible(current, next) {
3497 return false;
3498 }
3499
3500 let spacing = self.analyze_spacing_between(current, next);
3502 match spacing {
3503 BlockSpacing::Consecutive => true,
3504 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3505 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3506 self.can_merge_with_content_between(current, next)
3507 }
3508 }
3509 }
3510
3511 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3513 current.is_ordered == next.is_ordered
3514 && current.blockquote_prefix == next.blockquote_prefix
3515 && current.nesting_level == next.nesting_level
3516 }
3517
3518 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3520 let gap = next.start_line - current.end_line;
3521
3522 match gap {
3523 1 => BlockSpacing::Consecutive,
3524 2 => BlockSpacing::SingleBlank,
3525 _ if gap > 2 => {
3526 if self.has_only_blank_lines_between(current, next) {
3527 BlockSpacing::MultipleBlanks
3528 } else {
3529 BlockSpacing::ContentBetween
3530 }
3531 }
3532 _ => BlockSpacing::Consecutive, }
3534 }
3535
3536 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3538 if has_meaningful_content_between(self.content, current, next, self.lines) {
3541 return false; }
3543
3544 !current.is_ordered && current.marker == next.marker
3546 }
3547
3548 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3550 if has_meaningful_content_between(self.content, current, next, self.lines) {
3552 return false; }
3554
3555 current.is_ordered && next.is_ordered
3557 }
3558
3559 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3561 for line_num in (current.end_line + 1)..next.start_line {
3562 if let Some(line_info) = self.lines.get(line_num - 1)
3563 && !line_info.content(self.content).trim().is_empty()
3564 {
3565 return false;
3566 }
3567 }
3568 true
3569 }
3570
3571 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3573 current.end_line = next.end_line;
3574 current.item_lines.extend_from_slice(&next.item_lines);
3575
3576 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3578
3579 if !current.is_ordered && self.markers_differ(¤t, next) {
3581 current.marker = None; }
3583
3584 current
3585 }
3586
3587 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3589 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3590 }
3591}
3592
3593#[derive(Debug, PartialEq)]
3595enum BlockSpacing {
3596 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3601
3602fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3604 for line_num in (current.end_line + 1)..next.start_line {
3606 if let Some(line_info) = lines.get(line_num - 1) {
3607 let trimmed = line_info.content(content).trim();
3609
3610 if trimmed.is_empty() {
3612 continue;
3613 }
3614
3615 if line_info.heading.is_some() {
3619 return true; }
3621
3622 if is_horizontal_rule(trimmed) {
3624 return true; }
3626
3627 if crate::utils::skip_context::is_table_line(trimmed) {
3629 return true; }
3631
3632 if trimmed.starts_with('>') {
3634 return true; }
3636
3637 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3639 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3640
3641 let min_continuation_indent = if current.is_ordered {
3643 current.nesting_level + current.max_marker_width + 1 } else {
3645 current.nesting_level + 2
3646 };
3647
3648 if line_indent < min_continuation_indent {
3649 return true; }
3652 }
3653
3654 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3656
3657 let min_indent = if current.is_ordered {
3659 current.nesting_level + current.max_marker_width
3660 } else {
3661 current.nesting_level + 2
3662 };
3663
3664 if line_indent < min_indent {
3666 return true; }
3668
3669 }
3672 }
3673
3674 false
3676}
3677
3678pub fn is_horizontal_rule_line(line: &str) -> bool {
3685 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3687 if leading_spaces > 3 || line.starts_with('\t') {
3688 return false;
3689 }
3690
3691 is_horizontal_rule_content(line.trim())
3692}
3693
3694pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3697 if trimmed.len() < 3 {
3698 return false;
3699 }
3700
3701 let chars: Vec<char> = trimmed.chars().collect();
3703 if let Some(&first_char) = chars.first()
3704 && (first_char == '-' || first_char == '*' || first_char == '_')
3705 {
3706 let mut count = 0;
3707 for &ch in &chars {
3708 if ch == first_char {
3709 count += 1;
3710 } else if ch != ' ' && ch != '\t' {
3711 return false; }
3713 }
3714 return count >= 3;
3715 }
3716 false
3717}
3718
3719pub fn is_horizontal_rule(trimmed: &str) -> bool {
3721 is_horizontal_rule_content(trimmed)
3722}
3723
3724#[cfg(test)]
3726mod tests {
3727 use super::*;
3728
3729 #[test]
3730 fn test_empty_content() {
3731 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3732 assert_eq!(ctx.content, "");
3733 assert_eq!(ctx.line_offsets, vec![0]);
3734 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3735 assert_eq!(ctx.lines.len(), 0);
3736 }
3737
3738 #[test]
3739 fn test_single_line() {
3740 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3741 assert_eq!(ctx.content, "# Hello");
3742 assert_eq!(ctx.line_offsets, vec![0]);
3743 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3744 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3745 }
3746
3747 #[test]
3748 fn test_multi_line() {
3749 let content = "# Title\n\nSecond line\nThird line";
3750 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3751 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3752 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3759
3760 #[test]
3761 fn test_line_info() {
3762 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3763 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3764
3765 assert_eq!(ctx.lines.len(), 7);
3767
3768 let line1 = &ctx.lines[0];
3770 assert_eq!(line1.content(ctx.content), "# Title");
3771 assert_eq!(line1.byte_offset, 0);
3772 assert_eq!(line1.indent, 0);
3773 assert!(!line1.is_blank);
3774 assert!(!line1.in_code_block);
3775 assert!(line1.list_item.is_none());
3776
3777 let line2 = &ctx.lines[1];
3779 assert_eq!(line2.content(ctx.content), " indented");
3780 assert_eq!(line2.byte_offset, 8);
3781 assert_eq!(line2.indent, 4);
3782 assert!(!line2.is_blank);
3783
3784 let line3 = &ctx.lines[2];
3786 assert_eq!(line3.content(ctx.content), "");
3787 assert!(line3.is_blank);
3788
3789 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3791 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3792 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3793 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3794 }
3795
3796 #[test]
3797 fn test_list_item_detection() {
3798 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3799 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3800
3801 let line1 = &ctx.lines[0];
3803 assert!(line1.list_item.is_some());
3804 let list1 = line1.list_item.as_ref().unwrap();
3805 assert_eq!(list1.marker, "-");
3806 assert!(!list1.is_ordered);
3807 assert_eq!(list1.marker_column, 0);
3808 assert_eq!(list1.content_column, 2);
3809
3810 let line2 = &ctx.lines[1];
3812 assert!(line2.list_item.is_some());
3813 let list2 = line2.list_item.as_ref().unwrap();
3814 assert_eq!(list2.marker, "*");
3815 assert_eq!(list2.marker_column, 2);
3816
3817 let line3 = &ctx.lines[2];
3819 assert!(line3.list_item.is_some());
3820 let list3 = line3.list_item.as_ref().unwrap();
3821 assert_eq!(list3.marker, "1.");
3822 assert!(list3.is_ordered);
3823 assert_eq!(list3.number, Some(1));
3824
3825 let line6 = &ctx.lines[5];
3827 assert!(line6.list_item.is_none());
3828 }
3829
3830 #[test]
3831 fn test_offset_to_line_col_edge_cases() {
3832 let content = "a\nb\nc";
3833 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3834 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3842
3843 #[test]
3844 fn test_mdx_esm_blocks() {
3845 let content = r##"import {Chart} from './snowfall.js'
3846export const year = 2023
3847
3848# Last year's snowfall
3849
3850In {year}, the snowfall was above average.
3851It was followed by a warm spring which caused
3852flood conditions in many of the nearby rivers.
3853
3854<Chart color="#fcb32c" year={year} />
3855"##;
3856
3857 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3858
3859 assert_eq!(ctx.lines.len(), 10);
3861 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3862 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3863 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3864 assert!(
3865 !ctx.lines[3].in_esm_block,
3866 "Line 4 (heading) should NOT be in_esm_block"
3867 );
3868 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3869 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3870 }
3871
3872 #[test]
3873 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3874 let content = r#"import {Chart} from './snowfall.js'
3875export const year = 2023
3876
3877# Last year's snowfall
3878"#;
3879
3880 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3881
3882 assert!(
3884 !ctx.lines[0].in_esm_block,
3885 "Line 1 should NOT be in_esm_block in Standard flavor"
3886 );
3887 assert!(
3888 !ctx.lines[1].in_esm_block,
3889 "Line 2 should NOT be in_esm_block in Standard flavor"
3890 );
3891 }
3892}