1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15 ($name:expr, $profile:expr, $code:expr) => {{
16 let start = std::time::Instant::now();
17 let result = $code;
18 if $profile {
19 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20 }
21 result
22 }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(
34 r#"(?sx)
35 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36 (?:
37 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
38 |
39 \[([^\]]*)\] # Reference ID in group 6
40 )"#
41 ).unwrap()
42});
43
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?sx)
49 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50 (?:
51 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
52 |
53 \[([^\]]*)\] # Reference ID in group 6
54 )"#
55 ).unwrap()
56});
57
58static REF_DEF_PATTERN: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71#[derive(Debug, Clone)]
73pub struct LineInfo {
74 pub byte_offset: usize,
76 pub byte_len: usize,
78 pub indent: usize,
80 pub visual_indent: usize,
84 pub is_blank: bool,
86 pub in_code_block: bool,
88 pub in_front_matter: bool,
90 pub in_html_block: bool,
92 pub in_html_comment: bool,
94 pub list_item: Option<ListItemInfo>,
96 pub heading: Option<HeadingInfo>,
98 pub blockquote: Option<BlockquoteInfo>,
100 pub in_mkdocstrings: bool,
102 pub in_esm_block: bool,
104 pub in_code_span_continuation: bool,
106 pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112 pub fn content<'a>(&self, source: &'a str) -> &'a str {
114 &source[self.byte_offset..self.byte_offset + self.byte_len]
115 }
116}
117
118#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121 pub marker: String,
123 pub is_ordered: bool,
125 pub number: Option<usize>,
127 pub marker_column: usize,
129 pub content_column: usize,
131}
132
133#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136 ATX,
138 Setext1,
140 Setext2,
142}
143
144#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147 pub line: usize,
149 pub start_col: usize,
151 pub end_col: usize,
153 pub byte_offset: usize,
155 pub byte_end: usize,
157 pub text: Cow<'a, str>,
159 pub url: Cow<'a, str>,
161 pub is_reference: bool,
163 pub reference_id: Option<Cow<'a, str>>,
165 pub link_type: LinkType,
167}
168
169#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172 pub reference: String,
174 pub span: std::ops::Range<usize>,
176}
177
178#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181 pub id: String,
183 pub line: usize,
185 pub byte_offset: usize,
187 pub byte_end: usize,
189}
190
191#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194 pub line: usize,
196 pub start_col: usize,
198 pub end_col: usize,
200 pub byte_offset: usize,
202 pub byte_end: usize,
204 pub alt_text: Cow<'a, str>,
206 pub url: Cow<'a, str>,
208 pub is_reference: bool,
210 pub reference_id: Option<Cow<'a, str>>,
212 pub link_type: LinkType,
214}
215
216#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219 pub line: usize,
221 pub id: String,
223 pub url: String,
225 pub title: Option<String>,
227 pub byte_offset: usize,
229 pub byte_end: usize,
231 pub title_byte_start: Option<usize>,
233 pub title_byte_end: Option<usize>,
235}
236
237#[derive(Debug, Clone)]
239pub struct CodeSpan {
240 pub line: usize,
242 pub end_line: usize,
244 pub start_col: usize,
246 pub end_col: usize,
248 pub byte_offset: usize,
250 pub byte_end: usize,
252 pub backtick_count: usize,
254 pub content: String,
256}
257
258#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261 pub level: u8,
263 pub style: HeadingStyle,
265 pub marker: String,
267 pub marker_column: usize,
269 pub content_column: usize,
271 pub text: String,
273 pub custom_id: Option<String>,
275 pub raw_text: String,
277 pub has_closing_sequence: bool,
279 pub closing_sequence: String,
281 pub is_valid: bool,
284}
285
286#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292 pub line_num: usize,
294 pub heading: &'a HeadingInfo,
296 pub line_info: &'a LineInfo,
298}
299
300pub struct ValidHeadingsIter<'a> {
305 lines: &'a [LineInfo],
306 current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310 fn new(lines: &'a [LineInfo]) -> Self {
311 Self {
312 lines,
313 current_index: 0,
314 }
315 }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319 type Item = ValidHeading<'a>;
320
321 fn next(&mut self) -> Option<Self::Item> {
322 while self.current_index < self.lines.len() {
323 let idx = self.current_index;
324 self.current_index += 1;
325
326 let line_info = &self.lines[idx];
327 if let Some(heading) = &line_info.heading
328 && heading.is_valid
329 {
330 return Some(ValidHeading {
331 line_num: idx + 1, heading,
333 line_info,
334 });
335 }
336 }
337 None
338 }
339}
340
341#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344 pub nesting_level: usize,
346 pub indent: String,
348 pub marker_column: usize,
350 pub prefix: String,
352 pub content: String,
354 pub has_no_space_after_marker: bool,
356 pub has_multiple_spaces_after_marker: bool,
358 pub needs_md028_fix: bool,
360}
361
362#[derive(Debug, Clone)]
364pub struct ListBlock {
365 pub start_line: usize,
367 pub end_line: usize,
369 pub is_ordered: bool,
371 pub marker: Option<String>,
373 pub blockquote_prefix: String,
375 pub item_lines: Vec<usize>,
377 pub nesting_level: usize,
379 pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388 pub hash_count: usize,
390 pub asterisk_count: usize,
392 pub underscore_count: usize,
394 pub hyphen_count: usize,
396 pub plus_count: usize,
398 pub gt_count: usize,
400 pub pipe_count: usize,
402 pub bracket_count: usize,
404 pub backtick_count: usize,
406 pub lt_count: usize,
408 pub exclamation_count: usize,
410 pub newline_count: usize,
412}
413
414#[derive(Debug, Clone)]
416pub struct HtmlTag {
417 pub line: usize,
419 pub start_col: usize,
421 pub end_col: usize,
423 pub byte_offset: usize,
425 pub byte_end: usize,
427 pub tag_name: String,
429 pub is_closing: bool,
431 pub is_self_closing: bool,
433 pub raw_content: String,
435}
436
437#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440 pub line: usize,
442 pub start_col: usize,
444 pub end_col: usize,
446 pub byte_offset: usize,
448 pub byte_end: usize,
450 pub marker: char,
452 pub marker_count: usize,
454 pub content: String,
456}
457
458#[derive(Debug, Clone)]
460pub struct TableRow {
461 pub line: usize,
463 pub is_separator: bool,
465 pub column_count: usize,
467 pub column_alignments: Vec<String>, }
470
471#[derive(Debug, Clone)]
473pub struct BareUrl {
474 pub line: usize,
476 pub start_col: usize,
478 pub end_col: usize,
480 pub byte_offset: usize,
482 pub byte_end: usize,
484 pub url: String,
486 pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491 pub content: &'a str,
492 pub line_offsets: Vec<usize>,
493 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
515
516struct BlockquoteComponents<'a> {
518 indent: &'a str,
519 markers: &'a str,
520 spaces_after: &'a str,
521 content: &'a str,
522}
523
524#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527 let bytes = line.as_bytes();
528 let mut pos = 0;
529
530 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532 pos += 1;
533 }
534 let indent_end = pos;
535
536 if pos >= bytes.len() || bytes[pos] != b'>' {
538 return None;
539 }
540
541 while pos < bytes.len() && bytes[pos] == b'>' {
543 pos += 1;
544 }
545 let markers_end = pos;
546
547 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549 pos += 1;
550 }
551 let spaces_end = pos;
552
553 Some(BlockquoteComponents {
554 indent: &line[0..indent_end],
555 markers: &line[indent_end..markers_end],
556 spaces_after: &line[markers_end..spaces_end],
557 content: &line[spaces_end..],
558 })
559}
560
561impl<'a> LintContext<'a> {
562 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563 #[cfg(not(target_arch = "wasm32"))]
564 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565 #[cfg(target_arch = "wasm32")]
566 let profile = false;
567
568 let line_offsets = profile_section!("Line offsets", profile, {
569 let mut offsets = vec![0];
570 for (i, c) in content.char_indices() {
571 if c == '\n' {
572 offsets.push(i + 1);
573 }
574 }
575 offsets
576 });
577
578 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581 let html_comment_ranges = profile_section!(
583 "HTML comment ranges",
584 profile,
585 crate::utils::skip_context::compute_html_comment_ranges(content)
586 );
587
588 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590 if flavor == MarkdownFlavor::MkDocs {
591 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592 } else {
593 Vec::new()
594 }
595 });
596
597 let mut lines = profile_section!(
599 "Basic line info",
600 profile,
601 Self::compute_basic_line_info(
602 content,
603 &line_offsets,
604 &code_blocks,
605 flavor,
606 &html_comment_ranges,
607 &autodoc_ranges,
608 )
609 );
610
611 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614 profile_section!(
616 "ESM blocks",
617 profile,
618 Self::detect_esm_blocks(content, &mut lines, flavor)
619 );
620
621 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624 profile_section!(
626 "Headings & blockquotes",
627 profile,
628 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629 );
630
631 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634 for span in &code_spans {
637 if span.end_line > span.line {
638 for line_num in (span.line + 1)..=span.end_line {
640 if let Some(line_info) = lines.get_mut(line_num - 1) {
641 line_info.in_code_span_continuation = true;
642 }
643 }
644 }
645 }
646
647 let (links, broken_links, footnote_refs) = profile_section!(
649 "Links",
650 profile,
651 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652 );
653
654 let images = profile_section!(
655 "Images",
656 profile,
657 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658 );
659
660 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667 let table_blocks = profile_section!(
669 "Table blocks",
670 profile,
671 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672 content,
673 &code_blocks,
674 &code_spans,
675 &html_comment_ranges,
676 )
677 );
678
679 let line_index = profile_section!(
681 "Line index",
682 profile,
683 crate::utils::range_utils::LineIndex::new(content)
684 );
685
686 let jinja_ranges = profile_section!(
688 "Jinja ranges",
689 profile,
690 crate::utils::jinja_utils::find_jinja_ranges(content)
691 );
692
693 Self {
694 content,
695 line_offsets,
696 code_blocks,
697 lines,
698 links,
699 images,
700 broken_links,
701 footnote_refs,
702 reference_defs,
703 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704 list_blocks,
705 char_frequency,
706 html_tags_cache: OnceLock::new(),
707 emphasis_spans_cache: OnceLock::new(),
708 table_rows_cache: OnceLock::new(),
709 bare_urls_cache: OnceLock::new(),
710 has_mixed_list_nesting_cache: OnceLock::new(),
711 html_comment_ranges,
712 table_blocks,
713 line_index,
714 jinja_ranges,
715 flavor,
716 source_file,
717 }
718 }
719
720 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722 Arc::clone(
723 self.code_spans_cache
724 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725 )
726 }
727
728 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730 &self.html_comment_ranges
731 }
732
733 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735 Arc::clone(self.html_tags_cache.get_or_init(|| {
736 Arc::new(Self::parse_html_tags(
737 self.content,
738 &self.lines,
739 &self.code_blocks,
740 self.flavor,
741 ))
742 }))
743 }
744
745 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747 Arc::clone(
748 self.emphasis_spans_cache
749 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750 )
751 }
752
753 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755 Arc::clone(
756 self.table_rows_cache
757 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758 )
759 }
760
761 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763 Arc::clone(
764 self.bare_urls_cache
765 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766 )
767 }
768
769 pub fn has_mixed_list_nesting(&self) -> bool {
773 *self
774 .has_mixed_list_nesting_cache
775 .get_or_init(|| self.compute_mixed_list_nesting())
776 }
777
778 fn compute_mixed_list_nesting(&self) -> bool {
780 let mut stack: Vec<(usize, bool)> = Vec::new();
785 let mut last_was_blank = false;
786
787 for line_info in &self.lines {
788 if line_info.in_code_block
790 || line_info.in_front_matter
791 || line_info.in_mkdocstrings
792 || line_info.in_html_comment
793 || line_info.in_esm_block
794 {
795 continue;
796 }
797
798 if line_info.is_blank {
800 last_was_blank = true;
801 continue;
802 }
803
804 if let Some(list_item) = &line_info.list_item {
805 let current_pos = if list_item.marker_column == 1 {
807 0
808 } else {
809 list_item.marker_column
810 };
811
812 if last_was_blank && current_pos == 0 {
814 stack.clear();
815 }
816 last_was_blank = false;
817
818 while let Some(&(pos, _)) = stack.last() {
820 if pos >= current_pos {
821 stack.pop();
822 } else {
823 break;
824 }
825 }
826
827 if let Some(&(_, parent_is_ordered)) = stack.last()
829 && parent_is_ordered != list_item.is_ordered
830 {
831 return true; }
833
834 stack.push((current_pos, list_item.is_ordered));
835 } else {
836 last_was_blank = false;
838 }
839 }
840
841 false
842 }
843
844 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846 match self.line_offsets.binary_search(&offset) {
847 Ok(line) => (line + 1, 1),
848 Err(line) => {
849 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850 (line, offset - line_start + 1)
851 }
852 }
853 }
854
855 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859 return true;
860 }
861
862 self.code_spans()
864 .iter()
865 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866 }
867
868 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870 if line_num > 0 {
871 self.lines.get(line_num - 1)
872 } else {
873 None
874 }
875 }
876
877 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879 self.line_info(line_num).map(|info| info.byte_offset)
880 }
881
882 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884 let normalized_id = ref_id.to_lowercase();
885 self.reference_defs
886 .iter()
887 .find(|def| def.id == normalized_id)
888 .map(|def| def.url.as_str())
889 }
890
891 pub fn is_in_list_block(&self, line_num: usize) -> bool {
893 self.list_blocks
894 .iter()
895 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896 }
897
898 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900 self.list_blocks
901 .iter()
902 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903 }
904
905 pub fn is_in_code_block(&self, line_num: usize) -> bool {
909 if line_num == 0 || line_num > self.lines.len() {
910 return false;
911 }
912 self.lines[line_num - 1].in_code_block
913 }
914
915 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917 if line_num == 0 || line_num > self.lines.len() {
918 return false;
919 }
920 self.lines[line_num - 1].in_front_matter
921 }
922
923 pub fn is_in_html_block(&self, line_num: usize) -> bool {
925 if line_num == 0 || line_num > self.lines.len() {
926 return false;
927 }
928 self.lines[line_num - 1].in_html_block
929 }
930
931 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933 if line_num == 0 || line_num > self.lines.len() {
934 return false;
935 }
936
937 let col_0indexed = if col > 0 { col - 1 } else { 0 };
941 let code_spans = self.code_spans();
942 code_spans.iter().any(|span| {
943 if line_num < span.line || line_num > span.end_line {
945 return false;
946 }
947
948 if span.line == span.end_line {
949 col_0indexed >= span.start_col && col_0indexed < span.end_col
951 } else if line_num == span.line {
952 col_0indexed >= span.start_col
954 } else if line_num == span.end_line {
955 col_0indexed < span.end_col
957 } else {
958 true
960 }
961 })
962 }
963
964 #[inline]
966 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967 let code_spans = self.code_spans();
968 code_spans
969 .iter()
970 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971 }
972
973 #[inline]
976 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977 self.reference_defs
978 .iter()
979 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980 }
981
982 #[inline]
986 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987 self.html_comment_ranges
988 .iter()
989 .any(|range| byte_pos >= range.start && byte_pos < range.end)
990 }
991
992 #[inline]
995 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996 self.html_tags()
997 .iter()
998 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999 }
1000
1001 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003 self.jinja_ranges
1004 .iter()
1005 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006 }
1007
1008 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010 self.reference_defs.iter().any(|def| {
1011 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012 byte_pos >= start && byte_pos < end
1013 } else {
1014 false
1015 }
1016 })
1017 }
1018
1019 pub fn has_char(&self, ch: char) -> bool {
1021 match ch {
1022 '#' => self.char_frequency.hash_count > 0,
1023 '*' => self.char_frequency.asterisk_count > 0,
1024 '_' => self.char_frequency.underscore_count > 0,
1025 '-' => self.char_frequency.hyphen_count > 0,
1026 '+' => self.char_frequency.plus_count > 0,
1027 '>' => self.char_frequency.gt_count > 0,
1028 '|' => self.char_frequency.pipe_count > 0,
1029 '[' => self.char_frequency.bracket_count > 0,
1030 '`' => self.char_frequency.backtick_count > 0,
1031 '<' => self.char_frequency.lt_count > 0,
1032 '!' => self.char_frequency.exclamation_count > 0,
1033 '\n' => self.char_frequency.newline_count > 0,
1034 _ => self.content.contains(ch), }
1036 }
1037
1038 pub fn char_count(&self, ch: char) -> usize {
1040 match ch {
1041 '#' => self.char_frequency.hash_count,
1042 '*' => self.char_frequency.asterisk_count,
1043 '_' => self.char_frequency.underscore_count,
1044 '-' => self.char_frequency.hyphen_count,
1045 '+' => self.char_frequency.plus_count,
1046 '>' => self.char_frequency.gt_count,
1047 '|' => self.char_frequency.pipe_count,
1048 '[' => self.char_frequency.bracket_count,
1049 '`' => self.char_frequency.backtick_count,
1050 '<' => self.char_frequency.lt_count,
1051 '!' => self.char_frequency.exclamation_count,
1052 '\n' => self.char_frequency.newline_count,
1053 _ => self.content.matches(ch).count(), }
1055 }
1056
1057 pub fn likely_has_headings(&self) -> bool {
1059 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1061
1062 pub fn likely_has_lists(&self) -> bool {
1064 self.char_frequency.asterisk_count > 0
1065 || self.char_frequency.hyphen_count > 0
1066 || self.char_frequency.plus_count > 0
1067 }
1068
1069 pub fn likely_has_emphasis(&self) -> bool {
1071 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072 }
1073
1074 pub fn likely_has_tables(&self) -> bool {
1076 self.char_frequency.pipe_count > 2
1077 }
1078
1079 pub fn likely_has_blockquotes(&self) -> bool {
1081 self.char_frequency.gt_count > 0
1082 }
1083
1084 pub fn likely_has_code(&self) -> bool {
1086 self.char_frequency.backtick_count > 0
1087 }
1088
1089 pub fn likely_has_links_or_images(&self) -> bool {
1091 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092 }
1093
1094 pub fn likely_has_html(&self) -> bool {
1096 self.char_frequency.lt_count > 0
1097 }
1098
1099 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101 self.html_tags()
1102 .iter()
1103 .filter(|tag| tag.line == line_num)
1104 .cloned()
1105 .collect()
1106 }
1107
1108 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110 self.emphasis_spans()
1111 .iter()
1112 .filter(|span| span.line == line_num)
1113 .cloned()
1114 .collect()
1115 }
1116
1117 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119 self.table_rows()
1120 .iter()
1121 .filter(|row| row.line == line_num)
1122 .cloned()
1123 .collect()
1124 }
1125
1126 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128 self.bare_urls()
1129 .iter()
1130 .filter(|url| url.line == line_num)
1131 .cloned()
1132 .collect()
1133 }
1134
1135 #[inline]
1141 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142 let idx = match lines.binary_search_by(|line| {
1144 if byte_offset < line.byte_offset {
1145 std::cmp::Ordering::Greater
1146 } else if byte_offset > line.byte_offset + line.byte_len {
1147 std::cmp::Ordering::Less
1148 } else {
1149 std::cmp::Ordering::Equal
1150 }
1151 }) {
1152 Ok(idx) => idx,
1153 Err(idx) => idx.saturating_sub(1),
1154 };
1155
1156 let line = &lines[idx];
1157 let line_num = idx + 1;
1158 let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160 (idx, line_num, col)
1161 }
1162
1163 #[inline]
1165 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169 if idx > 0 {
1171 let span = &code_spans[idx - 1];
1172 if offset >= span.byte_offset && offset < span.byte_end {
1173 return true;
1174 }
1175 }
1176
1177 false
1178 }
1179
1180 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186 let mut link_ranges = Vec::new();
1187 let mut options = Options::empty();
1188 options.insert(Options::ENABLE_WIKILINKS);
1189 options.insert(Options::ENABLE_FOOTNOTES);
1190
1191 let parser = Parser::new_ext(content, options).into_offset_iter();
1192 let mut link_stack: Vec<usize> = Vec::new();
1193
1194 for (event, range) in parser {
1195 match event {
1196 Event::Start(Tag::Link { .. }) => {
1197 link_stack.push(range.start);
1198 }
1199 Event::End(TagEnd::Link) => {
1200 if let Some(start_pos) = link_stack.pop() {
1201 link_ranges.push((start_pos, range.end));
1202 }
1203 }
1204 _ => {}
1205 }
1206 }
1207
1208 link_ranges
1209 }
1210
1211 fn parse_links(
1213 content: &'a str,
1214 lines: &[LineInfo],
1215 code_blocks: &[(usize, usize)],
1216 code_spans: &[CodeSpan],
1217 flavor: MarkdownFlavor,
1218 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221 use std::collections::HashSet;
1222
1223 let mut links = Vec::with_capacity(content.len() / 500);
1224 let mut broken_links = Vec::new();
1225 let mut footnote_refs = Vec::new();
1226
1227 let mut found_positions = HashSet::new();
1229
1230 let mut options = Options::empty();
1240 options.insert(Options::ENABLE_WIKILINKS);
1241 options.insert(Options::ENABLE_FOOTNOTES);
1242
1243 let parser = Parser::new_with_broken_link_callback(
1244 content,
1245 options,
1246 Some(|link: BrokenLink<'_>| {
1247 broken_links.push(BrokenLinkInfo {
1248 reference: link.reference.to_string(),
1249 span: link.span.clone(),
1250 });
1251 None
1252 }),
1253 )
1254 .into_offset_iter();
1255
1256 let mut link_stack: Vec<(
1257 usize,
1258 usize,
1259 pulldown_cmark::CowStr<'a>,
1260 LinkType,
1261 pulldown_cmark::CowStr<'a>,
1262 )> = Vec::new();
1263 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1266 match event {
1267 Event::Start(Tag::Link {
1268 link_type,
1269 dest_url,
1270 id,
1271 ..
1272 }) => {
1273 link_stack.push((range.start, range.end, dest_url, link_type, id));
1275 text_chunks.clear();
1276 }
1277 Event::Text(text) if !link_stack.is_empty() => {
1278 text_chunks.push((text.to_string(), range.start, range.end));
1280 }
1281 Event::Code(code) if !link_stack.is_empty() => {
1282 let code_text = format!("`{code}`");
1284 text_chunks.push((code_text, range.start, range.end));
1285 }
1286 Event::End(TagEnd::Link) => {
1287 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290 text_chunks.clear();
1291 continue;
1292 }
1293
1294 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299 text_chunks.clear();
1300 continue;
1301 }
1302
1303 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305 let is_reference = matches!(
1306 link_type,
1307 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308 );
1309
1310 let link_text = if start_pos < content.len() {
1313 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315 let mut close_pos = None;
1319 let mut depth = 0;
1320 let mut in_code_span = false;
1321
1322 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323 let mut backslash_count = 0;
1325 let mut j = i;
1326 while j > 0 && link_bytes[j - 1] == b'\\' {
1327 backslash_count += 1;
1328 j -= 1;
1329 }
1330 let is_escaped = backslash_count % 2 != 0;
1331
1332 if byte == b'`' && !is_escaped {
1334 in_code_span = !in_code_span;
1335 }
1336
1337 if !is_escaped && !in_code_span {
1339 if byte == b'[' {
1340 depth += 1;
1341 } else if byte == b']' {
1342 if depth == 0 {
1343 close_pos = Some(i);
1345 break;
1346 } else {
1347 depth -= 1;
1348 }
1349 }
1350 }
1351 }
1352
1353 if let Some(pos) = close_pos {
1354 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355 } else {
1356 Cow::Borrowed("")
1357 }
1358 } else {
1359 Cow::Borrowed("")
1360 };
1361
1362 let reference_id = if is_reference && !ref_id.is_empty() {
1364 Some(Cow::Owned(ref_id.to_lowercase()))
1365 } else if is_reference {
1366 Some(Cow::Owned(link_text.to_lowercase()))
1368 } else {
1369 None
1370 };
1371
1372 found_positions.insert(start_pos);
1374
1375 links.push(ParsedLink {
1376 line: line_num,
1377 start_col: col_start,
1378 end_col: col_end,
1379 byte_offset: start_pos,
1380 byte_end: range.end,
1381 text: link_text,
1382 url: Cow::Owned(url.to_string()),
1383 is_reference,
1384 reference_id,
1385 link_type,
1386 });
1387
1388 text_chunks.clear();
1389 }
1390 }
1391 Event::FootnoteReference(footnote_id) => {
1392 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395 continue;
1396 }
1397
1398 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399 footnote_refs.push(FootnoteRef {
1400 id: footnote_id.to_string(),
1401 line: line_num,
1402 byte_offset: range.start,
1403 byte_end: range.end,
1404 });
1405 }
1406 _ => {}
1407 }
1408 }
1409
1410 for cap in LINK_PATTERN.captures_iter(content) {
1414 let full_match = cap.get(0).unwrap();
1415 let match_start = full_match.start();
1416 let match_end = full_match.end();
1417
1418 if found_positions.contains(&match_start) {
1420 continue;
1421 }
1422
1423 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425 continue;
1426 }
1427
1428 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430 continue;
1431 }
1432
1433 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435 continue;
1436 }
1437
1438 if Self::is_offset_in_code_span(code_spans, match_start) {
1440 continue;
1441 }
1442
1443 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445 continue;
1446 }
1447
1448 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453 continue;
1454 }
1455
1456 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458 let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460 if let Some(ref_id) = cap.get(6) {
1462 let ref_id_str = ref_id.as_str();
1463 let normalized_ref = if ref_id_str.is_empty() {
1464 Cow::Owned(text.to_lowercase()) } else {
1466 Cow::Owned(ref_id_str.to_lowercase())
1467 };
1468
1469 links.push(ParsedLink {
1471 line: line_num,
1472 start_col: col_start,
1473 end_col: col_end,
1474 byte_offset: match_start,
1475 byte_end: match_end,
1476 text: Cow::Borrowed(text),
1477 url: Cow::Borrowed(""), is_reference: true,
1479 reference_id: Some(normalized_ref),
1480 link_type: LinkType::Reference, });
1482 }
1483 }
1484
1485 (links, broken_links, footnote_refs)
1486 }
1487
1488 fn parse_images(
1490 content: &'a str,
1491 lines: &[LineInfo],
1492 code_blocks: &[(usize, usize)],
1493 code_spans: &[CodeSpan],
1494 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495 ) -> Vec<ParsedImage<'a>> {
1496 use crate::utils::skip_context::is_in_html_comment_ranges;
1497 use std::collections::HashSet;
1498
1499 let mut images = Vec::with_capacity(content.len() / 1000);
1501 let mut found_positions = HashSet::new();
1502
1503 let parser = Parser::new(content).into_offset_iter();
1505 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506 Vec::new();
1507 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1510 match event {
1511 Event::Start(Tag::Image {
1512 link_type,
1513 dest_url,
1514 id,
1515 ..
1516 }) => {
1517 image_stack.push((range.start, dest_url, link_type, id));
1518 text_chunks.clear();
1519 }
1520 Event::Text(text) if !image_stack.is_empty() => {
1521 text_chunks.push((text.to_string(), range.start, range.end));
1522 }
1523 Event::Code(code) if !image_stack.is_empty() => {
1524 let code_text = format!("`{code}`");
1525 text_chunks.push((code_text, range.start, range.end));
1526 }
1527 Event::End(TagEnd::Image) => {
1528 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531 continue;
1532 }
1533
1534 if Self::is_offset_in_code_span(code_spans, start_pos) {
1536 continue;
1537 }
1538
1539 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541 continue;
1542 }
1543
1544 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548 let is_reference = matches!(
1549 link_type,
1550 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551 );
1552
1553 let alt_text = if start_pos < content.len() {
1556 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558 let mut close_pos = None;
1561 let mut depth = 0;
1562
1563 if image_bytes.len() > 2 {
1564 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565 let mut backslash_count = 0;
1567 let mut j = i;
1568 while j > 0 && image_bytes[j - 1] == b'\\' {
1569 backslash_count += 1;
1570 j -= 1;
1571 }
1572 let is_escaped = backslash_count % 2 != 0;
1573
1574 if !is_escaped {
1575 if byte == b'[' {
1576 depth += 1;
1577 } else if byte == b']' {
1578 if depth == 0 {
1579 close_pos = Some(i);
1581 break;
1582 } else {
1583 depth -= 1;
1584 }
1585 }
1586 }
1587 }
1588 }
1589
1590 if let Some(pos) = close_pos {
1591 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592 } else {
1593 Cow::Borrowed("")
1594 }
1595 } else {
1596 Cow::Borrowed("")
1597 };
1598
1599 let reference_id = if is_reference && !ref_id.is_empty() {
1600 Some(Cow::Owned(ref_id.to_lowercase()))
1601 } else if is_reference {
1602 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1604 None
1605 };
1606
1607 found_positions.insert(start_pos);
1608 images.push(ParsedImage {
1609 line: line_num,
1610 start_col: col_start,
1611 end_col: col_end,
1612 byte_offset: start_pos,
1613 byte_end: range.end,
1614 alt_text,
1615 url: Cow::Owned(url.to_string()),
1616 is_reference,
1617 reference_id,
1618 link_type,
1619 });
1620 }
1621 }
1622 _ => {}
1623 }
1624 }
1625
1626 for cap in IMAGE_PATTERN.captures_iter(content) {
1628 let full_match = cap.get(0).unwrap();
1629 let match_start = full_match.start();
1630 let match_end = full_match.end();
1631
1632 if found_positions.contains(&match_start) {
1634 continue;
1635 }
1636
1637 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639 continue;
1640 }
1641
1642 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644 || Self::is_offset_in_code_span(code_spans, match_start)
1645 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646 {
1647 continue;
1648 }
1649
1650 if let Some(ref_id) = cap.get(6) {
1652 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655 let ref_id_str = ref_id.as_str();
1656 let normalized_ref = if ref_id_str.is_empty() {
1657 Cow::Owned(alt_text.to_lowercase())
1658 } else {
1659 Cow::Owned(ref_id_str.to_lowercase())
1660 };
1661
1662 images.push(ParsedImage {
1663 line: line_num,
1664 start_col: col_start,
1665 end_col: col_end,
1666 byte_offset: match_start,
1667 byte_end: match_end,
1668 alt_text: Cow::Borrowed(alt_text),
1669 url: Cow::Borrowed(""),
1670 is_reference: true,
1671 reference_id: Some(normalized_ref),
1672 link_type: LinkType::Reference, });
1674 }
1675 }
1676
1677 images
1678 }
1679
1680 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1686 if line_info.in_code_block {
1688 continue;
1689 }
1690
1691 let line = line_info.content(content);
1692 let line_num = line_idx + 1;
1693
1694 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695 let id = cap.get(1).unwrap().as_str().to_lowercase();
1696 let url = cap.get(2).unwrap().as_str().to_string();
1697 let title_match = cap.get(3).or_else(|| cap.get(4));
1698 let title = title_match.map(|m| m.as_str().to_string());
1699
1700 let match_obj = cap.get(0).unwrap();
1703 let byte_offset = line_info.byte_offset + match_obj.start();
1704 let byte_end = line_info.byte_offset + match_obj.end();
1705
1706 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708 let start = line_info.byte_offset + m.start().saturating_sub(1);
1710 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1712 } else {
1713 (None, None)
1714 };
1715
1716 refs.push(ReferenceDef {
1717 line: line_num,
1718 id,
1719 url,
1720 title,
1721 byte_offset,
1722 byte_end,
1723 title_byte_start,
1724 title_byte_end,
1725 });
1726 }
1727 }
1728
1729 refs
1730 }
1731
1732 #[inline]
1736 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737 let trimmed_start = line.trim_start();
1738 if !trimmed_start.starts_with('>') {
1739 return None;
1740 }
1741
1742 let mut remaining = line;
1744 let mut total_prefix_len = 0;
1745
1746 loop {
1747 let trimmed = remaining.trim_start();
1748 if !trimmed.starts_with('>') {
1749 break;
1750 }
1751
1752 let leading_ws_len = remaining.len() - trimmed.len();
1754 total_prefix_len += leading_ws_len + 1;
1755
1756 let after_gt = &trimmed[1..];
1757
1758 if let Some(stripped) = after_gt.strip_prefix(' ') {
1760 total_prefix_len += 1;
1761 remaining = stripped;
1762 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763 total_prefix_len += 1;
1764 remaining = stripped;
1765 } else {
1766 remaining = after_gt;
1767 }
1768 }
1769
1770 Some((&line[..total_prefix_len], remaining))
1771 }
1772
1773 #[inline]
1777 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1778 let bytes = line.as_bytes();
1779 let mut i = 0;
1780
1781 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1783 i += 1;
1784 }
1785
1786 if i >= bytes.len() {
1788 return None;
1789 }
1790 let marker = bytes[i] as char;
1791 if marker != '-' && marker != '*' && marker != '+' {
1792 return None;
1793 }
1794 let marker_pos = i;
1795 i += 1;
1796
1797 let spacing_start = i;
1799 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1800 i += 1;
1801 }
1802
1803 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1804 }
1805
1806 #[inline]
1810 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1811 let bytes = line.as_bytes();
1812 let mut i = 0;
1813
1814 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1816 i += 1;
1817 }
1818
1819 let number_start = i;
1821 while i < bytes.len() && bytes[i].is_ascii_digit() {
1822 i += 1;
1823 }
1824 if i == number_start {
1825 return None; }
1827
1828 if i >= bytes.len() {
1830 return None;
1831 }
1832 let delimiter = bytes[i] as char;
1833 if delimiter != '.' && delimiter != ')' {
1834 return None;
1835 }
1836 let delimiter_pos = i;
1837 i += 1;
1838
1839 let spacing_start = i;
1841 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1842 i += 1;
1843 }
1844
1845 Some((
1846 &line[..number_start],
1847 &line[number_start..delimiter_pos],
1848 delimiter,
1849 &line[spacing_start..i],
1850 &line[i..],
1851 ))
1852 }
1853
1854 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1857 let num_lines = line_offsets.len();
1858 let mut in_code_block = vec![false; num_lines];
1859
1860 for &(start, end) in code_blocks {
1862 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1864 let mut boundary = start;
1865 while boundary > 0 && !content.is_char_boundary(boundary) {
1866 boundary -= 1;
1867 }
1868 boundary
1869 } else {
1870 start
1871 };
1872
1873 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1874 let mut boundary = end;
1875 while boundary < content.len() && !content.is_char_boundary(boundary) {
1876 boundary += 1;
1877 }
1878 boundary
1879 } else {
1880 end.min(content.len())
1881 };
1882
1883 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1902 let first_line = first_line_after.saturating_sub(1);
1903 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1904
1905 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1907 *flag = true;
1908 }
1909 }
1910
1911 in_code_block
1912 }
1913
1914 fn compute_basic_line_info(
1916 content: &str,
1917 line_offsets: &[usize],
1918 code_blocks: &[(usize, usize)],
1919 flavor: MarkdownFlavor,
1920 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1921 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1922 ) -> Vec<LineInfo> {
1923 let content_lines: Vec<&str> = content.lines().collect();
1924 let mut lines = Vec::with_capacity(content_lines.len());
1925
1926 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1928
1929 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1932
1933 for (i, line) in content_lines.iter().enumerate() {
1934 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1935 let indent = line.len() - line.trim_start().len();
1936 let visual_indent = ElementCache::calculate_indentation_width_default(line);
1938
1939 let blockquote_parse = Self::parse_blockquote_prefix(line);
1941
1942 let is_blank = if let Some((_, content)) = blockquote_parse {
1944 content.trim().is_empty()
1946 } else {
1947 line.trim().is_empty()
1948 };
1949
1950 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1952
1953 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1955 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1956 let line_end_offset = byte_offset + line.len();
1959 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1960 html_comment_ranges,
1961 byte_offset,
1962 line_end_offset,
1963 );
1964 let list_item = if !(in_code_block
1965 || is_blank
1966 || in_mkdocstrings
1967 || in_html_comment
1968 || (front_matter_end > 0 && i < front_matter_end))
1969 {
1970 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1972 (content, prefix.len())
1973 } else {
1974 (&**line, 0)
1975 };
1976
1977 if let Some((leading_spaces, marker, spacing, _content)) =
1978 Self::parse_unordered_list(line_for_list_check)
1979 {
1980 let marker_column = blockquote_prefix_len + leading_spaces.len();
1981 let content_column = marker_column + 1 + spacing.len();
1982
1983 if spacing.is_empty() {
1990 None
1991 } else {
1992 Some(ListItemInfo {
1993 marker: marker.to_string(),
1994 is_ordered: false,
1995 number: None,
1996 marker_column,
1997 content_column,
1998 })
1999 }
2000 } else if let Some((leading_spaces, number_str, delimiter, spacing, content)) =
2001 Self::parse_ordered_list(line_for_list_check)
2002 {
2003 let marker = format!("{number_str}{delimiter}");
2004 let marker_column = blockquote_prefix_len + leading_spaces.len();
2005 let content_column = marker_column + marker.len() + spacing.len();
2006
2007 let content_after_spacing = content.trim();
2015 if spacing.is_empty() && !content_after_spacing.is_empty() {
2016 None
2017 } else {
2018 Some(ListItemInfo {
2019 marker,
2020 is_ordered: true,
2021 number: number_str.parse().ok(),
2022 marker_column,
2023 content_column,
2024 })
2025 }
2026 } else {
2027 None
2028 }
2029 } else {
2030 None
2031 };
2032
2033 let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2036 let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2037
2038 lines.push(LineInfo {
2039 byte_offset,
2040 byte_len: line.len(),
2041 indent,
2042 visual_indent,
2043 is_blank,
2044 in_code_block,
2045 in_front_matter,
2046 in_html_block: false, in_html_comment,
2048 list_item,
2049 heading: None, blockquote: None, in_mkdocstrings,
2052 in_esm_block: false, in_code_span_continuation: false, is_horizontal_rule: is_hr,
2055 });
2056 }
2057
2058 lines
2059 }
2060
2061 fn detect_headings_and_blockquotes(
2063 content: &str,
2064 lines: &mut [LineInfo],
2065 flavor: MarkdownFlavor,
2066 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2067 link_byte_ranges: &[(usize, usize)],
2068 ) {
2069 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2071 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2072 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2073 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2074
2075 let content_lines: Vec<&str> = content.lines().collect();
2076
2077 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2079
2080 for i in 0..lines.len() {
2082 if lines[i].in_code_block {
2083 continue;
2084 }
2085
2086 if front_matter_end > 0 && i < front_matter_end {
2088 continue;
2089 }
2090
2091 if lines[i].in_html_block {
2093 continue;
2094 }
2095
2096 let line = content_lines[i];
2097
2098 if let Some(bq) = parse_blockquote_detailed(line) {
2100 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2102
2103 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2105
2106 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2108 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2111
2112 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2116
2117 lines[i].blockquote = Some(BlockquoteInfo {
2118 nesting_level,
2119 indent: bq.indent.to_string(),
2120 marker_column,
2121 prefix,
2122 content: bq.content.to_string(),
2123 has_no_space_after_marker: has_no_space,
2124 has_multiple_spaces_after_marker: has_multiple_spaces,
2125 needs_md028_fix,
2126 });
2127 }
2128
2129 if lines[i].is_blank {
2131 continue;
2132 }
2133
2134 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2137 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2138 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2139 } else {
2140 false
2141 };
2142
2143 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2144 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2146 continue;
2147 }
2148 let line_offset = lines[i].byte_offset;
2151 if link_byte_ranges
2152 .iter()
2153 .any(|&(start, end)| line_offset > start && line_offset < end)
2154 {
2155 continue;
2156 }
2157 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2158 let hashes = caps.get(2).map_or("", |m| m.as_str());
2159 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2160 let rest = caps.get(4).map_or("", |m| m.as_str());
2161
2162 let level = hashes.len() as u8;
2163 let marker_column = leading_spaces.len();
2164
2165 let (text, has_closing, closing_seq) = {
2167 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2169 if rest[id_start..].trim_end().ends_with('}') {
2171 (&rest[..id_start], &rest[id_start..])
2173 } else {
2174 (rest, "")
2175 }
2176 } else {
2177 (rest, "")
2178 };
2179
2180 let trimmed_rest = rest_without_id.trim_end();
2182 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2183 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2186
2187 let last_hash_char_idx = char_positions
2189 .iter()
2190 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2191
2192 if let Some(mut char_idx) = last_hash_char_idx {
2193 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2195 char_idx -= 1;
2196 }
2197
2198 let start_of_hashes = char_positions[char_idx].0;
2200
2201 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2203
2204 let potential_closing = &trimmed_rest[start_of_hashes..];
2206 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2207
2208 if is_all_hashes && has_space_before {
2209 let closing_hashes = potential_closing.to_string();
2211 let text_part = if !custom_id_part.is_empty() {
2214 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2217 } else {
2218 trimmed_rest[..start_of_hashes].trim_end().to_string()
2219 };
2220 (text_part, true, closing_hashes)
2221 } else {
2222 (rest.to_string(), false, String::new())
2224 }
2225 } else {
2226 (rest.to_string(), false, String::new())
2228 }
2229 } else {
2230 (rest.to_string(), false, String::new())
2232 }
2233 };
2234
2235 let content_column = marker_column + hashes.len() + spaces_after.len();
2236
2237 let raw_text = text.trim().to_string();
2239 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2240
2241 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2243 let next_line = content_lines[i + 1];
2244 if !lines[i + 1].in_code_block
2245 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2246 && let Some(next_line_id) =
2247 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2248 {
2249 custom_id = Some(next_line_id);
2250 }
2251 }
2252
2253 let is_valid = !spaces_after.is_empty()
2263 || rest.is_empty()
2264 || level > 1
2265 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2266
2267 lines[i].heading = Some(HeadingInfo {
2268 level,
2269 style: HeadingStyle::ATX,
2270 marker: hashes.to_string(),
2271 marker_column,
2272 content_column,
2273 text: clean_text,
2274 custom_id,
2275 raw_text,
2276 has_closing_sequence: has_closing,
2277 closing_sequence: closing_seq,
2278 is_valid,
2279 });
2280 }
2281 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2283 let next_line = content_lines[i + 1];
2284 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2285 if front_matter_end > 0 && i < front_matter_end {
2287 continue;
2288 }
2289
2290 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2292 {
2293 continue;
2294 }
2295
2296 let underline = next_line.trim();
2297
2298 let level = if underline.starts_with('=') { 1 } else { 2 };
2299 let style = if level == 1 {
2300 HeadingStyle::Setext1
2301 } else {
2302 HeadingStyle::Setext2
2303 };
2304
2305 let raw_text = line.trim().to_string();
2307 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2308
2309 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2311 let attr_line = content_lines[i + 2];
2312 if !lines[i + 2].in_code_block
2313 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2314 && let Some(attr_line_id) =
2315 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2316 {
2317 custom_id = Some(attr_line_id);
2318 }
2319 }
2320
2321 lines[i].heading = Some(HeadingInfo {
2322 level,
2323 style,
2324 marker: underline.to_string(),
2325 marker_column: next_line.len() - next_line.trim_start().len(),
2326 content_column: lines[i].indent,
2327 text: clean_text,
2328 custom_id,
2329 raw_text,
2330 has_closing_sequence: false,
2331 closing_sequence: String::new(),
2332 is_valid: true, });
2334 }
2335 }
2336 }
2337 }
2338
2339 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2341 const BLOCK_ELEMENTS: &[&str] = &[
2344 "address",
2345 "article",
2346 "aside",
2347 "audio",
2348 "blockquote",
2349 "canvas",
2350 "details",
2351 "dialog",
2352 "dd",
2353 "div",
2354 "dl",
2355 "dt",
2356 "embed",
2357 "fieldset",
2358 "figcaption",
2359 "figure",
2360 "footer",
2361 "form",
2362 "h1",
2363 "h2",
2364 "h3",
2365 "h4",
2366 "h5",
2367 "h6",
2368 "header",
2369 "hr",
2370 "iframe",
2371 "li",
2372 "main",
2373 "menu",
2374 "nav",
2375 "noscript",
2376 "object",
2377 "ol",
2378 "p",
2379 "picture",
2380 "pre",
2381 "script",
2382 "search",
2383 "section",
2384 "source",
2385 "style",
2386 "summary",
2387 "svg",
2388 "table",
2389 "tbody",
2390 "td",
2391 "template",
2392 "textarea",
2393 "tfoot",
2394 "th",
2395 "thead",
2396 "tr",
2397 "track",
2398 "ul",
2399 "video",
2400 ];
2401
2402 let mut i = 0;
2403 while i < lines.len() {
2404 if lines[i].in_code_block || lines[i].in_front_matter {
2406 i += 1;
2407 continue;
2408 }
2409
2410 let trimmed = lines[i].content(content).trim_start();
2411
2412 if trimmed.starts_with('<') && trimmed.len() > 1 {
2414 let after_bracket = &trimmed[1..];
2416 let is_closing = after_bracket.starts_with('/');
2417 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2418
2419 let tag_name = tag_start
2421 .chars()
2422 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2423 .collect::<String>()
2424 .to_lowercase();
2425
2426 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2428 lines[i].in_html_block = true;
2430
2431 if !is_closing {
2434 let closing_tag = format!("</{tag_name}>");
2435 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2437 let mut j = i + 1;
2438 let mut found_closing_tag = false;
2439 while j < lines.len() && j < i + 100 {
2440 if !allow_blank_lines && lines[j].is_blank {
2443 break;
2444 }
2445
2446 lines[j].in_html_block = true;
2447
2448 if lines[j].content(content).contains(&closing_tag) {
2450 found_closing_tag = true;
2451 }
2452
2453 if found_closing_tag {
2456 j += 1;
2457 while j < lines.len() && j < i + 100 {
2459 if lines[j].is_blank {
2460 break;
2461 }
2462 lines[j].in_html_block = true;
2463 j += 1;
2464 }
2465 break;
2466 }
2467 j += 1;
2468 }
2469 }
2470 }
2471 }
2472
2473 i += 1;
2474 }
2475 }
2476
2477 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2480 if !flavor.supports_esm_blocks() {
2482 return;
2483 }
2484
2485 let mut in_multiline_comment = false;
2486
2487 for line in lines.iter_mut() {
2488 if line.is_blank || line.in_html_comment {
2490 continue;
2491 }
2492
2493 let trimmed = line.content(content).trim_start();
2494
2495 if in_multiline_comment {
2497 if trimmed.contains("*/") {
2498 in_multiline_comment = false;
2499 }
2500 continue;
2501 }
2502
2503 if trimmed.starts_with("//") {
2505 continue;
2506 }
2507
2508 if trimmed.starts_with("/*") {
2510 if !trimmed.contains("*/") {
2511 in_multiline_comment = true;
2512 }
2513 continue;
2514 }
2515
2516 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2518 line.in_esm_block = true;
2519 } else {
2520 break;
2522 }
2523 }
2524 }
2525
2526 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2528 let mut code_spans = Vec::new();
2529
2530 if !content.contains('`') {
2532 return code_spans;
2533 }
2534
2535 let parser = Parser::new(content).into_offset_iter();
2537
2538 for (event, range) in parser {
2539 if let Event::Code(_) = event {
2540 let start_pos = range.start;
2541 let end_pos = range.end;
2542
2543 let full_span = &content[start_pos..end_pos];
2545 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2546
2547 let content_start = start_pos + backtick_count;
2549 let content_end = end_pos - backtick_count;
2550 let span_content = if content_start < content_end {
2551 content[content_start..content_end].to_string()
2552 } else {
2553 String::new()
2554 };
2555
2556 let line_idx = lines
2559 .partition_point(|line| line.byte_offset <= start_pos)
2560 .saturating_sub(1);
2561 let line_num = line_idx + 1;
2562 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2563
2564 let end_line_idx = lines
2566 .partition_point(|line| line.byte_offset <= end_pos)
2567 .saturating_sub(1);
2568 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2569
2570 let line_content = lines[line_idx].content(content);
2573 let col_start = if byte_col_start <= line_content.len() {
2574 line_content[..byte_col_start].chars().count()
2575 } else {
2576 line_content.chars().count()
2577 };
2578
2579 let end_line_content = lines[end_line_idx].content(content);
2580 let col_end = if byte_col_end <= end_line_content.len() {
2581 end_line_content[..byte_col_end].chars().count()
2582 } else {
2583 end_line_content.chars().count()
2584 };
2585
2586 code_spans.push(CodeSpan {
2587 line: line_num,
2588 end_line: end_line_idx + 1,
2589 start_col: col_start,
2590 end_col: col_end,
2591 byte_offset: start_pos,
2592 byte_end: end_pos,
2593 backtick_count,
2594 content: span_content,
2595 });
2596 }
2597 }
2598
2599 code_spans.sort_by_key(|span| span.byte_offset);
2601
2602 code_spans
2603 }
2604
2605 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2616 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2618
2619 #[inline]
2622 fn reset_tracking_state(
2623 list_item: &ListItemInfo,
2624 has_list_breaking_content: &mut bool,
2625 min_continuation: &mut usize,
2626 ) {
2627 *has_list_breaking_content = false;
2628 let marker_width = if list_item.is_ordered {
2629 list_item.marker.len() + 1 } else {
2631 list_item.marker.len()
2632 };
2633 *min_continuation = if list_item.is_ordered {
2634 marker_width
2635 } else {
2636 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2637 };
2638 }
2639
2640 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2643 let mut last_list_item_line = 0;
2644 let mut current_indent_level = 0;
2645 let mut last_marker_width = 0;
2646
2647 let mut has_list_breaking_content_since_last_item = false;
2649 let mut min_continuation_for_tracking = 0;
2650
2651 for (line_idx, line_info) in lines.iter().enumerate() {
2652 let line_num = line_idx + 1;
2653
2654 if line_info.in_code_block {
2656 if let Some(ref mut block) = current_block {
2657 let min_continuation_indent =
2659 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2660
2661 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2663
2664 match context {
2665 CodeBlockContext::Indented => {
2666 block.end_line = line_num;
2668 continue;
2669 }
2670 CodeBlockContext::Standalone => {
2671 let completed_block = current_block.take().unwrap();
2673 list_blocks.push(completed_block);
2674 continue;
2675 }
2676 CodeBlockContext::Adjacent => {
2677 block.end_line = line_num;
2679 continue;
2680 }
2681 }
2682 } else {
2683 continue;
2685 }
2686 }
2687
2688 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2690 caps.get(0).unwrap().as_str().to_string()
2691 } else {
2692 String::new()
2693 };
2694
2695 if current_block.is_some()
2698 && line_info.list_item.is_none()
2699 && !line_info.is_blank
2700 && !line_info.in_code_span_continuation
2701 {
2702 let line_content = line_info.content(content).trim();
2703
2704 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2709 let breaks_list = line_info.heading.is_some()
2710 || line_content.starts_with("---")
2711 || line_content.starts_with("***")
2712 || line_content.starts_with("___")
2713 || crate::utils::skip_context::is_table_line(line_content)
2714 || line_content.starts_with(">")
2715 || (line_info.indent > 0
2716 && line_info.indent < min_continuation_for_tracking
2717 && !is_lazy_continuation);
2718
2719 if breaks_list {
2720 has_list_breaking_content_since_last_item = true;
2721 }
2722 }
2723
2724 if line_info.in_code_span_continuation
2727 && line_info.list_item.is_none()
2728 && let Some(ref mut block) = current_block
2729 {
2730 block.end_line = line_num;
2731 }
2732
2733 let is_valid_continuation =
2738 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2740 && line_info.list_item.is_none()
2741 && !line_info.is_blank
2742 && !line_info.in_code_block
2743 && is_valid_continuation
2744 && let Some(ref mut block) = current_block
2745 {
2746 block.end_line = line_num;
2747 }
2748
2749 if let Some(list_item) = &line_info.list_item {
2751 let item_indent = list_item.marker_column;
2753 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2756 let is_nested = nesting > block.nesting_level;
2760 let same_type =
2761 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2762 let same_context = block.blockquote_prefix == blockquote_prefix;
2763 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2765
2766 let marker_compatible =
2768 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2769
2770 let has_non_list_content = has_list_breaking_content_since_last_item;
2773
2774 let mut continues_list = if is_nested {
2778 same_context && reasonable_distance && !has_non_list_content
2780 } else {
2781 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2783 };
2784
2785 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2788 if block.item_lines.contains(&(line_num - 1)) {
2791 continues_list = true;
2793 } else {
2794 continues_list = true;
2798 }
2799 }
2800
2801 if continues_list {
2802 block.end_line = line_num;
2804 block.item_lines.push(line_num);
2805
2806 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2808 list_item.marker.len() + 1
2809 } else {
2810 list_item.marker.len()
2811 });
2812
2813 if !block.is_ordered
2815 && block.marker.is_some()
2816 && block.marker.as_ref() != Some(&list_item.marker)
2817 {
2818 block.marker = None;
2820 }
2821
2822 reset_tracking_state(
2824 list_item,
2825 &mut has_list_breaking_content_since_last_item,
2826 &mut min_continuation_for_tracking,
2827 );
2828 } else {
2829 list_blocks.push(block.clone());
2832
2833 *block = ListBlock {
2834 start_line: line_num,
2835 end_line: line_num,
2836 is_ordered: list_item.is_ordered,
2837 marker: if list_item.is_ordered {
2838 None
2839 } else {
2840 Some(list_item.marker.clone())
2841 },
2842 blockquote_prefix: blockquote_prefix.clone(),
2843 item_lines: vec![line_num],
2844 nesting_level: nesting,
2845 max_marker_width: if list_item.is_ordered {
2846 list_item.marker.len() + 1
2847 } else {
2848 list_item.marker.len()
2849 },
2850 };
2851
2852 reset_tracking_state(
2854 list_item,
2855 &mut has_list_breaking_content_since_last_item,
2856 &mut min_continuation_for_tracking,
2857 );
2858 }
2859 } else {
2860 current_block = Some(ListBlock {
2862 start_line: line_num,
2863 end_line: line_num,
2864 is_ordered: list_item.is_ordered,
2865 marker: if list_item.is_ordered {
2866 None
2867 } else {
2868 Some(list_item.marker.clone())
2869 },
2870 blockquote_prefix,
2871 item_lines: vec![line_num],
2872 nesting_level: nesting,
2873 max_marker_width: list_item.marker.len(),
2874 });
2875
2876 reset_tracking_state(
2878 list_item,
2879 &mut has_list_breaking_content_since_last_item,
2880 &mut min_continuation_for_tracking,
2881 );
2882 }
2883
2884 last_list_item_line = line_num;
2885 current_indent_level = item_indent;
2886 last_marker_width = if list_item.is_ordered {
2887 list_item.marker.len() + 1 } else {
2889 list_item.marker.len()
2890 };
2891 } else if let Some(ref mut block) = current_block {
2892 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2902 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2903 } else {
2904 false
2905 };
2906
2907 let min_continuation_indent = if block.is_ordered {
2911 current_indent_level + last_marker_width
2912 } else {
2913 current_indent_level + 2 };
2915
2916 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2917 block.end_line = line_num;
2919 } else if line_info.is_blank {
2920 let mut check_idx = line_idx + 1;
2923 let mut found_continuation = false;
2924
2925 while check_idx < lines.len() && lines[check_idx].is_blank {
2927 check_idx += 1;
2928 }
2929
2930 if check_idx < lines.len() {
2931 let next_line = &lines[check_idx];
2932 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2934 found_continuation = true;
2935 }
2936 else if !next_line.in_code_block
2938 && next_line.list_item.is_some()
2939 && let Some(item) = &next_line.list_item
2940 {
2941 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2942 .find(next_line.content(content))
2943 .map_or(String::new(), |m| m.as_str().to_string());
2944 if item.marker_column == current_indent_level
2945 && item.is_ordered == block.is_ordered
2946 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2947 {
2948 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2951 if let Some(between_line) = lines.get(idx) {
2952 let between_content = between_line.content(content);
2953 let trimmed = between_content.trim();
2954 if trimmed.is_empty() {
2956 return false;
2957 }
2958 let line_indent = between_content.len() - between_content.trim_start().len();
2960
2961 if trimmed.starts_with("```")
2963 || trimmed.starts_with("~~~")
2964 || trimmed.starts_with("---")
2965 || trimmed.starts_with("***")
2966 || trimmed.starts_with("___")
2967 || trimmed.starts_with(">")
2968 || crate::utils::skip_context::is_table_line(trimmed)
2969 || between_line.heading.is_some()
2970 {
2971 return true; }
2973
2974 line_indent >= min_continuation_indent
2976 } else {
2977 false
2978 }
2979 });
2980
2981 if block.is_ordered {
2982 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2985 if let Some(between_line) = lines.get(idx) {
2986 let trimmed = between_line.content(content).trim();
2987 if trimmed.is_empty() {
2988 return false;
2989 }
2990 trimmed.starts_with("```")
2992 || trimmed.starts_with("~~~")
2993 || trimmed.starts_with("---")
2994 || trimmed.starts_with("***")
2995 || trimmed.starts_with("___")
2996 || trimmed.starts_with(">")
2997 || crate::utils::skip_context::is_table_line(trimmed)
2998 || between_line.heading.is_some()
2999 } else {
3000 false
3001 }
3002 });
3003 found_continuation = !has_structural_separators;
3004 } else {
3005 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3007 if let Some(between_line) = lines.get(idx) {
3008 let trimmed = between_line.content(content).trim();
3009 if trimmed.is_empty() {
3010 return false;
3011 }
3012 trimmed.starts_with("```")
3014 || trimmed.starts_with("~~~")
3015 || trimmed.starts_with("---")
3016 || trimmed.starts_with("***")
3017 || trimmed.starts_with("___")
3018 || trimmed.starts_with(">")
3019 || crate::utils::skip_context::is_table_line(trimmed)
3020 || between_line.heading.is_some()
3021 } else {
3022 false
3023 }
3024 });
3025 found_continuation = !has_structural_separators;
3026 }
3027 }
3028 }
3029 }
3030
3031 if found_continuation {
3032 block.end_line = line_num;
3034 } else {
3035 list_blocks.push(block.clone());
3037 current_block = None;
3038 }
3039 } else {
3040 let min_required_indent = if block.is_ordered {
3043 current_indent_level + last_marker_width
3044 } else {
3045 current_indent_level + 2
3046 };
3047
3048 let line_content = line_info.content(content).trim();
3053
3054 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3056
3057 let is_structural_separator = line_info.heading.is_some()
3058 || line_content.starts_with("```")
3059 || line_content.starts_with("~~~")
3060 || line_content.starts_with("---")
3061 || line_content.starts_with("***")
3062 || line_content.starts_with("___")
3063 || line_content.starts_with(">")
3064 || looks_like_table;
3065
3066 let is_lazy_continuation = !is_structural_separator
3069 && !line_info.is_blank
3070 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3071
3072 if is_lazy_continuation {
3073 let content_to_check = if !blockquote_prefix.is_empty() {
3076 line_info
3078 .content(content)
3079 .strip_prefix(&blockquote_prefix)
3080 .unwrap_or(line_info.content(content))
3081 .trim()
3082 } else {
3083 line_info.content(content).trim()
3084 };
3085
3086 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3087
3088 if starts_with_uppercase && last_list_item_line > 0 {
3091 list_blocks.push(block.clone());
3093 current_block = None;
3094 } else {
3095 block.end_line = line_num;
3097 }
3098 } else {
3099 list_blocks.push(block.clone());
3101 current_block = None;
3102 }
3103 }
3104 }
3105 }
3106
3107 if let Some(block) = current_block {
3109 list_blocks.push(block);
3110 }
3111
3112 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3114
3115 list_blocks
3116 }
3117
3118 fn compute_char_frequency(content: &str) -> CharFrequency {
3120 let mut frequency = CharFrequency::default();
3121
3122 for ch in content.chars() {
3123 match ch {
3124 '#' => frequency.hash_count += 1,
3125 '*' => frequency.asterisk_count += 1,
3126 '_' => frequency.underscore_count += 1,
3127 '-' => frequency.hyphen_count += 1,
3128 '+' => frequency.plus_count += 1,
3129 '>' => frequency.gt_count += 1,
3130 '|' => frequency.pipe_count += 1,
3131 '[' => frequency.bracket_count += 1,
3132 '`' => frequency.backtick_count += 1,
3133 '<' => frequency.lt_count += 1,
3134 '!' => frequency.exclamation_count += 1,
3135 '\n' => frequency.newline_count += 1,
3136 _ => {}
3137 }
3138 }
3139
3140 frequency
3141 }
3142
3143 fn parse_html_tags(
3145 content: &str,
3146 lines: &[LineInfo],
3147 code_blocks: &[(usize, usize)],
3148 flavor: MarkdownFlavor,
3149 ) -> Vec<HtmlTag> {
3150 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3151 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3152
3153 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3154
3155 for cap in HTML_TAG_REGEX.captures_iter(content) {
3156 let full_match = cap.get(0).unwrap();
3157 let match_start = full_match.start();
3158 let match_end = full_match.end();
3159
3160 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3162 continue;
3163 }
3164
3165 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3166 let tag_name_original = cap.get(2).unwrap().as_str();
3167 let tag_name = tag_name_original.to_lowercase();
3168 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3169
3170 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3173 continue;
3174 }
3175
3176 let mut line_num = 1;
3178 let mut col_start = match_start;
3179 let mut col_end = match_end;
3180 for (idx, line_info) in lines.iter().enumerate() {
3181 if match_start >= line_info.byte_offset {
3182 line_num = idx + 1;
3183 col_start = match_start - line_info.byte_offset;
3184 col_end = match_end - line_info.byte_offset;
3185 } else {
3186 break;
3187 }
3188 }
3189
3190 html_tags.push(HtmlTag {
3191 line: line_num,
3192 start_col: col_start,
3193 end_col: col_end,
3194 byte_offset: match_start,
3195 byte_end: match_end,
3196 tag_name,
3197 is_closing,
3198 is_self_closing,
3199 raw_content: full_match.as_str().to_string(),
3200 });
3201 }
3202
3203 html_tags
3204 }
3205
3206 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3208 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3209 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3210
3211 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3212
3213 for cap in EMPHASIS_REGEX.captures_iter(content) {
3214 let full_match = cap.get(0).unwrap();
3215 let match_start = full_match.start();
3216 let match_end = full_match.end();
3217
3218 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3220 continue;
3221 }
3222
3223 let opening_markers = cap.get(1).unwrap().as_str();
3224 let content_part = cap.get(2).unwrap().as_str();
3225 let closing_markers = cap.get(3).unwrap().as_str();
3226
3227 if opening_markers.chars().next() != closing_markers.chars().next()
3229 || opening_markers.len() != closing_markers.len()
3230 {
3231 continue;
3232 }
3233
3234 let marker = opening_markers.chars().next().unwrap();
3235 let marker_count = opening_markers.len();
3236
3237 let mut line_num = 1;
3239 let mut col_start = match_start;
3240 let mut col_end = match_end;
3241 for (idx, line_info) in lines.iter().enumerate() {
3242 if match_start >= line_info.byte_offset {
3243 line_num = idx + 1;
3244 col_start = match_start - line_info.byte_offset;
3245 col_end = match_end - line_info.byte_offset;
3246 } else {
3247 break;
3248 }
3249 }
3250
3251 emphasis_spans.push(EmphasisSpan {
3252 line: line_num,
3253 start_col: col_start,
3254 end_col: col_end,
3255 byte_offset: match_start,
3256 byte_end: match_end,
3257 marker,
3258 marker_count,
3259 content: content_part.to_string(),
3260 });
3261 }
3262
3263 emphasis_spans
3264 }
3265
3266 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3268 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3269
3270 for (line_idx, line_info) in lines.iter().enumerate() {
3271 if line_info.in_code_block || line_info.is_blank {
3273 continue;
3274 }
3275
3276 let line = line_info.content(content);
3277 let line_num = line_idx + 1;
3278
3279 if !line.contains('|') {
3281 continue;
3282 }
3283
3284 let parts: Vec<&str> = line.split('|').collect();
3286 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3287
3288 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3290 let mut column_alignments = Vec::new();
3291
3292 if is_separator {
3293 for part in &parts[1..parts.len() - 1] {
3294 let trimmed = part.trim();
3296 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3297 "center".to_string()
3298 } else if trimmed.ends_with(':') {
3299 "right".to_string()
3300 } else if trimmed.starts_with(':') {
3301 "left".to_string()
3302 } else {
3303 "none".to_string()
3304 };
3305 column_alignments.push(alignment);
3306 }
3307 }
3308
3309 table_rows.push(TableRow {
3310 line: line_num,
3311 is_separator,
3312 column_count,
3313 column_alignments,
3314 });
3315 }
3316
3317 table_rows
3318 }
3319
3320 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3322 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3323
3324 for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3326 let full_match = cap.get(0).unwrap();
3327 let match_start = full_match.start();
3328 let match_end = full_match.end();
3329
3330 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3332 continue;
3333 }
3334
3335 let preceding_char = if match_start > 0 {
3337 content.chars().nth(match_start - 1)
3338 } else {
3339 None
3340 };
3341 let following_char = content.chars().nth(match_end);
3342
3343 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3344 continue;
3345 }
3346 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3347 continue;
3348 }
3349
3350 let url = full_match.as_str();
3351 let url_type = if url.starts_with("https://") {
3352 "https"
3353 } else if url.starts_with("http://") {
3354 "http"
3355 } else if url.starts_with("ftp://") {
3356 "ftp"
3357 } else {
3358 "other"
3359 };
3360
3361 let mut line_num = 1;
3363 let mut col_start = match_start;
3364 let mut col_end = match_end;
3365 for (idx, line_info) in lines.iter().enumerate() {
3366 if match_start >= line_info.byte_offset {
3367 line_num = idx + 1;
3368 col_start = match_start - line_info.byte_offset;
3369 col_end = match_end - line_info.byte_offset;
3370 } else {
3371 break;
3372 }
3373 }
3374
3375 bare_urls.push(BareUrl {
3376 line: line_num,
3377 start_col: col_start,
3378 end_col: col_end,
3379 byte_offset: match_start,
3380 byte_end: match_end,
3381 url: url.to_string(),
3382 url_type: url_type.to_string(),
3383 });
3384 }
3385
3386 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3388 let full_match = cap.get(0).unwrap();
3389 let match_start = full_match.start();
3390 let match_end = full_match.end();
3391
3392 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3394 continue;
3395 }
3396
3397 let preceding_char = if match_start > 0 {
3399 content.chars().nth(match_start - 1)
3400 } else {
3401 None
3402 };
3403 let following_char = content.chars().nth(match_end);
3404
3405 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3406 continue;
3407 }
3408 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3409 continue;
3410 }
3411
3412 let email = full_match.as_str();
3413
3414 let mut line_num = 1;
3416 let mut col_start = match_start;
3417 let mut col_end = match_end;
3418 for (idx, line_info) in lines.iter().enumerate() {
3419 if match_start >= line_info.byte_offset {
3420 line_num = idx + 1;
3421 col_start = match_start - line_info.byte_offset;
3422 col_end = match_end - line_info.byte_offset;
3423 } else {
3424 break;
3425 }
3426 }
3427
3428 bare_urls.push(BareUrl {
3429 line: line_num,
3430 start_col: col_start,
3431 end_col: col_end,
3432 byte_offset: match_start,
3433 byte_end: match_end,
3434 url: email.to_string(),
3435 url_type: "email".to_string(),
3436 });
3437 }
3438
3439 bare_urls
3440 }
3441
3442 #[must_use]
3462 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3463 ValidHeadingsIter::new(&self.lines)
3464 }
3465
3466 #[must_use]
3470 pub fn has_valid_headings(&self) -> bool {
3471 self.lines
3472 .iter()
3473 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3474 }
3475}
3476
3477fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3479 if list_blocks.len() < 2 {
3480 return;
3481 }
3482
3483 let mut merger = ListBlockMerger::new(content, lines);
3484 *list_blocks = merger.merge(list_blocks);
3485}
3486
3487struct ListBlockMerger<'a> {
3489 content: &'a str,
3490 lines: &'a [LineInfo],
3491}
3492
3493impl<'a> ListBlockMerger<'a> {
3494 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3495 Self { content, lines }
3496 }
3497
3498 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3499 let mut merged = Vec::with_capacity(list_blocks.len());
3500 let mut current = list_blocks[0].clone();
3501
3502 for next in list_blocks.iter().skip(1) {
3503 if self.should_merge_blocks(¤t, next) {
3504 current = self.merge_two_blocks(current, next);
3505 } else {
3506 merged.push(current);
3507 current = next.clone();
3508 }
3509 }
3510
3511 merged.push(current);
3512 merged
3513 }
3514
3515 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3517 if !self.blocks_are_compatible(current, next) {
3519 return false;
3520 }
3521
3522 let spacing = self.analyze_spacing_between(current, next);
3524 match spacing {
3525 BlockSpacing::Consecutive => true,
3526 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3527 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3528 self.can_merge_with_content_between(current, next)
3529 }
3530 }
3531 }
3532
3533 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3535 current.is_ordered == next.is_ordered
3536 && current.blockquote_prefix == next.blockquote_prefix
3537 && current.nesting_level == next.nesting_level
3538 }
3539
3540 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3542 let gap = next.start_line - current.end_line;
3543
3544 match gap {
3545 1 => BlockSpacing::Consecutive,
3546 2 => BlockSpacing::SingleBlank,
3547 _ if gap > 2 => {
3548 if self.has_only_blank_lines_between(current, next) {
3549 BlockSpacing::MultipleBlanks
3550 } else {
3551 BlockSpacing::ContentBetween
3552 }
3553 }
3554 _ => BlockSpacing::Consecutive, }
3556 }
3557
3558 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3560 if has_meaningful_content_between(self.content, current, next, self.lines) {
3563 return false; }
3565
3566 !current.is_ordered && current.marker == next.marker
3568 }
3569
3570 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3572 if has_meaningful_content_between(self.content, current, next, self.lines) {
3574 return false; }
3576
3577 current.is_ordered && next.is_ordered
3579 }
3580
3581 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3583 for line_num in (current.end_line + 1)..next.start_line {
3584 if let Some(line_info) = self.lines.get(line_num - 1)
3585 && !line_info.content(self.content).trim().is_empty()
3586 {
3587 return false;
3588 }
3589 }
3590 true
3591 }
3592
3593 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3595 current.end_line = next.end_line;
3596 current.item_lines.extend_from_slice(&next.item_lines);
3597
3598 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3600
3601 if !current.is_ordered && self.markers_differ(¤t, next) {
3603 current.marker = None; }
3605
3606 current
3607 }
3608
3609 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3611 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3612 }
3613}
3614
3615#[derive(Debug, PartialEq)]
3617enum BlockSpacing {
3618 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3623
3624fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3626 for line_num in (current.end_line + 1)..next.start_line {
3628 if let Some(line_info) = lines.get(line_num - 1) {
3629 let trimmed = line_info.content(content).trim();
3631
3632 if trimmed.is_empty() {
3634 continue;
3635 }
3636
3637 if line_info.heading.is_some() {
3641 return true; }
3643
3644 if is_horizontal_rule(trimmed) {
3646 return true; }
3648
3649 if crate::utils::skip_context::is_table_line(trimmed) {
3651 return true; }
3653
3654 if trimmed.starts_with('>') {
3656 return true; }
3658
3659 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3661 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3662
3663 let min_continuation_indent = if current.is_ordered {
3665 current.nesting_level + current.max_marker_width + 1 } else {
3667 current.nesting_level + 2
3668 };
3669
3670 if line_indent < min_continuation_indent {
3671 return true; }
3674 }
3675
3676 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3678
3679 let min_indent = if current.is_ordered {
3681 current.nesting_level + current.max_marker_width
3682 } else {
3683 current.nesting_level + 2
3684 };
3685
3686 if line_indent < min_indent {
3688 return true; }
3690
3691 }
3694 }
3695
3696 false
3698}
3699
3700pub fn is_horizontal_rule_line(line: &str) -> bool {
3707 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3709 if leading_spaces > 3 || line.starts_with('\t') {
3710 return false;
3711 }
3712
3713 is_horizontal_rule_content(line.trim())
3714}
3715
3716pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3719 if trimmed.len() < 3 {
3720 return false;
3721 }
3722
3723 let chars: Vec<char> = trimmed.chars().collect();
3725 if let Some(&first_char) = chars.first()
3726 && (first_char == '-' || first_char == '*' || first_char == '_')
3727 {
3728 let mut count = 0;
3729 for &ch in &chars {
3730 if ch == first_char {
3731 count += 1;
3732 } else if ch != ' ' && ch != '\t' {
3733 return false; }
3735 }
3736 return count >= 3;
3737 }
3738 false
3739}
3740
3741pub fn is_horizontal_rule(trimmed: &str) -> bool {
3743 is_horizontal_rule_content(trimmed)
3744}
3745
3746#[cfg(test)]
3748mod tests {
3749 use super::*;
3750
3751 #[test]
3752 fn test_empty_content() {
3753 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3754 assert_eq!(ctx.content, "");
3755 assert_eq!(ctx.line_offsets, vec![0]);
3756 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3757 assert_eq!(ctx.lines.len(), 0);
3758 }
3759
3760 #[test]
3761 fn test_single_line() {
3762 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3763 assert_eq!(ctx.content, "# Hello");
3764 assert_eq!(ctx.line_offsets, vec![0]);
3765 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3766 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3767 }
3768
3769 #[test]
3770 fn test_multi_line() {
3771 let content = "# Title\n\nSecond line\nThird line";
3772 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3773 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3774 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3781
3782 #[test]
3783 fn test_line_info() {
3784 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3785 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3786
3787 assert_eq!(ctx.lines.len(), 7);
3789
3790 let line1 = &ctx.lines[0];
3792 assert_eq!(line1.content(ctx.content), "# Title");
3793 assert_eq!(line1.byte_offset, 0);
3794 assert_eq!(line1.indent, 0);
3795 assert!(!line1.is_blank);
3796 assert!(!line1.in_code_block);
3797 assert!(line1.list_item.is_none());
3798
3799 let line2 = &ctx.lines[1];
3801 assert_eq!(line2.content(ctx.content), " indented");
3802 assert_eq!(line2.byte_offset, 8);
3803 assert_eq!(line2.indent, 4);
3804 assert!(!line2.is_blank);
3805
3806 let line3 = &ctx.lines[2];
3808 assert_eq!(line3.content(ctx.content), "");
3809 assert!(line3.is_blank);
3810
3811 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3813 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3814 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3815 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3816 }
3817
3818 #[test]
3819 fn test_list_item_detection() {
3820 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3821 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3822
3823 let line1 = &ctx.lines[0];
3825 assert!(line1.list_item.is_some());
3826 let list1 = line1.list_item.as_ref().unwrap();
3827 assert_eq!(list1.marker, "-");
3828 assert!(!list1.is_ordered);
3829 assert_eq!(list1.marker_column, 0);
3830 assert_eq!(list1.content_column, 2);
3831
3832 let line2 = &ctx.lines[1];
3834 assert!(line2.list_item.is_some());
3835 let list2 = line2.list_item.as_ref().unwrap();
3836 assert_eq!(list2.marker, "*");
3837 assert_eq!(list2.marker_column, 2);
3838
3839 let line3 = &ctx.lines[2];
3841 assert!(line3.list_item.is_some());
3842 let list3 = line3.list_item.as_ref().unwrap();
3843 assert_eq!(list3.marker, "1.");
3844 assert!(list3.is_ordered);
3845 assert_eq!(list3.number, Some(1));
3846
3847 let line6 = &ctx.lines[5];
3849 assert!(line6.list_item.is_none());
3850 }
3851
3852 #[test]
3853 fn test_offset_to_line_col_edge_cases() {
3854 let content = "a\nb\nc";
3855 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3856 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3864
3865 #[test]
3866 fn test_mdx_esm_blocks() {
3867 let content = r##"import {Chart} from './snowfall.js'
3868export const year = 2023
3869
3870# Last year's snowfall
3871
3872In {year}, the snowfall was above average.
3873It was followed by a warm spring which caused
3874flood conditions in many of the nearby rivers.
3875
3876<Chart color="#fcb32c" year={year} />
3877"##;
3878
3879 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3880
3881 assert_eq!(ctx.lines.len(), 10);
3883 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3884 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3885 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3886 assert!(
3887 !ctx.lines[3].in_esm_block,
3888 "Line 4 (heading) should NOT be in_esm_block"
3889 );
3890 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3891 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3892 }
3893
3894 #[test]
3895 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3896 let content = r#"import {Chart} from './snowfall.js'
3897export const year = 2023
3898
3899# Last year's snowfall
3900"#;
3901
3902 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3903
3904 assert!(
3906 !ctx.lines[0].in_esm_block,
3907 "Line 1 should NOT be in_esm_block in Standard flavor"
3908 );
3909 assert!(
3910 !ctx.lines[1].in_esm_block,
3911 "Line 2 should NOT be in_esm_block in Standard flavor"
3912 );
3913 }
3914}