1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13 ($name:expr, $profile:expr, $code:expr) => {{
14 let start = std::time::Instant::now();
15 let result = $code;
16 if $profile {
17 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18 }
19 result
20 }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(
32 r#"(?sx)
33 \[((?:[^\[\]\\]|\\.)*)\] # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34 (?:
35 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
36 |
37 \[([^\]]*)\] # Reference ID in group 6
38 )"#
39 ).unwrap()
40});
41
42static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?sx)
47 !\[((?:[^\[\]\\]|\\.)*)\] # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48 (?:
49 \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\) # URL in group 2 (angle) or 3 (bare), title in 4/5
50 |
51 \[([^\]]*)\] # Reference ID in group 6
52 )"#
53 ).unwrap()
54});
55
56static REF_DEF_PATTERN: LazyLock<Regex> =
58 LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62 Regex::new(
63 r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64 ).unwrap()
65});
66
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74#[derive(Debug, Clone)]
76pub struct LineInfo {
77 pub byte_offset: usize,
79 pub byte_len: usize,
81 pub indent: usize,
83 pub is_blank: bool,
85 pub in_code_block: bool,
87 pub in_front_matter: bool,
89 pub in_html_block: bool,
91 pub in_html_comment: bool,
93 pub list_item: Option<ListItemInfo>,
95 pub heading: Option<HeadingInfo>,
97 pub blockquote: Option<BlockquoteInfo>,
99 pub in_mkdocstrings: bool,
101 pub in_esm_block: bool,
103 pub in_code_span_continuation: bool,
105}
106
107impl LineInfo {
108 pub fn content<'a>(&self, source: &'a str) -> &'a str {
110 &source[self.byte_offset..self.byte_offset + self.byte_len]
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct ListItemInfo {
117 pub marker: String,
119 pub is_ordered: bool,
121 pub number: Option<usize>,
123 pub marker_column: usize,
125 pub content_column: usize,
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum HeadingStyle {
132 ATX,
134 Setext1,
136 Setext2,
138}
139
140#[derive(Debug, Clone)]
142pub struct ParsedLink<'a> {
143 pub line: usize,
145 pub start_col: usize,
147 pub end_col: usize,
149 pub byte_offset: usize,
151 pub byte_end: usize,
153 pub text: Cow<'a, str>,
155 pub url: Cow<'a, str>,
157 pub is_reference: bool,
159 pub reference_id: Option<Cow<'a, str>>,
161 pub link_type: LinkType,
163}
164
165#[derive(Debug, Clone)]
167pub struct BrokenLinkInfo {
168 pub reference: String,
170 pub span: std::ops::Range<usize>,
172}
173
174#[derive(Debug, Clone)]
176pub struct FootnoteRef {
177 pub id: String,
179 pub line: usize,
181 pub byte_offset: usize,
183 pub byte_end: usize,
185}
186
187#[derive(Debug, Clone)]
189pub struct ParsedImage<'a> {
190 pub line: usize,
192 pub start_col: usize,
194 pub end_col: usize,
196 pub byte_offset: usize,
198 pub byte_end: usize,
200 pub alt_text: Cow<'a, str>,
202 pub url: Cow<'a, str>,
204 pub is_reference: bool,
206 pub reference_id: Option<Cow<'a, str>>,
208 pub link_type: LinkType,
210}
211
212#[derive(Debug, Clone)]
214pub struct ReferenceDef {
215 pub line: usize,
217 pub id: String,
219 pub url: String,
221 pub title: Option<String>,
223 pub byte_offset: usize,
225 pub byte_end: usize,
227 pub title_byte_start: Option<usize>,
229 pub title_byte_end: Option<usize>,
231}
232
233#[derive(Debug, Clone)]
235pub struct CodeSpan {
236 pub line: usize,
238 pub end_line: usize,
240 pub start_col: usize,
242 pub end_col: usize,
244 pub byte_offset: usize,
246 pub byte_end: usize,
248 pub backtick_count: usize,
250 pub content: String,
252}
253
254#[derive(Debug, Clone)]
256pub struct HeadingInfo {
257 pub level: u8,
259 pub style: HeadingStyle,
261 pub marker: String,
263 pub marker_column: usize,
265 pub content_column: usize,
267 pub text: String,
269 pub custom_id: Option<String>,
271 pub raw_text: String,
273 pub has_closing_sequence: bool,
275 pub closing_sequence: String,
277 pub is_valid: bool,
280}
281
282#[derive(Debug, Clone)]
287pub struct ValidHeading<'a> {
288 pub line_num: usize,
290 pub heading: &'a HeadingInfo,
292 pub line_info: &'a LineInfo,
294}
295
296pub struct ValidHeadingsIter<'a> {
301 lines: &'a [LineInfo],
302 current_index: usize,
303}
304
305impl<'a> ValidHeadingsIter<'a> {
306 fn new(lines: &'a [LineInfo]) -> Self {
307 Self {
308 lines,
309 current_index: 0,
310 }
311 }
312}
313
314impl<'a> Iterator for ValidHeadingsIter<'a> {
315 type Item = ValidHeading<'a>;
316
317 fn next(&mut self) -> Option<Self::Item> {
318 while self.current_index < self.lines.len() {
319 let idx = self.current_index;
320 self.current_index += 1;
321
322 let line_info = &self.lines[idx];
323 if let Some(heading) = &line_info.heading
324 && heading.is_valid
325 {
326 return Some(ValidHeading {
327 line_num: idx + 1, heading,
329 line_info,
330 });
331 }
332 }
333 None
334 }
335}
336
337#[derive(Debug, Clone)]
339pub struct BlockquoteInfo {
340 pub nesting_level: usize,
342 pub indent: String,
344 pub marker_column: usize,
346 pub prefix: String,
348 pub content: String,
350 pub has_no_space_after_marker: bool,
352 pub has_multiple_spaces_after_marker: bool,
354 pub needs_md028_fix: bool,
356}
357
358#[derive(Debug, Clone)]
360pub struct ListBlock {
361 pub start_line: usize,
363 pub end_line: usize,
365 pub is_ordered: bool,
367 pub marker: Option<String>,
369 pub blockquote_prefix: String,
371 pub item_lines: Vec<usize>,
373 pub nesting_level: usize,
375 pub max_marker_width: usize,
377}
378
379use std::sync::{Arc, OnceLock};
380
381#[derive(Debug, Clone, Default)]
383pub struct CharFrequency {
384 pub hash_count: usize,
386 pub asterisk_count: usize,
388 pub underscore_count: usize,
390 pub hyphen_count: usize,
392 pub plus_count: usize,
394 pub gt_count: usize,
396 pub pipe_count: usize,
398 pub bracket_count: usize,
400 pub backtick_count: usize,
402 pub lt_count: usize,
404 pub exclamation_count: usize,
406 pub newline_count: usize,
408}
409
410#[derive(Debug, Clone)]
412pub struct HtmlTag {
413 pub line: usize,
415 pub start_col: usize,
417 pub end_col: usize,
419 pub byte_offset: usize,
421 pub byte_end: usize,
423 pub tag_name: String,
425 pub is_closing: bool,
427 pub is_self_closing: bool,
429 pub raw_content: String,
431}
432
433#[derive(Debug, Clone)]
435pub struct EmphasisSpan {
436 pub line: usize,
438 pub start_col: usize,
440 pub end_col: usize,
442 pub byte_offset: usize,
444 pub byte_end: usize,
446 pub marker: char,
448 pub marker_count: usize,
450 pub content: String,
452}
453
454#[derive(Debug, Clone)]
456pub struct TableRow {
457 pub line: usize,
459 pub is_separator: bool,
461 pub column_count: usize,
463 pub column_alignments: Vec<String>, }
466
467#[derive(Debug, Clone)]
469pub struct BareUrl {
470 pub line: usize,
472 pub start_col: usize,
474 pub end_col: usize,
476 pub byte_offset: usize,
478 pub byte_end: usize,
480 pub url: String,
482 pub url_type: String,
484}
485
486pub struct LintContext<'a> {
487 pub content: &'a str,
488 pub line_offsets: Vec<usize>,
489 pub code_blocks: Vec<(usize, usize)>, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, }
510
511struct BlockquoteComponents<'a> {
513 indent: &'a str,
514 markers: &'a str,
515 spaces_after: &'a str,
516 content: &'a str,
517}
518
519#[inline]
521fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
522 let bytes = line.as_bytes();
523 let mut pos = 0;
524
525 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
527 pos += 1;
528 }
529 let indent_end = pos;
530
531 if pos >= bytes.len() || bytes[pos] != b'>' {
533 return None;
534 }
535
536 while pos < bytes.len() && bytes[pos] == b'>' {
538 pos += 1;
539 }
540 let markers_end = pos;
541
542 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
544 pos += 1;
545 }
546 let spaces_end = pos;
547
548 Some(BlockquoteComponents {
549 indent: &line[0..indent_end],
550 markers: &line[indent_end..markers_end],
551 spaces_after: &line[markers_end..spaces_end],
552 content: &line[spaces_end..],
553 })
554}
555
556impl<'a> LintContext<'a> {
557 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
558 #[cfg(not(target_arch = "wasm32"))]
559 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
560 #[cfg(target_arch = "wasm32")]
561 let profile = false;
562
563 let line_offsets = profile_section!("Line offsets", profile, {
564 let mut offsets = vec![0];
565 for (i, c) in content.char_indices() {
566 if c == '\n' {
567 offsets.push(i + 1);
568 }
569 }
570 offsets
571 });
572
573 let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
575
576 let html_comment_ranges = profile_section!(
578 "HTML comment ranges",
579 profile,
580 crate::utils::skip_context::compute_html_comment_ranges(content)
581 );
582
583 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
585 if flavor == MarkdownFlavor::MkDocs {
586 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
587 } else {
588 Vec::new()
589 }
590 });
591
592 let mut lines = profile_section!(
594 "Basic line info",
595 profile,
596 Self::compute_basic_line_info(
597 content,
598 &line_offsets,
599 &code_blocks,
600 flavor,
601 &html_comment_ranges,
602 &autodoc_ranges,
603 )
604 );
605
606 profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
608
609 profile_section!(
611 "ESM blocks",
612 profile,
613 Self::detect_esm_blocks(content, &mut lines, flavor)
614 );
615
616 let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
618
619 profile_section!(
621 "Headings & blockquotes",
622 profile,
623 Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
624 );
625
626 let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
628
629 for span in &code_spans {
632 if span.end_line > span.line {
633 for line_num in (span.line + 1)..=span.end_line {
635 if let Some(line_info) = lines.get_mut(line_num - 1) {
636 line_info.in_code_span_continuation = true;
637 }
638 }
639 }
640 }
641
642 let (links, broken_links, footnote_refs) = profile_section!(
644 "Links",
645 profile,
646 Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
647 );
648
649 let images = profile_section!(
650 "Images",
651 profile,
652 Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
653 );
654
655 let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
656
657 let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
658
659 let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
661
662 let table_blocks = profile_section!(
664 "Table blocks",
665 profile,
666 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
667 content,
668 &code_blocks,
669 &code_spans,
670 &html_comment_ranges,
671 )
672 );
673
674 let line_index = profile_section!(
676 "Line index",
677 profile,
678 crate::utils::range_utils::LineIndex::new(content)
679 );
680
681 let jinja_ranges = profile_section!(
683 "Jinja ranges",
684 profile,
685 crate::utils::jinja_utils::find_jinja_ranges(content)
686 );
687
688 Self {
689 content,
690 line_offsets,
691 code_blocks,
692 lines,
693 links,
694 images,
695 broken_links,
696 footnote_refs,
697 reference_defs,
698 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
699 list_blocks,
700 char_frequency,
701 html_tags_cache: OnceLock::new(),
702 emphasis_spans_cache: OnceLock::new(),
703 table_rows_cache: OnceLock::new(),
704 bare_urls_cache: OnceLock::new(),
705 html_comment_ranges,
706 table_blocks,
707 line_index,
708 jinja_ranges,
709 flavor,
710 source_file,
711 }
712 }
713
714 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
716 Arc::clone(
717 self.code_spans_cache
718 .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
719 )
720 }
721
722 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
724 &self.html_comment_ranges
725 }
726
727 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
729 Arc::clone(self.html_tags_cache.get_or_init(|| {
730 Arc::new(Self::parse_html_tags(
731 self.content,
732 &self.lines,
733 &self.code_blocks,
734 self.flavor,
735 ))
736 }))
737 }
738
739 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
741 Arc::clone(
742 self.emphasis_spans_cache
743 .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
744 )
745 }
746
747 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
749 Arc::clone(
750 self.table_rows_cache
751 .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
752 )
753 }
754
755 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
757 Arc::clone(
758 self.bare_urls_cache
759 .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
760 )
761 }
762
763 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
765 match self.line_offsets.binary_search(&offset) {
766 Ok(line) => (line + 1, 1),
767 Err(line) => {
768 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
769 (line, offset - line_start + 1)
770 }
771 }
772 }
773
774 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
776 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
778 return true;
779 }
780
781 self.code_spans()
783 .iter()
784 .any(|span| pos >= span.byte_offset && pos < span.byte_end)
785 }
786
787 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
789 if line_num > 0 {
790 self.lines.get(line_num - 1)
791 } else {
792 None
793 }
794 }
795
796 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
798 self.line_info(line_num).map(|info| info.byte_offset)
799 }
800
801 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
803 let normalized_id = ref_id.to_lowercase();
804 self.reference_defs
805 .iter()
806 .find(|def| def.id == normalized_id)
807 .map(|def| def.url.as_str())
808 }
809
810 pub fn is_in_list_block(&self, line_num: usize) -> bool {
812 self.list_blocks
813 .iter()
814 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
815 }
816
817 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
819 self.list_blocks
820 .iter()
821 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
822 }
823
824 pub fn is_in_code_block(&self, line_num: usize) -> bool {
828 if line_num == 0 || line_num > self.lines.len() {
829 return false;
830 }
831 self.lines[line_num - 1].in_code_block
832 }
833
834 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
836 if line_num == 0 || line_num > self.lines.len() {
837 return false;
838 }
839 self.lines[line_num - 1].in_front_matter
840 }
841
842 pub fn is_in_html_block(&self, line_num: usize) -> bool {
844 if line_num == 0 || line_num > self.lines.len() {
845 return false;
846 }
847 self.lines[line_num - 1].in_html_block
848 }
849
850 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
852 if line_num == 0 || line_num > self.lines.len() {
853 return false;
854 }
855
856 let col_0indexed = if col > 0 { col - 1 } else { 0 };
860 let code_spans = self.code_spans();
861 code_spans.iter().any(|span| {
862 if line_num < span.line || line_num > span.end_line {
864 return false;
865 }
866
867 if span.line == span.end_line {
868 col_0indexed >= span.start_col && col_0indexed < span.end_col
870 } else if line_num == span.line {
871 col_0indexed >= span.start_col
873 } else if line_num == span.end_line {
874 col_0indexed < span.end_col
876 } else {
877 true
879 }
880 })
881 }
882
883 #[inline]
885 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
886 let code_spans = self.code_spans();
887 code_spans
888 .iter()
889 .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
890 }
891
892 #[inline]
895 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
896 self.reference_defs
897 .iter()
898 .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
899 }
900
901 #[inline]
905 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
906 self.html_comment_ranges
907 .iter()
908 .any(|range| byte_pos >= range.start && byte_pos < range.end)
909 }
910
911 #[inline]
914 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
915 self.html_tags()
916 .iter()
917 .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
918 }
919
920 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
922 self.jinja_ranges
923 .iter()
924 .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
925 }
926
927 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
929 self.reference_defs.iter().any(|def| {
930 if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
931 byte_pos >= start && byte_pos < end
932 } else {
933 false
934 }
935 })
936 }
937
938 pub fn has_char(&self, ch: char) -> bool {
940 match ch {
941 '#' => self.char_frequency.hash_count > 0,
942 '*' => self.char_frequency.asterisk_count > 0,
943 '_' => self.char_frequency.underscore_count > 0,
944 '-' => self.char_frequency.hyphen_count > 0,
945 '+' => self.char_frequency.plus_count > 0,
946 '>' => self.char_frequency.gt_count > 0,
947 '|' => self.char_frequency.pipe_count > 0,
948 '[' => self.char_frequency.bracket_count > 0,
949 '`' => self.char_frequency.backtick_count > 0,
950 '<' => self.char_frequency.lt_count > 0,
951 '!' => self.char_frequency.exclamation_count > 0,
952 '\n' => self.char_frequency.newline_count > 0,
953 _ => self.content.contains(ch), }
955 }
956
957 pub fn char_count(&self, ch: char) -> usize {
959 match ch {
960 '#' => self.char_frequency.hash_count,
961 '*' => self.char_frequency.asterisk_count,
962 '_' => self.char_frequency.underscore_count,
963 '-' => self.char_frequency.hyphen_count,
964 '+' => self.char_frequency.plus_count,
965 '>' => self.char_frequency.gt_count,
966 '|' => self.char_frequency.pipe_count,
967 '[' => self.char_frequency.bracket_count,
968 '`' => self.char_frequency.backtick_count,
969 '<' => self.char_frequency.lt_count,
970 '!' => self.char_frequency.exclamation_count,
971 '\n' => self.char_frequency.newline_count,
972 _ => self.content.matches(ch).count(), }
974 }
975
976 pub fn likely_has_headings(&self) -> bool {
978 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
980
981 pub fn likely_has_lists(&self) -> bool {
983 self.char_frequency.asterisk_count > 0
984 || self.char_frequency.hyphen_count > 0
985 || self.char_frequency.plus_count > 0
986 }
987
988 pub fn likely_has_emphasis(&self) -> bool {
990 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
991 }
992
993 pub fn likely_has_tables(&self) -> bool {
995 self.char_frequency.pipe_count > 2
996 }
997
998 pub fn likely_has_blockquotes(&self) -> bool {
1000 self.char_frequency.gt_count > 0
1001 }
1002
1003 pub fn likely_has_code(&self) -> bool {
1005 self.char_frequency.backtick_count > 0
1006 }
1007
1008 pub fn likely_has_links_or_images(&self) -> bool {
1010 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1011 }
1012
1013 pub fn likely_has_html(&self) -> bool {
1015 self.char_frequency.lt_count > 0
1016 }
1017
1018 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1020 self.html_tags()
1021 .iter()
1022 .filter(|tag| tag.line == line_num)
1023 .cloned()
1024 .collect()
1025 }
1026
1027 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1029 self.emphasis_spans()
1030 .iter()
1031 .filter(|span| span.line == line_num)
1032 .cloned()
1033 .collect()
1034 }
1035
1036 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1038 self.table_rows()
1039 .iter()
1040 .filter(|row| row.line == line_num)
1041 .cloned()
1042 .collect()
1043 }
1044
1045 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1047 self.bare_urls()
1048 .iter()
1049 .filter(|url| url.line == line_num)
1050 .cloned()
1051 .collect()
1052 }
1053
1054 #[inline]
1060 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1061 let idx = match lines.binary_search_by(|line| {
1063 if byte_offset < line.byte_offset {
1064 std::cmp::Ordering::Greater
1065 } else if byte_offset > line.byte_offset + line.byte_len {
1066 std::cmp::Ordering::Less
1067 } else {
1068 std::cmp::Ordering::Equal
1069 }
1070 }) {
1071 Ok(idx) => idx,
1072 Err(idx) => idx.saturating_sub(1),
1073 };
1074
1075 let line = &lines[idx];
1076 let line_num = idx + 1;
1077 let col = byte_offset.saturating_sub(line.byte_offset);
1078
1079 (idx, line_num, col)
1080 }
1081
1082 #[inline]
1084 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1085 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1087
1088 if idx > 0 {
1090 let span = &code_spans[idx - 1];
1091 if offset >= span.byte_offset && offset < span.byte_end {
1092 return true;
1093 }
1094 }
1095
1096 false
1097 }
1098
1099 fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1103 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1104
1105 let mut link_ranges = Vec::new();
1106 let mut options = Options::empty();
1107 options.insert(Options::ENABLE_WIKILINKS);
1108 options.insert(Options::ENABLE_FOOTNOTES);
1109
1110 let parser = Parser::new_ext(content, options).into_offset_iter();
1111 let mut link_stack: Vec<usize> = Vec::new();
1112
1113 for (event, range) in parser {
1114 match event {
1115 Event::Start(Tag::Link { .. }) => {
1116 link_stack.push(range.start);
1117 }
1118 Event::End(TagEnd::Link) => {
1119 if let Some(start_pos) = link_stack.pop() {
1120 link_ranges.push((start_pos, range.end));
1121 }
1122 }
1123 _ => {}
1124 }
1125 }
1126
1127 link_ranges
1128 }
1129
1130 fn parse_links(
1132 content: &'a str,
1133 lines: &[LineInfo],
1134 code_blocks: &[(usize, usize)],
1135 code_spans: &[CodeSpan],
1136 flavor: MarkdownFlavor,
1137 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1138 ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1139 use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1140 use std::collections::HashSet;
1141
1142 let mut links = Vec::with_capacity(content.len() / 500);
1143 let mut broken_links = Vec::new();
1144 let mut footnote_refs = Vec::new();
1145
1146 let mut found_positions = HashSet::new();
1148
1149 let mut options = Options::empty();
1159 options.insert(Options::ENABLE_WIKILINKS);
1160 options.insert(Options::ENABLE_FOOTNOTES);
1161
1162 let parser = Parser::new_with_broken_link_callback(
1163 content,
1164 options,
1165 Some(|link: BrokenLink<'_>| {
1166 broken_links.push(BrokenLinkInfo {
1167 reference: link.reference.to_string(),
1168 span: link.span.clone(),
1169 });
1170 None
1171 }),
1172 )
1173 .into_offset_iter();
1174
1175 let mut link_stack: Vec<(
1176 usize,
1177 usize,
1178 pulldown_cmark::CowStr<'a>,
1179 LinkType,
1180 pulldown_cmark::CowStr<'a>,
1181 )> = Vec::new();
1182 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1185 match event {
1186 Event::Start(Tag::Link {
1187 link_type,
1188 dest_url,
1189 id,
1190 ..
1191 }) => {
1192 link_stack.push((range.start, range.end, dest_url, link_type, id));
1194 text_chunks.clear();
1195 }
1196 Event::Text(text) if !link_stack.is_empty() => {
1197 text_chunks.push((text.to_string(), range.start, range.end));
1199 }
1200 Event::Code(code) if !link_stack.is_empty() => {
1201 let code_text = format!("`{code}`");
1203 text_chunks.push((code_text, range.start, range.end));
1204 }
1205 Event::End(TagEnd::Link) => {
1206 if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1207 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1209 text_chunks.clear();
1210 continue;
1211 }
1212
1213 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1215
1216 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1218 text_chunks.clear();
1219 continue;
1220 }
1221
1222 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1223
1224 let is_reference = matches!(
1225 link_type,
1226 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1227 );
1228
1229 let link_text = if start_pos < content.len() {
1232 let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1233
1234 let mut close_pos = None;
1238 let mut depth = 0;
1239 let mut in_code_span = false;
1240
1241 for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1242 let mut backslash_count = 0;
1244 let mut j = i;
1245 while j > 0 && link_bytes[j - 1] == b'\\' {
1246 backslash_count += 1;
1247 j -= 1;
1248 }
1249 let is_escaped = backslash_count % 2 != 0;
1250
1251 if byte == b'`' && !is_escaped {
1253 in_code_span = !in_code_span;
1254 }
1255
1256 if !is_escaped && !in_code_span {
1258 if byte == b'[' {
1259 depth += 1;
1260 } else if byte == b']' {
1261 if depth == 0 {
1262 close_pos = Some(i);
1264 break;
1265 } else {
1266 depth -= 1;
1267 }
1268 }
1269 }
1270 }
1271
1272 if let Some(pos) = close_pos {
1273 Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1274 } else {
1275 Cow::Borrowed("")
1276 }
1277 } else {
1278 Cow::Borrowed("")
1279 };
1280
1281 let reference_id = if is_reference && !ref_id.is_empty() {
1283 Some(Cow::Owned(ref_id.to_lowercase()))
1284 } else if is_reference {
1285 Some(Cow::Owned(link_text.to_lowercase()))
1287 } else {
1288 None
1289 };
1290
1291 let has_escaped_bang = start_pos >= 2
1295 && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1296 && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1297
1298 let has_escaped_bracket =
1301 start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1302
1303 if has_escaped_bang || has_escaped_bracket {
1304 text_chunks.clear();
1305 continue; }
1307
1308 found_positions.insert(start_pos);
1310
1311 links.push(ParsedLink {
1312 line: line_num,
1313 start_col: col_start,
1314 end_col: col_end,
1315 byte_offset: start_pos,
1316 byte_end: range.end,
1317 text: link_text,
1318 url: Cow::Owned(url.to_string()),
1319 is_reference,
1320 reference_id,
1321 link_type,
1322 });
1323
1324 text_chunks.clear();
1325 }
1326 }
1327 Event::FootnoteReference(footnote_id) => {
1328 if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1331 continue;
1332 }
1333
1334 let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1335 footnote_refs.push(FootnoteRef {
1336 id: footnote_id.to_string(),
1337 line: line_num,
1338 byte_offset: range.start,
1339 byte_end: range.end,
1340 });
1341 }
1342 _ => {}
1343 }
1344 }
1345
1346 for cap in LINK_PATTERN.captures_iter(content) {
1350 let full_match = cap.get(0).unwrap();
1351 let match_start = full_match.start();
1352 let match_end = full_match.end();
1353
1354 if found_positions.contains(&match_start) {
1356 continue;
1357 }
1358
1359 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1361 continue;
1362 }
1363
1364 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1366 continue;
1367 }
1368
1369 if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1371 continue;
1372 }
1373
1374 if Self::is_offset_in_code_span(code_spans, match_start) {
1376 continue;
1377 }
1378
1379 if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1381 continue;
1382 }
1383
1384 let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1386
1387 if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1389 continue;
1390 }
1391
1392 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1393
1394 let text = cap.get(1).map_or("", |m| m.as_str());
1395
1396 if let Some(ref_id) = cap.get(6) {
1398 let ref_id_str = ref_id.as_str();
1399 let normalized_ref = if ref_id_str.is_empty() {
1400 Cow::Owned(text.to_lowercase()) } else {
1402 Cow::Owned(ref_id_str.to_lowercase())
1403 };
1404
1405 links.push(ParsedLink {
1407 line: line_num,
1408 start_col: col_start,
1409 end_col: col_end,
1410 byte_offset: match_start,
1411 byte_end: match_end,
1412 text: Cow::Borrowed(text),
1413 url: Cow::Borrowed(""), is_reference: true,
1415 reference_id: Some(normalized_ref),
1416 link_type: LinkType::Reference, });
1418 }
1419 }
1420
1421 (links, broken_links, footnote_refs)
1422 }
1423
1424 fn parse_images(
1426 content: &'a str,
1427 lines: &[LineInfo],
1428 code_blocks: &[(usize, usize)],
1429 code_spans: &[CodeSpan],
1430 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1431 ) -> Vec<ParsedImage<'a>> {
1432 use crate::utils::skip_context::is_in_html_comment_ranges;
1433 use std::collections::HashSet;
1434
1435 let mut images = Vec::with_capacity(content.len() / 1000);
1437 let mut found_positions = HashSet::new();
1438
1439 let parser = Parser::new(content).into_offset_iter();
1441 let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1442 Vec::new();
1443 let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); for (event, range) in parser {
1446 match event {
1447 Event::Start(Tag::Image {
1448 link_type,
1449 dest_url,
1450 id,
1451 ..
1452 }) => {
1453 image_stack.push((range.start, dest_url, link_type, id));
1454 text_chunks.clear();
1455 }
1456 Event::Text(text) if !image_stack.is_empty() => {
1457 text_chunks.push((text.to_string(), range.start, range.end));
1458 }
1459 Event::Code(code) if !image_stack.is_empty() => {
1460 let code_text = format!("`{code}`");
1461 text_chunks.push((code_text, range.start, range.end));
1462 }
1463 Event::End(TagEnd::Image) => {
1464 if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1465 if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1467 continue;
1468 }
1469
1470 if Self::is_offset_in_code_span(code_spans, start_pos) {
1472 continue;
1473 }
1474
1475 if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1477 continue;
1478 }
1479
1480 let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1482 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1483
1484 let is_reference = matches!(
1485 link_type,
1486 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1487 );
1488
1489 let alt_text = if start_pos < content.len() {
1492 let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1493
1494 let mut close_pos = None;
1497 let mut depth = 0;
1498
1499 if image_bytes.len() > 2 {
1500 for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1501 let mut backslash_count = 0;
1503 let mut j = i;
1504 while j > 0 && image_bytes[j - 1] == b'\\' {
1505 backslash_count += 1;
1506 j -= 1;
1507 }
1508 let is_escaped = backslash_count % 2 != 0;
1509
1510 if !is_escaped {
1511 if byte == b'[' {
1512 depth += 1;
1513 } else if byte == b']' {
1514 if depth == 0 {
1515 close_pos = Some(i);
1517 break;
1518 } else {
1519 depth -= 1;
1520 }
1521 }
1522 }
1523 }
1524 }
1525
1526 if let Some(pos) = close_pos {
1527 Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1528 } else {
1529 Cow::Borrowed("")
1530 }
1531 } else {
1532 Cow::Borrowed("")
1533 };
1534
1535 let reference_id = if is_reference && !ref_id.is_empty() {
1536 Some(Cow::Owned(ref_id.to_lowercase()))
1537 } else if is_reference {
1538 Some(Cow::Owned(alt_text.to_lowercase())) } else {
1540 None
1541 };
1542
1543 found_positions.insert(start_pos);
1544 images.push(ParsedImage {
1545 line: line_num,
1546 start_col: col_start,
1547 end_col: col_end,
1548 byte_offset: start_pos,
1549 byte_end: range.end,
1550 alt_text,
1551 url: Cow::Owned(url.to_string()),
1552 is_reference,
1553 reference_id,
1554 link_type,
1555 });
1556 }
1557 }
1558 _ => {}
1559 }
1560 }
1561
1562 for cap in IMAGE_PATTERN.captures_iter(content) {
1564 let full_match = cap.get(0).unwrap();
1565 let match_start = full_match.start();
1566 let match_end = full_match.end();
1567
1568 if found_positions.contains(&match_start) {
1570 continue;
1571 }
1572
1573 if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1575 continue;
1576 }
1577
1578 if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1580 || Self::is_offset_in_code_span(code_spans, match_start)
1581 || is_in_html_comment_ranges(html_comment_ranges, match_start)
1582 {
1583 continue;
1584 }
1585
1586 if let Some(ref_id) = cap.get(6) {
1588 let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1589 let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1590 let alt_text = cap.get(1).map_or("", |m| m.as_str());
1591 let ref_id_str = ref_id.as_str();
1592 let normalized_ref = if ref_id_str.is_empty() {
1593 Cow::Owned(alt_text.to_lowercase())
1594 } else {
1595 Cow::Owned(ref_id_str.to_lowercase())
1596 };
1597
1598 images.push(ParsedImage {
1599 line: line_num,
1600 start_col: col_start,
1601 end_col: col_end,
1602 byte_offset: match_start,
1603 byte_end: match_end,
1604 alt_text: Cow::Borrowed(alt_text),
1605 url: Cow::Borrowed(""),
1606 is_reference: true,
1607 reference_id: Some(normalized_ref),
1608 link_type: LinkType::Reference, });
1610 }
1611 }
1612
1613 images
1614 }
1615
1616 fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1618 let mut refs = Vec::with_capacity(lines.len() / 20); for (line_idx, line_info) in lines.iter().enumerate() {
1622 if line_info.in_code_block {
1624 continue;
1625 }
1626
1627 let line = line_info.content(content);
1628 let line_num = line_idx + 1;
1629
1630 if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1631 let id = cap.get(1).unwrap().as_str().to_lowercase();
1632 let url = cap.get(2).unwrap().as_str().to_string();
1633 let title_match = cap.get(3).or_else(|| cap.get(4));
1634 let title = title_match.map(|m| m.as_str().to_string());
1635
1636 let match_obj = cap.get(0).unwrap();
1639 let byte_offset = line_info.byte_offset + match_obj.start();
1640 let byte_end = line_info.byte_offset + match_obj.end();
1641
1642 let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1644 let start = line_info.byte_offset + m.start().saturating_sub(1);
1646 let end = line_info.byte_offset + m.end() + 1; (Some(start), Some(end))
1648 } else {
1649 (None, None)
1650 };
1651
1652 refs.push(ReferenceDef {
1653 line: line_num,
1654 id,
1655 url,
1656 title,
1657 byte_offset,
1658 byte_end,
1659 title_byte_start,
1660 title_byte_end,
1661 });
1662 }
1663 }
1664
1665 refs
1666 }
1667
1668 #[inline]
1672 fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1673 let trimmed_start = line.trim_start();
1674 if !trimmed_start.starts_with('>') {
1675 return None;
1676 }
1677
1678 let mut remaining = line;
1680 let mut total_prefix_len = 0;
1681
1682 loop {
1683 let trimmed = remaining.trim_start();
1684 if !trimmed.starts_with('>') {
1685 break;
1686 }
1687
1688 let leading_ws_len = remaining.len() - trimmed.len();
1690 total_prefix_len += leading_ws_len + 1;
1691
1692 let after_gt = &trimmed[1..];
1693
1694 if let Some(stripped) = after_gt.strip_prefix(' ') {
1696 total_prefix_len += 1;
1697 remaining = stripped;
1698 } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1699 total_prefix_len += 1;
1700 remaining = stripped;
1701 } else {
1702 remaining = after_gt;
1703 }
1704 }
1705
1706 Some((&line[..total_prefix_len], remaining))
1707 }
1708
1709 #[inline]
1713 fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1714 let bytes = line.as_bytes();
1715 let mut i = 0;
1716
1717 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1719 i += 1;
1720 }
1721
1722 if i >= bytes.len() {
1724 return None;
1725 }
1726 let marker = bytes[i] as char;
1727 if marker != '-' && marker != '*' && marker != '+' {
1728 return None;
1729 }
1730 let marker_pos = i;
1731 i += 1;
1732
1733 let spacing_start = i;
1735 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1736 i += 1;
1737 }
1738
1739 Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1740 }
1741
1742 #[inline]
1746 fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1747 let bytes = line.as_bytes();
1748 let mut i = 0;
1749
1750 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1752 i += 1;
1753 }
1754
1755 let number_start = i;
1757 while i < bytes.len() && bytes[i].is_ascii_digit() {
1758 i += 1;
1759 }
1760 if i == number_start {
1761 return None; }
1763
1764 if i >= bytes.len() {
1766 return None;
1767 }
1768 let delimiter = bytes[i] as char;
1769 if delimiter != '.' && delimiter != ')' {
1770 return None;
1771 }
1772 let delimiter_pos = i;
1773 i += 1;
1774
1775 let spacing_start = i;
1777 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1778 i += 1;
1779 }
1780
1781 Some((
1782 &line[..number_start],
1783 &line[number_start..delimiter_pos],
1784 delimiter,
1785 &line[spacing_start..i],
1786 &line[i..],
1787 ))
1788 }
1789
1790 fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1793 let num_lines = line_offsets.len();
1794 let mut in_code_block = vec![false; num_lines];
1795
1796 for &(start, end) in code_blocks {
1798 let safe_start = if start > 0 && !content.is_char_boundary(start) {
1800 let mut boundary = start;
1801 while boundary > 0 && !content.is_char_boundary(boundary) {
1802 boundary -= 1;
1803 }
1804 boundary
1805 } else {
1806 start
1807 };
1808
1809 let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1810 let mut boundary = end;
1811 while boundary < content.len() && !content.is_char_boundary(boundary) {
1812 boundary += 1;
1813 }
1814 boundary
1815 } else {
1816 end.min(content.len())
1817 };
1818
1819 let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1838 let first_line = first_line_after.saturating_sub(1);
1839 let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1840
1841 for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1843 *flag = true;
1844 }
1845 }
1846
1847 in_code_block
1848 }
1849
1850 fn compute_basic_line_info(
1852 content: &str,
1853 line_offsets: &[usize],
1854 code_blocks: &[(usize, usize)],
1855 flavor: MarkdownFlavor,
1856 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1857 autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1858 ) -> Vec<LineInfo> {
1859 let content_lines: Vec<&str> = content.lines().collect();
1860 let mut lines = Vec::with_capacity(content_lines.len());
1861
1862 let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1864
1865 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1868
1869 for (i, line) in content_lines.iter().enumerate() {
1870 let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1871 let indent = line.len() - line.trim_start().len();
1872
1873 let blockquote_parse = Self::parse_blockquote_prefix(line);
1875
1876 let is_blank = if let Some((_, content)) = blockquote_parse {
1878 content.trim().is_empty()
1880 } else {
1881 line.trim().is_empty()
1882 };
1883
1884 let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1886
1887 let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1889 && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1890 let line_end_offset = byte_offset + line.len();
1893 let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1894 html_comment_ranges,
1895 byte_offset,
1896 line_end_offset,
1897 );
1898 let list_item = if !(in_code_block
1899 || is_blank
1900 || in_mkdocstrings
1901 || in_html_comment
1902 || (front_matter_end > 0 && i < front_matter_end))
1903 {
1904 let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1906 (content, prefix.len())
1907 } else {
1908 (&**line, 0)
1909 };
1910
1911 if let Some((leading_spaces, marker, spacing, _content)) =
1912 Self::parse_unordered_list(line_for_list_check)
1913 {
1914 let marker_column = blockquote_prefix_len + leading_spaces.len();
1915 let content_column = marker_column + 1 + spacing.len();
1916
1917 if spacing.is_empty() {
1924 None
1925 } else {
1926 Some(ListItemInfo {
1927 marker: marker.to_string(),
1928 is_ordered: false,
1929 number: None,
1930 marker_column,
1931 content_column,
1932 })
1933 }
1934 } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1935 Self::parse_ordered_list(line_for_list_check)
1936 {
1937 let marker = format!("{number_str}{delimiter}");
1938 let marker_column = blockquote_prefix_len + leading_spaces.len();
1939 let content_column = marker_column + marker.len() + spacing.len();
1940
1941 if spacing.is_empty() {
1944 None
1945 } else {
1946 Some(ListItemInfo {
1947 marker,
1948 is_ordered: true,
1949 number: number_str.parse().ok(),
1950 marker_column,
1951 content_column,
1952 })
1953 }
1954 } else {
1955 None
1956 }
1957 } else {
1958 None
1959 };
1960
1961 lines.push(LineInfo {
1962 byte_offset,
1963 byte_len: line.len(),
1964 indent,
1965 is_blank,
1966 in_code_block,
1967 in_front_matter: front_matter_end > 0 && i < front_matter_end,
1968 in_html_block: false, in_html_comment,
1970 list_item,
1971 heading: None, blockquote: None, in_mkdocstrings,
1974 in_esm_block: false, in_code_span_continuation: false, });
1977 }
1978
1979 lines
1980 }
1981
1982 fn detect_headings_and_blockquotes(
1984 content: &str,
1985 lines: &mut [LineInfo],
1986 flavor: MarkdownFlavor,
1987 html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1988 link_byte_ranges: &[(usize, usize)],
1989 ) {
1990 static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
1992 LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
1993 static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
1994 LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
1995
1996 let content_lines: Vec<&str> = content.lines().collect();
1997
1998 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2000
2001 for i in 0..lines.len() {
2003 if lines[i].in_code_block {
2004 continue;
2005 }
2006
2007 if front_matter_end > 0 && i < front_matter_end {
2009 continue;
2010 }
2011
2012 if lines[i].in_html_block {
2014 continue;
2015 }
2016
2017 let line = content_lines[i];
2018
2019 if let Some(bq) = parse_blockquote_detailed(line) {
2021 let nesting_level = bq.markers.len(); let marker_column = bq.indent.len();
2023
2024 let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2026
2027 let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2029 let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2032
2033 let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2037
2038 lines[i].blockquote = Some(BlockquoteInfo {
2039 nesting_level,
2040 indent: bq.indent.to_string(),
2041 marker_column,
2042 prefix,
2043 content: bq.content.to_string(),
2044 has_no_space_after_marker: has_no_space,
2045 has_multiple_spaces_after_marker: has_multiple_spaces,
2046 needs_md028_fix,
2047 });
2048 }
2049
2050 if lines[i].is_blank {
2052 continue;
2053 }
2054
2055 let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2058 crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2059 || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2060 } else {
2061 false
2062 };
2063
2064 if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2065 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2067 continue;
2068 }
2069 let line_offset = lines[i].byte_offset;
2072 if link_byte_ranges
2073 .iter()
2074 .any(|&(start, end)| line_offset > start && line_offset < end)
2075 {
2076 continue;
2077 }
2078 let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2079 let hashes = caps.get(2).map_or("", |m| m.as_str());
2080 let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2081 let rest = caps.get(4).map_or("", |m| m.as_str());
2082
2083 let level = hashes.len() as u8;
2084 let marker_column = leading_spaces.len();
2085
2086 let (text, has_closing, closing_seq) = {
2088 let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2090 if rest[id_start..].trim_end().ends_with('}') {
2092 (&rest[..id_start], &rest[id_start..])
2094 } else {
2095 (rest, "")
2096 }
2097 } else {
2098 (rest, "")
2099 };
2100
2101 let trimmed_rest = rest_without_id.trim_end();
2103 if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2104 let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2107
2108 let last_hash_char_idx = char_positions
2110 .iter()
2111 .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2112
2113 if let Some(mut char_idx) = last_hash_char_idx {
2114 while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2116 char_idx -= 1;
2117 }
2118
2119 let start_of_hashes = char_positions[char_idx].0;
2121
2122 let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2124
2125 let potential_closing = &trimmed_rest[start_of_hashes..];
2127 let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2128
2129 if is_all_hashes && has_space_before {
2130 let closing_hashes = potential_closing.to_string();
2132 let text_part = if !custom_id_part.is_empty() {
2135 format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2138 } else {
2139 trimmed_rest[..start_of_hashes].trim_end().to_string()
2140 };
2141 (text_part, true, closing_hashes)
2142 } else {
2143 (rest.to_string(), false, String::new())
2145 }
2146 } else {
2147 (rest.to_string(), false, String::new())
2149 }
2150 } else {
2151 (rest.to_string(), false, String::new())
2153 }
2154 };
2155
2156 let content_column = marker_column + hashes.len() + spaces_after.len();
2157
2158 let raw_text = text.trim().to_string();
2160 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2161
2162 if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2164 let next_line = content_lines[i + 1];
2165 if !lines[i + 1].in_code_block
2166 && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2167 && let Some(next_line_id) =
2168 crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2169 {
2170 custom_id = Some(next_line_id);
2171 }
2172 }
2173
2174 let is_valid = !spaces_after.is_empty()
2184 || rest.is_empty()
2185 || level > 1
2186 || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2187
2188 lines[i].heading = Some(HeadingInfo {
2189 level,
2190 style: HeadingStyle::ATX,
2191 marker: hashes.to_string(),
2192 marker_column,
2193 content_column,
2194 text: clean_text,
2195 custom_id,
2196 raw_text,
2197 has_closing_sequence: has_closing,
2198 closing_sequence: closing_seq,
2199 is_valid,
2200 });
2201 }
2202 else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2204 let next_line = content_lines[i + 1];
2205 if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2206 if front_matter_end > 0 && i < front_matter_end {
2208 continue;
2209 }
2210
2211 if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2213 {
2214 continue;
2215 }
2216
2217 let underline = next_line.trim();
2218
2219 let level = if underline.starts_with('=') { 1 } else { 2 };
2220 let style = if level == 1 {
2221 HeadingStyle::Setext1
2222 } else {
2223 HeadingStyle::Setext2
2224 };
2225
2226 let raw_text = line.trim().to_string();
2228 let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2229
2230 if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2232 let attr_line = content_lines[i + 2];
2233 if !lines[i + 2].in_code_block
2234 && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2235 && let Some(attr_line_id) =
2236 crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2237 {
2238 custom_id = Some(attr_line_id);
2239 }
2240 }
2241
2242 lines[i].heading = Some(HeadingInfo {
2243 level,
2244 style,
2245 marker: underline.to_string(),
2246 marker_column: next_line.len() - next_line.trim_start().len(),
2247 content_column: lines[i].indent,
2248 text: clean_text,
2249 custom_id,
2250 raw_text,
2251 has_closing_sequence: false,
2252 closing_sequence: String::new(),
2253 is_valid: true, });
2255 }
2256 }
2257 }
2258 }
2259
2260 fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2262 const BLOCK_ELEMENTS: &[&str] = &[
2265 "address",
2266 "article",
2267 "aside",
2268 "audio",
2269 "blockquote",
2270 "canvas",
2271 "details",
2272 "dialog",
2273 "dd",
2274 "div",
2275 "dl",
2276 "dt",
2277 "embed",
2278 "fieldset",
2279 "figcaption",
2280 "figure",
2281 "footer",
2282 "form",
2283 "h1",
2284 "h2",
2285 "h3",
2286 "h4",
2287 "h5",
2288 "h6",
2289 "header",
2290 "hr",
2291 "iframe",
2292 "li",
2293 "main",
2294 "menu",
2295 "nav",
2296 "noscript",
2297 "object",
2298 "ol",
2299 "p",
2300 "picture",
2301 "pre",
2302 "script",
2303 "search",
2304 "section",
2305 "source",
2306 "style",
2307 "summary",
2308 "svg",
2309 "table",
2310 "tbody",
2311 "td",
2312 "template",
2313 "textarea",
2314 "tfoot",
2315 "th",
2316 "thead",
2317 "tr",
2318 "track",
2319 "ul",
2320 "video",
2321 ];
2322
2323 let mut i = 0;
2324 while i < lines.len() {
2325 if lines[i].in_code_block || lines[i].in_front_matter {
2327 i += 1;
2328 continue;
2329 }
2330
2331 let trimmed = lines[i].content(content).trim_start();
2332
2333 if trimmed.starts_with('<') && trimmed.len() > 1 {
2335 let after_bracket = &trimmed[1..];
2337 let is_closing = after_bracket.starts_with('/');
2338 let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2339
2340 let tag_name = tag_start
2342 .chars()
2343 .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2344 .collect::<String>()
2345 .to_lowercase();
2346
2347 if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2349 lines[i].in_html_block = true;
2351
2352 if !is_closing {
2355 let closing_tag = format!("</{tag_name}>");
2356 let allow_blank_lines = tag_name == "style" || tag_name == "script";
2358 let mut j = i + 1;
2359 while j < lines.len() && j < i + 100 {
2360 if !allow_blank_lines && lines[j].is_blank {
2363 break;
2364 }
2365
2366 lines[j].in_html_block = true;
2367
2368 if lines[j].content(content).contains(&closing_tag) {
2370 break;
2371 }
2372 j += 1;
2373 }
2374 }
2375 }
2376 }
2377
2378 i += 1;
2379 }
2380 }
2381
2382 fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2385 if !flavor.supports_esm_blocks() {
2387 return;
2388 }
2389
2390 let mut in_multiline_comment = false;
2391
2392 for line in lines.iter_mut() {
2393 if line.is_blank || line.in_html_comment {
2395 continue;
2396 }
2397
2398 let trimmed = line.content(content).trim_start();
2399
2400 if in_multiline_comment {
2402 if trimmed.contains("*/") {
2403 in_multiline_comment = false;
2404 }
2405 continue;
2406 }
2407
2408 if trimmed.starts_with("//") {
2410 continue;
2411 }
2412
2413 if trimmed.starts_with("/*") {
2415 if !trimmed.contains("*/") {
2416 in_multiline_comment = true;
2417 }
2418 continue;
2419 }
2420
2421 if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2423 line.in_esm_block = true;
2424 } else {
2425 break;
2427 }
2428 }
2429 }
2430
2431 fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2433 let mut code_spans = Vec::new();
2434
2435 if !content.contains('`') {
2437 return code_spans;
2438 }
2439
2440 let parser = Parser::new(content).into_offset_iter();
2442
2443 for (event, range) in parser {
2444 if let Event::Code(_) = event {
2445 let start_pos = range.start;
2446 let end_pos = range.end;
2447
2448 let full_span = &content[start_pos..end_pos];
2450 let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2451
2452 let content_start = start_pos + backtick_count;
2454 let content_end = end_pos - backtick_count;
2455 let span_content = if content_start < content_end {
2456 content[content_start..content_end].to_string()
2457 } else {
2458 String::new()
2459 };
2460
2461 let line_idx = lines
2464 .partition_point(|line| line.byte_offset <= start_pos)
2465 .saturating_sub(1);
2466 let line_num = line_idx + 1;
2467 let byte_col_start = start_pos - lines[line_idx].byte_offset;
2468
2469 let end_line_idx = lines
2471 .partition_point(|line| line.byte_offset <= end_pos)
2472 .saturating_sub(1);
2473 let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2474
2475 let line_content = lines[line_idx].content(content);
2478 let col_start = if byte_col_start <= line_content.len() {
2479 line_content[..byte_col_start].chars().count()
2480 } else {
2481 line_content.chars().count()
2482 };
2483
2484 let end_line_content = lines[end_line_idx].content(content);
2485 let col_end = if byte_col_end <= end_line_content.len() {
2486 end_line_content[..byte_col_end].chars().count()
2487 } else {
2488 end_line_content.chars().count()
2489 };
2490
2491 code_spans.push(CodeSpan {
2492 line: line_num,
2493 end_line: end_line_idx + 1,
2494 start_col: col_start,
2495 end_col: col_end,
2496 byte_offset: start_pos,
2497 byte_end: end_pos,
2498 backtick_count,
2499 content: span_content,
2500 });
2501 }
2502 }
2503
2504 code_spans.sort_by_key(|span| span.byte_offset);
2506
2507 code_spans
2508 }
2509
2510 fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2521 const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2523
2524 #[inline]
2527 fn reset_tracking_state(
2528 list_item: &ListItemInfo,
2529 has_list_breaking_content: &mut bool,
2530 min_continuation: &mut usize,
2531 ) {
2532 *has_list_breaking_content = false;
2533 let marker_width = if list_item.is_ordered {
2534 list_item.marker.len() + 1 } else {
2536 list_item.marker.len()
2537 };
2538 *min_continuation = if list_item.is_ordered {
2539 marker_width
2540 } else {
2541 UNORDERED_LIST_MIN_CONTINUATION_INDENT
2542 };
2543 }
2544
2545 let mut list_blocks = Vec::with_capacity(lines.len() / 10); let mut current_block: Option<ListBlock> = None;
2548 let mut last_list_item_line = 0;
2549 let mut current_indent_level = 0;
2550 let mut last_marker_width = 0;
2551
2552 let mut has_list_breaking_content_since_last_item = false;
2554 let mut min_continuation_for_tracking = 0;
2555
2556 for (line_idx, line_info) in lines.iter().enumerate() {
2557 let line_num = line_idx + 1;
2558
2559 if line_info.in_code_block {
2561 if let Some(ref mut block) = current_block {
2562 let min_continuation_indent =
2564 CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2565
2566 let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2568
2569 match context {
2570 CodeBlockContext::Indented => {
2571 block.end_line = line_num;
2573 continue;
2574 }
2575 CodeBlockContext::Standalone => {
2576 let completed_block = current_block.take().unwrap();
2578 list_blocks.push(completed_block);
2579 continue;
2580 }
2581 CodeBlockContext::Adjacent => {
2582 block.end_line = line_num;
2584 continue;
2585 }
2586 }
2587 } else {
2588 continue;
2590 }
2591 }
2592
2593 let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2595 caps.get(0).unwrap().as_str().to_string()
2596 } else {
2597 String::new()
2598 };
2599
2600 if current_block.is_some()
2603 && line_info.list_item.is_none()
2604 && !line_info.is_blank
2605 && !line_info.in_code_span_continuation
2606 {
2607 let line_content = line_info.content(content).trim();
2608
2609 let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2614 let breaks_list = line_info.heading.is_some()
2615 || line_content.starts_with("---")
2616 || line_content.starts_with("***")
2617 || line_content.starts_with("___")
2618 || crate::utils::skip_context::is_table_line(line_content)
2619 || line_content.starts_with(">")
2620 || (line_info.indent > 0
2621 && line_info.indent < min_continuation_for_tracking
2622 && !is_lazy_continuation);
2623
2624 if breaks_list {
2625 has_list_breaking_content_since_last_item = true;
2626 }
2627 }
2628
2629 if line_info.in_code_span_continuation
2632 && line_info.list_item.is_none()
2633 && let Some(ref mut block) = current_block
2634 {
2635 block.end_line = line_num;
2636 }
2637
2638 let is_valid_continuation =
2643 line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); if !line_info.in_code_span_continuation
2645 && line_info.list_item.is_none()
2646 && !line_info.is_blank
2647 && !line_info.in_code_block
2648 && is_valid_continuation
2649 && let Some(ref mut block) = current_block
2650 {
2651 block.end_line = line_num;
2652 }
2653
2654 if let Some(list_item) = &line_info.list_item {
2656 let item_indent = list_item.marker_column;
2658 let nesting = item_indent / 2; if let Some(ref mut block) = current_block {
2661 let is_nested = nesting > block.nesting_level;
2665 let same_type =
2666 (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2667 let same_context = block.blockquote_prefix == blockquote_prefix;
2668 let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2670
2671 let marker_compatible =
2673 block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2674
2675 let has_non_list_content = has_list_breaking_content_since_last_item;
2678
2679 let mut continues_list = if is_nested {
2683 same_context && reasonable_distance && !has_non_list_content
2685 } else {
2686 same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2688 };
2689
2690 if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2693 if block.item_lines.contains(&(line_num - 1)) {
2696 continues_list = true;
2698 } else {
2699 continues_list = true;
2703 }
2704 }
2705
2706 if continues_list {
2707 block.end_line = line_num;
2709 block.item_lines.push(line_num);
2710
2711 block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2713 list_item.marker.len() + 1
2714 } else {
2715 list_item.marker.len()
2716 });
2717
2718 if !block.is_ordered
2720 && block.marker.is_some()
2721 && block.marker.as_ref() != Some(&list_item.marker)
2722 {
2723 block.marker = None;
2725 }
2726
2727 reset_tracking_state(
2729 list_item,
2730 &mut has_list_breaking_content_since_last_item,
2731 &mut min_continuation_for_tracking,
2732 );
2733 } else {
2734 list_blocks.push(block.clone());
2737
2738 *block = ListBlock {
2739 start_line: line_num,
2740 end_line: line_num,
2741 is_ordered: list_item.is_ordered,
2742 marker: if list_item.is_ordered {
2743 None
2744 } else {
2745 Some(list_item.marker.clone())
2746 },
2747 blockquote_prefix: blockquote_prefix.clone(),
2748 item_lines: vec![line_num],
2749 nesting_level: nesting,
2750 max_marker_width: if list_item.is_ordered {
2751 list_item.marker.len() + 1
2752 } else {
2753 list_item.marker.len()
2754 },
2755 };
2756
2757 reset_tracking_state(
2759 list_item,
2760 &mut has_list_breaking_content_since_last_item,
2761 &mut min_continuation_for_tracking,
2762 );
2763 }
2764 } else {
2765 current_block = Some(ListBlock {
2767 start_line: line_num,
2768 end_line: line_num,
2769 is_ordered: list_item.is_ordered,
2770 marker: if list_item.is_ordered {
2771 None
2772 } else {
2773 Some(list_item.marker.clone())
2774 },
2775 blockquote_prefix,
2776 item_lines: vec![line_num],
2777 nesting_level: nesting,
2778 max_marker_width: list_item.marker.len(),
2779 });
2780
2781 reset_tracking_state(
2783 list_item,
2784 &mut has_list_breaking_content_since_last_item,
2785 &mut min_continuation_for_tracking,
2786 );
2787 }
2788
2789 last_list_item_line = line_num;
2790 current_indent_level = item_indent;
2791 last_marker_width = if list_item.is_ordered {
2792 list_item.marker.len() + 1 } else {
2794 list_item.marker.len()
2795 };
2796 } else if let Some(ref mut block) = current_block {
2797 let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2807 lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2808 } else {
2809 false
2810 };
2811
2812 let min_continuation_indent = if block.is_ordered {
2816 current_indent_level + last_marker_width
2817 } else {
2818 current_indent_level + 2 };
2820
2821 if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2822 block.end_line = line_num;
2824 } else if line_info.is_blank {
2825 let mut check_idx = line_idx + 1;
2828 let mut found_continuation = false;
2829
2830 while check_idx < lines.len() && lines[check_idx].is_blank {
2832 check_idx += 1;
2833 }
2834
2835 if check_idx < lines.len() {
2836 let next_line = &lines[check_idx];
2837 if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2839 found_continuation = true;
2840 }
2841 else if !next_line.in_code_block
2843 && next_line.list_item.is_some()
2844 && let Some(item) = &next_line.list_item
2845 {
2846 let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2847 .find(next_line.content(content))
2848 .map_or(String::new(), |m| m.as_str().to_string());
2849 if item.marker_column == current_indent_level
2850 && item.is_ordered == block.is_ordered
2851 && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2852 {
2853 let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2856 if let Some(between_line) = lines.get(idx) {
2857 let between_content = between_line.content(content);
2858 let trimmed = between_content.trim();
2859 if trimmed.is_empty() {
2861 return false;
2862 }
2863 let line_indent = between_content.len() - between_content.trim_start().len();
2865
2866 if trimmed.starts_with("```")
2868 || trimmed.starts_with("~~~")
2869 || trimmed.starts_with("---")
2870 || trimmed.starts_with("***")
2871 || trimmed.starts_with("___")
2872 || trimmed.starts_with(">")
2873 || crate::utils::skip_context::is_table_line(trimmed)
2874 || between_line.heading.is_some()
2875 {
2876 return true; }
2878
2879 line_indent >= min_continuation_indent
2881 } else {
2882 false
2883 }
2884 });
2885
2886 if block.is_ordered {
2887 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2890 if let Some(between_line) = lines.get(idx) {
2891 let trimmed = between_line.content(content).trim();
2892 if trimmed.is_empty() {
2893 return false;
2894 }
2895 trimmed.starts_with("```")
2897 || trimmed.starts_with("~~~")
2898 || trimmed.starts_with("---")
2899 || trimmed.starts_with("***")
2900 || trimmed.starts_with("___")
2901 || trimmed.starts_with(">")
2902 || crate::utils::skip_context::is_table_line(trimmed)
2903 || between_line.heading.is_some()
2904 } else {
2905 false
2906 }
2907 });
2908 found_continuation = !has_structural_separators;
2909 } else {
2910 let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2912 if let Some(between_line) = lines.get(idx) {
2913 let trimmed = between_line.content(content).trim();
2914 if trimmed.is_empty() {
2915 return false;
2916 }
2917 trimmed.starts_with("```")
2919 || trimmed.starts_with("~~~")
2920 || trimmed.starts_with("---")
2921 || trimmed.starts_with("***")
2922 || trimmed.starts_with("___")
2923 || trimmed.starts_with(">")
2924 || crate::utils::skip_context::is_table_line(trimmed)
2925 || between_line.heading.is_some()
2926 } else {
2927 false
2928 }
2929 });
2930 found_continuation = !has_structural_separators;
2931 }
2932 }
2933 }
2934 }
2935
2936 if found_continuation {
2937 block.end_line = line_num;
2939 } else {
2940 list_blocks.push(block.clone());
2942 current_block = None;
2943 }
2944 } else {
2945 let min_required_indent = if block.is_ordered {
2948 current_indent_level + last_marker_width
2949 } else {
2950 current_indent_level + 2
2951 };
2952
2953 let line_content = line_info.content(content).trim();
2958
2959 let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
2961
2962 let is_structural_separator = line_info.heading.is_some()
2963 || line_content.starts_with("```")
2964 || line_content.starts_with("~~~")
2965 || line_content.starts_with("---")
2966 || line_content.starts_with("***")
2967 || line_content.starts_with("___")
2968 || line_content.starts_with(">")
2969 || looks_like_table;
2970
2971 let is_lazy_continuation = !is_structural_separator
2974 && !line_info.is_blank
2975 && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2976
2977 if is_lazy_continuation {
2978 let content_to_check = if !blockquote_prefix.is_empty() {
2981 line_info
2983 .content(content)
2984 .strip_prefix(&blockquote_prefix)
2985 .unwrap_or(line_info.content(content))
2986 .trim()
2987 } else {
2988 line_info.content(content).trim()
2989 };
2990
2991 let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2992
2993 if starts_with_uppercase && last_list_item_line > 0 {
2996 list_blocks.push(block.clone());
2998 current_block = None;
2999 } else {
3000 block.end_line = line_num;
3002 }
3003 } else {
3004 list_blocks.push(block.clone());
3006 current_block = None;
3007 }
3008 }
3009 }
3010 }
3011
3012 if let Some(block) = current_block {
3014 list_blocks.push(block);
3015 }
3016
3017 merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3019
3020 list_blocks
3021 }
3022
3023 fn compute_char_frequency(content: &str) -> CharFrequency {
3025 let mut frequency = CharFrequency::default();
3026
3027 for ch in content.chars() {
3028 match ch {
3029 '#' => frequency.hash_count += 1,
3030 '*' => frequency.asterisk_count += 1,
3031 '_' => frequency.underscore_count += 1,
3032 '-' => frequency.hyphen_count += 1,
3033 '+' => frequency.plus_count += 1,
3034 '>' => frequency.gt_count += 1,
3035 '|' => frequency.pipe_count += 1,
3036 '[' => frequency.bracket_count += 1,
3037 '`' => frequency.backtick_count += 1,
3038 '<' => frequency.lt_count += 1,
3039 '!' => frequency.exclamation_count += 1,
3040 '\n' => frequency.newline_count += 1,
3041 _ => {}
3042 }
3043 }
3044
3045 frequency
3046 }
3047
3048 fn parse_html_tags(
3050 content: &str,
3051 lines: &[LineInfo],
3052 code_blocks: &[(usize, usize)],
3053 flavor: MarkdownFlavor,
3054 ) -> Vec<HtmlTag> {
3055 static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3056 LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3057
3058 let mut html_tags = Vec::with_capacity(content.matches('<').count());
3059
3060 for cap in HTML_TAG_REGEX.captures_iter(content) {
3061 let full_match = cap.get(0).unwrap();
3062 let match_start = full_match.start();
3063 let match_end = full_match.end();
3064
3065 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3067 continue;
3068 }
3069
3070 let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3071 let tag_name_original = cap.get(2).unwrap().as_str();
3072 let tag_name = tag_name_original.to_lowercase();
3073 let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3074
3075 if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3078 continue;
3079 }
3080
3081 let mut line_num = 1;
3083 let mut col_start = match_start;
3084 let mut col_end = match_end;
3085 for (idx, line_info) in lines.iter().enumerate() {
3086 if match_start >= line_info.byte_offset {
3087 line_num = idx + 1;
3088 col_start = match_start - line_info.byte_offset;
3089 col_end = match_end - line_info.byte_offset;
3090 } else {
3091 break;
3092 }
3093 }
3094
3095 html_tags.push(HtmlTag {
3096 line: line_num,
3097 start_col: col_start,
3098 end_col: col_end,
3099 byte_offset: match_start,
3100 byte_end: match_end,
3101 tag_name,
3102 is_closing,
3103 is_self_closing,
3104 raw_content: full_match.as_str().to_string(),
3105 });
3106 }
3107
3108 html_tags
3109 }
3110
3111 fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3113 static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3114 LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3115
3116 let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3117
3118 for cap in EMPHASIS_REGEX.captures_iter(content) {
3119 let full_match = cap.get(0).unwrap();
3120 let match_start = full_match.start();
3121 let match_end = full_match.end();
3122
3123 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3125 continue;
3126 }
3127
3128 let opening_markers = cap.get(1).unwrap().as_str();
3129 let content_part = cap.get(2).unwrap().as_str();
3130 let closing_markers = cap.get(3).unwrap().as_str();
3131
3132 if opening_markers.chars().next() != closing_markers.chars().next()
3134 || opening_markers.len() != closing_markers.len()
3135 {
3136 continue;
3137 }
3138
3139 let marker = opening_markers.chars().next().unwrap();
3140 let marker_count = opening_markers.len();
3141
3142 let mut line_num = 1;
3144 let mut col_start = match_start;
3145 let mut col_end = match_end;
3146 for (idx, line_info) in lines.iter().enumerate() {
3147 if match_start >= line_info.byte_offset {
3148 line_num = idx + 1;
3149 col_start = match_start - line_info.byte_offset;
3150 col_end = match_end - line_info.byte_offset;
3151 } else {
3152 break;
3153 }
3154 }
3155
3156 emphasis_spans.push(EmphasisSpan {
3157 line: line_num,
3158 start_col: col_start,
3159 end_col: col_end,
3160 byte_offset: match_start,
3161 byte_end: match_end,
3162 marker,
3163 marker_count,
3164 content: content_part.to_string(),
3165 });
3166 }
3167
3168 emphasis_spans
3169 }
3170
3171 fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3173 let mut table_rows = Vec::with_capacity(lines.len() / 20);
3174
3175 for (line_idx, line_info) in lines.iter().enumerate() {
3176 if line_info.in_code_block || line_info.is_blank {
3178 continue;
3179 }
3180
3181 let line = line_info.content(content);
3182 let line_num = line_idx + 1;
3183
3184 if !line.contains('|') {
3186 continue;
3187 }
3188
3189 let parts: Vec<&str> = line.split('|').collect();
3191 let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3192
3193 let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3195 let mut column_alignments = Vec::new();
3196
3197 if is_separator {
3198 for part in &parts[1..parts.len() - 1] {
3199 let trimmed = part.trim();
3201 let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3202 "center".to_string()
3203 } else if trimmed.ends_with(':') {
3204 "right".to_string()
3205 } else if trimmed.starts_with(':') {
3206 "left".to_string()
3207 } else {
3208 "none".to_string()
3209 };
3210 column_alignments.push(alignment);
3211 }
3212 }
3213
3214 table_rows.push(TableRow {
3215 line: line_num,
3216 is_separator,
3217 column_count,
3218 column_alignments,
3219 });
3220 }
3221
3222 table_rows
3223 }
3224
3225 fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3227 let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3228
3229 for cap in BARE_URL_PATTERN.captures_iter(content) {
3231 let full_match = cap.get(0).unwrap();
3232 let match_start = full_match.start();
3233 let match_end = full_match.end();
3234
3235 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3237 continue;
3238 }
3239
3240 let preceding_char = if match_start > 0 {
3242 content.chars().nth(match_start - 1)
3243 } else {
3244 None
3245 };
3246 let following_char = content.chars().nth(match_end);
3247
3248 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3249 continue;
3250 }
3251 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3252 continue;
3253 }
3254
3255 let url = full_match.as_str();
3256 let url_type = if url.starts_with("https://") {
3257 "https"
3258 } else if url.starts_with("http://") {
3259 "http"
3260 } else if url.starts_with("ftp://") {
3261 "ftp"
3262 } else {
3263 "other"
3264 };
3265
3266 let mut line_num = 1;
3268 let mut col_start = match_start;
3269 let mut col_end = match_end;
3270 for (idx, line_info) in lines.iter().enumerate() {
3271 if match_start >= line_info.byte_offset {
3272 line_num = idx + 1;
3273 col_start = match_start - line_info.byte_offset;
3274 col_end = match_end - line_info.byte_offset;
3275 } else {
3276 break;
3277 }
3278 }
3279
3280 bare_urls.push(BareUrl {
3281 line: line_num,
3282 start_col: col_start,
3283 end_col: col_end,
3284 byte_offset: match_start,
3285 byte_end: match_end,
3286 url: url.to_string(),
3287 url_type: url_type.to_string(),
3288 });
3289 }
3290
3291 for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3293 let full_match = cap.get(0).unwrap();
3294 let match_start = full_match.start();
3295 let match_end = full_match.end();
3296
3297 if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3299 continue;
3300 }
3301
3302 let preceding_char = if match_start > 0 {
3304 content.chars().nth(match_start - 1)
3305 } else {
3306 None
3307 };
3308 let following_char = content.chars().nth(match_end);
3309
3310 if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3311 continue;
3312 }
3313 if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3314 continue;
3315 }
3316
3317 let email = full_match.as_str();
3318
3319 let mut line_num = 1;
3321 let mut col_start = match_start;
3322 let mut col_end = match_end;
3323 for (idx, line_info) in lines.iter().enumerate() {
3324 if match_start >= line_info.byte_offset {
3325 line_num = idx + 1;
3326 col_start = match_start - line_info.byte_offset;
3327 col_end = match_end - line_info.byte_offset;
3328 } else {
3329 break;
3330 }
3331 }
3332
3333 bare_urls.push(BareUrl {
3334 line: line_num,
3335 start_col: col_start,
3336 end_col: col_end,
3337 byte_offset: match_start,
3338 byte_end: match_end,
3339 url: email.to_string(),
3340 url_type: "email".to_string(),
3341 });
3342 }
3343
3344 bare_urls
3345 }
3346
3347 #[must_use]
3367 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3368 ValidHeadingsIter::new(&self.lines)
3369 }
3370
3371 #[must_use]
3375 pub fn has_valid_headings(&self) -> bool {
3376 self.lines
3377 .iter()
3378 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3379 }
3380}
3381
3382fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3384 if list_blocks.len() < 2 {
3385 return;
3386 }
3387
3388 let mut merger = ListBlockMerger::new(content, lines);
3389 *list_blocks = merger.merge(list_blocks);
3390}
3391
3392struct ListBlockMerger<'a> {
3394 content: &'a str,
3395 lines: &'a [LineInfo],
3396}
3397
3398impl<'a> ListBlockMerger<'a> {
3399 fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3400 Self { content, lines }
3401 }
3402
3403 fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3404 let mut merged = Vec::with_capacity(list_blocks.len());
3405 let mut current = list_blocks[0].clone();
3406
3407 for next in list_blocks.iter().skip(1) {
3408 if self.should_merge_blocks(¤t, next) {
3409 current = self.merge_two_blocks(current, next);
3410 } else {
3411 merged.push(current);
3412 current = next.clone();
3413 }
3414 }
3415
3416 merged.push(current);
3417 merged
3418 }
3419
3420 fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3422 if !self.blocks_are_compatible(current, next) {
3424 return false;
3425 }
3426
3427 let spacing = self.analyze_spacing_between(current, next);
3429 match spacing {
3430 BlockSpacing::Consecutive => true,
3431 BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3432 BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3433 self.can_merge_with_content_between(current, next)
3434 }
3435 }
3436 }
3437
3438 fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3440 current.is_ordered == next.is_ordered
3441 && current.blockquote_prefix == next.blockquote_prefix
3442 && current.nesting_level == next.nesting_level
3443 }
3444
3445 fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3447 let gap = next.start_line - current.end_line;
3448
3449 match gap {
3450 1 => BlockSpacing::Consecutive,
3451 2 => BlockSpacing::SingleBlank,
3452 _ if gap > 2 => {
3453 if self.has_only_blank_lines_between(current, next) {
3454 BlockSpacing::MultipleBlanks
3455 } else {
3456 BlockSpacing::ContentBetween
3457 }
3458 }
3459 _ => BlockSpacing::Consecutive, }
3461 }
3462
3463 fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3465 if has_meaningful_content_between(self.content, current, next, self.lines) {
3468 return false; }
3470
3471 !current.is_ordered && current.marker == next.marker
3473 }
3474
3475 fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3477 if has_meaningful_content_between(self.content, current, next, self.lines) {
3479 return false; }
3481
3482 current.is_ordered && next.is_ordered
3484 }
3485
3486 fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3488 for line_num in (current.end_line + 1)..next.start_line {
3489 if let Some(line_info) = self.lines.get(line_num - 1)
3490 && !line_info.content(self.content).trim().is_empty()
3491 {
3492 return false;
3493 }
3494 }
3495 true
3496 }
3497
3498 fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3500 current.end_line = next.end_line;
3501 current.item_lines.extend_from_slice(&next.item_lines);
3502
3503 current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3505
3506 if !current.is_ordered && self.markers_differ(¤t, next) {
3508 current.marker = None; }
3510
3511 current
3512 }
3513
3514 fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3516 current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3517 }
3518}
3519
3520#[derive(Debug, PartialEq)]
3522enum BlockSpacing {
3523 Consecutive, SingleBlank, MultipleBlanks, ContentBetween, }
3528
3529fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3531 for line_num in (current.end_line + 1)..next.start_line {
3533 if let Some(line_info) = lines.get(line_num - 1) {
3534 let trimmed = line_info.content(content).trim();
3536
3537 if trimmed.is_empty() {
3539 continue;
3540 }
3541
3542 if line_info.heading.is_some() {
3546 return true; }
3548
3549 if is_horizontal_rule(trimmed) {
3551 return true; }
3553
3554 if crate::utils::skip_context::is_table_line(trimmed) {
3556 return true; }
3558
3559 if trimmed.starts_with('>') {
3561 return true; }
3563
3564 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3566 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3567
3568 let min_continuation_indent = if current.is_ordered {
3570 current.nesting_level + current.max_marker_width + 1 } else {
3572 current.nesting_level + 2
3573 };
3574
3575 if line_indent < min_continuation_indent {
3576 return true; }
3579 }
3580
3581 let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3583
3584 let min_indent = if current.is_ordered {
3586 current.nesting_level + current.max_marker_width
3587 } else {
3588 current.nesting_level + 2
3589 };
3590
3591 if line_indent < min_indent {
3593 return true; }
3595
3596 }
3599 }
3600
3601 false
3603}
3604
3605fn is_horizontal_rule(trimmed: &str) -> bool {
3607 if trimmed.len() < 3 {
3608 return false;
3609 }
3610
3611 let chars: Vec<char> = trimmed.chars().collect();
3613 if let Some(&first_char) = chars.first()
3614 && (first_char == '-' || first_char == '*' || first_char == '_')
3615 {
3616 let mut count = 0;
3617 for &ch in &chars {
3618 if ch == first_char {
3619 count += 1;
3620 } else if ch != ' ' && ch != '\t' {
3621 return false; }
3623 }
3624 return count >= 3;
3625 }
3626 false
3627}
3628
3629#[cfg(test)]
3631mod tests {
3632 use super::*;
3633
3634 #[test]
3635 fn test_empty_content() {
3636 let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3637 assert_eq!(ctx.content, "");
3638 assert_eq!(ctx.line_offsets, vec![0]);
3639 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3640 assert_eq!(ctx.lines.len(), 0);
3641 }
3642
3643 #[test]
3644 fn test_single_line() {
3645 let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3646 assert_eq!(ctx.content, "# Hello");
3647 assert_eq!(ctx.line_offsets, vec![0]);
3648 assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3649 assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3650 }
3651
3652 #[test]
3653 fn test_multi_line() {
3654 let content = "# Title\n\nSecond line\nThird line";
3655 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3656 assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3657 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(8), (2, 1)); assert_eq!(ctx.offset_to_line_col(9), (3, 1)); assert_eq!(ctx.offset_to_line_col(15), (3, 7)); assert_eq!(ctx.offset_to_line_col(21), (4, 1)); }
3664
3665 #[test]
3666 fn test_line_info() {
3667 let content = "# Title\n indented\n\ncode:\n```rust\nfn main() {}\n```";
3668 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3669
3670 assert_eq!(ctx.lines.len(), 7);
3672
3673 let line1 = &ctx.lines[0];
3675 assert_eq!(line1.content(ctx.content), "# Title");
3676 assert_eq!(line1.byte_offset, 0);
3677 assert_eq!(line1.indent, 0);
3678 assert!(!line1.is_blank);
3679 assert!(!line1.in_code_block);
3680 assert!(line1.list_item.is_none());
3681
3682 let line2 = &ctx.lines[1];
3684 assert_eq!(line2.content(ctx.content), " indented");
3685 assert_eq!(line2.byte_offset, 8);
3686 assert_eq!(line2.indent, 4);
3687 assert!(!line2.is_blank);
3688
3689 let line3 = &ctx.lines[2];
3691 assert_eq!(line3.content(ctx.content), "");
3692 assert!(line3.is_blank);
3693
3694 assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3696 assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3697 assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3698 assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3699 }
3700
3701 #[test]
3702 fn test_list_item_detection() {
3703 let content = "- Unordered item\n * Nested item\n1. Ordered item\n 2) Nested ordered\n\nNot a list";
3704 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3705
3706 let line1 = &ctx.lines[0];
3708 assert!(line1.list_item.is_some());
3709 let list1 = line1.list_item.as_ref().unwrap();
3710 assert_eq!(list1.marker, "-");
3711 assert!(!list1.is_ordered);
3712 assert_eq!(list1.marker_column, 0);
3713 assert_eq!(list1.content_column, 2);
3714
3715 let line2 = &ctx.lines[1];
3717 assert!(line2.list_item.is_some());
3718 let list2 = line2.list_item.as_ref().unwrap();
3719 assert_eq!(list2.marker, "*");
3720 assert_eq!(list2.marker_column, 2);
3721
3722 let line3 = &ctx.lines[2];
3724 assert!(line3.list_item.is_some());
3725 let list3 = line3.list_item.as_ref().unwrap();
3726 assert_eq!(list3.marker, "1.");
3727 assert!(list3.is_ordered);
3728 assert_eq!(list3.number, Some(1));
3729
3730 let line6 = &ctx.lines[5];
3732 assert!(line6.list_item.is_none());
3733 }
3734
3735 #[test]
3736 fn test_offset_to_line_col_edge_cases() {
3737 let content = "a\nb\nc";
3738 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3739 assert_eq!(ctx.offset_to_line_col(0), (1, 1)); assert_eq!(ctx.offset_to_line_col(1), (1, 2)); assert_eq!(ctx.offset_to_line_col(2), (2, 1)); assert_eq!(ctx.offset_to_line_col(3), (2, 2)); assert_eq!(ctx.offset_to_line_col(4), (3, 1)); assert_eq!(ctx.offset_to_line_col(5), (3, 2)); }
3747
3748 #[test]
3749 fn test_mdx_esm_blocks() {
3750 let content = r##"import {Chart} from './snowfall.js'
3751export const year = 2023
3752
3753# Last year's snowfall
3754
3755In {year}, the snowfall was above average.
3756It was followed by a warm spring which caused
3757flood conditions in many of the nearby rivers.
3758
3759<Chart color="#fcb32c" year={year} />
3760"##;
3761
3762 let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3763
3764 assert_eq!(ctx.lines.len(), 10);
3766 assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3767 assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3768 assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3769 assert!(
3770 !ctx.lines[3].in_esm_block,
3771 "Line 4 (heading) should NOT be in_esm_block"
3772 );
3773 assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3774 assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3775 }
3776
3777 #[test]
3778 fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3779 let content = r#"import {Chart} from './snowfall.js'
3780export const year = 2023
3781
3782# Last year's snowfall
3783"#;
3784
3785 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3786
3787 assert!(
3789 !ctx.lines[0].in_esm_block,
3790 "Line 1 should NOT be in_esm_block in Standard flavor"
3791 );
3792 assert!(
3793 !ctx.lines[1].in_esm_block,
3794 "Line 2 should NOT be in_esm_block in Standard flavor"
3795 );
3796 }
3797}