1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23 ($name:expr, $profile:expr, $code:expr) => {{
24 let start = std::time::Instant::now();
25 let result = $code;
26 if $profile {
27 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28 }
29 result
30 }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38pub(super) struct SkipByteRanges<'a> {
41 pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42 pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43 pub(super) pandoc_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44 pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56 pub content: &'a str,
57 content_lines: Vec<&'a str>, pub line_offsets: Vec<usize>,
59 pub code_blocks: Vec<(usize, usize)>, pub code_block_details: Vec<CodeBlockDetail>, pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, pub line_to_list: crate::utils::code_block_utils::LineToListMap, pub list_start_values: crate::utils::code_block_utils::ListStartValues, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, pandoc_div_ranges: Vec<crate::utils::skip_context::ByteRange>, inline_footnote_ranges: Vec<crate::utils::skip_context::ByteRange>, pandoc_header_slugs: std::collections::HashSet<String>, example_list_marker_ranges: Vec<crate::utils::skip_context::ByteRange>, example_reference_ranges: Vec<crate::utils::skip_context::ByteRange>, sub_super_ranges: Vec<crate::utils::skip_context::ByteRange>, inline_code_attr_ranges: Vec<crate::utils::skip_context::ByteRange>, bracketed_span_ranges: Vec<crate::utils::skip_context::ByteRange>, line_block_ranges: Vec<crate::utils::skip_context::ByteRange>, pipe_table_caption_ranges: Vec<crate::utils::skip_context::ByteRange>, pandoc_metadata_ranges: Vec<crate::utils::skip_context::ByteRange>, grid_table_ranges: Vec<crate::utils::skip_context::ByteRange>, multi_line_table_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, link_title_ranges: Vec<(usize, usize)>, code_span_byte_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, }
109
110impl<'a> LintContext<'a> {
111 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
112 #[cfg(not(target_arch = "wasm32"))]
113 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
114
115 let line_offsets = profile_section!("Line offsets", profile, {
116 let mut offsets = vec![0];
117 for (i, c) in content.char_indices() {
118 if c == '\n' {
119 offsets.push(i + 1);
120 }
121 }
122 offsets
123 });
124
125 let content_lines: Vec<&str> = content.lines().collect();
127
128 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
130
131 let parse_result = profile_section!(
133 "Code blocks",
134 profile,
135 CodeBlockUtils::detect_code_blocks_and_spans(content)
136 );
137 let mut code_blocks = parse_result.code_blocks;
138 let code_span_ranges = parse_result.code_spans;
139 let code_block_details = parse_result.code_block_details;
140 let strong_spans = parse_result.strong_spans;
141 let line_to_list = parse_result.line_to_list;
142 let list_start_values = parse_result.list_start_values;
143
144 let html_comment_ranges = profile_section!(
146 "HTML comment ranges",
147 profile,
148 crate::utils::skip_context::compute_html_comment_ranges(content)
149 );
150
151 let autodoc_ranges = profile_section!(
155 "Autodoc block ranges",
156 profile,
157 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
158 );
159
160 let pandoc_div_ranges = profile_section!("Pandoc div ranges", profile, {
162 if flavor.is_pandoc_compatible() {
163 crate::utils::pandoc::detect_div_block_ranges(content)
164 } else {
165 Vec::new()
166 }
167 });
168
169 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
171 if flavor == MarkdownFlavor::MkDocs {
172 crate::utils::pymdown_blocks::detect_block_ranges(content)
173 } else {
174 Vec::new()
175 }
176 });
177
178 let skip_ranges = SkipByteRanges {
181 html_comment_ranges: &html_comment_ranges,
182 autodoc_ranges: &autodoc_ranges,
183 pandoc_div_ranges: &pandoc_div_ranges,
184 pymdown_block_ranges: &pymdown_block_ranges,
185 };
186 let (mut lines, emphasis_spans) = profile_section!(
187 "Basic line info",
188 profile,
189 line_computation::compute_basic_line_info(
190 content,
191 &content_lines,
192 &line_offsets,
193 &code_blocks,
194 flavor,
195 &skip_ranges,
196 front_matter_end,
197 )
198 );
199
200 profile_section!(
202 "HTML blocks",
203 profile,
204 heading_detection::detect_html_blocks(content, &mut lines)
205 );
206
207 profile_section!(
209 "ESM blocks",
210 profile,
211 flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
212 );
213
214 profile_section!(
216 "JSX block detection",
217 profile,
218 flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
219 );
220
221 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
223 "JSX/MDX detection",
224 profile,
225 flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
226 );
227
228 profile_section!(
233 "Markdown-in-HTML blocks",
234 profile,
235 flavor_detection::detect_markdown_html_blocks(&content_lines, &mut lines)
236 );
237
238 profile_section!(
240 "MkDocs constructs",
241 profile,
242 flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
243 );
244
245 profile_section!(
250 "Footnote definitions",
251 profile,
252 detect_footnote_definitions(content, &mut lines, &line_offsets)
253 );
254
255 {
258 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
259 for &(start, end) in &code_blocks {
260 let start_line = line_offsets
261 .partition_point(|&offset| offset <= start)
262 .saturating_sub(1);
263 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
264
265 let mut sub_start: Option<usize> = None;
266 for (i, &offset) in line_offsets[start_line..end_line]
267 .iter()
268 .enumerate()
269 .map(|(j, o)| (j + start_line, o))
270 {
271 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
272 if is_real_code && sub_start.is_none() {
273 let byte_start = if i == start_line { start } else { offset };
274 sub_start = Some(byte_start);
275 } else if !is_real_code && sub_start.is_some() {
276 new_code_blocks.push((sub_start.unwrap(), offset));
277 sub_start = None;
278 }
279 }
280 if let Some(s) = sub_start {
281 new_code_blocks.push((s, end));
282 }
283 }
284 code_blocks = new_code_blocks;
285 }
286
287 let has_markdown_html = lines.iter().any(|l| l.in_mkdocs_html_markdown);
295 if flavor == MarkdownFlavor::MkDocs || has_markdown_html {
296 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
297 for &(start, end) in &code_blocks {
298 let start_line = line_offsets
299 .partition_point(|&offset| offset <= start)
300 .saturating_sub(1);
301 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
302
303 let mut sub_start: Option<usize> = None;
305 for (i, &offset) in line_offsets[start_line..end_line]
306 .iter()
307 .enumerate()
308 .map(|(j, o)| (j + start_line, o))
309 {
310 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
311 if is_real_code && sub_start.is_none() {
312 let byte_start = if i == start_line { start } else { offset };
313 sub_start = Some(byte_start);
314 } else if !is_real_code && sub_start.is_some() {
315 new_code_blocks.push((sub_start.unwrap(), offset));
316 sub_start = None;
317 }
318 }
319 if let Some(s) = sub_start {
320 new_code_blocks.push((s, end));
321 }
322 }
323 code_blocks = new_code_blocks;
324 }
325
326 if flavor.supports_jsx() {
330 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
331 for &(start, end) in &code_blocks {
332 let start_line = line_offsets
333 .partition_point(|&offset| offset <= start)
334 .saturating_sub(1);
335 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
336
337 let mut sub_start: Option<usize> = None;
338 for (i, &offset) in line_offsets[start_line..end_line]
339 .iter()
340 .enumerate()
341 .map(|(j, o)| (j + start_line, o))
342 {
343 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
344 if is_real_code && sub_start.is_none() {
345 let byte_start = if i == start_line { start } else { offset };
346 sub_start = Some(byte_start);
347 } else if !is_real_code && sub_start.is_some() {
348 new_code_blocks.push((sub_start.unwrap(), offset));
349 sub_start = None;
350 }
351 }
352 if let Some(s) = sub_start {
353 new_code_blocks.push((s, end));
354 }
355 }
356 code_blocks = new_code_blocks;
357 }
358
359 profile_section!(
361 "Kramdown constructs",
362 profile,
363 flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
364 );
365
366 for line in &mut lines {
371 if line.in_kramdown_extension_block {
372 line.list_item = None;
373 line.is_horizontal_rule = false;
374 line.blockquote = None;
375 line.is_kramdown_block_ial = false;
376 }
377 }
378
379 let obsidian_comment_ranges = profile_section!(
381 "Obsidian comments",
382 profile,
383 flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
384 );
385
386 let pulldown_result = profile_section!(
390 "Links, images & link ranges",
391 profile,
392 link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
393 );
394
395 profile_section!(
397 "Headings & blockquotes",
398 profile,
399 heading_detection::detect_headings_and_blockquotes(
400 &content_lines,
401 &mut lines,
402 flavor,
403 &html_comment_ranges,
404 &pulldown_result.link_byte_ranges,
405 front_matter_end,
406 )
407 );
408
409 for line in &mut lines {
411 if line.in_kramdown_extension_block {
412 line.heading = None;
413 }
414 }
415
416 let mut code_spans = profile_section!(
418 "Code spans",
419 profile,
420 element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
421 );
422
423 if flavor == MarkdownFlavor::MkDocs {
427 let extra = profile_section!(
428 "MkDocs code spans",
429 profile,
430 element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
431 );
432 if !extra.is_empty() {
433 code_spans.extend(extra);
434 code_spans.sort_by_key(|span| span.byte_offset);
435 }
436 }
437
438 if flavor == MarkdownFlavor::MDX {
443 let extra = profile_section!(
444 "MDX JSX code spans",
445 profile,
446 element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
447 );
448 if !extra.is_empty() {
449 code_spans.extend(extra);
450 code_spans.sort_by_key(|span| span.byte_offset);
451 }
452 }
453
454 for span in &code_spans {
457 if span.end_line > span.line {
458 for line_num in (span.line + 1)..=span.end_line {
460 if let Some(line_info) = lines.get_mut(line_num - 1) {
461 line_info.in_code_span_continuation = true;
462 }
463 }
464 }
465 }
466
467 let (links, images, broken_links, footnote_refs) = profile_section!(
469 "Links & images finalize",
470 profile,
471 link_parser::finalize_links_and_images(
472 content,
473 &lines,
474 &code_blocks,
475 &code_spans,
476 flavor,
477 &html_comment_ranges,
478 pulldown_result
479 )
480 );
481
482 let reference_defs = profile_section!(
483 "Reference defs",
484 profile,
485 link_parser::parse_reference_defs(content, &lines)
486 );
487
488 let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
489
490 let char_frequency = profile_section!(
492 "Char frequency",
493 profile,
494 line_computation::compute_char_frequency(content)
495 );
496
497 let table_blocks = profile_section!(
499 "Table blocks",
500 profile,
501 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
502 content,
503 &code_blocks,
504 &code_spans,
505 &html_comment_ranges,
506 )
507 );
508
509 let links = links
512 .into_iter()
513 .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
514 .collect::<Vec<_>>();
515 let images = images
516 .into_iter()
517 .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
518 .collect::<Vec<_>>();
519 let broken_links = broken_links
520 .into_iter()
521 .filter(|bl| {
522 let line_idx = line_offsets
524 .partition_point(|&offset| offset <= bl.span.start)
525 .saturating_sub(1);
526 !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
527 })
528 .collect::<Vec<_>>();
529 let footnote_refs = footnote_refs
530 .into_iter()
531 .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
532 .collect::<Vec<_>>();
533 let reference_defs = reference_defs
534 .into_iter()
535 .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
536 .collect::<Vec<_>>();
537 let list_blocks = list_blocks
538 .into_iter()
539 .filter(|block| {
540 !lines
541 .get(block.start_line - 1)
542 .is_some_and(|l| l.in_kramdown_extension_block)
543 })
544 .collect::<Vec<_>>();
545 let table_blocks = table_blocks
546 .into_iter()
547 .filter(|block| {
548 !lines
550 .get(block.start_line)
551 .is_some_and(|l| l.in_kramdown_extension_block)
552 })
553 .collect::<Vec<_>>();
554 let emphasis_spans = emphasis_spans
555 .into_iter()
556 .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
557 .collect::<Vec<_>>();
558
559 let reference_defs_map: HashMap<String, usize> = reference_defs
561 .iter()
562 .enumerate()
563 .map(|(idx, def)| (def.id.to_lowercase(), idx))
564 .collect();
565
566 let link_title_ranges: Vec<(usize, usize)> = reference_defs
568 .iter()
569 .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
570 (Some(start), Some(end)) => Some((start, end)),
571 _ => None,
572 })
573 .collect();
574
575 let line_index = profile_section!(
577 "Line index",
578 profile,
579 crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
580 content,
581 line_offsets.clone(),
582 &code_blocks,
583 )
584 );
585
586 let jinja_ranges = profile_section!(
588 "Jinja ranges",
589 profile,
590 crate::utils::jinja_utils::find_jinja_ranges(content)
591 );
592
593 let citation_ranges = profile_section!("Citation ranges", profile, {
595 if flavor.is_pandoc_compatible() {
596 crate::utils::pandoc::find_citation_ranges(content)
597 } else {
598 Vec::new()
599 }
600 });
601
602 let inline_footnote_ranges = profile_section!("Inline footnote ranges", profile, {
604 if flavor.is_pandoc_compatible() {
605 crate::utils::pandoc::detect_inline_footnote_ranges(content)
606 } else {
607 Vec::new()
608 }
609 });
610
611 let pandoc_header_slugs = profile_section!("Pandoc header slugs", profile, {
613 if flavor.is_pandoc_compatible() {
614 crate::utils::pandoc::collect_pandoc_header_slugs(content)
615 } else {
616 std::collections::HashSet::new()
617 }
618 });
619
620 let example_list_marker_ranges = profile_section!("Example list markers", profile, {
622 if flavor.is_pandoc_compatible() {
623 crate::utils::pandoc::detect_example_list_marker_ranges(content)
624 } else {
625 Vec::new()
626 }
627 });
628
629 let example_reference_ranges = profile_section!("Example references", profile, {
631 if flavor.is_pandoc_compatible() {
632 crate::utils::pandoc::detect_example_reference_ranges(content, &example_list_marker_ranges)
633 } else {
634 Vec::new()
635 }
636 });
637
638 let sub_super_ranges = profile_section!("Subscript/superscript ranges", profile, {
640 if flavor.is_pandoc_compatible() {
641 crate::utils::pandoc::detect_subscript_superscript_ranges(content)
642 } else {
643 Vec::new()
644 }
645 });
646
647 let inline_code_attr_ranges = profile_section!("Inline code attribute ranges", profile, {
649 if flavor.is_pandoc_compatible() {
650 crate::utils::pandoc::detect_inline_code_attr_ranges(content)
651 } else {
652 Vec::new()
653 }
654 });
655
656 let bracketed_span_ranges = profile_section!("Bracketed span ranges", profile, {
658 if flavor.is_pandoc_compatible() {
659 crate::utils::pandoc::detect_bracketed_span_ranges(content)
660 } else {
661 Vec::new()
662 }
663 });
664
665 let line_block_ranges = profile_section!("Line block ranges", profile, {
667 if flavor.is_pandoc_compatible() {
668 crate::utils::pandoc::detect_line_block_ranges(content)
669 } else {
670 Vec::new()
671 }
672 });
673
674 let pipe_table_caption_ranges = profile_section!("Pipe-table caption ranges", profile, {
676 if flavor.is_pandoc_compatible() {
677 crate::utils::pandoc::detect_pipe_table_caption_ranges(content)
678 } else {
679 Vec::new()
680 }
681 });
682
683 let pandoc_metadata_ranges = profile_section!("Pandoc metadata ranges", profile, {
685 if flavor.is_pandoc_compatible() {
686 crate::utils::pandoc::detect_yaml_metadata_block_ranges(content)
687 } else {
688 Vec::new()
689 }
690 });
691
692 let grid_table_ranges = profile_section!("Grid table ranges", profile, {
694 if flavor.is_pandoc_compatible() {
695 crate::utils::pandoc::detect_grid_table_ranges(content)
696 } else {
697 Vec::new()
698 }
699 });
700
701 let multi_line_table_ranges = profile_section!("Multi-line table ranges", profile, {
703 if flavor.is_pandoc_compatible() {
704 crate::utils::pandoc::detect_multi_line_table_ranges(content)
705 } else {
706 Vec::new()
707 }
708 });
709
710 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
712 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
713 let mut ranges = Vec::new();
714 for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
715 ranges.push((mat.start(), mat.end()));
716 }
717 ranges
718 });
719
720 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
721
722 Self {
723 content,
724 content_lines,
725 line_offsets,
726 code_blocks,
727 code_block_details,
728 strong_spans,
729 line_to_list,
730 list_start_values,
731 lines,
732 links,
733 images,
734 broken_links,
735 footnote_refs,
736 reference_defs,
737 reference_defs_map,
738 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
739 math_spans_cache: OnceLock::new(), list_blocks,
741 char_frequency,
742 html_tags_cache: OnceLock::new(),
743 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
744 table_rows_cache: OnceLock::new(),
745 bare_urls_cache: OnceLock::new(),
746 has_mixed_list_nesting_cache: OnceLock::new(),
747 html_comment_ranges,
748 table_blocks,
749 line_index,
750 jinja_ranges,
751 flavor,
752 source_file,
753 jsx_expression_ranges,
754 mdx_comment_ranges,
755 citation_ranges,
756 pandoc_div_ranges,
757 inline_footnote_ranges,
758 pandoc_header_slugs,
759 example_list_marker_ranges,
760 example_reference_ranges,
761 sub_super_ranges,
762 inline_code_attr_ranges,
763 bracketed_span_ranges,
764 line_block_ranges,
765 pipe_table_caption_ranges,
766 pandoc_metadata_ranges,
767 grid_table_ranges,
768 multi_line_table_ranges,
769 shortcode_ranges,
770 link_title_ranges,
771 code_span_byte_ranges: code_span_ranges,
772 inline_config,
773 obsidian_comment_ranges,
774 lazy_cont_lines_cache: OnceLock::new(),
775 }
776 }
777
778 #[inline]
781 fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
782 let idx = ranges.partition_point(|&(start, _)| start <= pos);
784 idx > 0 && pos < ranges[idx - 1].1
786 }
787
788 pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
790 Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
791 }
792
793 pub fn is_in_link(&self, pos: usize) -> bool {
795 let idx = self.links.partition_point(|link| link.byte_offset <= pos);
796 if idx > 0 && pos < self.links[idx - 1].byte_end {
797 return true;
798 }
799 let idx = self.images.partition_point(|img| img.byte_offset <= pos);
800 if idx > 0 && pos < self.images[idx - 1].byte_end {
801 return true;
802 }
803 self.is_in_reference_def(pos)
804 }
805
806 pub fn inline_config(&self) -> &InlineConfig {
808 &self.inline_config
809 }
810
811 pub fn raw_lines(&self) -> &[&'a str] {
815 &self.content_lines
816 }
817
818 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
823 self.inline_config.is_rule_disabled(rule_name, line_number)
824 }
825
826 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
828 Arc::clone(
829 self.code_spans_cache
830 .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
831 )
832 }
833
834 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
836 Arc::clone(
837 self.math_spans_cache
838 .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
839 )
840 }
841
842 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
844 let math_spans = self.math_spans();
845 let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
847 idx > 0 && byte_pos < math_spans[idx - 1].byte_end
848 }
849
850 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
852 &self.html_comment_ranges
853 }
854
855 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
859 Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
860 }
861
862 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
867 if self.obsidian_comment_ranges.is_empty() {
868 return false;
869 }
870
871 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
873 self.is_in_obsidian_comment(byte_pos)
874 }
875
876 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
878 Arc::clone(self.html_tags_cache.get_or_init(|| {
879 let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
880 Arc::new(
882 tags.into_iter()
883 .filter(|tag| {
884 !self
885 .lines
886 .get(tag.line - 1)
887 .is_some_and(|l| l.in_kramdown_extension_block)
888 })
889 .collect(),
890 )
891 }))
892 }
893
894 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
896 Arc::clone(
897 self.emphasis_spans_cache
898 .get()
899 .expect("emphasis_spans_cache initialized during construction"),
900 )
901 }
902
903 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
905 Arc::clone(
906 self.table_rows_cache
907 .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
908 )
909 }
910
911 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
913 Arc::clone(self.bare_urls_cache.get_or_init(|| {
914 Arc::new(element_parsers::parse_bare_urls(
915 self.content,
916 &self.lines,
917 &self.code_blocks,
918 ))
919 }))
920 }
921
922 pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
924 Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
925 Arc::new(element_parsers::detect_lazy_continuation_lines(
926 self.content,
927 &self.lines,
928 &self.line_offsets,
929 ))
930 }))
931 }
932
933 pub fn has_mixed_list_nesting(&self) -> bool {
937 *self
938 .has_mixed_list_nesting_cache
939 .get_or_init(|| self.compute_mixed_list_nesting())
940 }
941
942 fn compute_mixed_list_nesting(&self) -> bool {
944 let mut stack: Vec<(usize, bool)> = Vec::new();
949 let mut last_was_blank = false;
950
951 for line_info in &self.lines {
952 if line_info.in_code_block
954 || line_info.in_front_matter
955 || line_info.in_mkdocstrings
956 || line_info.in_html_comment
957 || line_info.in_mdx_comment
958 || line_info.in_esm_block
959 {
960 continue;
961 }
962
963 if line_info.is_blank {
965 last_was_blank = true;
966 continue;
967 }
968
969 if let Some(list_item) = &line_info.list_item {
970 let current_pos = if list_item.marker_column == 1 {
972 0
973 } else {
974 list_item.marker_column
975 };
976
977 if last_was_blank && current_pos == 0 {
979 stack.clear();
980 }
981 last_was_blank = false;
982
983 while let Some(&(pos, _)) = stack.last() {
985 if pos >= current_pos {
986 stack.pop();
987 } else {
988 break;
989 }
990 }
991
992 if let Some(&(_, parent_is_ordered)) = stack.last()
994 && parent_is_ordered != list_item.is_ordered
995 {
996 return true; }
998
999 stack.push((current_pos, list_item.is_ordered));
1000 } else {
1001 last_was_blank = false;
1003 }
1004 }
1005
1006 false
1007 }
1008
1009 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1011 match self.line_offsets.binary_search(&offset) {
1012 Ok(line) => (line + 1, 1),
1013 Err(line) => {
1014 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1015 (line, offset - line_start + 1)
1016 }
1017 }
1018 }
1019
1020 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1022 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1024 return true;
1025 }
1026
1027 self.is_byte_offset_in_code_span(pos)
1029 }
1030
1031 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1033 if line_num > 0 {
1034 self.lines.get(line_num - 1)
1035 } else {
1036 None
1037 }
1038 }
1039
1040 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1042 let normalized_id = ref_id.to_lowercase();
1043 self.reference_defs_map
1044 .get(&normalized_id)
1045 .map(|&idx| self.reference_defs[idx].url.as_str())
1046 }
1047
1048 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1050 self.list_blocks
1051 .iter()
1052 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1053 }
1054
1055 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1057 if line_num == 0 || line_num > self.lines.len() {
1058 return false;
1059 }
1060 self.lines[line_num - 1].in_html_block
1061 }
1062
1063 pub fn is_in_table_block(&self, line_num: usize) -> bool {
1069 if line_num == 0 {
1070 return false;
1071 }
1072 let line_idx = line_num - 1;
1073 self.table_blocks
1074 .iter()
1075 .any(|block| line_idx >= block.start_line && line_idx <= block.end_line)
1076 }
1077
1078 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1080 if line_num == 0 || line_num > self.lines.len() {
1081 return false;
1082 }
1083
1084 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1088 let code_spans = self.code_spans();
1089 code_spans.iter().any(|span| {
1090 if line_num < span.line || line_num > span.end_line {
1092 return false;
1093 }
1094
1095 if span.line == span.end_line {
1096 col_0indexed >= span.start_col && col_0indexed < span.end_col
1098 } else if line_num == span.line {
1099 col_0indexed >= span.start_col
1101 } else if line_num == span.end_line {
1102 col_0indexed < span.end_col
1104 } else {
1105 true
1107 }
1108 })
1109 }
1110
1111 #[inline]
1113 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1114 let code_spans = self.code_spans();
1115 let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
1116 idx > 0 && byte_offset < code_spans[idx - 1].byte_end
1117 }
1118
1119 #[inline]
1121 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1122 let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
1123 idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
1124 }
1125
1126 #[inline]
1128 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1129 let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1130 idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1131 }
1132
1133 #[inline]
1136 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1137 let tags = self.html_tags();
1138 let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1139 idx > 0 && byte_pos < tags[idx - 1].byte_end
1140 }
1141
1142 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1144 Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1145 }
1146
1147 #[inline]
1149 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1150 Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1151 }
1152
1153 #[inline]
1155 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1156 Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1157 }
1158
1159 #[inline]
1162 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1163 let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1164 idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1165 }
1166
1167 #[inline]
1169 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1170 &self.citation_ranges
1171 }
1172
1173 #[inline]
1176 pub fn is_in_div_block(&self, byte_pos: usize) -> bool {
1177 let idx = self.pandoc_div_ranges.partition_point(|r| r.start <= byte_pos);
1178 idx > 0 && byte_pos < self.pandoc_div_ranges[idx - 1].end
1179 }
1180
1181 #[inline]
1184 pub fn is_in_inline_footnote(&self, byte_pos: usize) -> bool {
1185 let idx = self.inline_footnote_ranges.partition_point(|r| r.start <= byte_pos);
1186 idx > 0 && byte_pos < self.inline_footnote_ranges[idx - 1].end
1187 }
1188
1189 #[inline]
1192 pub fn is_in_example_list_marker(&self, byte_pos: usize) -> bool {
1193 let idx = self.example_list_marker_ranges.partition_point(|r| r.start <= byte_pos);
1194 idx > 0 && byte_pos < self.example_list_marker_ranges[idx - 1].end
1195 }
1196
1197 #[inline]
1200 pub fn is_in_example_reference(&self, byte_pos: usize) -> bool {
1201 let idx = self.example_reference_ranges.partition_point(|r| r.start <= byte_pos);
1202 idx > 0 && byte_pos < self.example_reference_ranges[idx - 1].end
1203 }
1204
1205 #[inline]
1208 pub fn is_in_subscript_or_superscript(&self, byte_pos: usize) -> bool {
1209 let idx = self.sub_super_ranges.partition_point(|r| r.start <= byte_pos);
1210 idx > 0 && byte_pos < self.sub_super_ranges[idx - 1].end
1211 }
1212
1213 #[inline]
1217 pub fn is_in_inline_code_attr(&self, byte_pos: usize) -> bool {
1218 let idx = self.inline_code_attr_ranges.partition_point(|r| r.start <= byte_pos);
1219 idx > 0 && byte_pos < self.inline_code_attr_ranges[idx - 1].end
1220 }
1221
1222 #[inline]
1225 pub fn is_in_bracketed_span(&self, byte_pos: usize) -> bool {
1226 let idx = self.bracketed_span_ranges.partition_point(|r| r.start <= byte_pos);
1227 idx > 0 && byte_pos < self.bracketed_span_ranges[idx - 1].end
1228 }
1229
1230 #[inline]
1233 pub fn is_in_line_block(&self, byte_pos: usize) -> bool {
1234 let idx = self.line_block_ranges.partition_point(|r| r.start <= byte_pos);
1235 idx > 0 && byte_pos < self.line_block_ranges[idx - 1].end
1236 }
1237
1238 #[inline]
1242 pub fn is_in_pipe_table_caption(&self, byte_pos: usize) -> bool {
1243 let idx = self.pipe_table_caption_ranges.partition_point(|r| r.start <= byte_pos);
1244 idx > 0 && byte_pos < self.pipe_table_caption_ranges[idx - 1].end
1245 }
1246
1247 #[inline]
1250 pub fn is_in_pandoc_metadata(&self, byte_pos: usize) -> bool {
1251 let idx = self.pandoc_metadata_ranges.partition_point(|r| r.start <= byte_pos);
1252 idx > 0 && byte_pos < self.pandoc_metadata_ranges[idx - 1].end
1253 }
1254
1255 #[inline]
1258 pub fn is_in_grid_table(&self, byte_pos: usize) -> bool {
1259 let idx = self.grid_table_ranges.partition_point(|r| r.start <= byte_pos);
1260 idx > 0 && byte_pos < self.grid_table_ranges[idx - 1].end
1261 }
1262
1263 #[inline]
1266 pub fn is_in_multi_line_table(&self, byte_pos: usize) -> bool {
1267 let idx = self.multi_line_table_ranges.partition_point(|r| r.start <= byte_pos);
1268 idx > 0 && byte_pos < self.multi_line_table_ranges[idx - 1].end
1269 }
1270
1271 pub fn matches_implicit_header_reference(&self, link_text: &str) -> bool {
1276 let slug = crate::utils::pandoc::pandoc_header_slug(link_text);
1277 self.pandoc_header_slugs.contains(&slug)
1278 }
1279
1280 #[inline]
1286 pub fn has_pandoc_slug(&self, slug: &str) -> bool {
1287 self.pandoc_header_slugs.contains(slug)
1288 }
1289
1290 #[inline]
1292 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1293 Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1294 }
1295
1296 #[inline]
1298 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1299 &self.shortcode_ranges
1300 }
1301
1302 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1304 Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1305 }
1306
1307 pub fn has_char(&self, ch: char) -> bool {
1309 match ch {
1310 '#' => self.char_frequency.hash_count > 0,
1311 '*' => self.char_frequency.asterisk_count > 0,
1312 '_' => self.char_frequency.underscore_count > 0,
1313 '-' => self.char_frequency.hyphen_count > 0,
1314 '+' => self.char_frequency.plus_count > 0,
1315 '>' => self.char_frequency.gt_count > 0,
1316 '|' => self.char_frequency.pipe_count > 0,
1317 '[' => self.char_frequency.bracket_count > 0,
1318 '`' => self.char_frequency.backtick_count > 0,
1319 '<' => self.char_frequency.lt_count > 0,
1320 '!' => self.char_frequency.exclamation_count > 0,
1321 '\n' => self.char_frequency.newline_count > 0,
1322 _ => self.content.contains(ch), }
1324 }
1325
1326 pub fn char_count(&self, ch: char) -> usize {
1328 match ch {
1329 '#' => self.char_frequency.hash_count,
1330 '*' => self.char_frequency.asterisk_count,
1331 '_' => self.char_frequency.underscore_count,
1332 '-' => self.char_frequency.hyphen_count,
1333 '+' => self.char_frequency.plus_count,
1334 '>' => self.char_frequency.gt_count,
1335 '|' => self.char_frequency.pipe_count,
1336 '[' => self.char_frequency.bracket_count,
1337 '`' => self.char_frequency.backtick_count,
1338 '<' => self.char_frequency.lt_count,
1339 '!' => self.char_frequency.exclamation_count,
1340 '\n' => self.char_frequency.newline_count,
1341 _ => self.content.matches(ch).count(), }
1343 }
1344
1345 pub fn likely_has_headings(&self) -> bool {
1347 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') }
1349
1350 pub fn likely_has_lists(&self) -> bool {
1352 self.char_frequency.asterisk_count > 0
1353 || self.char_frequency.hyphen_count > 0
1354 || self.char_frequency.plus_count > 0
1355 }
1356
1357 pub fn likely_has_emphasis(&self) -> bool {
1359 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1360 }
1361
1362 pub fn likely_has_tables(&self) -> bool {
1364 self.char_frequency.pipe_count > 2
1365 }
1366
1367 pub fn likely_has_blockquotes(&self) -> bool {
1369 self.char_frequency.gt_count > 0
1370 }
1371
1372 pub fn likely_has_code(&self) -> bool {
1374 self.char_frequency.backtick_count > 0
1375 }
1376
1377 pub fn likely_has_links_or_images(&self) -> bool {
1379 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1380 }
1381
1382 pub fn likely_has_html(&self) -> bool {
1384 self.char_frequency.lt_count > 0
1385 }
1386
1387 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1392 if let Some(line_info) = self.lines.get(line_idx)
1393 && let Some(ref bq) = line_info.blockquote
1394 {
1395 bq.prefix.trim_end().to_string()
1396 } else {
1397 String::new()
1398 }
1399 }
1400
1401 #[inline]
1407 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1408 let idx = match lines.binary_search_by(|line| {
1410 if byte_offset < line.byte_offset {
1411 std::cmp::Ordering::Greater
1412 } else if byte_offset > line.byte_offset + line.byte_len {
1413 std::cmp::Ordering::Less
1414 } else {
1415 std::cmp::Ordering::Equal
1416 }
1417 }) {
1418 Ok(idx) => idx,
1419 Err(idx) => idx.saturating_sub(1),
1420 };
1421
1422 let line = &lines[idx];
1423 let line_num = idx + 1;
1424 let col = byte_offset.saturating_sub(line.byte_offset);
1425
1426 (idx, line_num, col)
1427 }
1428
1429 #[inline]
1431 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1432 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1434
1435 if idx > 0 {
1437 let span = &code_spans[idx - 1];
1438 if offset >= span.byte_offset && offset < span.byte_end {
1439 return true;
1440 }
1441 }
1442
1443 false
1444 }
1445
1446 #[must_use]
1466 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1467 ValidHeadingsIter::new(&self.lines)
1468 }
1469
1470 #[must_use]
1474 pub fn has_valid_headings(&self) -> bool {
1475 self.lines
1476 .iter()
1477 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1478 }
1479}
1480
1481fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1490 use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1491
1492 let options = crate::utils::rumdl_parser_options();
1493 let parser = Parser::new_ext(content, options).into_offset_iter();
1494
1495 let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1497 let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1498 let mut in_footnote = false;
1499
1500 for (event, range) in parser {
1501 match event {
1502 Event::Start(Tag::FootnoteDefinition(_)) => {
1503 in_footnote = true;
1504 footnote_ranges.push((range.start, range.end));
1505 }
1506 Event::End(TagEnd::FootnoteDefinition) => {
1507 in_footnote = false;
1508 }
1509 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1510 fenced_code_ranges.push((range.start, range.end));
1511 }
1512 _ => {}
1513 }
1514 }
1515
1516 let byte_to_line = |byte_offset: usize| -> usize {
1517 line_offsets
1518 .partition_point(|&offset| offset <= byte_offset)
1519 .saturating_sub(1)
1520 };
1521
1522 for &(start, end) in &footnote_ranges {
1524 let start_line = byte_to_line(start);
1525 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1526
1527 for line in &mut lines[start_line..end_line] {
1528 line.in_footnote_definition = true;
1529 line.in_code_block = false;
1530 }
1531 }
1532
1533 for &(start, end) in &fenced_code_ranges {
1535 let start_line = byte_to_line(start);
1536 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1537
1538 for line in &mut lines[start_line..end_line] {
1539 line.in_code_block = true;
1540 }
1541 }
1542}