1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23 ($name:expr, $profile:expr, $code:expr) => {{
24 let start = std::time::Instant::now();
25 let result = $code;
26 if $profile {
27 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28 }
29 result
30 }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38pub(super) struct SkipByteRanges<'a> {
41 pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42 pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43 pub(super) pandoc_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44 pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56 pub content: &'a str,
57 content_lines: Vec<&'a str>, pub line_offsets: Vec<usize>,
59 pub code_blocks: Vec<(usize, usize)>, pub code_block_details: Vec<CodeBlockDetail>, pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, pub line_to_list: crate::utils::code_block_utils::LineToListMap, pub list_start_values: crate::utils::code_block_utils::ListStartValues, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, pandoc_div_ranges: Vec<crate::utils::skip_context::ByteRange>, colon_fence_ranges: Vec<(usize, usize)>, inline_footnote_ranges: Vec<crate::utils::skip_context::ByteRange>, pandoc_header_slugs: std::collections::HashSet<String>, example_list_marker_ranges: Vec<crate::utils::skip_context::ByteRange>, example_reference_ranges: Vec<crate::utils::skip_context::ByteRange>, sub_super_ranges: Vec<crate::utils::skip_context::ByteRange>, inline_code_attr_ranges: Vec<crate::utils::skip_context::ByteRange>, bracketed_span_ranges: Vec<crate::utils::skip_context::ByteRange>, line_block_ranges: Vec<crate::utils::skip_context::ByteRange>, pipe_table_caption_ranges: Vec<crate::utils::skip_context::ByteRange>, pandoc_metadata_ranges: Vec<crate::utils::skip_context::ByteRange>, grid_table_ranges: Vec<crate::utils::skip_context::ByteRange>, multi_line_table_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, link_title_ranges: Vec<(usize, usize)>, code_span_byte_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, }
110
111impl<'a> LintContext<'a> {
112 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
113 #[cfg(not(target_arch = "wasm32"))]
114 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
115
116 let line_offsets = profile_section!("Line offsets", profile, {
117 let mut offsets = vec![0];
118 for (i, c) in content.char_indices() {
119 if c == '\n' {
120 offsets.push(i + 1);
121 }
122 }
123 offsets
124 });
125
126 let content_lines: Vec<&str> = content.lines().collect();
128
129 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
131
132 let parse_result = profile_section!(
134 "Code blocks",
135 profile,
136 CodeBlockUtils::detect_code_blocks_and_spans(content)
137 );
138 let mut code_blocks = parse_result.code_blocks;
139 let code_span_ranges = parse_result.code_spans;
140 let code_block_details = parse_result.code_block_details;
141 let strong_spans = parse_result.strong_spans;
142 let line_to_list = parse_result.line_to_list;
143 let list_start_values = parse_result.list_start_values;
144
145 let html_comment_ranges = profile_section!(
147 "HTML comment ranges",
148 profile,
149 crate::utils::skip_context::compute_html_comment_ranges(content)
150 );
151
152 let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
156 if flavor.supports_colon_code_fences() {
157 Vec::new()
158 } else {
159 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
160 }
161 });
162
163 let pandoc_div_ranges = profile_section!("Pandoc div ranges", profile, {
165 if flavor.is_pandoc_compatible() {
166 crate::utils::pandoc::detect_div_block_ranges(content)
167 } else {
168 Vec::new()
169 }
170 });
171
172 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
174 if flavor == MarkdownFlavor::MkDocs {
175 crate::utils::pymdown_blocks::detect_block_ranges(content)
176 } else {
177 Vec::new()
178 }
179 });
180
181 let skip_ranges = SkipByteRanges {
184 html_comment_ranges: &html_comment_ranges,
185 autodoc_ranges: &autodoc_ranges,
186 pandoc_div_ranges: &pandoc_div_ranges,
187 pymdown_block_ranges: &pymdown_block_ranges,
188 };
189 let (mut lines, emphasis_spans) = profile_section!(
190 "Basic line info",
191 profile,
192 line_computation::compute_basic_line_info(
193 content,
194 &content_lines,
195 &line_offsets,
196 &code_blocks,
197 flavor,
198 &skip_ranges,
199 front_matter_end,
200 )
201 );
202
203 profile_section!(
205 "HTML blocks",
206 profile,
207 heading_detection::detect_html_blocks(content, &mut lines)
208 );
209
210 profile_section!(
212 "ESM blocks",
213 profile,
214 flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
215 );
216
217 profile_section!(
219 "JSX block detection",
220 profile,
221 flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
222 );
223
224 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
226 "JSX/MDX detection",
227 profile,
228 flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
229 );
230
231 profile_section!(
236 "Markdown-in-HTML blocks",
237 profile,
238 flavor_detection::detect_markdown_html_blocks(&content_lines, &mut lines)
239 );
240
241 profile_section!(
243 "MkDocs constructs",
244 profile,
245 flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
246 );
247
248 profile_section!(
253 "Footnote definitions",
254 profile,
255 detect_footnote_definitions(content, &mut lines, &line_offsets)
256 );
257
258 {
261 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
262 for &(start, end) in &code_blocks {
263 let start_line = line_offsets
264 .partition_point(|&offset| offset <= start)
265 .saturating_sub(1);
266 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
267
268 let mut sub_start: Option<usize> = None;
269 for (i, &offset) in line_offsets[start_line..end_line]
270 .iter()
271 .enumerate()
272 .map(|(j, o)| (j + start_line, o))
273 {
274 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
275 if is_real_code && sub_start.is_none() {
276 let byte_start = if i == start_line { start } else { offset };
277 sub_start = Some(byte_start);
278 } else if !is_real_code && sub_start.is_some() {
279 new_code_blocks.push((sub_start.unwrap(), offset));
280 sub_start = None;
281 }
282 }
283 if let Some(s) = sub_start {
284 new_code_blocks.push((s, end));
285 }
286 }
287 code_blocks = new_code_blocks;
288 }
289
290 let has_markdown_html = lines.iter().any(|l| l.in_mkdocs_html_markdown);
298 if flavor == MarkdownFlavor::MkDocs || has_markdown_html {
299 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
300 for &(start, end) in &code_blocks {
301 let start_line = line_offsets
302 .partition_point(|&offset| offset <= start)
303 .saturating_sub(1);
304 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
305
306 let mut sub_start: Option<usize> = None;
308 for (i, &offset) in line_offsets[start_line..end_line]
309 .iter()
310 .enumerate()
311 .map(|(j, o)| (j + start_line, o))
312 {
313 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
314 if is_real_code && sub_start.is_none() {
315 let byte_start = if i == start_line { start } else { offset };
316 sub_start = Some(byte_start);
317 } else if !is_real_code && sub_start.is_some() {
318 new_code_blocks.push((sub_start.unwrap(), offset));
319 sub_start = None;
320 }
321 }
322 if let Some(s) = sub_start {
323 new_code_blocks.push((s, end));
324 }
325 }
326 code_blocks = new_code_blocks;
327 }
328
329 if flavor.supports_jsx() {
333 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
334 for &(start, end) in &code_blocks {
335 let start_line = line_offsets
336 .partition_point(|&offset| offset <= start)
337 .saturating_sub(1);
338 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
339
340 let mut sub_start: Option<usize> = None;
341 for (i, &offset) in line_offsets[start_line..end_line]
342 .iter()
343 .enumerate()
344 .map(|(j, o)| (j + start_line, o))
345 {
346 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
347 if is_real_code && sub_start.is_none() {
348 let byte_start = if i == start_line { start } else { offset };
349 sub_start = Some(byte_start);
350 } else if !is_real_code && sub_start.is_some() {
351 new_code_blocks.push((sub_start.unwrap(), offset));
352 sub_start = None;
353 }
354 }
355 if let Some(s) = sub_start {
356 new_code_blocks.push((s, end));
357 }
358 }
359 code_blocks = new_code_blocks;
360 }
361
362 let colon_fence_ranges = profile_section!(
365 "Azure colon fence detection",
366 profile,
367 flavor_detection::detect_azure_colon_fences(content, &mut lines, flavor)
368 );
369 if !colon_fence_ranges.is_empty() {
370 code_blocks.extend(colon_fence_ranges.iter().copied());
371 code_blocks.sort_by_key(|&(start, _)| start);
372 }
373
374 profile_section!(
376 "Kramdown constructs",
377 profile,
378 flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
379 );
380
381 for line in &mut lines {
386 if line.in_kramdown_extension_block {
387 line.list_item = None;
388 line.is_horizontal_rule = false;
389 line.blockquote = None;
390 line.is_kramdown_block_ial = false;
391 }
392 }
393
394 let obsidian_comment_ranges = profile_section!(
396 "Obsidian comments",
397 profile,
398 flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
399 );
400
401 let pulldown_result = profile_section!(
405 "Links, images & link ranges",
406 profile,
407 link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
408 );
409
410 profile_section!(
412 "Headings & blockquotes",
413 profile,
414 heading_detection::detect_headings_and_blockquotes(
415 &content_lines,
416 &mut lines,
417 flavor,
418 &html_comment_ranges,
419 &pulldown_result.link_byte_ranges,
420 front_matter_end,
421 )
422 );
423
424 for line in &mut lines {
426 if line.in_kramdown_extension_block {
427 line.heading = None;
428 }
429 }
430
431 let mut code_spans = profile_section!(
433 "Code spans",
434 profile,
435 element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
436 );
437
438 if flavor == MarkdownFlavor::MkDocs {
442 let extra = profile_section!(
443 "MkDocs code spans",
444 profile,
445 element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
446 );
447 if !extra.is_empty() {
448 code_spans.extend(extra);
449 code_spans.sort_by_key(|span| span.byte_offset);
450 }
451 }
452
453 if flavor == MarkdownFlavor::MDX {
458 let extra = profile_section!(
459 "MDX JSX code spans",
460 profile,
461 element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
462 );
463 if !extra.is_empty() {
464 code_spans.extend(extra);
465 code_spans.sort_by_key(|span| span.byte_offset);
466 }
467 }
468
469 for span in &code_spans {
472 if span.end_line > span.line {
473 for line_num in (span.line + 1)..=span.end_line {
475 if let Some(line_info) = lines.get_mut(line_num - 1) {
476 line_info.in_code_span_continuation = true;
477 }
478 }
479 }
480 }
481
482 let (links, images, broken_links, footnote_refs) = profile_section!(
484 "Links & images finalize",
485 profile,
486 link_parser::finalize_links_and_images(
487 content,
488 &lines,
489 &code_blocks,
490 &code_spans,
491 flavor,
492 &html_comment_ranges,
493 pulldown_result
494 )
495 );
496
497 let reference_defs = profile_section!(
498 "Reference defs",
499 profile,
500 link_parser::parse_reference_defs(content, &lines)
501 );
502
503 let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
504
505 let char_frequency = profile_section!(
507 "Char frequency",
508 profile,
509 line_computation::compute_char_frequency(content)
510 );
511
512 let table_blocks = profile_section!(
514 "Table blocks",
515 profile,
516 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
517 content,
518 &code_blocks,
519 &code_spans,
520 &html_comment_ranges,
521 )
522 );
523
524 let links = links
527 .into_iter()
528 .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
529 .collect::<Vec<_>>();
530 let images = images
531 .into_iter()
532 .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
533 .collect::<Vec<_>>();
534 let broken_links = broken_links
535 .into_iter()
536 .filter(|bl| {
537 let line_idx = line_offsets
539 .partition_point(|&offset| offset <= bl.span.start)
540 .saturating_sub(1);
541 !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
542 })
543 .collect::<Vec<_>>();
544 let footnote_refs = footnote_refs
545 .into_iter()
546 .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
547 .collect::<Vec<_>>();
548 let reference_defs = reference_defs
549 .into_iter()
550 .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
551 .collect::<Vec<_>>();
552 let list_blocks = list_blocks
553 .into_iter()
554 .filter(|block| {
555 !lines
556 .get(block.start_line - 1)
557 .is_some_and(|l| l.in_kramdown_extension_block)
558 })
559 .collect::<Vec<_>>();
560 let table_blocks = table_blocks
561 .into_iter()
562 .filter(|block| {
563 !lines
565 .get(block.start_line)
566 .is_some_and(|l| l.in_kramdown_extension_block)
567 })
568 .collect::<Vec<_>>();
569 let emphasis_spans = emphasis_spans
570 .into_iter()
571 .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
572 .collect::<Vec<_>>();
573
574 let reference_defs_map: HashMap<String, usize> = reference_defs
576 .iter()
577 .enumerate()
578 .map(|(idx, def)| (def.id.to_lowercase(), idx))
579 .collect();
580
581 let link_title_ranges: Vec<(usize, usize)> = reference_defs
583 .iter()
584 .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
585 (Some(start), Some(end)) => Some((start, end)),
586 _ => None,
587 })
588 .collect();
589
590 let line_index = profile_section!(
592 "Line index",
593 profile,
594 crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
595 content,
596 line_offsets.clone(),
597 &code_blocks,
598 )
599 );
600
601 let jinja_ranges = profile_section!(
603 "Jinja ranges",
604 profile,
605 crate::utils::jinja_utils::find_jinja_ranges(content)
606 );
607
608 let citation_ranges = profile_section!("Citation ranges", profile, {
610 if flavor.is_pandoc_compatible() {
611 crate::utils::pandoc::find_citation_ranges(content)
612 } else {
613 Vec::new()
614 }
615 });
616
617 let inline_footnote_ranges = profile_section!("Inline footnote ranges", profile, {
619 if flavor.is_pandoc_compatible() {
620 crate::utils::pandoc::detect_inline_footnote_ranges(content)
621 } else {
622 Vec::new()
623 }
624 });
625
626 let pandoc_header_slugs = profile_section!("Pandoc header slugs", profile, {
628 if flavor.is_pandoc_compatible() {
629 crate::utils::pandoc::collect_pandoc_header_slugs(content)
630 } else {
631 std::collections::HashSet::new()
632 }
633 });
634
635 let example_list_marker_ranges = profile_section!("Example list markers", profile, {
637 if flavor.is_pandoc_compatible() {
638 crate::utils::pandoc::detect_example_list_marker_ranges(content)
639 } else {
640 Vec::new()
641 }
642 });
643
644 let example_reference_ranges = profile_section!("Example references", profile, {
646 if flavor.is_pandoc_compatible() {
647 crate::utils::pandoc::detect_example_reference_ranges(content, &example_list_marker_ranges)
648 } else {
649 Vec::new()
650 }
651 });
652
653 let sub_super_ranges = profile_section!("Subscript/superscript ranges", profile, {
655 if flavor.is_pandoc_compatible() {
656 crate::utils::pandoc::detect_subscript_superscript_ranges(content)
657 } else {
658 Vec::new()
659 }
660 });
661
662 let inline_code_attr_ranges = profile_section!("Inline code attribute ranges", profile, {
664 if flavor.is_pandoc_compatible() {
665 crate::utils::pandoc::detect_inline_code_attr_ranges(content)
666 } else {
667 Vec::new()
668 }
669 });
670
671 let bracketed_span_ranges = profile_section!("Bracketed span ranges", profile, {
673 if flavor.is_pandoc_compatible() {
674 crate::utils::pandoc::detect_bracketed_span_ranges(content)
675 } else {
676 Vec::new()
677 }
678 });
679
680 let line_block_ranges = profile_section!("Line block ranges", profile, {
682 if flavor.is_pandoc_compatible() {
683 crate::utils::pandoc::detect_line_block_ranges(content)
684 } else {
685 Vec::new()
686 }
687 });
688
689 let pipe_table_caption_ranges = profile_section!("Pipe-table caption ranges", profile, {
691 if flavor.is_pandoc_compatible() {
692 crate::utils::pandoc::detect_pipe_table_caption_ranges(content)
693 } else {
694 Vec::new()
695 }
696 });
697
698 let pandoc_metadata_ranges = profile_section!("Pandoc metadata ranges", profile, {
700 if flavor.is_pandoc_compatible() {
701 crate::utils::pandoc::detect_yaml_metadata_block_ranges(content)
702 } else {
703 Vec::new()
704 }
705 });
706
707 let grid_table_ranges = profile_section!("Grid table ranges", profile, {
709 if flavor.is_pandoc_compatible() {
710 crate::utils::pandoc::detect_grid_table_ranges(content)
711 } else {
712 Vec::new()
713 }
714 });
715
716 let multi_line_table_ranges = profile_section!("Multi-line table ranges", profile, {
718 if flavor.is_pandoc_compatible() {
719 crate::utils::pandoc::detect_multi_line_table_ranges(content)
720 } else {
721 Vec::new()
722 }
723 });
724
725 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
727 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
728 let mut ranges = Vec::new();
729 for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
730 ranges.push((mat.start(), mat.end()));
731 }
732 ranges
733 });
734
735 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
736
737 Self {
738 content,
739 content_lines,
740 line_offsets,
741 code_blocks,
742 code_block_details,
743 strong_spans,
744 line_to_list,
745 list_start_values,
746 lines,
747 links,
748 images,
749 broken_links,
750 footnote_refs,
751 reference_defs,
752 reference_defs_map,
753 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
754 math_spans_cache: OnceLock::new(), list_blocks,
756 char_frequency,
757 html_tags_cache: OnceLock::new(),
758 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
759 table_rows_cache: OnceLock::new(),
760 bare_urls_cache: OnceLock::new(),
761 has_mixed_list_nesting_cache: OnceLock::new(),
762 html_comment_ranges,
763 table_blocks,
764 line_index,
765 jinja_ranges,
766 flavor,
767 source_file,
768 jsx_expression_ranges,
769 mdx_comment_ranges,
770 citation_ranges,
771 pandoc_div_ranges,
772 colon_fence_ranges,
773 inline_footnote_ranges,
774 pandoc_header_slugs,
775 example_list_marker_ranges,
776 example_reference_ranges,
777 sub_super_ranges,
778 inline_code_attr_ranges,
779 bracketed_span_ranges,
780 line_block_ranges,
781 pipe_table_caption_ranges,
782 pandoc_metadata_ranges,
783 grid_table_ranges,
784 multi_line_table_ranges,
785 shortcode_ranges,
786 link_title_ranges,
787 code_span_byte_ranges: code_span_ranges,
788 inline_config,
789 obsidian_comment_ranges,
790 lazy_cont_lines_cache: OnceLock::new(),
791 }
792 }
793
794 #[inline]
797 fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
798 let idx = ranges.partition_point(|&(start, _)| start <= pos);
800 idx > 0 && pos < ranges[idx - 1].1
802 }
803
804 pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
806 Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
807 }
808
809 pub fn is_in_link(&self, pos: usize) -> bool {
811 let idx = self.links.partition_point(|link| link.byte_offset <= pos);
812 if idx > 0 && pos < self.links[idx - 1].byte_end {
813 return true;
814 }
815 let idx = self.images.partition_point(|img| img.byte_offset <= pos);
816 if idx > 0 && pos < self.images[idx - 1].byte_end {
817 return true;
818 }
819 self.is_in_reference_def(pos)
820 }
821
822 pub fn inline_config(&self) -> &InlineConfig {
824 &self.inline_config
825 }
826
827 pub fn colon_fence_ranges(&self) -> &[(usize, usize)] {
830 &self.colon_fence_ranges
831 }
832
833 pub fn raw_lines(&self) -> &[&'a str] {
837 &self.content_lines
838 }
839
840 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
845 self.inline_config.is_rule_disabled(rule_name, line_number)
846 }
847
848 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
850 Arc::clone(
851 self.code_spans_cache
852 .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
853 )
854 }
855
856 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
858 Arc::clone(
859 self.math_spans_cache
860 .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
861 )
862 }
863
864 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
866 let math_spans = self.math_spans();
867 let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
869 idx > 0 && byte_pos < math_spans[idx - 1].byte_end
870 }
871
872 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
874 &self.html_comment_ranges
875 }
876
877 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
881 Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
882 }
883
884 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
889 if self.obsidian_comment_ranges.is_empty() {
890 return false;
891 }
892
893 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
895 self.is_in_obsidian_comment(byte_pos)
896 }
897
898 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
900 Arc::clone(self.html_tags_cache.get_or_init(|| {
901 let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
902 Arc::new(
904 tags.into_iter()
905 .filter(|tag| {
906 !self
907 .lines
908 .get(tag.line - 1)
909 .is_some_and(|l| l.in_kramdown_extension_block)
910 })
911 .collect(),
912 )
913 }))
914 }
915
916 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
918 Arc::clone(
919 self.emphasis_spans_cache
920 .get()
921 .expect("emphasis_spans_cache initialized during construction"),
922 )
923 }
924
925 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
927 Arc::clone(
928 self.table_rows_cache
929 .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
930 )
931 }
932
933 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
935 Arc::clone(self.bare_urls_cache.get_or_init(|| {
936 Arc::new(element_parsers::parse_bare_urls(
937 self.content,
938 &self.lines,
939 &self.code_blocks,
940 ))
941 }))
942 }
943
944 pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
946 Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
947 Arc::new(element_parsers::detect_lazy_continuation_lines(
948 self.content,
949 &self.lines,
950 &self.line_offsets,
951 ))
952 }))
953 }
954
955 pub fn has_mixed_list_nesting(&self) -> bool {
959 *self
960 .has_mixed_list_nesting_cache
961 .get_or_init(|| self.compute_mixed_list_nesting())
962 }
963
964 fn compute_mixed_list_nesting(&self) -> bool {
966 let mut stack: Vec<(usize, bool)> = Vec::new();
971 let mut last_was_blank = false;
972
973 for line_info in &self.lines {
974 if line_info.in_code_block
976 || line_info.in_front_matter
977 || line_info.in_mkdocstrings
978 || line_info.in_html_comment
979 || line_info.in_mdx_comment
980 || line_info.in_esm_block
981 {
982 continue;
983 }
984
985 if line_info.is_blank {
987 last_was_blank = true;
988 continue;
989 }
990
991 if let Some(list_item) = &line_info.list_item {
992 let current_pos = if list_item.marker_column == 1 {
994 0
995 } else {
996 list_item.marker_column
997 };
998
999 if last_was_blank && current_pos == 0 {
1001 stack.clear();
1002 }
1003 last_was_blank = false;
1004
1005 while let Some(&(pos, _)) = stack.last() {
1007 if pos >= current_pos {
1008 stack.pop();
1009 } else {
1010 break;
1011 }
1012 }
1013
1014 if let Some(&(_, parent_is_ordered)) = stack.last()
1016 && parent_is_ordered != list_item.is_ordered
1017 {
1018 return true; }
1020
1021 stack.push((current_pos, list_item.is_ordered));
1022 } else {
1023 last_was_blank = false;
1025 }
1026 }
1027
1028 false
1029 }
1030
1031 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1033 match self.line_offsets.binary_search(&offset) {
1034 Ok(line) => (line + 1, 1),
1035 Err(line) => {
1036 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1037 (line, offset - line_start + 1)
1038 }
1039 }
1040 }
1041
1042 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1044 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1046 return true;
1047 }
1048
1049 self.is_byte_offset_in_code_span(pos)
1051 }
1052
1053 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1055 if line_num > 0 {
1056 self.lines.get(line_num - 1)
1057 } else {
1058 None
1059 }
1060 }
1061
1062 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1064 let normalized_id = ref_id.to_lowercase();
1065 self.reference_defs_map
1066 .get(&normalized_id)
1067 .map(|&idx| self.reference_defs[idx].url.as_str())
1068 }
1069
1070 pub fn is_in_list_block(&self, line_num: usize) -> bool {
1072 self.list_blocks
1073 .iter()
1074 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1075 }
1076
1077 pub fn is_in_html_block(&self, line_num: usize) -> bool {
1079 if line_num == 0 || line_num > self.lines.len() {
1080 return false;
1081 }
1082 self.lines[line_num - 1].in_html_block
1083 }
1084
1085 pub fn is_in_table_block(&self, line_num: usize) -> bool {
1091 if line_num == 0 {
1092 return false;
1093 }
1094 let line_idx = line_num - 1;
1095 self.table_blocks
1096 .iter()
1097 .any(|block| line_idx >= block.start_line && line_idx <= block.end_line)
1098 }
1099
1100 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1102 if line_num == 0 || line_num > self.lines.len() {
1103 return false;
1104 }
1105
1106 let col_0indexed = if col > 0 { col - 1 } else { 0 };
1110 let code_spans = self.code_spans();
1111 code_spans.iter().any(|span| {
1112 if line_num < span.line || line_num > span.end_line {
1114 return false;
1115 }
1116
1117 if span.line == span.end_line {
1118 col_0indexed >= span.start_col && col_0indexed < span.end_col
1120 } else if line_num == span.line {
1121 col_0indexed >= span.start_col
1123 } else if line_num == span.end_line {
1124 col_0indexed < span.end_col
1126 } else {
1127 true
1129 }
1130 })
1131 }
1132
1133 #[inline]
1135 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1136 let code_spans = self.code_spans();
1137 let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
1138 idx > 0 && byte_offset < code_spans[idx - 1].byte_end
1139 }
1140
1141 #[inline]
1143 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1144 let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
1145 idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
1146 }
1147
1148 #[inline]
1150 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1151 let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1152 idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1153 }
1154
1155 #[inline]
1158 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1159 let tags = self.html_tags();
1160 let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1161 idx > 0 && byte_pos < tags[idx - 1].byte_end
1162 }
1163
1164 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1166 Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1167 }
1168
1169 #[inline]
1171 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1172 Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1173 }
1174
1175 #[inline]
1177 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1178 Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1179 }
1180
1181 #[inline]
1184 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1185 let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1186 idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1187 }
1188
1189 #[inline]
1191 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1192 &self.citation_ranges
1193 }
1194
1195 #[inline]
1198 pub fn is_in_div_block(&self, byte_pos: usize) -> bool {
1199 let idx = self.pandoc_div_ranges.partition_point(|r| r.start <= byte_pos);
1200 idx > 0 && byte_pos < self.pandoc_div_ranges[idx - 1].end
1201 }
1202
1203 #[inline]
1206 pub fn is_in_inline_footnote(&self, byte_pos: usize) -> bool {
1207 let idx = self.inline_footnote_ranges.partition_point(|r| r.start <= byte_pos);
1208 idx > 0 && byte_pos < self.inline_footnote_ranges[idx - 1].end
1209 }
1210
1211 #[inline]
1214 pub fn is_in_example_list_marker(&self, byte_pos: usize) -> bool {
1215 let idx = self.example_list_marker_ranges.partition_point(|r| r.start <= byte_pos);
1216 idx > 0 && byte_pos < self.example_list_marker_ranges[idx - 1].end
1217 }
1218
1219 #[inline]
1222 pub fn is_in_example_reference(&self, byte_pos: usize) -> bool {
1223 let idx = self.example_reference_ranges.partition_point(|r| r.start <= byte_pos);
1224 idx > 0 && byte_pos < self.example_reference_ranges[idx - 1].end
1225 }
1226
1227 #[inline]
1230 pub fn is_in_subscript_or_superscript(&self, byte_pos: usize) -> bool {
1231 let idx = self.sub_super_ranges.partition_point(|r| r.start <= byte_pos);
1232 idx > 0 && byte_pos < self.sub_super_ranges[idx - 1].end
1233 }
1234
1235 #[inline]
1239 pub fn is_in_inline_code_attr(&self, byte_pos: usize) -> bool {
1240 let idx = self.inline_code_attr_ranges.partition_point(|r| r.start <= byte_pos);
1241 idx > 0 && byte_pos < self.inline_code_attr_ranges[idx - 1].end
1242 }
1243
1244 #[inline]
1247 pub fn is_in_bracketed_span(&self, byte_pos: usize) -> bool {
1248 let idx = self.bracketed_span_ranges.partition_point(|r| r.start <= byte_pos);
1249 idx > 0 && byte_pos < self.bracketed_span_ranges[idx - 1].end
1250 }
1251
1252 #[inline]
1255 pub fn is_in_line_block(&self, byte_pos: usize) -> bool {
1256 let idx = self.line_block_ranges.partition_point(|r| r.start <= byte_pos);
1257 idx > 0 && byte_pos < self.line_block_ranges[idx - 1].end
1258 }
1259
1260 #[inline]
1264 pub fn is_in_pipe_table_caption(&self, byte_pos: usize) -> bool {
1265 let idx = self.pipe_table_caption_ranges.partition_point(|r| r.start <= byte_pos);
1266 idx > 0 && byte_pos < self.pipe_table_caption_ranges[idx - 1].end
1267 }
1268
1269 #[inline]
1272 pub fn is_in_pandoc_metadata(&self, byte_pos: usize) -> bool {
1273 let idx = self.pandoc_metadata_ranges.partition_point(|r| r.start <= byte_pos);
1274 idx > 0 && byte_pos < self.pandoc_metadata_ranges[idx - 1].end
1275 }
1276
1277 #[inline]
1280 pub fn is_in_grid_table(&self, byte_pos: usize) -> bool {
1281 let idx = self.grid_table_ranges.partition_point(|r| r.start <= byte_pos);
1282 idx > 0 && byte_pos < self.grid_table_ranges[idx - 1].end
1283 }
1284
1285 #[inline]
1288 pub fn is_in_multi_line_table(&self, byte_pos: usize) -> bool {
1289 let idx = self.multi_line_table_ranges.partition_point(|r| r.start <= byte_pos);
1290 idx > 0 && byte_pos < self.multi_line_table_ranges[idx - 1].end
1291 }
1292
1293 pub fn matches_implicit_header_reference(&self, link_text: &str) -> bool {
1298 let slug = crate::utils::pandoc::pandoc_header_slug(link_text);
1299 self.pandoc_header_slugs.contains(&slug)
1300 }
1301
1302 #[inline]
1308 pub fn has_pandoc_slug(&self, slug: &str) -> bool {
1309 self.pandoc_header_slugs.contains(slug)
1310 }
1311
1312 #[inline]
1314 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1315 Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1316 }
1317
1318 #[inline]
1320 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1321 &self.shortcode_ranges
1322 }
1323
1324 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1326 Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1327 }
1328
1329 pub fn has_char(&self, ch: char) -> bool {
1331 match ch {
1332 '#' => self.char_frequency.hash_count > 0,
1333 '*' => self.char_frequency.asterisk_count > 0,
1334 '_' => self.char_frequency.underscore_count > 0,
1335 '-' => self.char_frequency.hyphen_count > 0,
1336 '+' => self.char_frequency.plus_count > 0,
1337 '>' => self.char_frequency.gt_count > 0,
1338 '|' => self.char_frequency.pipe_count > 0,
1339 '[' => self.char_frequency.bracket_count > 0,
1340 '`' => self.char_frequency.backtick_count > 0,
1341 '<' => self.char_frequency.lt_count > 0,
1342 '!' => self.char_frequency.exclamation_count > 0,
1343 '\n' => self.char_frequency.newline_count > 0,
1344 _ => self.content.contains(ch), }
1346 }
1347
1348 pub fn char_count(&self, ch: char) -> usize {
1350 match ch {
1351 '#' => self.char_frequency.hash_count,
1352 '*' => self.char_frequency.asterisk_count,
1353 '_' => self.char_frequency.underscore_count,
1354 '-' => self.char_frequency.hyphen_count,
1355 '+' => self.char_frequency.plus_count,
1356 '>' => self.char_frequency.gt_count,
1357 '|' => self.char_frequency.pipe_count,
1358 '[' => self.char_frequency.bracket_count,
1359 '`' => self.char_frequency.backtick_count,
1360 '<' => self.char_frequency.lt_count,
1361 '!' => self.char_frequency.exclamation_count,
1362 '\n' => self.char_frequency.newline_count,
1363 _ => self.content.matches(ch).count(), }
1365 }
1366
1367 pub fn likely_has_headings(&self) -> bool {
1369 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') }
1371
1372 pub fn likely_has_lists(&self) -> bool {
1374 self.char_frequency.asterisk_count > 0
1375 || self.char_frequency.hyphen_count > 0
1376 || self.char_frequency.plus_count > 0
1377 }
1378
1379 pub fn likely_has_emphasis(&self) -> bool {
1381 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1382 }
1383
1384 pub fn likely_has_tables(&self) -> bool {
1386 self.char_frequency.pipe_count > 2
1387 }
1388
1389 pub fn likely_has_blockquotes(&self) -> bool {
1391 self.char_frequency.gt_count > 0
1392 }
1393
1394 pub fn likely_has_code(&self) -> bool {
1396 self.char_frequency.backtick_count > 0
1397 }
1398
1399 pub fn likely_has_links_or_images(&self) -> bool {
1401 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1402 }
1403
1404 pub fn likely_has_html(&self) -> bool {
1406 self.char_frequency.lt_count > 0
1407 }
1408
1409 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1414 if let Some(line_info) = self.lines.get(line_idx)
1415 && let Some(ref bq) = line_info.blockquote
1416 {
1417 bq.prefix.trim_end().to_string()
1418 } else {
1419 String::new()
1420 }
1421 }
1422
1423 #[inline]
1429 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1430 let idx = match lines.binary_search_by(|line| {
1432 if byte_offset < line.byte_offset {
1433 std::cmp::Ordering::Greater
1434 } else if byte_offset > line.byte_offset + line.byte_len {
1435 std::cmp::Ordering::Less
1436 } else {
1437 std::cmp::Ordering::Equal
1438 }
1439 }) {
1440 Ok(idx) => idx,
1441 Err(idx) => idx.saturating_sub(1),
1442 };
1443
1444 let line = &lines[idx];
1445 let line_num = idx + 1;
1446 let col = byte_offset.saturating_sub(line.byte_offset);
1447
1448 (idx, line_num, col)
1449 }
1450
1451 #[inline]
1453 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1454 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1456
1457 if idx > 0 {
1459 let span = &code_spans[idx - 1];
1460 if offset >= span.byte_offset && offset < span.byte_end {
1461 return true;
1462 }
1463 }
1464
1465 false
1466 }
1467
1468 #[must_use]
1488 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1489 ValidHeadingsIter::new(&self.lines)
1490 }
1491
1492 #[must_use]
1496 pub fn has_valid_headings(&self) -> bool {
1497 self.lines
1498 .iter()
1499 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1500 }
1501}
1502
1503fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1512 use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1513
1514 let options = crate::utils::rumdl_parser_options();
1515 let parser = Parser::new_ext(content, options).into_offset_iter();
1516
1517 let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1519 let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1520 let mut in_footnote = false;
1521
1522 for (event, range) in parser {
1523 match event {
1524 Event::Start(Tag::FootnoteDefinition(_)) => {
1525 in_footnote = true;
1526 footnote_ranges.push((range.start, range.end));
1527 }
1528 Event::End(TagEnd::FootnoteDefinition) => {
1529 in_footnote = false;
1530 }
1531 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1532 fenced_code_ranges.push((range.start, range.end));
1533 }
1534 _ => {}
1535 }
1536 }
1537
1538 let byte_to_line = |byte_offset: usize| -> usize {
1539 line_offsets
1540 .partition_point(|&offset| offset <= byte_offset)
1541 .saturating_sub(1)
1542 };
1543
1544 for &(start, end) in &footnote_ranges {
1546 let start_line = byte_to_line(start);
1547 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1548
1549 for line in &mut lines[start_line..end_line] {
1550 line.in_footnote_definition = true;
1551 line.in_code_block = false;
1552 }
1553 }
1554
1555 for &(start, end) in &fenced_code_ranges {
1557 let start_line = byte_to_line(start);
1558 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1559
1560 for line in &mut lines[start_line..end_line] {
1561 line.in_code_block = true;
1562 }
1563 }
1564}