1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23 ($name:expr, $profile:expr, $code:expr) => {{
24 let start = std::time::Instant::now();
25 let result = $code;
26 if $profile {
27 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28 }
29 result
30 }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38pub(super) struct SkipByteRanges<'a> {
41 pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42 pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43 pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44 pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56 pub content: &'a str,
57 content_lines: Vec<&'a str>, pub line_offsets: Vec<usize>,
59 pub code_blocks: Vec<(usize, usize)>, pub code_block_details: Vec<CodeBlockDetail>, pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, pub line_to_list: crate::utils::code_block_utils::LineToListMap, pub list_start_values: crate::utils::code_block_utils::ListStartValues, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, link_title_ranges: Vec<(usize, usize)>, code_span_byte_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, }
96
97impl<'a> LintContext<'a> {
98 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
99 #[cfg(not(target_arch = "wasm32"))]
100 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
101 #[cfg(target_arch = "wasm32")]
102 let profile = false;
103
104 let line_offsets = profile_section!("Line offsets", profile, {
105 let mut offsets = vec![0];
106 for (i, c) in content.char_indices() {
107 if c == '\n' {
108 offsets.push(i + 1);
109 }
110 }
111 offsets
112 });
113
114 let content_lines: Vec<&str> = content.lines().collect();
116
117 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
119
120 let parse_result = profile_section!(
122 "Code blocks",
123 profile,
124 CodeBlockUtils::detect_code_blocks_and_spans(content)
125 );
126 let mut code_blocks = parse_result.code_blocks;
127 let code_span_ranges = parse_result.code_spans;
128 let code_block_details = parse_result.code_block_details;
129 let strong_spans = parse_result.strong_spans;
130 let line_to_list = parse_result.line_to_list;
131 let list_start_values = parse_result.list_start_values;
132
133 let html_comment_ranges = profile_section!(
135 "HTML comment ranges",
136 profile,
137 crate::utils::skip_context::compute_html_comment_ranges(content)
138 );
139
140 let autodoc_ranges = profile_section!(
144 "Autodoc block ranges",
145 profile,
146 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
147 );
148
149 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
151 if flavor == MarkdownFlavor::Quarto {
152 crate::utils::quarto_divs::detect_div_block_ranges(content)
153 } else {
154 Vec::new()
155 }
156 });
157
158 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
160 if flavor == MarkdownFlavor::MkDocs {
161 crate::utils::pymdown_blocks::detect_block_ranges(content)
162 } else {
163 Vec::new()
164 }
165 });
166
167 let skip_ranges = SkipByteRanges {
170 html_comment_ranges: &html_comment_ranges,
171 autodoc_ranges: &autodoc_ranges,
172 quarto_div_ranges: &quarto_div_ranges,
173 pymdown_block_ranges: &pymdown_block_ranges,
174 };
175 let (mut lines, emphasis_spans) = profile_section!(
176 "Basic line info",
177 profile,
178 line_computation::compute_basic_line_info(
179 content,
180 &content_lines,
181 &line_offsets,
182 &code_blocks,
183 flavor,
184 &skip_ranges,
185 front_matter_end,
186 )
187 );
188
189 profile_section!(
191 "HTML blocks",
192 profile,
193 heading_detection::detect_html_blocks(content, &mut lines)
194 );
195
196 profile_section!(
198 "ESM blocks",
199 profile,
200 flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
201 );
202
203 profile_section!(
205 "JSX block detection",
206 profile,
207 flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
208 );
209
210 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
212 "JSX/MDX detection",
213 profile,
214 flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
215 );
216
217 profile_section!(
219 "MkDocs constructs",
220 profile,
221 flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
222 );
223
224 profile_section!(
229 "Footnote definitions",
230 profile,
231 detect_footnote_definitions(content, &mut lines, &line_offsets)
232 );
233
234 {
237 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
238 for &(start, end) in &code_blocks {
239 let start_line = line_offsets
240 .partition_point(|&offset| offset <= start)
241 .saturating_sub(1);
242 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
243
244 let mut sub_start: Option<usize> = None;
245 for (i, &offset) in line_offsets[start_line..end_line]
246 .iter()
247 .enumerate()
248 .map(|(j, o)| (j + start_line, o))
249 {
250 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
251 if is_real_code && sub_start.is_none() {
252 let byte_start = if i == start_line { start } else { offset };
253 sub_start = Some(byte_start);
254 } else if !is_real_code && sub_start.is_some() {
255 new_code_blocks.push((sub_start.unwrap(), offset));
256 sub_start = None;
257 }
258 }
259 if let Some(s) = sub_start {
260 new_code_blocks.push((s, end));
261 }
262 }
263 code_blocks = new_code_blocks;
264 }
265
266 if flavor == MarkdownFlavor::MkDocs {
273 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
274 for &(start, end) in &code_blocks {
275 let start_line = line_offsets
276 .partition_point(|&offset| offset <= start)
277 .saturating_sub(1);
278 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
279
280 let mut sub_start: Option<usize> = None;
282 for (i, &offset) in line_offsets[start_line..end_line]
283 .iter()
284 .enumerate()
285 .map(|(j, o)| (j + start_line, o))
286 {
287 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
288 if is_real_code && sub_start.is_none() {
289 let byte_start = if i == start_line { start } else { offset };
290 sub_start = Some(byte_start);
291 } else if !is_real_code && sub_start.is_some() {
292 new_code_blocks.push((sub_start.unwrap(), offset));
293 sub_start = None;
294 }
295 }
296 if let Some(s) = sub_start {
297 new_code_blocks.push((s, end));
298 }
299 }
300 code_blocks = new_code_blocks;
301 }
302
303 if flavor.supports_jsx() {
307 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
308 for &(start, end) in &code_blocks {
309 let start_line = line_offsets
310 .partition_point(|&offset| offset <= start)
311 .saturating_sub(1);
312 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
313
314 let mut sub_start: Option<usize> = None;
315 for (i, &offset) in line_offsets[start_line..end_line]
316 .iter()
317 .enumerate()
318 .map(|(j, o)| (j + start_line, o))
319 {
320 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
321 if is_real_code && sub_start.is_none() {
322 let byte_start = if i == start_line { start } else { offset };
323 sub_start = Some(byte_start);
324 } else if !is_real_code && sub_start.is_some() {
325 new_code_blocks.push((sub_start.unwrap(), offset));
326 sub_start = None;
327 }
328 }
329 if let Some(s) = sub_start {
330 new_code_blocks.push((s, end));
331 }
332 }
333 code_blocks = new_code_blocks;
334 }
335
336 profile_section!(
338 "Kramdown constructs",
339 profile,
340 flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
341 );
342
343 for line in &mut lines {
348 if line.in_kramdown_extension_block {
349 line.list_item = None;
350 line.is_horizontal_rule = false;
351 line.blockquote = None;
352 line.is_kramdown_block_ial = false;
353 }
354 }
355
356 let obsidian_comment_ranges = profile_section!(
358 "Obsidian comments",
359 profile,
360 flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
361 );
362
363 let pulldown_result = profile_section!(
367 "Links, images & link ranges",
368 profile,
369 link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
370 );
371
372 profile_section!(
374 "Headings & blockquotes",
375 profile,
376 heading_detection::detect_headings_and_blockquotes(
377 &content_lines,
378 &mut lines,
379 flavor,
380 &html_comment_ranges,
381 &pulldown_result.link_byte_ranges,
382 front_matter_end,
383 )
384 );
385
386 for line in &mut lines {
388 if line.in_kramdown_extension_block {
389 line.heading = None;
390 }
391 }
392
393 let mut code_spans = profile_section!(
395 "Code spans",
396 profile,
397 element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
398 );
399
400 if flavor == MarkdownFlavor::MkDocs {
404 let extra = profile_section!(
405 "MkDocs code spans",
406 profile,
407 element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
408 );
409 if !extra.is_empty() {
410 code_spans.extend(extra);
411 code_spans.sort_by_key(|span| span.byte_offset);
412 }
413 }
414
415 for span in &code_spans {
418 if span.end_line > span.line {
419 for line_num in (span.line + 1)..=span.end_line {
421 if let Some(line_info) = lines.get_mut(line_num - 1) {
422 line_info.in_code_span_continuation = true;
423 }
424 }
425 }
426 }
427
428 let (links, images, broken_links, footnote_refs) = profile_section!(
430 "Links & images finalize",
431 profile,
432 link_parser::finalize_links_and_images(
433 content,
434 &lines,
435 &code_blocks,
436 &code_spans,
437 flavor,
438 &html_comment_ranges,
439 pulldown_result
440 )
441 );
442
443 let reference_defs = profile_section!(
444 "Reference defs",
445 profile,
446 link_parser::parse_reference_defs(content, &lines)
447 );
448
449 let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
450
451 let char_frequency = profile_section!(
453 "Char frequency",
454 profile,
455 line_computation::compute_char_frequency(content)
456 );
457
458 let table_blocks = profile_section!(
460 "Table blocks",
461 profile,
462 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
463 content,
464 &code_blocks,
465 &code_spans,
466 &html_comment_ranges,
467 )
468 );
469
470 let links = links
473 .into_iter()
474 .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
475 .collect::<Vec<_>>();
476 let images = images
477 .into_iter()
478 .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
479 .collect::<Vec<_>>();
480 let broken_links = broken_links
481 .into_iter()
482 .filter(|bl| {
483 let line_idx = line_offsets
485 .partition_point(|&offset| offset <= bl.span.start)
486 .saturating_sub(1);
487 !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
488 })
489 .collect::<Vec<_>>();
490 let footnote_refs = footnote_refs
491 .into_iter()
492 .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
493 .collect::<Vec<_>>();
494 let reference_defs = reference_defs
495 .into_iter()
496 .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
497 .collect::<Vec<_>>();
498 let list_blocks = list_blocks
499 .into_iter()
500 .filter(|block| {
501 !lines
502 .get(block.start_line - 1)
503 .is_some_and(|l| l.in_kramdown_extension_block)
504 })
505 .collect::<Vec<_>>();
506 let table_blocks = table_blocks
507 .into_iter()
508 .filter(|block| {
509 !lines
511 .get(block.start_line)
512 .is_some_and(|l| l.in_kramdown_extension_block)
513 })
514 .collect::<Vec<_>>();
515 let emphasis_spans = emphasis_spans
516 .into_iter()
517 .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
518 .collect::<Vec<_>>();
519
520 let reference_defs_map: HashMap<String, usize> = reference_defs
522 .iter()
523 .enumerate()
524 .map(|(idx, def)| (def.id.to_lowercase(), idx))
525 .collect();
526
527 let link_title_ranges: Vec<(usize, usize)> = reference_defs
529 .iter()
530 .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
531 (Some(start), Some(end)) => Some((start, end)),
532 _ => None,
533 })
534 .collect();
535
536 let line_index = profile_section!(
538 "Line index",
539 profile,
540 crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
541 content,
542 line_offsets.clone(),
543 &code_blocks,
544 )
545 );
546
547 let jinja_ranges = profile_section!(
549 "Jinja ranges",
550 profile,
551 crate::utils::jinja_utils::find_jinja_ranges(content)
552 );
553
554 let citation_ranges = profile_section!("Citation ranges", profile, {
556 if flavor == MarkdownFlavor::Quarto {
557 crate::utils::quarto_divs::find_citation_ranges(content)
558 } else {
559 Vec::new()
560 }
561 });
562
563 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
565 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
566 let mut ranges = Vec::new();
567 for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
568 ranges.push((mat.start(), mat.end()));
569 }
570 ranges
571 });
572
573 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
574
575 Self {
576 content,
577 content_lines,
578 line_offsets,
579 code_blocks,
580 code_block_details,
581 strong_spans,
582 line_to_list,
583 list_start_values,
584 lines,
585 links,
586 images,
587 broken_links,
588 footnote_refs,
589 reference_defs,
590 reference_defs_map,
591 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
592 math_spans_cache: OnceLock::new(), list_blocks,
594 char_frequency,
595 html_tags_cache: OnceLock::new(),
596 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
597 table_rows_cache: OnceLock::new(),
598 bare_urls_cache: OnceLock::new(),
599 has_mixed_list_nesting_cache: OnceLock::new(),
600 html_comment_ranges,
601 table_blocks,
602 line_index,
603 jinja_ranges,
604 flavor,
605 source_file,
606 jsx_expression_ranges,
607 mdx_comment_ranges,
608 citation_ranges,
609 shortcode_ranges,
610 link_title_ranges,
611 code_span_byte_ranges: code_span_ranges,
612 inline_config,
613 obsidian_comment_ranges,
614 lazy_cont_lines_cache: OnceLock::new(),
615 }
616 }
617
618 #[inline]
621 fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
622 let idx = ranges.partition_point(|&(start, _)| start <= pos);
624 idx > 0 && pos < ranges[idx - 1].1
626 }
627
628 pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
630 Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
631 }
632
633 pub fn is_in_link(&self, pos: usize) -> bool {
635 let idx = self.links.partition_point(|link| link.byte_offset <= pos);
636 if idx > 0 && pos < self.links[idx - 1].byte_end {
637 return true;
638 }
639 let idx = self.images.partition_point(|img| img.byte_offset <= pos);
640 if idx > 0 && pos < self.images[idx - 1].byte_end {
641 return true;
642 }
643 self.is_in_reference_def(pos)
644 }
645
646 pub fn inline_config(&self) -> &InlineConfig {
648 &self.inline_config
649 }
650
651 pub fn raw_lines(&self) -> &[&'a str] {
655 &self.content_lines
656 }
657
658 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
663 self.inline_config.is_rule_disabled(rule_name, line_number)
664 }
665
666 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
668 Arc::clone(
669 self.code_spans_cache
670 .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
671 )
672 }
673
674 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
676 Arc::clone(
677 self.math_spans_cache
678 .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
679 )
680 }
681
682 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
684 let math_spans = self.math_spans();
685 let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
687 idx > 0 && byte_pos < math_spans[idx - 1].byte_end
688 }
689
690 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
692 &self.html_comment_ranges
693 }
694
695 pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
698 &self.obsidian_comment_ranges
699 }
700
701 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
705 Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
706 }
707
708 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
713 if self.obsidian_comment_ranges.is_empty() {
714 return false;
715 }
716
717 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
719 self.is_in_obsidian_comment(byte_pos)
720 }
721
722 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
724 Arc::clone(self.html_tags_cache.get_or_init(|| {
725 let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
726 Arc::new(
728 tags.into_iter()
729 .filter(|tag| {
730 !self
731 .lines
732 .get(tag.line - 1)
733 .is_some_and(|l| l.in_kramdown_extension_block)
734 })
735 .collect(),
736 )
737 }))
738 }
739
740 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
742 Arc::clone(
743 self.emphasis_spans_cache
744 .get()
745 .expect("emphasis_spans_cache initialized during construction"),
746 )
747 }
748
749 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
751 Arc::clone(
752 self.table_rows_cache
753 .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
754 )
755 }
756
757 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
759 Arc::clone(self.bare_urls_cache.get_or_init(|| {
760 Arc::new(element_parsers::parse_bare_urls(
761 self.content,
762 &self.lines,
763 &self.code_blocks,
764 ))
765 }))
766 }
767
768 pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
770 Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
771 Arc::new(element_parsers::detect_lazy_continuation_lines(
772 self.content,
773 &self.lines,
774 &self.line_offsets,
775 ))
776 }))
777 }
778
779 pub fn has_mixed_list_nesting(&self) -> bool {
783 *self
784 .has_mixed_list_nesting_cache
785 .get_or_init(|| self.compute_mixed_list_nesting())
786 }
787
788 fn compute_mixed_list_nesting(&self) -> bool {
790 let mut stack: Vec<(usize, bool)> = Vec::new();
795 let mut last_was_blank = false;
796
797 for line_info in &self.lines {
798 if line_info.in_code_block
800 || line_info.in_front_matter
801 || line_info.in_mkdocstrings
802 || line_info.in_html_comment
803 || line_info.in_mdx_comment
804 || line_info.in_esm_block
805 {
806 continue;
807 }
808
809 if line_info.is_blank {
811 last_was_blank = true;
812 continue;
813 }
814
815 if let Some(list_item) = &line_info.list_item {
816 let current_pos = if list_item.marker_column == 1 {
818 0
819 } else {
820 list_item.marker_column
821 };
822
823 if last_was_blank && current_pos == 0 {
825 stack.clear();
826 }
827 last_was_blank = false;
828
829 while let Some(&(pos, _)) = stack.last() {
831 if pos >= current_pos {
832 stack.pop();
833 } else {
834 break;
835 }
836 }
837
838 if let Some(&(_, parent_is_ordered)) = stack.last()
840 && parent_is_ordered != list_item.is_ordered
841 {
842 return true; }
844
845 stack.push((current_pos, list_item.is_ordered));
846 } else {
847 last_was_blank = false;
849 }
850 }
851
852 false
853 }
854
855 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
857 match self.line_offsets.binary_search(&offset) {
858 Ok(line) => (line + 1, 1),
859 Err(line) => {
860 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
861 (line, offset - line_start + 1)
862 }
863 }
864 }
865
866 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
868 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
870 return true;
871 }
872
873 self.is_byte_offset_in_code_span(pos)
875 }
876
877 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
879 if line_num > 0 {
880 self.lines.get(line_num - 1)
881 } else {
882 None
883 }
884 }
885
886 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
888 self.line_info(line_num).map(|info| info.byte_offset)
889 }
890
891 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
893 let normalized_id = ref_id.to_lowercase();
894 self.reference_defs_map
895 .get(&normalized_id)
896 .map(|&idx| self.reference_defs[idx].url.as_str())
897 }
898
899 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
901 let normalized_id = ref_id.to_lowercase();
902 self.reference_defs_map
903 .get(&normalized_id)
904 .map(|&idx| &self.reference_defs[idx])
905 }
906
907 pub fn has_reference_def(&self, ref_id: &str) -> bool {
909 let normalized_id = ref_id.to_lowercase();
910 self.reference_defs_map.contains_key(&normalized_id)
911 }
912
913 pub fn is_in_list_block(&self, line_num: usize) -> bool {
915 self.list_blocks
916 .iter()
917 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
918 }
919
920 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
922 self.list_blocks
923 .iter()
924 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
925 }
926
927 pub fn is_in_code_block(&self, line_num: usize) -> bool {
931 if line_num == 0 || line_num > self.lines.len() {
932 return false;
933 }
934 self.lines[line_num - 1].in_code_block
935 }
936
937 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
939 if line_num == 0 || line_num > self.lines.len() {
940 return false;
941 }
942 self.lines[line_num - 1].in_front_matter
943 }
944
945 pub fn is_in_html_block(&self, line_num: usize) -> bool {
947 if line_num == 0 || line_num > self.lines.len() {
948 return false;
949 }
950 self.lines[line_num - 1].in_html_block
951 }
952
953 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
955 if line_num == 0 || line_num > self.lines.len() {
956 return false;
957 }
958
959 let col_0indexed = if col > 0 { col - 1 } else { 0 };
963 let code_spans = self.code_spans();
964 code_spans.iter().any(|span| {
965 if line_num < span.line || line_num > span.end_line {
967 return false;
968 }
969
970 if span.line == span.end_line {
971 col_0indexed >= span.start_col && col_0indexed < span.end_col
973 } else if line_num == span.line {
974 col_0indexed >= span.start_col
976 } else if line_num == span.end_line {
977 col_0indexed < span.end_col
979 } else {
980 true
982 }
983 })
984 }
985
986 #[inline]
988 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
989 let code_spans = self.code_spans();
990 let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
991 idx > 0 && byte_offset < code_spans[idx - 1].byte_end
992 }
993
994 #[inline]
996 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
997 let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
998 idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
999 }
1000
1001 #[inline]
1003 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1004 let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1005 idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1006 }
1007
1008 #[inline]
1011 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1012 let tags = self.html_tags();
1013 let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1014 idx > 0 && byte_pos < tags[idx - 1].byte_end
1015 }
1016
1017 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1019 Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1020 }
1021
1022 #[inline]
1024 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1025 Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1026 }
1027
1028 #[inline]
1030 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1031 Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1032 }
1033
1034 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1036 &self.jsx_expression_ranges
1037 }
1038
1039 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1041 &self.mdx_comment_ranges
1042 }
1043
1044 #[inline]
1047 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1048 let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1049 idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1050 }
1051
1052 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1054 &self.citation_ranges
1055 }
1056
1057 #[inline]
1059 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1060 Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1061 }
1062
1063 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1065 &self.shortcode_ranges
1066 }
1067
1068 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1070 Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1071 }
1072
1073 pub fn has_char(&self, ch: char) -> bool {
1075 match ch {
1076 '#' => self.char_frequency.hash_count > 0,
1077 '*' => self.char_frequency.asterisk_count > 0,
1078 '_' => self.char_frequency.underscore_count > 0,
1079 '-' => self.char_frequency.hyphen_count > 0,
1080 '+' => self.char_frequency.plus_count > 0,
1081 '>' => self.char_frequency.gt_count > 0,
1082 '|' => self.char_frequency.pipe_count > 0,
1083 '[' => self.char_frequency.bracket_count > 0,
1084 '`' => self.char_frequency.backtick_count > 0,
1085 '<' => self.char_frequency.lt_count > 0,
1086 '!' => self.char_frequency.exclamation_count > 0,
1087 '\n' => self.char_frequency.newline_count > 0,
1088 _ => self.content.contains(ch), }
1090 }
1091
1092 pub fn char_count(&self, ch: char) -> usize {
1094 match ch {
1095 '#' => self.char_frequency.hash_count,
1096 '*' => self.char_frequency.asterisk_count,
1097 '_' => self.char_frequency.underscore_count,
1098 '-' => self.char_frequency.hyphen_count,
1099 '+' => self.char_frequency.plus_count,
1100 '>' => self.char_frequency.gt_count,
1101 '|' => self.char_frequency.pipe_count,
1102 '[' => self.char_frequency.bracket_count,
1103 '`' => self.char_frequency.backtick_count,
1104 '<' => self.char_frequency.lt_count,
1105 '!' => self.char_frequency.exclamation_count,
1106 '\n' => self.char_frequency.newline_count,
1107 _ => self.content.matches(ch).count(), }
1109 }
1110
1111 pub fn likely_has_headings(&self) -> bool {
1113 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') }
1115
1116 pub fn likely_has_lists(&self) -> bool {
1118 self.char_frequency.asterisk_count > 0
1119 || self.char_frequency.hyphen_count > 0
1120 || self.char_frequency.plus_count > 0
1121 }
1122
1123 pub fn likely_has_emphasis(&self) -> bool {
1125 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1126 }
1127
1128 pub fn likely_has_tables(&self) -> bool {
1130 self.char_frequency.pipe_count > 2
1131 }
1132
1133 pub fn likely_has_blockquotes(&self) -> bool {
1135 self.char_frequency.gt_count > 0
1136 }
1137
1138 pub fn likely_has_code(&self) -> bool {
1140 self.char_frequency.backtick_count > 0
1141 }
1142
1143 pub fn likely_has_links_or_images(&self) -> bool {
1145 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1146 }
1147
1148 pub fn likely_has_html(&self) -> bool {
1150 self.char_frequency.lt_count > 0
1151 }
1152
1153 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1158 if let Some(line_info) = self.lines.get(line_idx)
1159 && let Some(ref bq) = line_info.blockquote
1160 {
1161 bq.prefix.trim_end().to_string()
1162 } else {
1163 String::new()
1164 }
1165 }
1166
1167 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1169 self.html_tags()
1170 .iter()
1171 .filter(|tag| tag.line == line_num)
1172 .cloned()
1173 .collect()
1174 }
1175
1176 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1178 self.emphasis_spans()
1179 .iter()
1180 .filter(|span| span.line == line_num)
1181 .cloned()
1182 .collect()
1183 }
1184
1185 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1187 self.table_rows()
1188 .iter()
1189 .filter(|row| row.line == line_num)
1190 .cloned()
1191 .collect()
1192 }
1193
1194 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1196 self.bare_urls()
1197 .iter()
1198 .filter(|url| url.line == line_num)
1199 .cloned()
1200 .collect()
1201 }
1202
1203 #[inline]
1209 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1210 let idx = match lines.binary_search_by(|line| {
1212 if byte_offset < line.byte_offset {
1213 std::cmp::Ordering::Greater
1214 } else if byte_offset > line.byte_offset + line.byte_len {
1215 std::cmp::Ordering::Less
1216 } else {
1217 std::cmp::Ordering::Equal
1218 }
1219 }) {
1220 Ok(idx) => idx,
1221 Err(idx) => idx.saturating_sub(1),
1222 };
1223
1224 let line = &lines[idx];
1225 let line_num = idx + 1;
1226 let col = byte_offset.saturating_sub(line.byte_offset);
1227
1228 (idx, line_num, col)
1229 }
1230
1231 #[inline]
1233 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1234 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1236
1237 if idx > 0 {
1239 let span = &code_spans[idx - 1];
1240 if offset >= span.byte_offset && offset < span.byte_end {
1241 return true;
1242 }
1243 }
1244
1245 false
1246 }
1247
1248 #[must_use]
1268 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1269 ValidHeadingsIter::new(&self.lines)
1270 }
1271
1272 #[must_use]
1276 pub fn has_valid_headings(&self) -> bool {
1277 self.lines
1278 .iter()
1279 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1280 }
1281}
1282
1283fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1292 use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1293
1294 let options = crate::utils::rumdl_parser_options();
1295 let parser = Parser::new_ext(content, options).into_offset_iter();
1296
1297 let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1299 let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1300 let mut in_footnote = false;
1301
1302 for (event, range) in parser {
1303 match event {
1304 Event::Start(Tag::FootnoteDefinition(_)) => {
1305 in_footnote = true;
1306 footnote_ranges.push((range.start, range.end));
1307 }
1308 Event::End(TagEnd::FootnoteDefinition) => {
1309 in_footnote = false;
1310 }
1311 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1312 fenced_code_ranges.push((range.start, range.end));
1313 }
1314 _ => {}
1315 }
1316 }
1317
1318 let byte_to_line = |byte_offset: usize| -> usize {
1319 line_offsets
1320 .partition_point(|&offset| offset <= byte_offset)
1321 .saturating_sub(1)
1322 };
1323
1324 for &(start, end) in &footnote_ranges {
1326 let start_line = byte_to_line(start);
1327 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1328
1329 for line in &mut lines[start_line..end_line] {
1330 line.in_footnote_definition = true;
1331 line.in_code_block = false;
1332 }
1333 }
1334
1335 for &(start, end) in &fenced_code_ranges {
1337 let start_line = byte_to_line(start);
1338 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1339
1340 for line in &mut lines[start_line..end_line] {
1341 line.in_code_block = true;
1342 }
1343 }
1344}