1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23 ($name:expr, $profile:expr, $code:expr) => {{
24 let start = std::time::Instant::now();
25 let result = $code;
26 if $profile {
27 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28 }
29 result
30 }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38pub(super) struct SkipByteRanges<'a> {
41 pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42 pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43 pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44 pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56 pub content: &'a str,
57 content_lines: Vec<&'a str>, pub line_offsets: Vec<usize>,
59 pub code_blocks: Vec<(usize, usize)>, pub code_block_details: Vec<CodeBlockDetail>, pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, pub line_to_list: crate::utils::code_block_utils::LineToListMap, pub list_start_values: crate::utils::code_block_utils::ListStartValues, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, link_title_ranges: Vec<(usize, usize)>, code_span_byte_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, }
96
97impl<'a> LintContext<'a> {
98 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
99 #[cfg(not(target_arch = "wasm32"))]
100 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
101
102 let line_offsets = profile_section!("Line offsets", profile, {
103 let mut offsets = vec![0];
104 for (i, c) in content.char_indices() {
105 if c == '\n' {
106 offsets.push(i + 1);
107 }
108 }
109 offsets
110 });
111
112 let content_lines: Vec<&str> = content.lines().collect();
114
115 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
117
118 let parse_result = profile_section!(
120 "Code blocks",
121 profile,
122 CodeBlockUtils::detect_code_blocks_and_spans(content)
123 );
124 let mut code_blocks = parse_result.code_blocks;
125 let code_span_ranges = parse_result.code_spans;
126 let code_block_details = parse_result.code_block_details;
127 let strong_spans = parse_result.strong_spans;
128 let line_to_list = parse_result.line_to_list;
129 let list_start_values = parse_result.list_start_values;
130
131 let html_comment_ranges = profile_section!(
133 "HTML comment ranges",
134 profile,
135 crate::utils::skip_context::compute_html_comment_ranges(content)
136 );
137
138 let autodoc_ranges = profile_section!(
142 "Autodoc block ranges",
143 profile,
144 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
145 );
146
147 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
149 if flavor == MarkdownFlavor::Quarto {
150 crate::utils::quarto_divs::detect_div_block_ranges(content)
151 } else {
152 Vec::new()
153 }
154 });
155
156 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
158 if flavor == MarkdownFlavor::MkDocs {
159 crate::utils::pymdown_blocks::detect_block_ranges(content)
160 } else {
161 Vec::new()
162 }
163 });
164
165 let skip_ranges = SkipByteRanges {
168 html_comment_ranges: &html_comment_ranges,
169 autodoc_ranges: &autodoc_ranges,
170 quarto_div_ranges: &quarto_div_ranges,
171 pymdown_block_ranges: &pymdown_block_ranges,
172 };
173 let (mut lines, emphasis_spans) = profile_section!(
174 "Basic line info",
175 profile,
176 line_computation::compute_basic_line_info(
177 content,
178 &content_lines,
179 &line_offsets,
180 &code_blocks,
181 flavor,
182 &skip_ranges,
183 front_matter_end,
184 )
185 );
186
187 profile_section!(
189 "HTML blocks",
190 profile,
191 heading_detection::detect_html_blocks(content, &mut lines)
192 );
193
194 profile_section!(
196 "ESM blocks",
197 profile,
198 flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
199 );
200
201 profile_section!(
203 "JSX block detection",
204 profile,
205 flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
206 );
207
208 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
210 "JSX/MDX detection",
211 profile,
212 flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
213 );
214
215 profile_section!(
220 "Markdown-in-HTML blocks",
221 profile,
222 flavor_detection::detect_markdown_html_blocks(&content_lines, &mut lines)
223 );
224
225 profile_section!(
227 "MkDocs constructs",
228 profile,
229 flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
230 );
231
232 profile_section!(
237 "Footnote definitions",
238 profile,
239 detect_footnote_definitions(content, &mut lines, &line_offsets)
240 );
241
242 {
245 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
246 for &(start, end) in &code_blocks {
247 let start_line = line_offsets
248 .partition_point(|&offset| offset <= start)
249 .saturating_sub(1);
250 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
251
252 let mut sub_start: Option<usize> = None;
253 for (i, &offset) in line_offsets[start_line..end_line]
254 .iter()
255 .enumerate()
256 .map(|(j, o)| (j + start_line, o))
257 {
258 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
259 if is_real_code && sub_start.is_none() {
260 let byte_start = if i == start_line { start } else { offset };
261 sub_start = Some(byte_start);
262 } else if !is_real_code && sub_start.is_some() {
263 new_code_blocks.push((sub_start.unwrap(), offset));
264 sub_start = None;
265 }
266 }
267 if let Some(s) = sub_start {
268 new_code_blocks.push((s, end));
269 }
270 }
271 code_blocks = new_code_blocks;
272 }
273
274 let has_markdown_html = lines.iter().any(|l| l.in_mkdocs_html_markdown);
282 if flavor == MarkdownFlavor::MkDocs || has_markdown_html {
283 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
284 for &(start, end) in &code_blocks {
285 let start_line = line_offsets
286 .partition_point(|&offset| offset <= start)
287 .saturating_sub(1);
288 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
289
290 let mut sub_start: Option<usize> = None;
292 for (i, &offset) in line_offsets[start_line..end_line]
293 .iter()
294 .enumerate()
295 .map(|(j, o)| (j + start_line, o))
296 {
297 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
298 if is_real_code && sub_start.is_none() {
299 let byte_start = if i == start_line { start } else { offset };
300 sub_start = Some(byte_start);
301 } else if !is_real_code && sub_start.is_some() {
302 new_code_blocks.push((sub_start.unwrap(), offset));
303 sub_start = None;
304 }
305 }
306 if let Some(s) = sub_start {
307 new_code_blocks.push((s, end));
308 }
309 }
310 code_blocks = new_code_blocks;
311 }
312
313 if flavor.supports_jsx() {
317 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
318 for &(start, end) in &code_blocks {
319 let start_line = line_offsets
320 .partition_point(|&offset| offset <= start)
321 .saturating_sub(1);
322 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
323
324 let mut sub_start: Option<usize> = None;
325 for (i, &offset) in line_offsets[start_line..end_line]
326 .iter()
327 .enumerate()
328 .map(|(j, o)| (j + start_line, o))
329 {
330 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
331 if is_real_code && sub_start.is_none() {
332 let byte_start = if i == start_line { start } else { offset };
333 sub_start = Some(byte_start);
334 } else if !is_real_code && sub_start.is_some() {
335 new_code_blocks.push((sub_start.unwrap(), offset));
336 sub_start = None;
337 }
338 }
339 if let Some(s) = sub_start {
340 new_code_blocks.push((s, end));
341 }
342 }
343 code_blocks = new_code_blocks;
344 }
345
346 profile_section!(
348 "Kramdown constructs",
349 profile,
350 flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
351 );
352
353 for line in &mut lines {
358 if line.in_kramdown_extension_block {
359 line.list_item = None;
360 line.is_horizontal_rule = false;
361 line.blockquote = None;
362 line.is_kramdown_block_ial = false;
363 }
364 }
365
366 let obsidian_comment_ranges = profile_section!(
368 "Obsidian comments",
369 profile,
370 flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
371 );
372
373 let pulldown_result = profile_section!(
377 "Links, images & link ranges",
378 profile,
379 link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
380 );
381
382 profile_section!(
384 "Headings & blockquotes",
385 profile,
386 heading_detection::detect_headings_and_blockquotes(
387 &content_lines,
388 &mut lines,
389 flavor,
390 &html_comment_ranges,
391 &pulldown_result.link_byte_ranges,
392 front_matter_end,
393 )
394 );
395
396 for line in &mut lines {
398 if line.in_kramdown_extension_block {
399 line.heading = None;
400 }
401 }
402
403 let mut code_spans = profile_section!(
405 "Code spans",
406 profile,
407 element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
408 );
409
410 if flavor == MarkdownFlavor::MkDocs {
414 let extra = profile_section!(
415 "MkDocs code spans",
416 profile,
417 element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
418 );
419 if !extra.is_empty() {
420 code_spans.extend(extra);
421 code_spans.sort_by_key(|span| span.byte_offset);
422 }
423 }
424
425 if flavor == MarkdownFlavor::MDX {
430 let extra = profile_section!(
431 "MDX JSX code spans",
432 profile,
433 element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
434 );
435 if !extra.is_empty() {
436 code_spans.extend(extra);
437 code_spans.sort_by_key(|span| span.byte_offset);
438 }
439 }
440
441 for span in &code_spans {
444 if span.end_line > span.line {
445 for line_num in (span.line + 1)..=span.end_line {
447 if let Some(line_info) = lines.get_mut(line_num - 1) {
448 line_info.in_code_span_continuation = true;
449 }
450 }
451 }
452 }
453
454 let (links, images, broken_links, footnote_refs) = profile_section!(
456 "Links & images finalize",
457 profile,
458 link_parser::finalize_links_and_images(
459 content,
460 &lines,
461 &code_blocks,
462 &code_spans,
463 flavor,
464 &html_comment_ranges,
465 pulldown_result
466 )
467 );
468
469 let reference_defs = profile_section!(
470 "Reference defs",
471 profile,
472 link_parser::parse_reference_defs(content, &lines)
473 );
474
475 let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
476
477 let char_frequency = profile_section!(
479 "Char frequency",
480 profile,
481 line_computation::compute_char_frequency(content)
482 );
483
484 let table_blocks = profile_section!(
486 "Table blocks",
487 profile,
488 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
489 content,
490 &code_blocks,
491 &code_spans,
492 &html_comment_ranges,
493 )
494 );
495
496 let links = links
499 .into_iter()
500 .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
501 .collect::<Vec<_>>();
502 let images = images
503 .into_iter()
504 .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
505 .collect::<Vec<_>>();
506 let broken_links = broken_links
507 .into_iter()
508 .filter(|bl| {
509 let line_idx = line_offsets
511 .partition_point(|&offset| offset <= bl.span.start)
512 .saturating_sub(1);
513 !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
514 })
515 .collect::<Vec<_>>();
516 let footnote_refs = footnote_refs
517 .into_iter()
518 .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
519 .collect::<Vec<_>>();
520 let reference_defs = reference_defs
521 .into_iter()
522 .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
523 .collect::<Vec<_>>();
524 let list_blocks = list_blocks
525 .into_iter()
526 .filter(|block| {
527 !lines
528 .get(block.start_line - 1)
529 .is_some_and(|l| l.in_kramdown_extension_block)
530 })
531 .collect::<Vec<_>>();
532 let table_blocks = table_blocks
533 .into_iter()
534 .filter(|block| {
535 !lines
537 .get(block.start_line)
538 .is_some_and(|l| l.in_kramdown_extension_block)
539 })
540 .collect::<Vec<_>>();
541 let emphasis_spans = emphasis_spans
542 .into_iter()
543 .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
544 .collect::<Vec<_>>();
545
546 let reference_defs_map: HashMap<String, usize> = reference_defs
548 .iter()
549 .enumerate()
550 .map(|(idx, def)| (def.id.to_lowercase(), idx))
551 .collect();
552
553 let link_title_ranges: Vec<(usize, usize)> = reference_defs
555 .iter()
556 .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
557 (Some(start), Some(end)) => Some((start, end)),
558 _ => None,
559 })
560 .collect();
561
562 let line_index = profile_section!(
564 "Line index",
565 profile,
566 crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
567 content,
568 line_offsets.clone(),
569 &code_blocks,
570 )
571 );
572
573 let jinja_ranges = profile_section!(
575 "Jinja ranges",
576 profile,
577 crate::utils::jinja_utils::find_jinja_ranges(content)
578 );
579
580 let citation_ranges = profile_section!("Citation ranges", profile, {
582 if flavor == MarkdownFlavor::Quarto {
583 crate::utils::quarto_divs::find_citation_ranges(content)
584 } else {
585 Vec::new()
586 }
587 });
588
589 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
591 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
592 let mut ranges = Vec::new();
593 for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
594 ranges.push((mat.start(), mat.end()));
595 }
596 ranges
597 });
598
599 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
600
601 Self {
602 content,
603 content_lines,
604 line_offsets,
605 code_blocks,
606 code_block_details,
607 strong_spans,
608 line_to_list,
609 list_start_values,
610 lines,
611 links,
612 images,
613 broken_links,
614 footnote_refs,
615 reference_defs,
616 reference_defs_map,
617 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
618 math_spans_cache: OnceLock::new(), list_blocks,
620 char_frequency,
621 html_tags_cache: OnceLock::new(),
622 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
623 table_rows_cache: OnceLock::new(),
624 bare_urls_cache: OnceLock::new(),
625 has_mixed_list_nesting_cache: OnceLock::new(),
626 html_comment_ranges,
627 table_blocks,
628 line_index,
629 jinja_ranges,
630 flavor,
631 source_file,
632 jsx_expression_ranges,
633 mdx_comment_ranges,
634 citation_ranges,
635 shortcode_ranges,
636 link_title_ranges,
637 code_span_byte_ranges: code_span_ranges,
638 inline_config,
639 obsidian_comment_ranges,
640 lazy_cont_lines_cache: OnceLock::new(),
641 }
642 }
643
644 #[inline]
647 fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
648 let idx = ranges.partition_point(|&(start, _)| start <= pos);
650 idx > 0 && pos < ranges[idx - 1].1
652 }
653
654 pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
656 Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
657 }
658
659 pub fn is_in_link(&self, pos: usize) -> bool {
661 let idx = self.links.partition_point(|link| link.byte_offset <= pos);
662 if idx > 0 && pos < self.links[idx - 1].byte_end {
663 return true;
664 }
665 let idx = self.images.partition_point(|img| img.byte_offset <= pos);
666 if idx > 0 && pos < self.images[idx - 1].byte_end {
667 return true;
668 }
669 self.is_in_reference_def(pos)
670 }
671
672 pub fn inline_config(&self) -> &InlineConfig {
674 &self.inline_config
675 }
676
677 pub fn raw_lines(&self) -> &[&'a str] {
681 &self.content_lines
682 }
683
684 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
689 self.inline_config.is_rule_disabled(rule_name, line_number)
690 }
691
692 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
694 Arc::clone(
695 self.code_spans_cache
696 .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
697 )
698 }
699
700 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
702 Arc::clone(
703 self.math_spans_cache
704 .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
705 )
706 }
707
708 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
710 let math_spans = self.math_spans();
711 let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
713 idx > 0 && byte_pos < math_spans[idx - 1].byte_end
714 }
715
716 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
718 &self.html_comment_ranges
719 }
720
721 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
725 Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
726 }
727
728 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
733 if self.obsidian_comment_ranges.is_empty() {
734 return false;
735 }
736
737 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
739 self.is_in_obsidian_comment(byte_pos)
740 }
741
742 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
744 Arc::clone(self.html_tags_cache.get_or_init(|| {
745 let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
746 Arc::new(
748 tags.into_iter()
749 .filter(|tag| {
750 !self
751 .lines
752 .get(tag.line - 1)
753 .is_some_and(|l| l.in_kramdown_extension_block)
754 })
755 .collect(),
756 )
757 }))
758 }
759
760 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
762 Arc::clone(
763 self.emphasis_spans_cache
764 .get()
765 .expect("emphasis_spans_cache initialized during construction"),
766 )
767 }
768
769 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
771 Arc::clone(
772 self.table_rows_cache
773 .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
774 )
775 }
776
777 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
779 Arc::clone(self.bare_urls_cache.get_or_init(|| {
780 Arc::new(element_parsers::parse_bare_urls(
781 self.content,
782 &self.lines,
783 &self.code_blocks,
784 ))
785 }))
786 }
787
788 pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
790 Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
791 Arc::new(element_parsers::detect_lazy_continuation_lines(
792 self.content,
793 &self.lines,
794 &self.line_offsets,
795 ))
796 }))
797 }
798
799 pub fn has_mixed_list_nesting(&self) -> bool {
803 *self
804 .has_mixed_list_nesting_cache
805 .get_or_init(|| self.compute_mixed_list_nesting())
806 }
807
808 fn compute_mixed_list_nesting(&self) -> bool {
810 let mut stack: Vec<(usize, bool)> = Vec::new();
815 let mut last_was_blank = false;
816
817 for line_info in &self.lines {
818 if line_info.in_code_block
820 || line_info.in_front_matter
821 || line_info.in_mkdocstrings
822 || line_info.in_html_comment
823 || line_info.in_mdx_comment
824 || line_info.in_esm_block
825 {
826 continue;
827 }
828
829 if line_info.is_blank {
831 last_was_blank = true;
832 continue;
833 }
834
835 if let Some(list_item) = &line_info.list_item {
836 let current_pos = if list_item.marker_column == 1 {
838 0
839 } else {
840 list_item.marker_column
841 };
842
843 if last_was_blank && current_pos == 0 {
845 stack.clear();
846 }
847 last_was_blank = false;
848
849 while let Some(&(pos, _)) = stack.last() {
851 if pos >= current_pos {
852 stack.pop();
853 } else {
854 break;
855 }
856 }
857
858 if let Some(&(_, parent_is_ordered)) = stack.last()
860 && parent_is_ordered != list_item.is_ordered
861 {
862 return true; }
864
865 stack.push((current_pos, list_item.is_ordered));
866 } else {
867 last_was_blank = false;
869 }
870 }
871
872 false
873 }
874
875 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
877 match self.line_offsets.binary_search(&offset) {
878 Ok(line) => (line + 1, 1),
879 Err(line) => {
880 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
881 (line, offset - line_start + 1)
882 }
883 }
884 }
885
886 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
888 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
890 return true;
891 }
892
893 self.is_byte_offset_in_code_span(pos)
895 }
896
897 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
899 if line_num > 0 {
900 self.lines.get(line_num - 1)
901 } else {
902 None
903 }
904 }
905
906 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
908 let normalized_id = ref_id.to_lowercase();
909 self.reference_defs_map
910 .get(&normalized_id)
911 .map(|&idx| self.reference_defs[idx].url.as_str())
912 }
913
914 pub fn is_in_list_block(&self, line_num: usize) -> bool {
916 self.list_blocks
917 .iter()
918 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
919 }
920
921 pub fn is_in_html_block(&self, line_num: usize) -> bool {
923 if line_num == 0 || line_num > self.lines.len() {
924 return false;
925 }
926 self.lines[line_num - 1].in_html_block
927 }
928
929 pub fn is_in_table_block(&self, line_num: usize) -> bool {
935 if line_num == 0 {
936 return false;
937 }
938 let line_idx = line_num - 1;
939 self.table_blocks
940 .iter()
941 .any(|block| line_idx >= block.start_line && line_idx <= block.end_line)
942 }
943
944 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
946 if line_num == 0 || line_num > self.lines.len() {
947 return false;
948 }
949
950 let col_0indexed = if col > 0 { col - 1 } else { 0 };
954 let code_spans = self.code_spans();
955 code_spans.iter().any(|span| {
956 if line_num < span.line || line_num > span.end_line {
958 return false;
959 }
960
961 if span.line == span.end_line {
962 col_0indexed >= span.start_col && col_0indexed < span.end_col
964 } else if line_num == span.line {
965 col_0indexed >= span.start_col
967 } else if line_num == span.end_line {
968 col_0indexed < span.end_col
970 } else {
971 true
973 }
974 })
975 }
976
977 #[inline]
979 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
980 let code_spans = self.code_spans();
981 let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
982 idx > 0 && byte_offset < code_spans[idx - 1].byte_end
983 }
984
985 #[inline]
987 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
988 let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
989 idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
990 }
991
992 #[inline]
994 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
995 let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
996 idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
997 }
998
999 #[inline]
1002 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1003 let tags = self.html_tags();
1004 let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1005 idx > 0 && byte_pos < tags[idx - 1].byte_end
1006 }
1007
1008 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1010 Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1011 }
1012
1013 #[inline]
1015 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1016 Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1017 }
1018
1019 #[inline]
1021 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1022 Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1023 }
1024
1025 #[inline]
1028 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1029 let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1030 idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1031 }
1032
1033 #[inline]
1035 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1036 &self.citation_ranges
1037 }
1038
1039 #[inline]
1041 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1042 Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1043 }
1044
1045 #[inline]
1047 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1048 &self.shortcode_ranges
1049 }
1050
1051 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1053 Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1054 }
1055
1056 pub fn has_char(&self, ch: char) -> bool {
1058 match ch {
1059 '#' => self.char_frequency.hash_count > 0,
1060 '*' => self.char_frequency.asterisk_count > 0,
1061 '_' => self.char_frequency.underscore_count > 0,
1062 '-' => self.char_frequency.hyphen_count > 0,
1063 '+' => self.char_frequency.plus_count > 0,
1064 '>' => self.char_frequency.gt_count > 0,
1065 '|' => self.char_frequency.pipe_count > 0,
1066 '[' => self.char_frequency.bracket_count > 0,
1067 '`' => self.char_frequency.backtick_count > 0,
1068 '<' => self.char_frequency.lt_count > 0,
1069 '!' => self.char_frequency.exclamation_count > 0,
1070 '\n' => self.char_frequency.newline_count > 0,
1071 _ => self.content.contains(ch), }
1073 }
1074
1075 pub fn char_count(&self, ch: char) -> usize {
1077 match ch {
1078 '#' => self.char_frequency.hash_count,
1079 '*' => self.char_frequency.asterisk_count,
1080 '_' => self.char_frequency.underscore_count,
1081 '-' => self.char_frequency.hyphen_count,
1082 '+' => self.char_frequency.plus_count,
1083 '>' => self.char_frequency.gt_count,
1084 '|' => self.char_frequency.pipe_count,
1085 '[' => self.char_frequency.bracket_count,
1086 '`' => self.char_frequency.backtick_count,
1087 '<' => self.char_frequency.lt_count,
1088 '!' => self.char_frequency.exclamation_count,
1089 '\n' => self.char_frequency.newline_count,
1090 _ => self.content.matches(ch).count(), }
1092 }
1093
1094 pub fn likely_has_headings(&self) -> bool {
1096 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') }
1098
1099 pub fn likely_has_lists(&self) -> bool {
1101 self.char_frequency.asterisk_count > 0
1102 || self.char_frequency.hyphen_count > 0
1103 || self.char_frequency.plus_count > 0
1104 }
1105
1106 pub fn likely_has_emphasis(&self) -> bool {
1108 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1109 }
1110
1111 pub fn likely_has_tables(&self) -> bool {
1113 self.char_frequency.pipe_count > 2
1114 }
1115
1116 pub fn likely_has_blockquotes(&self) -> bool {
1118 self.char_frequency.gt_count > 0
1119 }
1120
1121 pub fn likely_has_code(&self) -> bool {
1123 self.char_frequency.backtick_count > 0
1124 }
1125
1126 pub fn likely_has_links_or_images(&self) -> bool {
1128 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1129 }
1130
1131 pub fn likely_has_html(&self) -> bool {
1133 self.char_frequency.lt_count > 0
1134 }
1135
1136 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1141 if let Some(line_info) = self.lines.get(line_idx)
1142 && let Some(ref bq) = line_info.blockquote
1143 {
1144 bq.prefix.trim_end().to_string()
1145 } else {
1146 String::new()
1147 }
1148 }
1149
1150 #[inline]
1156 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1157 let idx = match lines.binary_search_by(|line| {
1159 if byte_offset < line.byte_offset {
1160 std::cmp::Ordering::Greater
1161 } else if byte_offset > line.byte_offset + line.byte_len {
1162 std::cmp::Ordering::Less
1163 } else {
1164 std::cmp::Ordering::Equal
1165 }
1166 }) {
1167 Ok(idx) => idx,
1168 Err(idx) => idx.saturating_sub(1),
1169 };
1170
1171 let line = &lines[idx];
1172 let line_num = idx + 1;
1173 let col = byte_offset.saturating_sub(line.byte_offset);
1174
1175 (idx, line_num, col)
1176 }
1177
1178 #[inline]
1180 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1181 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1183
1184 if idx > 0 {
1186 let span = &code_spans[idx - 1];
1187 if offset >= span.byte_offset && offset < span.byte_end {
1188 return true;
1189 }
1190 }
1191
1192 false
1193 }
1194
1195 #[must_use]
1215 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1216 ValidHeadingsIter::new(&self.lines)
1217 }
1218
1219 #[must_use]
1223 pub fn has_valid_headings(&self) -> bool {
1224 self.lines
1225 .iter()
1226 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1227 }
1228}
1229
1230fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1239 use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1240
1241 let options = crate::utils::rumdl_parser_options();
1242 let parser = Parser::new_ext(content, options).into_offset_iter();
1243
1244 let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1246 let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1247 let mut in_footnote = false;
1248
1249 for (event, range) in parser {
1250 match event {
1251 Event::Start(Tag::FootnoteDefinition(_)) => {
1252 in_footnote = true;
1253 footnote_ranges.push((range.start, range.end));
1254 }
1255 Event::End(TagEnd::FootnoteDefinition) => {
1256 in_footnote = false;
1257 }
1258 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1259 fenced_code_ranges.push((range.start, range.end));
1260 }
1261 _ => {}
1262 }
1263 }
1264
1265 let byte_to_line = |byte_offset: usize| -> usize {
1266 line_offsets
1267 .partition_point(|&offset| offset <= byte_offset)
1268 .saturating_sub(1)
1269 };
1270
1271 for &(start, end) in &footnote_ranges {
1273 let start_line = byte_to_line(start);
1274 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1275
1276 for line in &mut lines[start_line..end_line] {
1277 line.in_footnote_definition = true;
1278 line.in_code_block = false;
1279 }
1280 }
1281
1282 for &(start, end) in &fenced_code_ranges {
1284 let start_line = byte_to_line(start);
1285 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1286
1287 for line in &mut lines[start_line..end_line] {
1288 line.in_code_block = true;
1289 }
1290 }
1291}