1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23 ($name:expr, $profile:expr, $code:expr) => {{
24 let start = std::time::Instant::now();
25 let result = $code;
26 if $profile {
27 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28 }
29 result
30 }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38pub(super) struct SkipByteRanges<'a> {
41 pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42 pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43 pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44 pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56 pub content: &'a str,
57 content_lines: Vec<&'a str>, pub line_offsets: Vec<usize>,
59 pub code_blocks: Vec<(usize, usize)>, pub code_block_details: Vec<CodeBlockDetail>, pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, pub line_to_list: crate::utils::code_block_utils::LineToListMap, pub list_start_values: crate::utils::code_block_utils::ListStartValues, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, link_title_ranges: Vec<(usize, usize)>, code_span_byte_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, }
96
97impl<'a> LintContext<'a> {
98 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
99 #[cfg(not(target_arch = "wasm32"))]
100 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
101 #[cfg(target_arch = "wasm32")]
102 let profile = false;
103
104 let line_offsets = profile_section!("Line offsets", profile, {
105 let mut offsets = vec![0];
106 for (i, c) in content.char_indices() {
107 if c == '\n' {
108 offsets.push(i + 1);
109 }
110 }
111 offsets
112 });
113
114 let content_lines: Vec<&str> = content.lines().collect();
116
117 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
119
120 let parse_result = profile_section!(
122 "Code blocks",
123 profile,
124 CodeBlockUtils::detect_code_blocks_and_spans(content)
125 );
126 let mut code_blocks = parse_result.code_blocks;
127 let code_span_ranges = parse_result.code_spans;
128 let code_block_details = parse_result.code_block_details;
129 let strong_spans = parse_result.strong_spans;
130 let line_to_list = parse_result.line_to_list;
131 let list_start_values = parse_result.list_start_values;
132
133 let html_comment_ranges = profile_section!(
135 "HTML comment ranges",
136 profile,
137 crate::utils::skip_context::compute_html_comment_ranges(content)
138 );
139
140 let autodoc_ranges = profile_section!(
144 "Autodoc block ranges",
145 profile,
146 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
147 );
148
149 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
151 if flavor == MarkdownFlavor::Quarto {
152 crate::utils::quarto_divs::detect_div_block_ranges(content)
153 } else {
154 Vec::new()
155 }
156 });
157
158 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
160 if flavor == MarkdownFlavor::MkDocs {
161 crate::utils::pymdown_blocks::detect_block_ranges(content)
162 } else {
163 Vec::new()
164 }
165 });
166
167 let skip_ranges = SkipByteRanges {
170 html_comment_ranges: &html_comment_ranges,
171 autodoc_ranges: &autodoc_ranges,
172 quarto_div_ranges: &quarto_div_ranges,
173 pymdown_block_ranges: &pymdown_block_ranges,
174 };
175 let (mut lines, emphasis_spans) = profile_section!(
176 "Basic line info",
177 profile,
178 line_computation::compute_basic_line_info(
179 content,
180 &content_lines,
181 &line_offsets,
182 &code_blocks,
183 flavor,
184 &skip_ranges,
185 front_matter_end,
186 )
187 );
188
189 profile_section!(
191 "HTML blocks",
192 profile,
193 heading_detection::detect_html_blocks(content, &mut lines)
194 );
195
196 profile_section!(
198 "ESM blocks",
199 profile,
200 flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
201 );
202
203 profile_section!(
205 "JSX block detection",
206 profile,
207 flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
208 );
209
210 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
212 "JSX/MDX detection",
213 profile,
214 flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
215 );
216
217 profile_section!(
219 "MkDocs constructs",
220 profile,
221 flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
222 );
223
224 profile_section!(
229 "Footnote definitions",
230 profile,
231 detect_footnote_definitions(content, &mut lines, &line_offsets)
232 );
233
234 {
237 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
238 for &(start, end) in &code_blocks {
239 let start_line = line_offsets
240 .partition_point(|&offset| offset <= start)
241 .saturating_sub(1);
242 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
243
244 let mut sub_start: Option<usize> = None;
245 for (i, &offset) in line_offsets[start_line..end_line]
246 .iter()
247 .enumerate()
248 .map(|(j, o)| (j + start_line, o))
249 {
250 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
251 if is_real_code && sub_start.is_none() {
252 let byte_start = if i == start_line { start } else { offset };
253 sub_start = Some(byte_start);
254 } else if !is_real_code && sub_start.is_some() {
255 new_code_blocks.push((sub_start.unwrap(), offset));
256 sub_start = None;
257 }
258 }
259 if let Some(s) = sub_start {
260 new_code_blocks.push((s, end));
261 }
262 }
263 code_blocks = new_code_blocks;
264 }
265
266 if flavor == MarkdownFlavor::MkDocs {
273 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
274 for &(start, end) in &code_blocks {
275 let start_line = line_offsets
276 .partition_point(|&offset| offset <= start)
277 .saturating_sub(1);
278 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
279
280 let mut sub_start: Option<usize> = None;
282 for (i, &offset) in line_offsets[start_line..end_line]
283 .iter()
284 .enumerate()
285 .map(|(j, o)| (j + start_line, o))
286 {
287 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
288 if is_real_code && sub_start.is_none() {
289 let byte_start = if i == start_line { start } else { offset };
290 sub_start = Some(byte_start);
291 } else if !is_real_code && sub_start.is_some() {
292 new_code_blocks.push((sub_start.unwrap(), offset));
293 sub_start = None;
294 }
295 }
296 if let Some(s) = sub_start {
297 new_code_blocks.push((s, end));
298 }
299 }
300 code_blocks = new_code_blocks;
301 }
302
303 if flavor.supports_jsx() {
307 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
308 for &(start, end) in &code_blocks {
309 let start_line = line_offsets
310 .partition_point(|&offset| offset <= start)
311 .saturating_sub(1);
312 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
313
314 let mut sub_start: Option<usize> = None;
315 for (i, &offset) in line_offsets[start_line..end_line]
316 .iter()
317 .enumerate()
318 .map(|(j, o)| (j + start_line, o))
319 {
320 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
321 if is_real_code && sub_start.is_none() {
322 let byte_start = if i == start_line { start } else { offset };
323 sub_start = Some(byte_start);
324 } else if !is_real_code && sub_start.is_some() {
325 new_code_blocks.push((sub_start.unwrap(), offset));
326 sub_start = None;
327 }
328 }
329 if let Some(s) = sub_start {
330 new_code_blocks.push((s, end));
331 }
332 }
333 code_blocks = new_code_blocks;
334 }
335
336 profile_section!(
338 "Kramdown constructs",
339 profile,
340 flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
341 );
342
343 for line in &mut lines {
348 if line.in_kramdown_extension_block {
349 line.list_item = None;
350 line.is_horizontal_rule = false;
351 line.blockquote = None;
352 line.is_kramdown_block_ial = false;
353 }
354 }
355
356 let obsidian_comment_ranges = profile_section!(
358 "Obsidian comments",
359 profile,
360 flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
361 );
362
363 let pulldown_result = profile_section!(
367 "Links, images & link ranges",
368 profile,
369 link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
370 );
371
372 profile_section!(
374 "Headings & blockquotes",
375 profile,
376 heading_detection::detect_headings_and_blockquotes(
377 &content_lines,
378 &mut lines,
379 flavor,
380 &html_comment_ranges,
381 &pulldown_result.link_byte_ranges,
382 front_matter_end,
383 )
384 );
385
386 for line in &mut lines {
388 if line.in_kramdown_extension_block {
389 line.heading = None;
390 }
391 }
392
393 let mut code_spans = profile_section!(
395 "Code spans",
396 profile,
397 element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
398 );
399
400 if flavor == MarkdownFlavor::MkDocs {
404 let extra = profile_section!(
405 "MkDocs code spans",
406 profile,
407 element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
408 );
409 if !extra.is_empty() {
410 code_spans.extend(extra);
411 code_spans.sort_by_key(|span| span.byte_offset);
412 }
413 }
414
415 if flavor == MarkdownFlavor::MDX {
420 let extra = profile_section!(
421 "MDX JSX code spans",
422 profile,
423 element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
424 );
425 if !extra.is_empty() {
426 code_spans.extend(extra);
427 code_spans.sort_by_key(|span| span.byte_offset);
428 }
429 }
430
431 for span in &code_spans {
434 if span.end_line > span.line {
435 for line_num in (span.line + 1)..=span.end_line {
437 if let Some(line_info) = lines.get_mut(line_num - 1) {
438 line_info.in_code_span_continuation = true;
439 }
440 }
441 }
442 }
443
444 let (links, images, broken_links, footnote_refs) = profile_section!(
446 "Links & images finalize",
447 profile,
448 link_parser::finalize_links_and_images(
449 content,
450 &lines,
451 &code_blocks,
452 &code_spans,
453 flavor,
454 &html_comment_ranges,
455 pulldown_result
456 )
457 );
458
459 let reference_defs = profile_section!(
460 "Reference defs",
461 profile,
462 link_parser::parse_reference_defs(content, &lines)
463 );
464
465 let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
466
467 let char_frequency = profile_section!(
469 "Char frequency",
470 profile,
471 line_computation::compute_char_frequency(content)
472 );
473
474 let table_blocks = profile_section!(
476 "Table blocks",
477 profile,
478 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
479 content,
480 &code_blocks,
481 &code_spans,
482 &html_comment_ranges,
483 )
484 );
485
486 let links = links
489 .into_iter()
490 .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
491 .collect::<Vec<_>>();
492 let images = images
493 .into_iter()
494 .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
495 .collect::<Vec<_>>();
496 let broken_links = broken_links
497 .into_iter()
498 .filter(|bl| {
499 let line_idx = line_offsets
501 .partition_point(|&offset| offset <= bl.span.start)
502 .saturating_sub(1);
503 !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
504 })
505 .collect::<Vec<_>>();
506 let footnote_refs = footnote_refs
507 .into_iter()
508 .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
509 .collect::<Vec<_>>();
510 let reference_defs = reference_defs
511 .into_iter()
512 .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
513 .collect::<Vec<_>>();
514 let list_blocks = list_blocks
515 .into_iter()
516 .filter(|block| {
517 !lines
518 .get(block.start_line - 1)
519 .is_some_and(|l| l.in_kramdown_extension_block)
520 })
521 .collect::<Vec<_>>();
522 let table_blocks = table_blocks
523 .into_iter()
524 .filter(|block| {
525 !lines
527 .get(block.start_line)
528 .is_some_and(|l| l.in_kramdown_extension_block)
529 })
530 .collect::<Vec<_>>();
531 let emphasis_spans = emphasis_spans
532 .into_iter()
533 .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
534 .collect::<Vec<_>>();
535
536 let reference_defs_map: HashMap<String, usize> = reference_defs
538 .iter()
539 .enumerate()
540 .map(|(idx, def)| (def.id.to_lowercase(), idx))
541 .collect();
542
543 let link_title_ranges: Vec<(usize, usize)> = reference_defs
545 .iter()
546 .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
547 (Some(start), Some(end)) => Some((start, end)),
548 _ => None,
549 })
550 .collect();
551
552 let line_index = profile_section!(
554 "Line index",
555 profile,
556 crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
557 content,
558 line_offsets.clone(),
559 &code_blocks,
560 )
561 );
562
563 let jinja_ranges = profile_section!(
565 "Jinja ranges",
566 profile,
567 crate::utils::jinja_utils::find_jinja_ranges(content)
568 );
569
570 let citation_ranges = profile_section!("Citation ranges", profile, {
572 if flavor == MarkdownFlavor::Quarto {
573 crate::utils::quarto_divs::find_citation_ranges(content)
574 } else {
575 Vec::new()
576 }
577 });
578
579 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
581 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
582 let mut ranges = Vec::new();
583 for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
584 ranges.push((mat.start(), mat.end()));
585 }
586 ranges
587 });
588
589 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
590
591 Self {
592 content,
593 content_lines,
594 line_offsets,
595 code_blocks,
596 code_block_details,
597 strong_spans,
598 line_to_list,
599 list_start_values,
600 lines,
601 links,
602 images,
603 broken_links,
604 footnote_refs,
605 reference_defs,
606 reference_defs_map,
607 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
608 math_spans_cache: OnceLock::new(), list_blocks,
610 char_frequency,
611 html_tags_cache: OnceLock::new(),
612 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
613 table_rows_cache: OnceLock::new(),
614 bare_urls_cache: OnceLock::new(),
615 has_mixed_list_nesting_cache: OnceLock::new(),
616 html_comment_ranges,
617 table_blocks,
618 line_index,
619 jinja_ranges,
620 flavor,
621 source_file,
622 jsx_expression_ranges,
623 mdx_comment_ranges,
624 citation_ranges,
625 shortcode_ranges,
626 link_title_ranges,
627 code_span_byte_ranges: code_span_ranges,
628 inline_config,
629 obsidian_comment_ranges,
630 lazy_cont_lines_cache: OnceLock::new(),
631 }
632 }
633
634 #[inline]
637 fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
638 let idx = ranges.partition_point(|&(start, _)| start <= pos);
640 idx > 0 && pos < ranges[idx - 1].1
642 }
643
644 pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
646 Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
647 }
648
649 pub fn is_in_link(&self, pos: usize) -> bool {
651 let idx = self.links.partition_point(|link| link.byte_offset <= pos);
652 if idx > 0 && pos < self.links[idx - 1].byte_end {
653 return true;
654 }
655 let idx = self.images.partition_point(|img| img.byte_offset <= pos);
656 if idx > 0 && pos < self.images[idx - 1].byte_end {
657 return true;
658 }
659 self.is_in_reference_def(pos)
660 }
661
662 pub fn inline_config(&self) -> &InlineConfig {
664 &self.inline_config
665 }
666
667 pub fn raw_lines(&self) -> &[&'a str] {
671 &self.content_lines
672 }
673
674 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
679 self.inline_config.is_rule_disabled(rule_name, line_number)
680 }
681
682 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
684 Arc::clone(
685 self.code_spans_cache
686 .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
687 )
688 }
689
690 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
692 Arc::clone(
693 self.math_spans_cache
694 .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
695 )
696 }
697
698 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
700 let math_spans = self.math_spans();
701 let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
703 idx > 0 && byte_pos < math_spans[idx - 1].byte_end
704 }
705
706 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
708 &self.html_comment_ranges
709 }
710
711 pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
714 &self.obsidian_comment_ranges
715 }
716
717 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
721 Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
722 }
723
724 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
729 if self.obsidian_comment_ranges.is_empty() {
730 return false;
731 }
732
733 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
735 self.is_in_obsidian_comment(byte_pos)
736 }
737
738 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
740 Arc::clone(self.html_tags_cache.get_or_init(|| {
741 let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
742 Arc::new(
744 tags.into_iter()
745 .filter(|tag| {
746 !self
747 .lines
748 .get(tag.line - 1)
749 .is_some_and(|l| l.in_kramdown_extension_block)
750 })
751 .collect(),
752 )
753 }))
754 }
755
756 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
758 Arc::clone(
759 self.emphasis_spans_cache
760 .get()
761 .expect("emphasis_spans_cache initialized during construction"),
762 )
763 }
764
765 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
767 Arc::clone(
768 self.table_rows_cache
769 .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
770 )
771 }
772
773 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
775 Arc::clone(self.bare_urls_cache.get_or_init(|| {
776 Arc::new(element_parsers::parse_bare_urls(
777 self.content,
778 &self.lines,
779 &self.code_blocks,
780 ))
781 }))
782 }
783
784 pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
786 Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
787 Arc::new(element_parsers::detect_lazy_continuation_lines(
788 self.content,
789 &self.lines,
790 &self.line_offsets,
791 ))
792 }))
793 }
794
795 pub fn has_mixed_list_nesting(&self) -> bool {
799 *self
800 .has_mixed_list_nesting_cache
801 .get_or_init(|| self.compute_mixed_list_nesting())
802 }
803
804 fn compute_mixed_list_nesting(&self) -> bool {
806 let mut stack: Vec<(usize, bool)> = Vec::new();
811 let mut last_was_blank = false;
812
813 for line_info in &self.lines {
814 if line_info.in_code_block
816 || line_info.in_front_matter
817 || line_info.in_mkdocstrings
818 || line_info.in_html_comment
819 || line_info.in_mdx_comment
820 || line_info.in_esm_block
821 {
822 continue;
823 }
824
825 if line_info.is_blank {
827 last_was_blank = true;
828 continue;
829 }
830
831 if let Some(list_item) = &line_info.list_item {
832 let current_pos = if list_item.marker_column == 1 {
834 0
835 } else {
836 list_item.marker_column
837 };
838
839 if last_was_blank && current_pos == 0 {
841 stack.clear();
842 }
843 last_was_blank = false;
844
845 while let Some(&(pos, _)) = stack.last() {
847 if pos >= current_pos {
848 stack.pop();
849 } else {
850 break;
851 }
852 }
853
854 if let Some(&(_, parent_is_ordered)) = stack.last()
856 && parent_is_ordered != list_item.is_ordered
857 {
858 return true; }
860
861 stack.push((current_pos, list_item.is_ordered));
862 } else {
863 last_was_blank = false;
865 }
866 }
867
868 false
869 }
870
871 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
873 match self.line_offsets.binary_search(&offset) {
874 Ok(line) => (line + 1, 1),
875 Err(line) => {
876 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
877 (line, offset - line_start + 1)
878 }
879 }
880 }
881
882 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
884 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
886 return true;
887 }
888
889 self.is_byte_offset_in_code_span(pos)
891 }
892
893 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
895 if line_num > 0 {
896 self.lines.get(line_num - 1)
897 } else {
898 None
899 }
900 }
901
902 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
904 self.line_info(line_num).map(|info| info.byte_offset)
905 }
906
907 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
909 let normalized_id = ref_id.to_lowercase();
910 self.reference_defs_map
911 .get(&normalized_id)
912 .map(|&idx| self.reference_defs[idx].url.as_str())
913 }
914
915 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
917 let normalized_id = ref_id.to_lowercase();
918 self.reference_defs_map
919 .get(&normalized_id)
920 .map(|&idx| &self.reference_defs[idx])
921 }
922
923 pub fn has_reference_def(&self, ref_id: &str) -> bool {
925 let normalized_id = ref_id.to_lowercase();
926 self.reference_defs_map.contains_key(&normalized_id)
927 }
928
929 pub fn is_in_list_block(&self, line_num: usize) -> bool {
931 self.list_blocks
932 .iter()
933 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
934 }
935
936 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
938 self.list_blocks
939 .iter()
940 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
941 }
942
943 pub fn is_in_code_block(&self, line_num: usize) -> bool {
947 if line_num == 0 || line_num > self.lines.len() {
948 return false;
949 }
950 self.lines[line_num - 1].in_code_block
951 }
952
953 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
955 if line_num == 0 || line_num > self.lines.len() {
956 return false;
957 }
958 self.lines[line_num - 1].in_front_matter
959 }
960
961 pub fn is_in_html_block(&self, line_num: usize) -> bool {
963 if line_num == 0 || line_num > self.lines.len() {
964 return false;
965 }
966 self.lines[line_num - 1].in_html_block
967 }
968
969 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
971 if line_num == 0 || line_num > self.lines.len() {
972 return false;
973 }
974
975 let col_0indexed = if col > 0 { col - 1 } else { 0 };
979 let code_spans = self.code_spans();
980 code_spans.iter().any(|span| {
981 if line_num < span.line || line_num > span.end_line {
983 return false;
984 }
985
986 if span.line == span.end_line {
987 col_0indexed >= span.start_col && col_0indexed < span.end_col
989 } else if line_num == span.line {
990 col_0indexed >= span.start_col
992 } else if line_num == span.end_line {
993 col_0indexed < span.end_col
995 } else {
996 true
998 }
999 })
1000 }
1001
1002 #[inline]
1004 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1005 let code_spans = self.code_spans();
1006 let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
1007 idx > 0 && byte_offset < code_spans[idx - 1].byte_end
1008 }
1009
1010 #[inline]
1012 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1013 let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
1014 idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
1015 }
1016
1017 #[inline]
1019 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1020 let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1021 idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1022 }
1023
1024 #[inline]
1027 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1028 let tags = self.html_tags();
1029 let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1030 idx > 0 && byte_pos < tags[idx - 1].byte_end
1031 }
1032
1033 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1035 Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1036 }
1037
1038 #[inline]
1040 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1041 Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1042 }
1043
1044 #[inline]
1046 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1047 Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1048 }
1049
1050 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1052 &self.jsx_expression_ranges
1053 }
1054
1055 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1057 &self.mdx_comment_ranges
1058 }
1059
1060 #[inline]
1063 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1064 let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1065 idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1066 }
1067
1068 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1070 &self.citation_ranges
1071 }
1072
1073 #[inline]
1075 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1076 Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1077 }
1078
1079 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1081 &self.shortcode_ranges
1082 }
1083
1084 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1086 Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1087 }
1088
1089 pub fn has_char(&self, ch: char) -> bool {
1091 match ch {
1092 '#' => self.char_frequency.hash_count > 0,
1093 '*' => self.char_frequency.asterisk_count > 0,
1094 '_' => self.char_frequency.underscore_count > 0,
1095 '-' => self.char_frequency.hyphen_count > 0,
1096 '+' => self.char_frequency.plus_count > 0,
1097 '>' => self.char_frequency.gt_count > 0,
1098 '|' => self.char_frequency.pipe_count > 0,
1099 '[' => self.char_frequency.bracket_count > 0,
1100 '`' => self.char_frequency.backtick_count > 0,
1101 '<' => self.char_frequency.lt_count > 0,
1102 '!' => self.char_frequency.exclamation_count > 0,
1103 '\n' => self.char_frequency.newline_count > 0,
1104 _ => self.content.contains(ch), }
1106 }
1107
1108 pub fn char_count(&self, ch: char) -> usize {
1110 match ch {
1111 '#' => self.char_frequency.hash_count,
1112 '*' => self.char_frequency.asterisk_count,
1113 '_' => self.char_frequency.underscore_count,
1114 '-' => self.char_frequency.hyphen_count,
1115 '+' => self.char_frequency.plus_count,
1116 '>' => self.char_frequency.gt_count,
1117 '|' => self.char_frequency.pipe_count,
1118 '[' => self.char_frequency.bracket_count,
1119 '`' => self.char_frequency.backtick_count,
1120 '<' => self.char_frequency.lt_count,
1121 '!' => self.char_frequency.exclamation_count,
1122 '\n' => self.char_frequency.newline_count,
1123 _ => self.content.matches(ch).count(), }
1125 }
1126
1127 pub fn likely_has_headings(&self) -> bool {
1129 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') }
1131
1132 pub fn likely_has_lists(&self) -> bool {
1134 self.char_frequency.asterisk_count > 0
1135 || self.char_frequency.hyphen_count > 0
1136 || self.char_frequency.plus_count > 0
1137 }
1138
1139 pub fn likely_has_emphasis(&self) -> bool {
1141 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1142 }
1143
1144 pub fn likely_has_tables(&self) -> bool {
1146 self.char_frequency.pipe_count > 2
1147 }
1148
1149 pub fn likely_has_blockquotes(&self) -> bool {
1151 self.char_frequency.gt_count > 0
1152 }
1153
1154 pub fn likely_has_code(&self) -> bool {
1156 self.char_frequency.backtick_count > 0
1157 }
1158
1159 pub fn likely_has_links_or_images(&self) -> bool {
1161 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1162 }
1163
1164 pub fn likely_has_html(&self) -> bool {
1166 self.char_frequency.lt_count > 0
1167 }
1168
1169 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1174 if let Some(line_info) = self.lines.get(line_idx)
1175 && let Some(ref bq) = line_info.blockquote
1176 {
1177 bq.prefix.trim_end().to_string()
1178 } else {
1179 String::new()
1180 }
1181 }
1182
1183 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1185 self.html_tags()
1186 .iter()
1187 .filter(|tag| tag.line == line_num)
1188 .cloned()
1189 .collect()
1190 }
1191
1192 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1194 self.emphasis_spans()
1195 .iter()
1196 .filter(|span| span.line == line_num)
1197 .cloned()
1198 .collect()
1199 }
1200
1201 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1203 self.table_rows()
1204 .iter()
1205 .filter(|row| row.line == line_num)
1206 .cloned()
1207 .collect()
1208 }
1209
1210 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1212 self.bare_urls()
1213 .iter()
1214 .filter(|url| url.line == line_num)
1215 .cloned()
1216 .collect()
1217 }
1218
1219 #[inline]
1225 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1226 let idx = match lines.binary_search_by(|line| {
1228 if byte_offset < line.byte_offset {
1229 std::cmp::Ordering::Greater
1230 } else if byte_offset > line.byte_offset + line.byte_len {
1231 std::cmp::Ordering::Less
1232 } else {
1233 std::cmp::Ordering::Equal
1234 }
1235 }) {
1236 Ok(idx) => idx,
1237 Err(idx) => idx.saturating_sub(1),
1238 };
1239
1240 let line = &lines[idx];
1241 let line_num = idx + 1;
1242 let col = byte_offset.saturating_sub(line.byte_offset);
1243
1244 (idx, line_num, col)
1245 }
1246
1247 #[inline]
1249 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1250 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1252
1253 if idx > 0 {
1255 let span = &code_spans[idx - 1];
1256 if offset >= span.byte_offset && offset < span.byte_end {
1257 return true;
1258 }
1259 }
1260
1261 false
1262 }
1263
1264 #[must_use]
1284 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1285 ValidHeadingsIter::new(&self.lines)
1286 }
1287
1288 #[must_use]
1292 pub fn has_valid_headings(&self) -> bool {
1293 self.lines
1294 .iter()
1295 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1296 }
1297}
1298
1299fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1308 use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1309
1310 let options = crate::utils::rumdl_parser_options();
1311 let parser = Parser::new_ext(content, options).into_offset_iter();
1312
1313 let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1315 let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1316 let mut in_footnote = false;
1317
1318 for (event, range) in parser {
1319 match event {
1320 Event::Start(Tag::FootnoteDefinition(_)) => {
1321 in_footnote = true;
1322 footnote_ranges.push((range.start, range.end));
1323 }
1324 Event::End(TagEnd::FootnoteDefinition) => {
1325 in_footnote = false;
1326 }
1327 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1328 fenced_code_ranges.push((range.start, range.end));
1329 }
1330 _ => {}
1331 }
1332 }
1333
1334 let byte_to_line = |byte_offset: usize| -> usize {
1335 line_offsets
1336 .partition_point(|&offset| offset <= byte_offset)
1337 .saturating_sub(1)
1338 };
1339
1340 for &(start, end) in &footnote_ranges {
1342 let start_line = byte_to_line(start);
1343 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1344
1345 for line in &mut lines[start_line..end_line] {
1346 line.in_footnote_definition = true;
1347 line.in_code_block = false;
1348 }
1349 }
1350
1351 for &(start, end) in &fenced_code_ranges {
1353 let start_line = byte_to_line(start);
1354 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1355
1356 for line in &mut lines[start_line..end_line] {
1357 line.in_code_block = true;
1358 }
1359 }
1360}