1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23 ($name:expr, $profile:expr, $code:expr) => {{
24 let start = std::time::Instant::now();
25 let result = $code;
26 if $profile {
27 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28 }
29 result
30 }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38pub(super) struct SkipByteRanges<'a> {
41 pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42 pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43 pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44 pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56 pub content: &'a str,
57 content_lines: Vec<&'a str>, pub line_offsets: Vec<usize>,
59 pub code_blocks: Vec<(usize, usize)>, pub code_block_details: Vec<CodeBlockDetail>, pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, pub line_to_list: crate::utils::code_block_utils::LineToListMap, pub list_start_values: crate::utils::code_block_utils::ListStartValues, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, link_title_ranges: Vec<(usize, usize)>, code_span_byte_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, }
96
97impl<'a> LintContext<'a> {
98 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
99 #[cfg(not(target_arch = "wasm32"))]
100 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
101
102 let line_offsets = profile_section!("Line offsets", profile, {
103 let mut offsets = vec![0];
104 for (i, c) in content.char_indices() {
105 if c == '\n' {
106 offsets.push(i + 1);
107 }
108 }
109 offsets
110 });
111
112 let content_lines: Vec<&str> = content.lines().collect();
114
115 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
117
118 let parse_result = profile_section!(
120 "Code blocks",
121 profile,
122 CodeBlockUtils::detect_code_blocks_and_spans(content)
123 );
124 let mut code_blocks = parse_result.code_blocks;
125 let code_span_ranges = parse_result.code_spans;
126 let code_block_details = parse_result.code_block_details;
127 let strong_spans = parse_result.strong_spans;
128 let line_to_list = parse_result.line_to_list;
129 let list_start_values = parse_result.list_start_values;
130
131 let html_comment_ranges = profile_section!(
133 "HTML comment ranges",
134 profile,
135 crate::utils::skip_context::compute_html_comment_ranges(content)
136 );
137
138 let autodoc_ranges = profile_section!(
142 "Autodoc block ranges",
143 profile,
144 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
145 );
146
147 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
149 if flavor == MarkdownFlavor::Quarto {
150 crate::utils::quarto_divs::detect_div_block_ranges(content)
151 } else {
152 Vec::new()
153 }
154 });
155
156 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
158 if flavor == MarkdownFlavor::MkDocs {
159 crate::utils::pymdown_blocks::detect_block_ranges(content)
160 } else {
161 Vec::new()
162 }
163 });
164
165 let skip_ranges = SkipByteRanges {
168 html_comment_ranges: &html_comment_ranges,
169 autodoc_ranges: &autodoc_ranges,
170 quarto_div_ranges: &quarto_div_ranges,
171 pymdown_block_ranges: &pymdown_block_ranges,
172 };
173 let (mut lines, emphasis_spans) = profile_section!(
174 "Basic line info",
175 profile,
176 line_computation::compute_basic_line_info(
177 content,
178 &content_lines,
179 &line_offsets,
180 &code_blocks,
181 flavor,
182 &skip_ranges,
183 front_matter_end,
184 )
185 );
186
187 profile_section!(
189 "HTML blocks",
190 profile,
191 heading_detection::detect_html_blocks(content, &mut lines)
192 );
193
194 profile_section!(
196 "ESM blocks",
197 profile,
198 flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
199 );
200
201 profile_section!(
203 "JSX block detection",
204 profile,
205 flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
206 );
207
208 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
210 "JSX/MDX detection",
211 profile,
212 flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
213 );
214
215 profile_section!(
217 "MkDocs constructs",
218 profile,
219 flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
220 );
221
222 profile_section!(
227 "Footnote definitions",
228 profile,
229 detect_footnote_definitions(content, &mut lines, &line_offsets)
230 );
231
232 {
235 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
236 for &(start, end) in &code_blocks {
237 let start_line = line_offsets
238 .partition_point(|&offset| offset <= start)
239 .saturating_sub(1);
240 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
241
242 let mut sub_start: Option<usize> = None;
243 for (i, &offset) in line_offsets[start_line..end_line]
244 .iter()
245 .enumerate()
246 .map(|(j, o)| (j + start_line, o))
247 {
248 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
249 if is_real_code && sub_start.is_none() {
250 let byte_start = if i == start_line { start } else { offset };
251 sub_start = Some(byte_start);
252 } else if !is_real_code && sub_start.is_some() {
253 new_code_blocks.push((sub_start.unwrap(), offset));
254 sub_start = None;
255 }
256 }
257 if let Some(s) = sub_start {
258 new_code_blocks.push((s, end));
259 }
260 }
261 code_blocks = new_code_blocks;
262 }
263
264 if flavor == MarkdownFlavor::MkDocs {
271 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
272 for &(start, end) in &code_blocks {
273 let start_line = line_offsets
274 .partition_point(|&offset| offset <= start)
275 .saturating_sub(1);
276 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
277
278 let mut sub_start: Option<usize> = None;
280 for (i, &offset) in line_offsets[start_line..end_line]
281 .iter()
282 .enumerate()
283 .map(|(j, o)| (j + start_line, o))
284 {
285 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
286 if is_real_code && sub_start.is_none() {
287 let byte_start = if i == start_line { start } else { offset };
288 sub_start = Some(byte_start);
289 } else if !is_real_code && sub_start.is_some() {
290 new_code_blocks.push((sub_start.unwrap(), offset));
291 sub_start = None;
292 }
293 }
294 if let Some(s) = sub_start {
295 new_code_blocks.push((s, end));
296 }
297 }
298 code_blocks = new_code_blocks;
299 }
300
301 if flavor.supports_jsx() {
305 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
306 for &(start, end) in &code_blocks {
307 let start_line = line_offsets
308 .partition_point(|&offset| offset <= start)
309 .saturating_sub(1);
310 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
311
312 let mut sub_start: Option<usize> = None;
313 for (i, &offset) in line_offsets[start_line..end_line]
314 .iter()
315 .enumerate()
316 .map(|(j, o)| (j + start_line, o))
317 {
318 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
319 if is_real_code && sub_start.is_none() {
320 let byte_start = if i == start_line { start } else { offset };
321 sub_start = Some(byte_start);
322 } else if !is_real_code && sub_start.is_some() {
323 new_code_blocks.push((sub_start.unwrap(), offset));
324 sub_start = None;
325 }
326 }
327 if let Some(s) = sub_start {
328 new_code_blocks.push((s, end));
329 }
330 }
331 code_blocks = new_code_blocks;
332 }
333
334 profile_section!(
336 "Kramdown constructs",
337 profile,
338 flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
339 );
340
341 for line in &mut lines {
346 if line.in_kramdown_extension_block {
347 line.list_item = None;
348 line.is_horizontal_rule = false;
349 line.blockquote = None;
350 line.is_kramdown_block_ial = false;
351 }
352 }
353
354 let obsidian_comment_ranges = profile_section!(
356 "Obsidian comments",
357 profile,
358 flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
359 );
360
361 let pulldown_result = profile_section!(
365 "Links, images & link ranges",
366 profile,
367 link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
368 );
369
370 profile_section!(
372 "Headings & blockquotes",
373 profile,
374 heading_detection::detect_headings_and_blockquotes(
375 &content_lines,
376 &mut lines,
377 flavor,
378 &html_comment_ranges,
379 &pulldown_result.link_byte_ranges,
380 front_matter_end,
381 )
382 );
383
384 for line in &mut lines {
386 if line.in_kramdown_extension_block {
387 line.heading = None;
388 }
389 }
390
391 let mut code_spans = profile_section!(
393 "Code spans",
394 profile,
395 element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
396 );
397
398 if flavor == MarkdownFlavor::MkDocs {
402 let extra = profile_section!(
403 "MkDocs code spans",
404 profile,
405 element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
406 );
407 if !extra.is_empty() {
408 code_spans.extend(extra);
409 code_spans.sort_by_key(|span| span.byte_offset);
410 }
411 }
412
413 if flavor == MarkdownFlavor::MDX {
418 let extra = profile_section!(
419 "MDX JSX code spans",
420 profile,
421 element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
422 );
423 if !extra.is_empty() {
424 code_spans.extend(extra);
425 code_spans.sort_by_key(|span| span.byte_offset);
426 }
427 }
428
429 for span in &code_spans {
432 if span.end_line > span.line {
433 for line_num in (span.line + 1)..=span.end_line {
435 if let Some(line_info) = lines.get_mut(line_num - 1) {
436 line_info.in_code_span_continuation = true;
437 }
438 }
439 }
440 }
441
442 let (links, images, broken_links, footnote_refs) = profile_section!(
444 "Links & images finalize",
445 profile,
446 link_parser::finalize_links_and_images(
447 content,
448 &lines,
449 &code_blocks,
450 &code_spans,
451 flavor,
452 &html_comment_ranges,
453 pulldown_result
454 )
455 );
456
457 let reference_defs = profile_section!(
458 "Reference defs",
459 profile,
460 link_parser::parse_reference_defs(content, &lines)
461 );
462
463 let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
464
465 let char_frequency = profile_section!(
467 "Char frequency",
468 profile,
469 line_computation::compute_char_frequency(content)
470 );
471
472 let table_blocks = profile_section!(
474 "Table blocks",
475 profile,
476 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
477 content,
478 &code_blocks,
479 &code_spans,
480 &html_comment_ranges,
481 )
482 );
483
484 let links = links
487 .into_iter()
488 .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
489 .collect::<Vec<_>>();
490 let images = images
491 .into_iter()
492 .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
493 .collect::<Vec<_>>();
494 let broken_links = broken_links
495 .into_iter()
496 .filter(|bl| {
497 let line_idx = line_offsets
499 .partition_point(|&offset| offset <= bl.span.start)
500 .saturating_sub(1);
501 !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
502 })
503 .collect::<Vec<_>>();
504 let footnote_refs = footnote_refs
505 .into_iter()
506 .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
507 .collect::<Vec<_>>();
508 let reference_defs = reference_defs
509 .into_iter()
510 .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
511 .collect::<Vec<_>>();
512 let list_blocks = list_blocks
513 .into_iter()
514 .filter(|block| {
515 !lines
516 .get(block.start_line - 1)
517 .is_some_and(|l| l.in_kramdown_extension_block)
518 })
519 .collect::<Vec<_>>();
520 let table_blocks = table_blocks
521 .into_iter()
522 .filter(|block| {
523 !lines
525 .get(block.start_line)
526 .is_some_and(|l| l.in_kramdown_extension_block)
527 })
528 .collect::<Vec<_>>();
529 let emphasis_spans = emphasis_spans
530 .into_iter()
531 .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
532 .collect::<Vec<_>>();
533
534 let reference_defs_map: HashMap<String, usize> = reference_defs
536 .iter()
537 .enumerate()
538 .map(|(idx, def)| (def.id.to_lowercase(), idx))
539 .collect();
540
541 let link_title_ranges: Vec<(usize, usize)> = reference_defs
543 .iter()
544 .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
545 (Some(start), Some(end)) => Some((start, end)),
546 _ => None,
547 })
548 .collect();
549
550 let line_index = profile_section!(
552 "Line index",
553 profile,
554 crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
555 content,
556 line_offsets.clone(),
557 &code_blocks,
558 )
559 );
560
561 let jinja_ranges = profile_section!(
563 "Jinja ranges",
564 profile,
565 crate::utils::jinja_utils::find_jinja_ranges(content)
566 );
567
568 let citation_ranges = profile_section!("Citation ranges", profile, {
570 if flavor == MarkdownFlavor::Quarto {
571 crate::utils::quarto_divs::find_citation_ranges(content)
572 } else {
573 Vec::new()
574 }
575 });
576
577 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
579 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
580 let mut ranges = Vec::new();
581 for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
582 ranges.push((mat.start(), mat.end()));
583 }
584 ranges
585 });
586
587 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
588
589 Self {
590 content,
591 content_lines,
592 line_offsets,
593 code_blocks,
594 code_block_details,
595 strong_spans,
596 line_to_list,
597 list_start_values,
598 lines,
599 links,
600 images,
601 broken_links,
602 footnote_refs,
603 reference_defs,
604 reference_defs_map,
605 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
606 math_spans_cache: OnceLock::new(), list_blocks,
608 char_frequency,
609 html_tags_cache: OnceLock::new(),
610 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
611 table_rows_cache: OnceLock::new(),
612 bare_urls_cache: OnceLock::new(),
613 has_mixed_list_nesting_cache: OnceLock::new(),
614 html_comment_ranges,
615 table_blocks,
616 line_index,
617 jinja_ranges,
618 flavor,
619 source_file,
620 jsx_expression_ranges,
621 mdx_comment_ranges,
622 citation_ranges,
623 shortcode_ranges,
624 link_title_ranges,
625 code_span_byte_ranges: code_span_ranges,
626 inline_config,
627 obsidian_comment_ranges,
628 lazy_cont_lines_cache: OnceLock::new(),
629 }
630 }
631
632 #[inline]
635 fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
636 let idx = ranges.partition_point(|&(start, _)| start <= pos);
638 idx > 0 && pos < ranges[idx - 1].1
640 }
641
642 pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
644 Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
645 }
646
647 pub fn is_in_link(&self, pos: usize) -> bool {
649 let idx = self.links.partition_point(|link| link.byte_offset <= pos);
650 if idx > 0 && pos < self.links[idx - 1].byte_end {
651 return true;
652 }
653 let idx = self.images.partition_point(|img| img.byte_offset <= pos);
654 if idx > 0 && pos < self.images[idx - 1].byte_end {
655 return true;
656 }
657 self.is_in_reference_def(pos)
658 }
659
660 pub fn inline_config(&self) -> &InlineConfig {
662 &self.inline_config
663 }
664
665 pub fn raw_lines(&self) -> &[&'a str] {
669 &self.content_lines
670 }
671
672 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
677 self.inline_config.is_rule_disabled(rule_name, line_number)
678 }
679
680 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
682 Arc::clone(
683 self.code_spans_cache
684 .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
685 )
686 }
687
688 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
690 Arc::clone(
691 self.math_spans_cache
692 .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
693 )
694 }
695
696 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
698 let math_spans = self.math_spans();
699 let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
701 idx > 0 && byte_pos < math_spans[idx - 1].byte_end
702 }
703
704 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
706 &self.html_comment_ranges
707 }
708
709 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
713 Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
714 }
715
716 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
721 if self.obsidian_comment_ranges.is_empty() {
722 return false;
723 }
724
725 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
727 self.is_in_obsidian_comment(byte_pos)
728 }
729
730 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
732 Arc::clone(self.html_tags_cache.get_or_init(|| {
733 let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
734 Arc::new(
736 tags.into_iter()
737 .filter(|tag| {
738 !self
739 .lines
740 .get(tag.line - 1)
741 .is_some_and(|l| l.in_kramdown_extension_block)
742 })
743 .collect(),
744 )
745 }))
746 }
747
748 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
750 Arc::clone(
751 self.emphasis_spans_cache
752 .get()
753 .expect("emphasis_spans_cache initialized during construction"),
754 )
755 }
756
757 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
759 Arc::clone(
760 self.table_rows_cache
761 .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
762 )
763 }
764
765 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
767 Arc::clone(self.bare_urls_cache.get_or_init(|| {
768 Arc::new(element_parsers::parse_bare_urls(
769 self.content,
770 &self.lines,
771 &self.code_blocks,
772 ))
773 }))
774 }
775
776 pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
778 Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
779 Arc::new(element_parsers::detect_lazy_continuation_lines(
780 self.content,
781 &self.lines,
782 &self.line_offsets,
783 ))
784 }))
785 }
786
787 pub fn has_mixed_list_nesting(&self) -> bool {
791 *self
792 .has_mixed_list_nesting_cache
793 .get_or_init(|| self.compute_mixed_list_nesting())
794 }
795
796 fn compute_mixed_list_nesting(&self) -> bool {
798 let mut stack: Vec<(usize, bool)> = Vec::new();
803 let mut last_was_blank = false;
804
805 for line_info in &self.lines {
806 if line_info.in_code_block
808 || line_info.in_front_matter
809 || line_info.in_mkdocstrings
810 || line_info.in_html_comment
811 || line_info.in_mdx_comment
812 || line_info.in_esm_block
813 {
814 continue;
815 }
816
817 if line_info.is_blank {
819 last_was_blank = true;
820 continue;
821 }
822
823 if let Some(list_item) = &line_info.list_item {
824 let current_pos = if list_item.marker_column == 1 {
826 0
827 } else {
828 list_item.marker_column
829 };
830
831 if last_was_blank && current_pos == 0 {
833 stack.clear();
834 }
835 last_was_blank = false;
836
837 while let Some(&(pos, _)) = stack.last() {
839 if pos >= current_pos {
840 stack.pop();
841 } else {
842 break;
843 }
844 }
845
846 if let Some(&(_, parent_is_ordered)) = stack.last()
848 && parent_is_ordered != list_item.is_ordered
849 {
850 return true; }
852
853 stack.push((current_pos, list_item.is_ordered));
854 } else {
855 last_was_blank = false;
857 }
858 }
859
860 false
861 }
862
863 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
865 match self.line_offsets.binary_search(&offset) {
866 Ok(line) => (line + 1, 1),
867 Err(line) => {
868 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
869 (line, offset - line_start + 1)
870 }
871 }
872 }
873
874 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
876 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
878 return true;
879 }
880
881 self.is_byte_offset_in_code_span(pos)
883 }
884
885 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
887 if line_num > 0 {
888 self.lines.get(line_num - 1)
889 } else {
890 None
891 }
892 }
893
894 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
896 let normalized_id = ref_id.to_lowercase();
897 self.reference_defs_map
898 .get(&normalized_id)
899 .map(|&idx| self.reference_defs[idx].url.as_str())
900 }
901
902 pub fn is_in_list_block(&self, line_num: usize) -> bool {
904 self.list_blocks
905 .iter()
906 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
907 }
908
909 pub fn is_in_html_block(&self, line_num: usize) -> bool {
911 if line_num == 0 || line_num > self.lines.len() {
912 return false;
913 }
914 self.lines[line_num - 1].in_html_block
915 }
916
917 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
919 if line_num == 0 || line_num > self.lines.len() {
920 return false;
921 }
922
923 let col_0indexed = if col > 0 { col - 1 } else { 0 };
927 let code_spans = self.code_spans();
928 code_spans.iter().any(|span| {
929 if line_num < span.line || line_num > span.end_line {
931 return false;
932 }
933
934 if span.line == span.end_line {
935 col_0indexed >= span.start_col && col_0indexed < span.end_col
937 } else if line_num == span.line {
938 col_0indexed >= span.start_col
940 } else if line_num == span.end_line {
941 col_0indexed < span.end_col
943 } else {
944 true
946 }
947 })
948 }
949
950 #[inline]
952 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
953 let code_spans = self.code_spans();
954 let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
955 idx > 0 && byte_offset < code_spans[idx - 1].byte_end
956 }
957
958 #[inline]
960 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
961 let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
962 idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
963 }
964
965 #[inline]
967 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
968 let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
969 idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
970 }
971
972 #[inline]
975 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
976 let tags = self.html_tags();
977 let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
978 idx > 0 && byte_pos < tags[idx - 1].byte_end
979 }
980
981 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
983 Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
984 }
985
986 #[inline]
988 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
989 Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
990 }
991
992 #[inline]
994 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
995 Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
996 }
997
998 #[inline]
1001 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1002 let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1003 idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1004 }
1005
1006 #[inline]
1008 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1009 &self.citation_ranges
1010 }
1011
1012 #[inline]
1014 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1015 Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1016 }
1017
1018 #[inline]
1020 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1021 &self.shortcode_ranges
1022 }
1023
1024 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1026 Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1027 }
1028
1029 pub fn has_char(&self, ch: char) -> bool {
1031 match ch {
1032 '#' => self.char_frequency.hash_count > 0,
1033 '*' => self.char_frequency.asterisk_count > 0,
1034 '_' => self.char_frequency.underscore_count > 0,
1035 '-' => self.char_frequency.hyphen_count > 0,
1036 '+' => self.char_frequency.plus_count > 0,
1037 '>' => self.char_frequency.gt_count > 0,
1038 '|' => self.char_frequency.pipe_count > 0,
1039 '[' => self.char_frequency.bracket_count > 0,
1040 '`' => self.char_frequency.backtick_count > 0,
1041 '<' => self.char_frequency.lt_count > 0,
1042 '!' => self.char_frequency.exclamation_count > 0,
1043 '\n' => self.char_frequency.newline_count > 0,
1044 _ => self.content.contains(ch), }
1046 }
1047
1048 pub fn char_count(&self, ch: char) -> usize {
1050 match ch {
1051 '#' => self.char_frequency.hash_count,
1052 '*' => self.char_frequency.asterisk_count,
1053 '_' => self.char_frequency.underscore_count,
1054 '-' => self.char_frequency.hyphen_count,
1055 '+' => self.char_frequency.plus_count,
1056 '>' => self.char_frequency.gt_count,
1057 '|' => self.char_frequency.pipe_count,
1058 '[' => self.char_frequency.bracket_count,
1059 '`' => self.char_frequency.backtick_count,
1060 '<' => self.char_frequency.lt_count,
1061 '!' => self.char_frequency.exclamation_count,
1062 '\n' => self.char_frequency.newline_count,
1063 _ => self.content.matches(ch).count(), }
1065 }
1066
1067 pub fn likely_has_headings(&self) -> bool {
1069 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') }
1071
1072 pub fn likely_has_lists(&self) -> bool {
1074 self.char_frequency.asterisk_count > 0
1075 || self.char_frequency.hyphen_count > 0
1076 || self.char_frequency.plus_count > 0
1077 }
1078
1079 pub fn likely_has_emphasis(&self) -> bool {
1081 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1082 }
1083
1084 pub fn likely_has_tables(&self) -> bool {
1086 self.char_frequency.pipe_count > 2
1087 }
1088
1089 pub fn likely_has_blockquotes(&self) -> bool {
1091 self.char_frequency.gt_count > 0
1092 }
1093
1094 pub fn likely_has_code(&self) -> bool {
1096 self.char_frequency.backtick_count > 0
1097 }
1098
1099 pub fn likely_has_links_or_images(&self) -> bool {
1101 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1102 }
1103
1104 pub fn likely_has_html(&self) -> bool {
1106 self.char_frequency.lt_count > 0
1107 }
1108
1109 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1114 if let Some(line_info) = self.lines.get(line_idx)
1115 && let Some(ref bq) = line_info.blockquote
1116 {
1117 bq.prefix.trim_end().to_string()
1118 } else {
1119 String::new()
1120 }
1121 }
1122
1123 #[inline]
1129 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1130 let idx = match lines.binary_search_by(|line| {
1132 if byte_offset < line.byte_offset {
1133 std::cmp::Ordering::Greater
1134 } else if byte_offset > line.byte_offset + line.byte_len {
1135 std::cmp::Ordering::Less
1136 } else {
1137 std::cmp::Ordering::Equal
1138 }
1139 }) {
1140 Ok(idx) => idx,
1141 Err(idx) => idx.saturating_sub(1),
1142 };
1143
1144 let line = &lines[idx];
1145 let line_num = idx + 1;
1146 let col = byte_offset.saturating_sub(line.byte_offset);
1147
1148 (idx, line_num, col)
1149 }
1150
1151 #[inline]
1153 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1154 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1156
1157 if idx > 0 {
1159 let span = &code_spans[idx - 1];
1160 if offset >= span.byte_offset && offset < span.byte_end {
1161 return true;
1162 }
1163 }
1164
1165 false
1166 }
1167
1168 #[must_use]
1188 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1189 ValidHeadingsIter::new(&self.lines)
1190 }
1191
1192 #[must_use]
1196 pub fn has_valid_headings(&self) -> bool {
1197 self.lines
1198 .iter()
1199 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1200 }
1201}
1202
1203fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1212 use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1213
1214 let options = crate::utils::rumdl_parser_options();
1215 let parser = Parser::new_ext(content, options).into_offset_iter();
1216
1217 let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1219 let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1220 let mut in_footnote = false;
1221
1222 for (event, range) in parser {
1223 match event {
1224 Event::Start(Tag::FootnoteDefinition(_)) => {
1225 in_footnote = true;
1226 footnote_ranges.push((range.start, range.end));
1227 }
1228 Event::End(TagEnd::FootnoteDefinition) => {
1229 in_footnote = false;
1230 }
1231 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1232 fenced_code_ranges.push((range.start, range.end));
1233 }
1234 _ => {}
1235 }
1236 }
1237
1238 let byte_to_line = |byte_offset: usize| -> usize {
1239 line_offsets
1240 .partition_point(|&offset| offset <= byte_offset)
1241 .saturating_sub(1)
1242 };
1243
1244 for &(start, end) in &footnote_ranges {
1246 let start_line = byte_to_line(start);
1247 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1248
1249 for line in &mut lines[start_line..end_line] {
1250 line.in_footnote_definition = true;
1251 line.in_code_block = false;
1252 }
1253 }
1254
1255 for &(start, end) in &fenced_code_ranges {
1257 let start_line = byte_to_line(start);
1258 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1259
1260 for line in &mut lines[start_line..end_line] {
1261 line.in_code_block = true;
1262 }
1263 }
1264}