1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23 ($name:expr, $profile:expr, $code:expr) => {{
24 let start = std::time::Instant::now();
25 let result = $code;
26 if $profile {
27 eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28 }
29 result
30 }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35 ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38pub(super) struct SkipByteRanges<'a> {
41 pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42 pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43 pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44 pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56 pub content: &'a str,
57 content_lines: Vec<&'a str>, pub line_offsets: Vec<usize>,
59 pub code_blocks: Vec<(usize, usize)>, pub code_block_details: Vec<CodeBlockDetail>, pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, pub line_to_list: crate::utils::code_block_utils::LineToListMap, pub list_start_values: crate::utils::code_block_utils::ListStartValues, pub lines: Vec<LineInfo>, pub links: Vec<ParsedLink<'a>>, pub images: Vec<ParsedImage<'a>>, pub broken_links: Vec<BrokenLinkInfo>, pub footnote_refs: Vec<FootnoteRef>, pub reference_defs: Vec<ReferenceDef>, reference_defs_map: HashMap<String, usize>, code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, pub list_blocks: Vec<ListBlock>, pub char_frequency: CharFrequency, html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, has_mixed_list_nesting_cache: OnceLock<bool>, html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, pub line_index: crate::utils::range_utils::LineIndex<'a>, jinja_ranges: Vec<(usize, usize)>, pub flavor: MarkdownFlavor, pub source_file: Option<PathBuf>, jsx_expression_ranges: Vec<(usize, usize)>, mdx_comment_ranges: Vec<(usize, usize)>, citation_ranges: Vec<crate::utils::skip_context::ByteRange>, shortcode_ranges: Vec<(usize, usize)>, link_title_ranges: Vec<(usize, usize)>, code_span_byte_ranges: Vec<(usize, usize)>, inline_config: InlineConfig, obsidian_comment_ranges: Vec<(usize, usize)>, lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, }
96
97impl<'a> LintContext<'a> {
98 pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
99 #[cfg(not(target_arch = "wasm32"))]
100 let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
101 #[cfg(target_arch = "wasm32")]
102 let profile = false;
103
104 let line_offsets = profile_section!("Line offsets", profile, {
105 let mut offsets = vec![0];
106 for (i, c) in content.char_indices() {
107 if c == '\n' {
108 offsets.push(i + 1);
109 }
110 }
111 offsets
112 });
113
114 let content_lines: Vec<&str> = content.lines().collect();
116
117 let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
119
120 let parse_result = profile_section!(
122 "Code blocks",
123 profile,
124 CodeBlockUtils::detect_code_blocks_and_spans(content)
125 );
126 let mut code_blocks = parse_result.code_blocks;
127 let code_span_ranges = parse_result.code_spans;
128 let code_block_details = parse_result.code_block_details;
129 let strong_spans = parse_result.strong_spans;
130 let line_to_list = parse_result.line_to_list;
131 let list_start_values = parse_result.list_start_values;
132
133 let html_comment_ranges = profile_section!(
135 "HTML comment ranges",
136 profile,
137 crate::utils::skip_context::compute_html_comment_ranges(content)
138 );
139
140 let autodoc_ranges = profile_section!(
144 "Autodoc block ranges",
145 profile,
146 crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
147 );
148
149 let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
151 if flavor == MarkdownFlavor::Quarto {
152 crate::utils::quarto_divs::detect_div_block_ranges(content)
153 } else {
154 Vec::new()
155 }
156 });
157
158 let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
160 if flavor == MarkdownFlavor::MkDocs {
161 crate::utils::pymdown_blocks::detect_block_ranges(content)
162 } else {
163 Vec::new()
164 }
165 });
166
167 let skip_ranges = SkipByteRanges {
170 html_comment_ranges: &html_comment_ranges,
171 autodoc_ranges: &autodoc_ranges,
172 quarto_div_ranges: &quarto_div_ranges,
173 pymdown_block_ranges: &pymdown_block_ranges,
174 };
175 let (mut lines, emphasis_spans) = profile_section!(
176 "Basic line info",
177 profile,
178 line_computation::compute_basic_line_info(
179 content,
180 &content_lines,
181 &line_offsets,
182 &code_blocks,
183 flavor,
184 &skip_ranges,
185 front_matter_end,
186 )
187 );
188
189 profile_section!(
191 "HTML blocks",
192 profile,
193 heading_detection::detect_html_blocks(content, &mut lines)
194 );
195
196 profile_section!(
198 "ESM blocks",
199 profile,
200 flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
201 );
202
203 profile_section!(
205 "JSX block detection",
206 profile,
207 flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
208 );
209
210 let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
212 "JSX/MDX detection",
213 profile,
214 flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
215 );
216
217 profile_section!(
219 "MkDocs constructs",
220 profile,
221 flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
222 );
223
224 if flavor == MarkdownFlavor::MkDocs {
231 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
232 for &(start, end) in &code_blocks {
233 let start_line = line_offsets
234 .partition_point(|&offset| offset <= start)
235 .saturating_sub(1);
236 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
237
238 let mut sub_start: Option<usize> = None;
240 for (i, &offset) in line_offsets[start_line..end_line]
241 .iter()
242 .enumerate()
243 .map(|(j, o)| (j + start_line, o))
244 {
245 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
246 if is_real_code && sub_start.is_none() {
247 let byte_start = if i == start_line { start } else { offset };
248 sub_start = Some(byte_start);
249 } else if !is_real_code && sub_start.is_some() {
250 new_code_blocks.push((sub_start.unwrap(), offset));
251 sub_start = None;
252 }
253 }
254 if let Some(s) = sub_start {
255 new_code_blocks.push((s, end));
256 }
257 }
258 code_blocks = new_code_blocks;
259 }
260
261 if flavor.supports_jsx() {
265 let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
266 for &(start, end) in &code_blocks {
267 let start_line = line_offsets
268 .partition_point(|&offset| offset <= start)
269 .saturating_sub(1);
270 let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
271
272 let mut sub_start: Option<usize> = None;
273 for (i, &offset) in line_offsets[start_line..end_line]
274 .iter()
275 .enumerate()
276 .map(|(j, o)| (j + start_line, o))
277 {
278 let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
279 if is_real_code && sub_start.is_none() {
280 let byte_start = if i == start_line { start } else { offset };
281 sub_start = Some(byte_start);
282 } else if !is_real_code && sub_start.is_some() {
283 new_code_blocks.push((sub_start.unwrap(), offset));
284 sub_start = None;
285 }
286 }
287 if let Some(s) = sub_start {
288 new_code_blocks.push((s, end));
289 }
290 }
291 code_blocks = new_code_blocks;
292 }
293
294 profile_section!(
296 "Kramdown constructs",
297 profile,
298 flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
299 );
300
301 for line in &mut lines {
306 if line.in_kramdown_extension_block {
307 line.list_item = None;
308 line.is_horizontal_rule = false;
309 line.blockquote = None;
310 line.is_kramdown_block_ial = false;
311 }
312 }
313
314 let obsidian_comment_ranges = profile_section!(
316 "Obsidian comments",
317 profile,
318 flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
319 );
320
321 let pulldown_result = profile_section!(
325 "Links, images & link ranges",
326 profile,
327 link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
328 );
329
330 profile_section!(
332 "Headings & blockquotes",
333 profile,
334 heading_detection::detect_headings_and_blockquotes(
335 &content_lines,
336 &mut lines,
337 flavor,
338 &html_comment_ranges,
339 &pulldown_result.link_byte_ranges,
340 front_matter_end,
341 )
342 );
343
344 for line in &mut lines {
346 if line.in_kramdown_extension_block {
347 line.heading = None;
348 }
349 }
350
351 let mut code_spans = profile_section!(
353 "Code spans",
354 profile,
355 element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
356 );
357
358 if flavor == MarkdownFlavor::MkDocs {
362 let extra = profile_section!(
363 "MkDocs code spans",
364 profile,
365 element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
366 );
367 if !extra.is_empty() {
368 code_spans.extend(extra);
369 code_spans.sort_by_key(|span| span.byte_offset);
370 }
371 }
372
373 for span in &code_spans {
376 if span.end_line > span.line {
377 for line_num in (span.line + 1)..=span.end_line {
379 if let Some(line_info) = lines.get_mut(line_num - 1) {
380 line_info.in_code_span_continuation = true;
381 }
382 }
383 }
384 }
385
386 let (links, images, broken_links, footnote_refs) = profile_section!(
388 "Links & images finalize",
389 profile,
390 link_parser::finalize_links_and_images(
391 content,
392 &lines,
393 &code_blocks,
394 &code_spans,
395 flavor,
396 &html_comment_ranges,
397 pulldown_result
398 )
399 );
400
401 let reference_defs = profile_section!(
402 "Reference defs",
403 profile,
404 link_parser::parse_reference_defs(content, &lines)
405 );
406
407 let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
408
409 let char_frequency = profile_section!(
411 "Char frequency",
412 profile,
413 line_computation::compute_char_frequency(content)
414 );
415
416 let table_blocks = profile_section!(
418 "Table blocks",
419 profile,
420 crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
421 content,
422 &code_blocks,
423 &code_spans,
424 &html_comment_ranges,
425 )
426 );
427
428 let links = links
431 .into_iter()
432 .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
433 .collect::<Vec<_>>();
434 let images = images
435 .into_iter()
436 .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
437 .collect::<Vec<_>>();
438 let broken_links = broken_links
439 .into_iter()
440 .filter(|bl| {
441 let line_idx = line_offsets
443 .partition_point(|&offset| offset <= bl.span.start)
444 .saturating_sub(1);
445 !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
446 })
447 .collect::<Vec<_>>();
448 let footnote_refs = footnote_refs
449 .into_iter()
450 .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
451 .collect::<Vec<_>>();
452 let reference_defs = reference_defs
453 .into_iter()
454 .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
455 .collect::<Vec<_>>();
456 let list_blocks = list_blocks
457 .into_iter()
458 .filter(|block| {
459 !lines
460 .get(block.start_line - 1)
461 .is_some_and(|l| l.in_kramdown_extension_block)
462 })
463 .collect::<Vec<_>>();
464 let table_blocks = table_blocks
465 .into_iter()
466 .filter(|block| {
467 !lines
469 .get(block.start_line)
470 .is_some_and(|l| l.in_kramdown_extension_block)
471 })
472 .collect::<Vec<_>>();
473 let emphasis_spans = emphasis_spans
474 .into_iter()
475 .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
476 .collect::<Vec<_>>();
477
478 let reference_defs_map: HashMap<String, usize> = reference_defs
480 .iter()
481 .enumerate()
482 .map(|(idx, def)| (def.id.to_lowercase(), idx))
483 .collect();
484
485 let link_title_ranges: Vec<(usize, usize)> = reference_defs
487 .iter()
488 .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
489 (Some(start), Some(end)) => Some((start, end)),
490 _ => None,
491 })
492 .collect();
493
494 let line_index = profile_section!(
496 "Line index",
497 profile,
498 crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
499 content,
500 line_offsets.clone(),
501 &code_blocks,
502 )
503 );
504
505 let jinja_ranges = profile_section!(
507 "Jinja ranges",
508 profile,
509 crate::utils::jinja_utils::find_jinja_ranges(content)
510 );
511
512 let citation_ranges = profile_section!("Citation ranges", profile, {
514 if flavor == MarkdownFlavor::Quarto {
515 crate::utils::quarto_divs::find_citation_ranges(content)
516 } else {
517 Vec::new()
518 }
519 });
520
521 let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
523 use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
524 let mut ranges = Vec::new();
525 for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
526 ranges.push((mat.start(), mat.end()));
527 }
528 ranges
529 });
530
531 let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
532
533 Self {
534 content,
535 content_lines,
536 line_offsets,
537 code_blocks,
538 code_block_details,
539 strong_spans,
540 line_to_list,
541 list_start_values,
542 lines,
543 links,
544 images,
545 broken_links,
546 footnote_refs,
547 reference_defs,
548 reference_defs_map,
549 code_spans_cache: OnceLock::from(Arc::new(code_spans)),
550 math_spans_cache: OnceLock::new(), list_blocks,
552 char_frequency,
553 html_tags_cache: OnceLock::new(),
554 emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
555 table_rows_cache: OnceLock::new(),
556 bare_urls_cache: OnceLock::new(),
557 has_mixed_list_nesting_cache: OnceLock::new(),
558 html_comment_ranges,
559 table_blocks,
560 line_index,
561 jinja_ranges,
562 flavor,
563 source_file,
564 jsx_expression_ranges,
565 mdx_comment_ranges,
566 citation_ranges,
567 shortcode_ranges,
568 link_title_ranges,
569 code_span_byte_ranges: code_span_ranges,
570 inline_config,
571 obsidian_comment_ranges,
572 lazy_cont_lines_cache: OnceLock::new(),
573 }
574 }
575
576 #[inline]
579 fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
580 let idx = ranges.partition_point(|&(start, _)| start <= pos);
582 idx > 0 && pos < ranges[idx - 1].1
584 }
585
586 pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
588 Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
589 }
590
591 pub fn is_in_link(&self, pos: usize) -> bool {
593 let idx = self.links.partition_point(|link| link.byte_offset <= pos);
594 if idx > 0 && pos < self.links[idx - 1].byte_end {
595 return true;
596 }
597 let idx = self.images.partition_point(|img| img.byte_offset <= pos);
598 if idx > 0 && pos < self.images[idx - 1].byte_end {
599 return true;
600 }
601 self.is_in_reference_def(pos)
602 }
603
604 pub fn inline_config(&self) -> &InlineConfig {
606 &self.inline_config
607 }
608
609 pub fn raw_lines(&self) -> &[&'a str] {
613 &self.content_lines
614 }
615
616 pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
621 self.inline_config.is_rule_disabled(rule_name, line_number)
622 }
623
624 pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
626 Arc::clone(
627 self.code_spans_cache
628 .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
629 )
630 }
631
632 pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
634 Arc::clone(
635 self.math_spans_cache
636 .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
637 )
638 }
639
640 pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
642 let math_spans = self.math_spans();
643 let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
645 idx > 0 && byte_pos < math_spans[idx - 1].byte_end
646 }
647
648 pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
650 &self.html_comment_ranges
651 }
652
653 pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
656 &self.obsidian_comment_ranges
657 }
658
659 pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
663 Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
664 }
665
666 pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
671 if self.obsidian_comment_ranges.is_empty() {
672 return false;
673 }
674
675 let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
677 self.is_in_obsidian_comment(byte_pos)
678 }
679
680 pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
682 Arc::clone(self.html_tags_cache.get_or_init(|| {
683 let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
684 Arc::new(
686 tags.into_iter()
687 .filter(|tag| {
688 !self
689 .lines
690 .get(tag.line - 1)
691 .is_some_and(|l| l.in_kramdown_extension_block)
692 })
693 .collect(),
694 )
695 }))
696 }
697
698 pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
700 Arc::clone(
701 self.emphasis_spans_cache
702 .get()
703 .expect("emphasis_spans_cache initialized during construction"),
704 )
705 }
706
707 pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
709 Arc::clone(
710 self.table_rows_cache
711 .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
712 )
713 }
714
715 pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
717 Arc::clone(self.bare_urls_cache.get_or_init(|| {
718 Arc::new(element_parsers::parse_bare_urls(
719 self.content,
720 &self.lines,
721 &self.code_blocks,
722 ))
723 }))
724 }
725
726 pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
728 Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
729 Arc::new(element_parsers::detect_lazy_continuation_lines(
730 self.content,
731 &self.lines,
732 &self.line_offsets,
733 ))
734 }))
735 }
736
737 pub fn has_mixed_list_nesting(&self) -> bool {
741 *self
742 .has_mixed_list_nesting_cache
743 .get_or_init(|| self.compute_mixed_list_nesting())
744 }
745
746 fn compute_mixed_list_nesting(&self) -> bool {
748 let mut stack: Vec<(usize, bool)> = Vec::new();
753 let mut last_was_blank = false;
754
755 for line_info in &self.lines {
756 if line_info.in_code_block
758 || line_info.in_front_matter
759 || line_info.in_mkdocstrings
760 || line_info.in_html_comment
761 || line_info.in_esm_block
762 {
763 continue;
764 }
765
766 if line_info.is_blank {
768 last_was_blank = true;
769 continue;
770 }
771
772 if let Some(list_item) = &line_info.list_item {
773 let current_pos = if list_item.marker_column == 1 {
775 0
776 } else {
777 list_item.marker_column
778 };
779
780 if last_was_blank && current_pos == 0 {
782 stack.clear();
783 }
784 last_was_blank = false;
785
786 while let Some(&(pos, _)) = stack.last() {
788 if pos >= current_pos {
789 stack.pop();
790 } else {
791 break;
792 }
793 }
794
795 if let Some(&(_, parent_is_ordered)) = stack.last()
797 && parent_is_ordered != list_item.is_ordered
798 {
799 return true; }
801
802 stack.push((current_pos, list_item.is_ordered));
803 } else {
804 last_was_blank = false;
806 }
807 }
808
809 false
810 }
811
812 pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
814 match self.line_offsets.binary_search(&offset) {
815 Ok(line) => (line + 1, 1),
816 Err(line) => {
817 let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
818 (line, offset - line_start + 1)
819 }
820 }
821 }
822
823 pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
825 if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
827 return true;
828 }
829
830 self.is_byte_offset_in_code_span(pos)
832 }
833
834 pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
836 if line_num > 0 {
837 self.lines.get(line_num - 1)
838 } else {
839 None
840 }
841 }
842
843 pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
845 self.line_info(line_num).map(|info| info.byte_offset)
846 }
847
848 pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
850 let normalized_id = ref_id.to_lowercase();
851 self.reference_defs_map
852 .get(&normalized_id)
853 .map(|&idx| self.reference_defs[idx].url.as_str())
854 }
855
856 pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
858 let normalized_id = ref_id.to_lowercase();
859 self.reference_defs_map
860 .get(&normalized_id)
861 .map(|&idx| &self.reference_defs[idx])
862 }
863
864 pub fn has_reference_def(&self, ref_id: &str) -> bool {
866 let normalized_id = ref_id.to_lowercase();
867 self.reference_defs_map.contains_key(&normalized_id)
868 }
869
870 pub fn is_in_list_block(&self, line_num: usize) -> bool {
872 self.list_blocks
873 .iter()
874 .any(|block| line_num >= block.start_line && line_num <= block.end_line)
875 }
876
877 pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
879 self.list_blocks
880 .iter()
881 .find(|block| line_num >= block.start_line && line_num <= block.end_line)
882 }
883
884 pub fn is_in_code_block(&self, line_num: usize) -> bool {
888 if line_num == 0 || line_num > self.lines.len() {
889 return false;
890 }
891 self.lines[line_num - 1].in_code_block
892 }
893
894 pub fn is_in_front_matter(&self, line_num: usize) -> bool {
896 if line_num == 0 || line_num > self.lines.len() {
897 return false;
898 }
899 self.lines[line_num - 1].in_front_matter
900 }
901
902 pub fn is_in_html_block(&self, line_num: usize) -> bool {
904 if line_num == 0 || line_num > self.lines.len() {
905 return false;
906 }
907 self.lines[line_num - 1].in_html_block
908 }
909
910 pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
912 if line_num == 0 || line_num > self.lines.len() {
913 return false;
914 }
915
916 let col_0indexed = if col > 0 { col - 1 } else { 0 };
920 let code_spans = self.code_spans();
921 code_spans.iter().any(|span| {
922 if line_num < span.line || line_num > span.end_line {
924 return false;
925 }
926
927 if span.line == span.end_line {
928 col_0indexed >= span.start_col && col_0indexed < span.end_col
930 } else if line_num == span.line {
931 col_0indexed >= span.start_col
933 } else if line_num == span.end_line {
934 col_0indexed < span.end_col
936 } else {
937 true
939 }
940 })
941 }
942
943 #[inline]
945 pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
946 let code_spans = self.code_spans();
947 let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
948 idx > 0 && byte_offset < code_spans[idx - 1].byte_end
949 }
950
951 #[inline]
953 pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
954 let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
955 idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
956 }
957
958 #[inline]
960 pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
961 let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
962 idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
963 }
964
965 #[inline]
968 pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
969 let tags = self.html_tags();
970 let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
971 idx > 0 && byte_pos < tags[idx - 1].byte_end
972 }
973
974 pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
976 Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
977 }
978
979 #[inline]
981 pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
982 Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
983 }
984
985 #[inline]
987 pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
988 Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
989 }
990
991 pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
993 &self.jsx_expression_ranges
994 }
995
996 pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
998 &self.mdx_comment_ranges
999 }
1000
1001 #[inline]
1004 pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1005 let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1006 idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1007 }
1008
1009 pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1011 &self.citation_ranges
1012 }
1013
1014 #[inline]
1016 pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1017 Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1018 }
1019
1020 pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1022 &self.shortcode_ranges
1023 }
1024
1025 pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1027 Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1028 }
1029
1030 pub fn has_char(&self, ch: char) -> bool {
1032 match ch {
1033 '#' => self.char_frequency.hash_count > 0,
1034 '*' => self.char_frequency.asterisk_count > 0,
1035 '_' => self.char_frequency.underscore_count > 0,
1036 '-' => self.char_frequency.hyphen_count > 0,
1037 '+' => self.char_frequency.plus_count > 0,
1038 '>' => self.char_frequency.gt_count > 0,
1039 '|' => self.char_frequency.pipe_count > 0,
1040 '[' => self.char_frequency.bracket_count > 0,
1041 '`' => self.char_frequency.backtick_count > 0,
1042 '<' => self.char_frequency.lt_count > 0,
1043 '!' => self.char_frequency.exclamation_count > 0,
1044 '\n' => self.char_frequency.newline_count > 0,
1045 _ => self.content.contains(ch), }
1047 }
1048
1049 pub fn char_count(&self, ch: char) -> usize {
1051 match ch {
1052 '#' => self.char_frequency.hash_count,
1053 '*' => self.char_frequency.asterisk_count,
1054 '_' => self.char_frequency.underscore_count,
1055 '-' => self.char_frequency.hyphen_count,
1056 '+' => self.char_frequency.plus_count,
1057 '>' => self.char_frequency.gt_count,
1058 '|' => self.char_frequency.pipe_count,
1059 '[' => self.char_frequency.bracket_count,
1060 '`' => self.char_frequency.backtick_count,
1061 '<' => self.char_frequency.lt_count,
1062 '!' => self.char_frequency.exclamation_count,
1063 '\n' => self.char_frequency.newline_count,
1064 _ => self.content.matches(ch).count(), }
1066 }
1067
1068 pub fn likely_has_headings(&self) -> bool {
1070 self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 }
1072
1073 pub fn likely_has_lists(&self) -> bool {
1075 self.char_frequency.asterisk_count > 0
1076 || self.char_frequency.hyphen_count > 0
1077 || self.char_frequency.plus_count > 0
1078 }
1079
1080 pub fn likely_has_emphasis(&self) -> bool {
1082 self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1083 }
1084
1085 pub fn likely_has_tables(&self) -> bool {
1087 self.char_frequency.pipe_count > 2
1088 }
1089
1090 pub fn likely_has_blockquotes(&self) -> bool {
1092 self.char_frequency.gt_count > 0
1093 }
1094
1095 pub fn likely_has_code(&self) -> bool {
1097 self.char_frequency.backtick_count > 0
1098 }
1099
1100 pub fn likely_has_links_or_images(&self) -> bool {
1102 self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1103 }
1104
1105 pub fn likely_has_html(&self) -> bool {
1107 self.char_frequency.lt_count > 0
1108 }
1109
1110 pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1115 if let Some(line_info) = self.lines.get(line_idx)
1116 && let Some(ref bq) = line_info.blockquote
1117 {
1118 bq.prefix.trim_end().to_string()
1119 } else {
1120 String::new()
1121 }
1122 }
1123
1124 pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1126 self.html_tags()
1127 .iter()
1128 .filter(|tag| tag.line == line_num)
1129 .cloned()
1130 .collect()
1131 }
1132
1133 pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1135 self.emphasis_spans()
1136 .iter()
1137 .filter(|span| span.line == line_num)
1138 .cloned()
1139 .collect()
1140 }
1141
1142 pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1144 self.table_rows()
1145 .iter()
1146 .filter(|row| row.line == line_num)
1147 .cloned()
1148 .collect()
1149 }
1150
1151 pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1153 self.bare_urls()
1154 .iter()
1155 .filter(|url| url.line == line_num)
1156 .cloned()
1157 .collect()
1158 }
1159
1160 #[inline]
1166 fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1167 let idx = match lines.binary_search_by(|line| {
1169 if byte_offset < line.byte_offset {
1170 std::cmp::Ordering::Greater
1171 } else if byte_offset > line.byte_offset + line.byte_len {
1172 std::cmp::Ordering::Less
1173 } else {
1174 std::cmp::Ordering::Equal
1175 }
1176 }) {
1177 Ok(idx) => idx,
1178 Err(idx) => idx.saturating_sub(1),
1179 };
1180
1181 let line = &lines[idx];
1182 let line_num = idx + 1;
1183 let col = byte_offset.saturating_sub(line.byte_offset);
1184
1185 (idx, line_num, col)
1186 }
1187
1188 #[inline]
1190 fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1191 let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1193
1194 if idx > 0 {
1196 let span = &code_spans[idx - 1];
1197 if offset >= span.byte_offset && offset < span.byte_end {
1198 return true;
1199 }
1200 }
1201
1202 false
1203 }
1204
1205 #[must_use]
1225 pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1226 ValidHeadingsIter::new(&self.lines)
1227 }
1228
1229 #[must_use]
1233 pub fn has_valid_headings(&self) -> bool {
1234 self.lines
1235 .iter()
1236 .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1237 }
1238}