rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::{HtmlTag, LintContext};
8use crate::utils::mkdocs_admonitions;
9use crate::utils::mkdocs_critic;
10use crate::utils::mkdocs_extensions;
11use crate::utils::mkdocs_footnotes;
12use crate::utils::mkdocs_icons;
13use crate::utils::mkdocs_snippets;
14use crate::utils::mkdocs_tabs;
15use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
16use regex::Regex;
17use std::sync::LazyLock;
18
19/// Enhanced inline math pattern that handles both single $ and double $$ delimiters.
20/// Matches:
21/// - Display math: $$...$$ (zero or more non-$ characters)
22/// - Inline math: $...$ (zero or more non-$ non-newline characters)
23///
24/// The display math pattern is tried first to correctly handle $$content$$.
25/// Critically, both patterns allow ZERO characters between delimiters,
26/// so empty math like $$ or $ $ is consumed and won't pair with other $ signs.
27static INLINE_MATH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
28
29/// Range representing a span of bytes (start inclusive, end exclusive)
30#[derive(Debug, Clone, Copy)]
31pub struct ByteRange {
32    pub start: usize,
33    pub end: usize,
34}
35
36/// Pre-compute all HTML comment ranges in the content
37/// Returns a sorted vector of byte ranges for efficient lookup
38pub fn compute_html_comment_ranges(content: &str) -> Vec<ByteRange> {
39    HTML_COMMENT_PATTERN
40        .find_iter(content)
41        .map(|m| ByteRange {
42            start: m.start(),
43            end: m.end(),
44        })
45        .collect()
46}
47
48/// Check if a byte position is within any of the pre-computed HTML comment ranges
49/// Uses binary search for O(log n) complexity
50pub fn is_in_html_comment_ranges(ranges: &[ByteRange], byte_pos: usize) -> bool {
51    // Binary search to find a range that might contain byte_pos
52    ranges
53        .binary_search_by(|range| {
54            if byte_pos < range.start {
55                std::cmp::Ordering::Greater
56            } else if byte_pos >= range.end {
57                std::cmp::Ordering::Less
58            } else {
59                std::cmp::Ordering::Equal
60            }
61        })
62        .is_ok()
63}
64
65/// Check if a line is ENTIRELY within a single HTML comment
66/// Returns true only if both the line start AND end are within the same comment range
67pub fn is_line_entirely_in_html_comment(ranges: &[ByteRange], line_start: usize, line_end: usize) -> bool {
68    for range in ranges {
69        // If line start is within this range, check if line end is also within it
70        if line_start >= range.start && line_start < range.end {
71            return line_end <= range.end;
72        }
73    }
74    false
75}
76
77/// Check if a byte position is within a JSX expression (MDX: {expression})
78#[inline]
79pub fn is_in_jsx_expression(ctx: &LintContext, byte_pos: usize) -> bool {
80    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_jsx_expression(byte_pos)
81}
82
83/// Check if a byte position is within an MDX comment ({/* ... */})
84#[inline]
85pub fn is_in_mdx_comment(ctx: &LintContext, byte_pos: usize) -> bool {
86    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_mdx_comment(byte_pos)
87}
88
89/// Check if a line should be skipped due to MkDocs snippet syntax
90pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
91    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
92}
93
94/// Check if a line is a MkDocs admonition marker
95pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
96    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
97}
98
99/// Check if a line is a MkDocs footnote definition
100pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
101    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
102}
103
104/// Check if a line is a MkDocs tab marker
105pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
106    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
107}
108
109/// Check if a line contains MkDocs Critic Markup
110pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
111    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
112}
113
114/// Check if a byte position is within an HTML comment
115pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
116    for m in HTML_COMMENT_PATTERN.find_iter(content) {
117        if m.start() <= byte_pos && byte_pos < m.end() {
118            return true;
119        }
120    }
121    false
122}
123
124/// Check if a byte position is within an HTML tag
125pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
126    for html_tag in ctx.html_tags().iter() {
127        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
128            return true;
129        }
130    }
131    false
132}
133
134/// Check if a byte position is within a math context.
135///
136/// `$$...$$` display math is recognized only when it begins its line, via
137/// [`math_block_ranges`]; a mid-line or stray-prose `$$...$$` is a literal,
138/// not math. Single-`$` inline spans are recognized anywhere. This keeps
139/// every math-aware rule agreeing on what is math.
140pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
141    math_byte_ranges(ctx.content)
142        .iter()
143        .any(|&(start, end)| byte_pos >= start && byte_pos < end)
144}
145
146/// Paired `$$ ... $$` display-math byte ranges, half-open `[start, end)`.
147///
148/// A block only *opens* on a `$$` that begins its line, ignoring leading
149/// whitespace and blockquote markers (`>`); a stray `$$` mid-prose is a
150/// literal, not a block opener. This keeps the byte-level result consistent
151/// with the line-level [`compute_math_block_line_map`] guard. Once open, the
152/// block *closes* on the next `$$` anywhere - even when that closing `$$`
153/// shares its line with LaTeX content (`\end{cases}$$`) or trailing Markdown
154/// prose. An opener with no matching closer is dropped, not treated as an
155/// unterminated block that swallows the rest of the document.
156pub(crate) fn math_block_ranges(content: &str) -> Vec<(usize, usize)> {
157    let bytes = content.as_bytes();
158    let mut ranges = Vec::new();
159    let mut open: Option<usize> = None;
160    let mut line_start = 0usize;
161    let mut i = 0;
162    while i < bytes.len() {
163        match bytes[i] {
164            b'\n' => {
165                line_start = i + 1;
166                i += 1;
167            }
168            b'$' if i + 1 < bytes.len() && bytes[i + 1] == b'$' => {
169                match open {
170                    None => {
171                        // Open only when this `$$` is the first non-blank,
172                        // non-blockquote content on its line.
173                        let starts_line = bytes[line_start..i]
174                            .iter()
175                            .all(|&b| b == b' ' || b == b'\t' || b == b'>');
176                        if starts_line {
177                            open = Some(i);
178                        }
179                    }
180                    Some(start) => {
181                        ranges.push((start, i + 2));
182                        open = None;
183                    }
184                }
185                i += 2;
186            }
187            _ => i += 1,
188        }
189    }
190    ranges
191}
192
193/// Check if a byte position is within a `$$ ... $$` display-math block.
194///
195/// A block opens only on a `$$` that begins its line (see [`math_block_ranges`])
196/// and closes on the next `$$` anywhere, so the closing fence ends the block
197/// even when it shares its line with LaTeX content (e.g. `\end{cases}$$`) or
198/// trailing Markdown prose; bytes after the closing `$$` are not math.
199pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
200    math_block_ranges(content)
201        .iter()
202        .any(|&(start, end)| byte_pos >= start && byte_pos < end)
203}
204
205/// Check if a byte position is within inline math (`$...$`).
206///
207/// Only single-`$` spans count here. A `$$...$$` token is display-math
208/// syntax, and whether it is actually math depends solely on whether it
209/// begins its line - that decision belongs to [`math_block_ranges`]. The
210/// regex still consumes `$$...$$` tokens so a single-`$` span cannot straddle
211/// them, but a mid-line `$$...$$` is a literal here, not inline math, keeping
212/// this function consistent with the line-start-gated block model.
213pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
214    for m in INLINE_MATH_REGEX.find_iter(content) {
215        if content[m.start()..m.end()].starts_with("$$") {
216            continue;
217        }
218        if m.start() <= byte_pos && byte_pos < m.end() {
219            return true;
220        }
221    }
222    false
223}
224
225/// All math byte ranges in `content`: line-start `$$...$$` display blocks
226/// plus single-`$` inline spans. Ranges are half-open `[start, end)` and may
227/// be unordered relative to each other; membership is by `any`-containment.
228///
229/// Precompute this once when classifying many positions in one document
230/// (e.g. every emphasis span). [`is_in_math_context`] is the single-shot
231/// equivalent and is defined in terms of the same two sources.
232pub fn math_byte_ranges(content: &str) -> Vec<(usize, usize)> {
233    let mut ranges = math_block_ranges(content);
234    for m in INLINE_MATH_REGEX.find_iter(content) {
235        if content[m.start()..m.end()].starts_with("$$") {
236            continue;
237        }
238        ranges.push((m.start(), m.end()));
239    }
240    ranges
241}
242
243/// Check if a position is within a table cell
244pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
245    // Check if this line is part of a table
246    for table_row in ctx.table_rows().iter() {
247        if table_row.line == line_num {
248            // This line is part of a table
249            // For now, we'll skip the entire table row
250            // Future enhancement: check specific column boundaries
251            return true;
252        }
253    }
254    false
255}
256
257/// Check if a line contains table syntax
258pub fn is_table_line(line: &str) -> bool {
259    let trimmed = line.trim();
260
261    // Check for table separator line
262    if trimmed
263        .chars()
264        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
265        && trimmed.contains('|')
266        && trimmed.contains('-')
267    {
268        return true;
269    }
270
271    // Check for table content line (starts and/or ends with |)
272    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
273        return true;
274    }
275
276    false
277}
278
279/// Check if a byte position is within an MkDocs icon shortcode
280/// Icon shortcodes use format like `:material-check:`, `:octicons-mark-github-16:`
281pub fn is_in_icon_shortcode(line: &str, position: usize, _flavor: MarkdownFlavor) -> bool {
282    // Only skip for MkDocs flavor, but check pattern for all flavors
283    // since emoji shortcodes are universal
284    mkdocs_icons::is_in_any_shortcode(line, position)
285}
286
287/// Check if a byte position is within PyMdown extension markup
288/// Includes: Keys (++ctrl+alt++), Caret (^text^), Insert (^^text^^), Mark (==text==)
289///
290/// For MkDocs flavor: supports all PyMdown extensions
291/// For Obsidian flavor: only supports Mark (==highlight==) syntax
292pub fn is_in_pymdown_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
293    match flavor {
294        MarkdownFlavor::MkDocs => mkdocs_extensions::is_in_pymdown_markup(line, position),
295        MarkdownFlavor::Obsidian => {
296            // Obsidian supports ==highlight== syntax (same as PyMdown Mark)
297            mkdocs_extensions::is_in_mark(line, position)
298        }
299        _ => false,
300    }
301}
302
303/// Check whether a position on a line falls inside an inline HTML code-like element.
304///
305/// Handles `<code>`, `<pre>`, `<samp>`, `<kbd>`, and `<var>` tags (case-insensitive).
306/// These are inline elements whose content should not be interpreted as markdown emphasis.
307pub fn is_in_inline_html_code(line: &str, position: usize) -> bool {
308    // Tags whose content should not be parsed as markdown
309    const TAGS: &[&str] = &["code", "pre", "samp", "kbd", "var"];
310
311    let bytes = line.as_bytes();
312
313    for tag in TAGS {
314        let open_bytes = format!("<{tag}").into_bytes();
315        let close_pattern = format!("</{tag}>").into_bytes();
316
317        let mut search_from = 0;
318        while search_from + open_bytes.len() <= bytes.len() {
319            // Find opening tag (case-insensitive byte search)
320            let Some(open_abs) = find_case_insensitive(bytes, &open_bytes, search_from) else {
321                break;
322            };
323
324            let after_tag = open_abs + open_bytes.len();
325
326            // Verify the character after the tag name is '>' or whitespace (not a longer tag name)
327            if after_tag < bytes.len() {
328                let next = bytes[after_tag];
329                if next != b'>' && next != b' ' && next != b'\t' {
330                    search_from = after_tag;
331                    continue;
332                }
333            }
334
335            // Find the end of the opening tag
336            let Some(tag_close) = bytes[after_tag..].iter().position(|&b| b == b'>') else {
337                break;
338            };
339            let content_start = after_tag + tag_close + 1;
340
341            // Find the closing tag (case-insensitive)
342            let Some(close_start) = find_case_insensitive(bytes, &close_pattern, content_start) else {
343                break;
344            };
345            let content_end = close_start;
346
347            if position >= content_start && position < content_end {
348                return true;
349            }
350
351            search_from = close_start + close_pattern.len();
352        }
353    }
354    false
355}
356
357/// Case-insensitive byte search within a slice, starting at `from`.
358fn find_case_insensitive(haystack: &[u8], needle: &[u8], from: usize) -> Option<usize> {
359    if needle.is_empty() || from + needle.len() > haystack.len() {
360        return None;
361    }
362    for i in from..=haystack.len() - needle.len() {
363        if haystack[i..i + needle.len()]
364            .iter()
365            .zip(needle.iter())
366            .all(|(h, n)| h.eq_ignore_ascii_case(n))
367        {
368            return Some(i);
369        }
370    }
371    None
372}
373
374/// Check if a byte position is within flavor-specific markup
375/// For MkDocs: icon shortcodes and PyMdown extensions
376/// For Obsidian: highlight syntax (==text==)
377pub fn is_in_mkdocs_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
378    if is_in_icon_shortcode(line, position, flavor) {
379        return true;
380    }
381    if is_in_pymdown_markup(line, position, flavor) {
382        return true;
383    }
384    false
385}
386
387/// Check if a byte position within a line is inside a backtick-delimited code span.
388///
389/// This is a line-level fallback for cases where pulldown-cmark's code span detection
390/// misses spans due to table parsing interference (e.g., pipes inside code spans
391/// in table rows cause pulldown-cmark to misidentify cell boundaries).
392fn is_in_inline_code_on_line(line: &str, byte_pos: usize) -> bool {
393    let bytes = line.as_bytes();
394    let mut i = 0;
395
396    while i < bytes.len() {
397        if bytes[i] == b'`' {
398            let open_start = i;
399            let mut backtick_count = 0;
400            while i < bytes.len() && bytes[i] == b'`' {
401                backtick_count += 1;
402                i += 1;
403            }
404
405            // Search for matching closing backticks
406            let mut j = i;
407            while j < bytes.len() {
408                if bytes[j] == b'`' {
409                    let mut close_count = 0;
410                    while j < bytes.len() && bytes[j] == b'`' {
411                        close_count += 1;
412                        j += 1;
413                    }
414                    if close_count == backtick_count {
415                        // Found matching pair: code span covers open_start..j
416                        if byte_pos >= open_start && byte_pos < j {
417                            return true;
418                        }
419                        i = j;
420                        break;
421                    }
422                } else {
423                    j += 1;
424                }
425            }
426
427            if j >= bytes.len() {
428                // No matching close found, remaining text is not a code span
429                break;
430            }
431        } else {
432            i += 1;
433        }
434    }
435
436    false
437}
438
439/// Check if a byte position is within an HTML tag. O(log n) via binary search.
440fn is_byte_in_html_tag(html_tags: &[HtmlTag], byte_pos: usize) -> bool {
441    let idx = html_tags.partition_point(|tag| tag.byte_offset <= byte_pos);
442    idx > 0 && byte_pos < html_tags[idx - 1].byte_end
443}
444
445/// Check if a byte position is within HTML code content (`<code>...</code>`).
446/// Uses pre-computed code ranges for O(log n) lookup via binary search.
447fn is_byte_in_html_code_content(code_ranges: &[(usize, usize)], byte_pos: usize) -> bool {
448    let idx = code_ranges.partition_point(|&(start, _)| start <= byte_pos);
449    idx > 0 && byte_pos < code_ranges[idx - 1].1
450}
451
452/// Pre-compute ranges covered by `<code>...</code>` HTML tags.
453/// Returns sorted Vec of (start, end) byte ranges.
454pub(crate) fn compute_html_code_ranges(html_tags: &[HtmlTag]) -> Vec<(usize, usize)> {
455    let mut ranges = Vec::new();
456    let mut open_code_end: Option<usize> = None;
457
458    for tag in html_tags {
459        if tag.tag_name == "code" {
460            if tag.is_self_closing {
461                continue;
462            } else if !tag.is_closing {
463                open_code_end = Some(tag.byte_end);
464            } else if tag.is_closing {
465                if let Some(start) = open_code_end {
466                    ranges.push((start, tag.byte_offset));
467                }
468                open_code_end = None;
469            }
470        }
471    }
472    // Handle unclosed <code> tag
473    if let Some(start) = open_code_end {
474        ranges.push((start, usize::MAX));
475    }
476    ranges
477}
478
479/// Determine whether an emphasis or strong span starting at `span_start` should be
480/// skipped because it falls inside a non-prose context: code blocks/spans, inline
481/// code, links, HTML tags or `<code>` content, MkDocs/PyMdown markup, math, JSX
482/// expressions, MDX comments, front matter, or mkdocstrings blocks.
483///
484/// `html_tags` and `html_code_ranges` are passed in so callers iterating many spans
485/// can compute them once via [`compute_html_code_ranges`].
486pub(crate) fn should_skip_emphasis_span(
487    ctx: &LintContext,
488    html_tags: &[HtmlTag],
489    html_code_ranges: &[(usize, usize)],
490    span_start: usize,
491) -> bool {
492    let lines = ctx.raw_lines();
493    let (line_num, col) = ctx.offset_to_line_col(span_start);
494
495    // Skip matches in front matter or mkdocstrings blocks
496    if ctx
497        .line_info(line_num)
498        .is_some_and(|info| info.in_front_matter || info.in_mkdocstrings)
499    {
500        return true;
501    }
502
503    // Check MkDocs markup
504    let in_mkdocs_markup = lines
505        .get(line_num.saturating_sub(1))
506        .is_some_and(|line| is_in_mkdocs_markup(line, col.saturating_sub(1), ctx.flavor));
507
508    // Line-level inline code fallback for cases pulldown-cmark misses
509    let in_inline_code = lines
510        .get(line_num.saturating_sub(1))
511        .is_some_and(|line| is_in_inline_code_on_line(line, col.saturating_sub(1)));
512
513    ctx.is_in_code_block_or_span(span_start)
514        || in_inline_code
515        || ctx.is_in_link(span_start)
516        || is_byte_in_html_tag(html_tags, span_start)
517        || is_byte_in_html_code_content(html_code_ranges, span_start)
518        || in_mkdocs_markup
519        || is_in_math_context(ctx, span_start)
520        || is_in_jsx_expression(ctx, span_start)
521        || is_in_mdx_comment(ctx, span_start)
522}
523
524#[cfg(test)]
525mod tests {
526    use super::*;
527
528    #[test]
529    fn test_html_comment_detection() {
530        let content = "Text <!-- comment --> more text";
531        assert!(is_in_html_comment(content, 10)); // Inside comment
532        assert!(!is_in_html_comment(content, 0)); // Before comment
533        assert!(!is_in_html_comment(content, 25)); // After comment
534    }
535
536    #[test]
537    fn test_is_line_entirely_in_html_comment() {
538        // Test 1: Multi-line comment with content after closing
539        let content = "<!--\ncomment\n--> Content after comment";
540        let ranges = compute_html_comment_ranges(content);
541        // Line 0: "<!--" (bytes 0-4) - entirely in comment
542        assert!(is_line_entirely_in_html_comment(&ranges, 0, 4));
543        // Line 1: "comment" (bytes 5-12) - entirely in comment
544        assert!(is_line_entirely_in_html_comment(&ranges, 5, 12));
545        // Line 2: "--> Content after comment" (bytes 13-38) - NOT entirely in comment
546        assert!(!is_line_entirely_in_html_comment(&ranges, 13, 38));
547
548        // Test 2: Single-line comment with content after
549        let content2 = "<!-- comment --> Not a comment";
550        let ranges2 = compute_html_comment_ranges(content2);
551        // The entire line is NOT entirely in the comment
552        assert!(!is_line_entirely_in_html_comment(&ranges2, 0, 30));
553
554        // Test 3: Single-line comment alone
555        let content3 = "<!-- comment -->";
556        let ranges3 = compute_html_comment_ranges(content3);
557        // The entire line IS entirely in the comment
558        assert!(is_line_entirely_in_html_comment(&ranges3, 0, 16));
559
560        // Test 4: Content before comment
561        let content4 = "Text before <!-- comment -->";
562        let ranges4 = compute_html_comment_ranges(content4);
563        // Line start is NOT in the comment range
564        assert!(!is_line_entirely_in_html_comment(&ranges4, 0, 28));
565    }
566
567    #[test]
568    fn test_math_block_detection() {
569        let content = "Text\n$$\nmath content\n$$\nmore text";
570        assert!(is_in_math_block(content, 8)); // On opening $$
571        assert!(is_in_math_block(content, 15)); // Inside math block
572        assert!(!is_in_math_block(content, 0)); // Before math block
573        assert!(!is_in_math_block(content, 30)); // After math block
574    }
575
576    #[test]
577    fn test_stray_double_dollar_in_prose_is_not_math() {
578        // Two `$$` tokens inside a prose line must NOT pair into a math block:
579        // a multi-line block only opens on a `$$` that begins its line. This
580        // keeps the byte-level result consistent with the line-level map.
581        let content = "Note: $$ is used for display math and $$ closes it";
582        let between = content.find("is used").unwrap();
583        assert!(
584            !is_in_math_block(content, between),
585            "stray paired `$$` in prose must not be treated as a math block"
586        );
587        assert!(math_block_ranges(content).is_empty());
588    }
589
590    #[test]
591    fn test_blockquoted_double_dollar_opens_block() {
592        // A `$$` opener is still recognized behind a blockquote prefix.
593        let content = "> $$\n> x = y\n> $$\n";
594        let inside = content.find("x = y").unwrap();
595        assert!(is_in_math_block(content, inside), "blockquoted math interior");
596    }
597
598    #[test]
599    fn test_self_contained_single_line_block_leaves_trailing_prose() {
600        // `$$ a $$` at line start is math; prose after the closing `$$` is not.
601        let content = "$$ a $$ and __not math__\n";
602        let in_math = content.find('a').unwrap();
603        assert!(is_in_math_block(content, in_math), "single-line math interior");
604        let after = content.find("not math").unwrap();
605        assert!(!is_in_math_block(content, after), "trailing prose is lintable");
606    }
607
608    #[test]
609    fn test_math_block_closes_with_content_before_fence() {
610        // A display-math block whose closing `$$` shares its line with
611        // content (e.g. `\end{aligned}$$`) must still close the block.
612        // Content after the block is prose, not math.
613        let content = "$$\nx = y\n\\end{x}$$\nafter __text__ here";
614
615        let inside = content.find("x = y").unwrap();
616        assert!(is_in_math_block(content, inside), "interior must be math");
617
618        let after = content.find("after").unwrap();
619        assert!(
620            !is_in_math_block(content, after),
621            "content after a content-sharing closing fence must NOT be math"
622        );
623    }
624
625    #[test]
626    fn test_inline_math_detection() {
627        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
628        assert!(is_in_inline_math(content, 7), "inside the single-`$` inline span");
629        // The mid-line `$$a^2 + b^2$$` is display syntax, not a line-start
630        // block, so it is a literal under the shared math model - neither the
631        // inline path nor `math_block_ranges` treats it as math.
632        assert!(!is_in_inline_math(content, 20), "mid-line $$...$$ is not inline math");
633        assert!(
634            !is_in_math_block(content, 20),
635            "mid-line $$...$$ is not a line-start display block"
636        );
637        assert!(!is_in_inline_math(content, 0), "before any math");
638        assert!(!is_in_inline_math(content, 35), "after the spans");
639    }
640
641    #[test]
642    fn test_table_line_detection() {
643        assert!(is_table_line("| Header | Column |"));
644        assert!(is_table_line("|--------|--------|"));
645        assert!(is_table_line("| Cell 1 | Cell 2 |"));
646        assert!(!is_table_line("Regular text"));
647        assert!(!is_table_line("Just a pipe | here"));
648    }
649
650    #[test]
651    fn test_is_in_icon_shortcode() {
652        let line = "Click :material-check: to confirm";
653        // Position 0-5 is "Click"
654        assert!(!is_in_icon_shortcode(line, 0, MarkdownFlavor::MkDocs));
655        // Position 6-22 is ":material-check:"
656        assert!(is_in_icon_shortcode(line, 6, MarkdownFlavor::MkDocs));
657        assert!(is_in_icon_shortcode(line, 15, MarkdownFlavor::MkDocs));
658        assert!(is_in_icon_shortcode(line, 21, MarkdownFlavor::MkDocs));
659        // Position 22+ is " to confirm"
660        assert!(!is_in_icon_shortcode(line, 22, MarkdownFlavor::MkDocs));
661    }
662
663    #[test]
664    fn test_is_in_pymdown_markup() {
665        // Test Keys notation
666        let line = "Press ++ctrl+c++ to copy";
667        assert!(!is_in_pymdown_markup(line, 0, MarkdownFlavor::MkDocs));
668        assert!(is_in_pymdown_markup(line, 6, MarkdownFlavor::MkDocs));
669        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::MkDocs));
670        assert!(!is_in_pymdown_markup(line, 17, MarkdownFlavor::MkDocs));
671
672        // Test Mark notation
673        let line2 = "This is ==highlighted== text";
674        assert!(!is_in_pymdown_markup(line2, 0, MarkdownFlavor::MkDocs));
675        assert!(is_in_pymdown_markup(line2, 8, MarkdownFlavor::MkDocs));
676        assert!(is_in_pymdown_markup(line2, 15, MarkdownFlavor::MkDocs));
677        assert!(!is_in_pymdown_markup(line2, 23, MarkdownFlavor::MkDocs));
678
679        // Should not match for Standard flavor
680        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Standard));
681    }
682
683    #[test]
684    fn test_is_in_mkdocs_markup() {
685        // Should combine both icon and pymdown
686        let line = ":material-check: and ++ctrl++";
687        assert!(is_in_mkdocs_markup(line, 5, MarkdownFlavor::MkDocs)); // In icon
688        assert!(is_in_mkdocs_markup(line, 23, MarkdownFlavor::MkDocs)); // In keys
689        assert!(!is_in_mkdocs_markup(line, 17, MarkdownFlavor::MkDocs)); // In " and "
690    }
691
692    // ==================== Obsidian highlight tests ====================
693
694    #[test]
695    fn test_obsidian_highlight_basic() {
696        // Obsidian flavor should recognize ==highlight== syntax
697        let line = "This is ==highlighted== text";
698        assert!(!is_in_pymdown_markup(line, 0, MarkdownFlavor::Obsidian)); // "T"
699        assert!(is_in_pymdown_markup(line, 8, MarkdownFlavor::Obsidian)); // First "="
700        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian)); // "h"
701        assert!(is_in_pymdown_markup(line, 15, MarkdownFlavor::Obsidian)); // "g"
702        assert!(is_in_pymdown_markup(line, 22, MarkdownFlavor::Obsidian)); // Last "="
703        assert!(!is_in_pymdown_markup(line, 23, MarkdownFlavor::Obsidian)); // " "
704    }
705
706    #[test]
707    fn test_obsidian_highlight_multiple() {
708        // Multiple highlights on one line
709        let line = "Both ==one== and ==two== here";
710        assert!(is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian)); // In first
711        assert!(is_in_pymdown_markup(line, 8, MarkdownFlavor::Obsidian)); // "o"
712        assert!(!is_in_pymdown_markup(line, 12, MarkdownFlavor::Obsidian)); // Space after
713        assert!(is_in_pymdown_markup(line, 17, MarkdownFlavor::Obsidian)); // In second
714    }
715
716    #[test]
717    fn test_obsidian_highlight_not_standard_flavor() {
718        // Standard flavor should NOT recognize ==highlight== as special
719        let line = "This is ==highlighted== text";
720        assert!(!is_in_pymdown_markup(line, 8, MarkdownFlavor::Standard));
721        assert!(!is_in_pymdown_markup(line, 15, MarkdownFlavor::Standard));
722    }
723
724    #[test]
725    fn test_obsidian_highlight_with_spaces_inside() {
726        // Highlights can have spaces inside the content
727        let line = "This is ==text with spaces== here";
728        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian)); // "t"
729        assert!(is_in_pymdown_markup(line, 15, MarkdownFlavor::Obsidian)); // "w"
730        assert!(is_in_pymdown_markup(line, 27, MarkdownFlavor::Obsidian)); // "="
731    }
732
733    #[test]
734    fn test_obsidian_does_not_support_keys_notation() {
735        // Obsidian flavor should NOT recognize ++keys++ syntax (that's MkDocs-specific)
736        let line = "Press ++ctrl+c++ to copy";
737        assert!(!is_in_pymdown_markup(line, 6, MarkdownFlavor::Obsidian));
738        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian));
739    }
740
741    #[test]
742    fn test_obsidian_mkdocs_markup_function() {
743        // is_in_mkdocs_markup should also work for Obsidian highlights
744        let line = "This is ==highlighted== text";
745        assert!(is_in_mkdocs_markup(line, 10, MarkdownFlavor::Obsidian)); // In highlight
746        assert!(!is_in_mkdocs_markup(line, 0, MarkdownFlavor::Obsidian)); // Not in highlight
747    }
748
749    #[test]
750    fn test_obsidian_highlight_edge_cases() {
751        // Empty highlight (====) should not match
752        let line = "Test ==== here";
753        assert!(!is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian)); // Position at first =
754        assert!(!is_in_pymdown_markup(line, 6, MarkdownFlavor::Obsidian));
755
756        // Single character highlight
757        let line2 = "Test ==a== here";
758        assert!(is_in_pymdown_markup(line2, 5, MarkdownFlavor::Obsidian));
759        assert!(is_in_pymdown_markup(line2, 7, MarkdownFlavor::Obsidian)); // "a"
760        assert!(is_in_pymdown_markup(line2, 9, MarkdownFlavor::Obsidian)); // last =
761
762        // Triple equals (===) should not create highlight
763        let line3 = "a === b";
764        assert!(!is_in_pymdown_markup(line3, 3, MarkdownFlavor::Obsidian));
765    }
766
767    #[test]
768    fn test_obsidian_highlight_unclosed() {
769        // Unclosed highlight should not match
770        let line = "This ==starts but never ends";
771        assert!(!is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian));
772        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian));
773    }
774
775    #[test]
776    fn test_inline_html_code_basic() {
777        let line = "The formula is <code>a * b * c</code> in math.";
778        // Position inside <code> content
779        assert!(is_in_inline_html_code(line, 21)); // 'a'
780        assert!(is_in_inline_html_code(line, 25)); // '*'
781        // Position outside <code> content
782        assert!(!is_in_inline_html_code(line, 0)); // 'T'
783        assert!(!is_in_inline_html_code(line, 40)); // after </code>
784    }
785
786    #[test]
787    fn test_inline_html_code_multiple_tags() {
788        let line = "<kbd>Ctrl</kbd> + <samp>output</samp>";
789        assert!(is_in_inline_html_code(line, 5)); // 'C' in Ctrl
790        assert!(is_in_inline_html_code(line, 24)); // 'o' in output
791        assert!(!is_in_inline_html_code(line, 16)); // '+'
792    }
793
794    #[test]
795    fn test_inline_html_code_with_attributes() {
796        let line = r#"<code class="lang">x * y</code>"#;
797        assert!(is_in_inline_html_code(line, 19)); // 'x'
798        assert!(is_in_inline_html_code(line, 23)); // '*'
799        assert!(!is_in_inline_html_code(line, 0)); // before tag
800    }
801
802    #[test]
803    fn test_inline_html_code_case_insensitive() {
804        let line = "<CODE>a * b</CODE>";
805        assert!(is_in_inline_html_code(line, 6)); // 'a'
806        assert!(is_in_inline_html_code(line, 8)); // '*'
807    }
808
809    #[test]
810    fn test_inline_html_code_var_and_pre() {
811        let line = "<var>x * y</var> and <pre>a * b</pre>";
812        assert!(is_in_inline_html_code(line, 5)); // 'x' in var
813        assert!(is_in_inline_html_code(line, 26)); // 'a' in pre
814        assert!(!is_in_inline_html_code(line, 17)); // 'and'
815    }
816
817    #[test]
818    fn test_inline_html_code_unclosed() {
819        // Unclosed tag should not match
820        let line = "<code>a * b without closing";
821        assert!(!is_in_inline_html_code(line, 6));
822    }
823
824    #[test]
825    fn test_inline_html_code_no_substring_match() {
826        // <variable> should NOT be treated as <var>
827        let line = "<variable>a * b</variable>";
828        assert!(!is_in_inline_html_code(line, 11));
829
830        // <keyboard> should NOT be treated as <kbd>
831        let line2 = "<keyboard>x * y</keyboard>";
832        assert!(!is_in_inline_html_code(line2, 11));
833    }
834}
rumdl_lib/utils/skip_context.rs

rumdl_lib/utils/
skip_context.rs