Skip to main content

rumdl_lib/utils/
mkdocs_critic.rs

1use regex::Regex;
2/// MkDocs Critic Markup detection utilities
3///
4/// Critic Markup is a PyMdown Extensions feature for tracking changes in documents.
5/// It uses special syntax to represent insertions, deletions, substitutions, highlights, and comments.
6///
7/// Patterns:
8/// - `{++addition++}` - Insert text
9/// - `{--deletion--}` - Delete text
10/// - `{~~old~>new~~}` - Substitution
11/// - `{==highlight==}` - Highlight
12/// - `{>>comment<<}` - Comment
13///
14/// These patterns should be skipped from processing by most rules to avoid false positives.
15use std::sync::LazyLock;
16
17/// Pattern to match Critic Markup syntax
18/// Matches: {++...++}, {--...--}, {~~...~~}, {==...==}, {>>...<<}
19/// Simplified without lookahead/lookbehind for compatibility
20static CRITIC_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
21    Regex::new(
22        r"(?x)
23        \{                          # Opening brace
24        (?:
25            \+\+                    # Addition marker
26            [^}]*?                  # Content (non-greedy)
27            \+\+                    # Closing addition marker
28        |
29            --                      # Deletion marker
30            [^}]*?                  # Content (non-greedy)
31            --                      # Closing deletion marker
32        |
33            ~~                      # Substitution start
34            [^}]*?                  # Content including ~> (non-greedy)
35            ~~                      # Substitution end
36        |
37            ==                      # Highlight marker
38            [^}]*?                  # Content (non-greedy)
39            ==                      # Closing highlight marker
40        |
41            >>                      # Comment start
42            [^}]*?                  # Content (non-greedy)
43            <<                      # Comment end
44        )
45        \}                          # Closing brace
46        ",
47    )
48    .unwrap()
49});
50
51/// Simple pattern to quickly check if a line might contain Critic Markup
52static CRITIC_QUICK_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{(?:\+\+|--|~~|==|>>)").unwrap());
53
54/// Check if a line contains Critic Markup
55pub fn contains_critic_markup(line: &str) -> bool {
56    // Quick check first for performance
57    if !CRITIC_QUICK_CHECK.is_match(line) {
58        return false;
59    }
60
61    CRITIC_PATTERN.is_match(line)
62}
63
64/// Check if a byte position is within Critic Markup
65pub fn is_within_critic_markup(content: &str, byte_pos: usize) -> bool {
66    // Find all Critic Markup spans
67    for m in CRITIC_PATTERN.find_iter(content) {
68        if m.start() <= byte_pos && byte_pos < m.end() {
69            return true;
70        }
71    }
72    false
73}
74
75/// Get all Critic Markup spans in content
76pub fn get_critic_spans(content: &str) -> Vec<(usize, usize)> {
77    CRITIC_PATTERN
78        .find_iter(content)
79        .map(|m| (m.start(), m.end()))
80        .collect()
81}
82
83/// Check if a specific pattern might be Critic Markup
84pub fn is_critic_pattern(text: &str) -> bool {
85    // Check if the text matches a complete Critic Markup pattern
86    CRITIC_PATTERN.is_match(text)
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn test_critic_addition() {
95        assert!(contains_critic_markup("{++add this++}"));
96        assert!(contains_critic_markup("Text {++inserted here++} more text"));
97        assert!(is_critic_pattern("{++new content++}"));
98    }
99
100    #[test]
101    fn test_critic_deletion() {
102        assert!(contains_critic_markup("{--remove this--}"));
103        assert!(contains_critic_markup("Text {--deleted--} more"));
104        assert!(is_critic_pattern("{--old content--}"));
105    }
106
107    #[test]
108    fn test_critic_substitution() {
109        assert!(contains_critic_markup("{~~old~>new~~}"));
110        assert!(contains_critic_markup("Replace {~~this~>with that~~} text"));
111        assert!(is_critic_pattern("{~~original~>replacement~~}"));
112    }
113
114    #[test]
115    fn test_critic_highlight() {
116        assert!(contains_critic_markup("{==highlight me==}"));
117        assert!(contains_critic_markup("Important {==text==} here"));
118        assert!(is_critic_pattern("{==emphasized==}"));
119    }
120
121    #[test]
122    fn test_critic_comment() {
123        assert!(contains_critic_markup("{>>This is a comment<<}"));
124        assert!(contains_critic_markup("{==text==}{>>comment about it<<}"));
125        assert!(is_critic_pattern("{>>note<<}"));
126    }
127
128    #[test]
129    fn test_multiline_critic() {
130        let content = "Here is {++some\ntext that\nspans lines++} ok";
131        assert!(contains_critic_markup(content));
132    }
133
134    #[test]
135    fn test_not_critic() {
136        assert!(!contains_critic_markup("Normal {text} here"));
137        assert!(!contains_critic_markup("Just ++ symbols"));
138        assert!(!contains_critic_markup("{+ incomplete +}"));
139        assert!(!contains_critic_markup("{{ template }}"));
140    }
141
142    #[test]
143    fn test_within_critic_markup() {
144        let content = "Text {++added++} here";
145        let add_start = content.find("{++").unwrap();
146        let add_end = content.find("++}").unwrap() + 3;
147
148        assert!(is_within_critic_markup(content, add_start + 3));
149        assert!(is_within_critic_markup(content, add_end - 1));
150        assert!(!is_within_critic_markup(content, 0));
151        assert!(!is_within_critic_markup(content, content.len() - 1));
152    }
153
154    #[test]
155    fn test_get_spans() {
156        let content = "{++add++} text {--del--} more {==hi==}";
157        let spans = get_critic_spans(content);
158
159        assert_eq!(spans.len(), 3);
160        assert_eq!(&content[spans[0].0..spans[0].1], "{++add++}");
161        assert_eq!(&content[spans[1].0..spans[1].1], "{--del--}");
162        assert_eq!(&content[spans[2].0..spans[2].1], "{==hi==}");
163    }
164}