rumdl_lib/utils/
emphasis_utils.rs1use regex::Regex;
2use std::sync::LazyLock;
3
4static INLINE_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap());
6
7static INLINE_MATH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
10
11static LIST_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[*+-]\s+").unwrap());
13
14static DOC_METADATA_PATTERN: LazyLock<Regex> =
16 LazyLock::new(|| Regex::new(r"^\s*\*?\s*\*\*(?:[^*\s][^*]*[^*\s]|[^*\s])\*\*\s*:").unwrap());
17
18static BOLD_TEXT_PATTERN: LazyLock<Regex> =
20 LazyLock::new(|| Regex::new(r"\*\*[^*\s][^*]*[^*\s]\*\*|\*\*[^*\s]\*\*").unwrap());
21
22static QUICK_DOC_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\s+\*").unwrap());
24static QUICK_BOLD_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*[^*\s]").unwrap());
25
26static TEMPLATE_SHORTCODE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\*.*\*\}").unwrap());
29
30#[derive(Debug, Clone, PartialEq)]
32pub struct EmphasisMarker {
33 pub marker_type: u8, pub count: u8, pub start_pos: usize, }
37
38impl EmphasisMarker {
39 #[inline]
40 pub fn end_pos(&self) -> usize {
41 self.start_pos + self.count as usize
42 }
43
44 #[inline]
45 pub fn as_char(&self) -> char {
46 self.marker_type as char
47 }
48}
49
50#[derive(Debug, Clone)]
52pub struct EmphasisSpan {
53 pub opening: EmphasisMarker,
54 pub closing: EmphasisMarker,
55 pub content: String,
56 pub has_leading_space: bool,
57 pub has_trailing_space: bool,
58}
59
60#[inline]
63pub fn replace_inline_code(line: &str) -> String {
64 if !line.contains('`') {
66 return line.to_string();
67 }
68
69 let mut result = line.to_string();
70 let mut offset = 0;
71
72 for cap in INLINE_CODE.captures_iter(line) {
73 if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
74 (cap.get(0), cap.get(1), cap.get(2), cap.get(3))
75 {
76 let match_start = full_match.start();
77 let match_end = full_match.end();
78 let placeholder = "X".repeat(match_end - match_start);
80
81 result.replace_range(match_start + offset..match_end + offset, &placeholder);
82 offset += placeholder.len() - (match_end - match_start);
83 }
84 }
85
86 result
87}
88
89pub fn replace_inline_math(line: &str) -> String {
92 if !line.contains('$') {
94 return line.to_string();
95 }
96
97 let mut result = line.to_string();
98 let mut offset: isize = 0;
99
100 for m in INLINE_MATH.find_iter(line) {
101 let match_start = m.start();
102 let match_end = m.end();
103 let placeholder = "M".repeat(match_end - match_start);
105
106 let adjusted_start = (match_start as isize + offset) as usize;
107 let adjusted_end = (match_end as isize + offset) as usize;
108 result.replace_range(adjusted_start..adjusted_end, &placeholder);
109 offset += placeholder.len() as isize - (match_end - match_start) as isize;
110 }
111
112 result
113}
114
115#[inline]
117pub fn find_emphasis_markers(line: &str) -> Vec<EmphasisMarker> {
118 if !line.contains('*') && !line.contains('_') {
120 return Vec::new();
121 }
122
123 let mut markers = Vec::new();
124 let bytes = line.as_bytes();
125 let mut i = 0;
126
127 while i < bytes.len() {
128 let byte = bytes[i];
129 if byte == b'*' || byte == b'_' {
130 let start_pos = i;
131 let mut count = 1u8;
132
133 while i + (count as usize) < bytes.len() && bytes[i + (count as usize)] == byte && count < 3 {
135 count += 1;
136 }
137
138 if count == 1 || count == 2 {
140 markers.push(EmphasisMarker {
141 marker_type: byte,
142 count,
143 start_pos,
144 });
145 }
146
147 i += count as usize;
148 } else {
149 i += 1;
150 }
151 }
152
153 markers
154}
155
156pub fn find_single_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
158 if markers.len() < 2 {
160 return Vec::new();
161 }
162
163 let mut spans = Vec::new();
164 let mut used_markers = vec![false; markers.len()];
165
166 for i in 0..markers.len() {
168 if used_markers[i] || markers[i].count != 1 {
169 continue;
170 }
171
172 let opening = &markers[i];
173
174 for j in (i + 1)..markers.len() {
176 if used_markers[j] {
177 continue;
178 }
179
180 let closing = &markers[j];
181
182 if closing.marker_type == opening.marker_type && closing.count == 1 {
184 let content_start = opening.end_pos();
185 let content_end = closing.start_pos;
186
187 if content_end > content_start {
188 let content = &line[content_start..content_end];
189
190 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
192 let crosses_markers = markers[i + 1..j]
194 .iter()
195 .any(|marker| marker.marker_type == opening.marker_type && marker.count == 1);
196
197 if !crosses_markers {
198 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
199 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
200
201 spans.push(EmphasisSpan {
202 opening: opening.clone(),
203 closing: closing.clone(),
204 content: content.to_string(),
205 has_leading_space,
206 has_trailing_space,
207 });
208
209 used_markers[i] = true;
211 used_markers[j] = true;
212 break;
213 }
214 }
215 }
216 }
217 }
218 }
219
220 spans
221}
222
223pub fn find_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
225 if markers.len() < 2 {
227 return Vec::new();
228 }
229
230 let mut spans = Vec::new();
231 let mut used_markers = vec![false; markers.len()];
232
233 for i in 0..markers.len() {
235 if used_markers[i] {
236 continue;
237 }
238
239 let opening = &markers[i];
240
241 for j in (i + 1)..markers.len() {
243 if used_markers[j] {
244 continue;
245 }
246
247 let closing = &markers[j];
248
249 if closing.marker_type == opening.marker_type && closing.count == opening.count {
251 let content_start = opening.end_pos();
252 let content_end = closing.start_pos;
253
254 if content_end > content_start {
255 let content = &line[content_start..content_end];
256
257 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
259 let crosses_markers = markers[i + 1..j]
261 .iter()
262 .any(|marker| marker.marker_type == opening.marker_type);
263
264 if !crosses_markers {
265 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
266 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
267
268 spans.push(EmphasisSpan {
269 opening: opening.clone(),
270 closing: closing.clone(),
271 content: content.to_string(),
272 has_leading_space,
273 has_trailing_space,
274 });
275
276 used_markers[i] = true;
278 used_markers[j] = true;
279 break;
280 }
281 }
282 }
283 }
284 }
285 }
286
287 spans
288}
289
290#[inline]
292pub fn is_valid_emphasis_span_fast(line: &str, opening: &EmphasisMarker, closing: &EmphasisMarker) -> bool {
293 let content_start = opening.end_pos();
294 let content_end = closing.start_pos;
295
296 if content_end <= content_start {
298 return false;
299 }
300
301 let content = &line[content_start..content_end];
302 if content.trim().is_empty() {
303 return false;
304 }
305
306 let bytes = line.as_bytes();
308
309 let valid_opening = opening.start_pos == 0
311 || matches!(
312 bytes.get(opening.start_pos.saturating_sub(1)),
313 Some(&b' ')
314 | Some(&b'\t')
315 | Some(&b'(')
316 | Some(&b'[')
317 | Some(&b'{')
318 | Some(&b'"')
319 | Some(&b'\'')
320 | Some(&b'>')
321 );
322
323 let valid_closing = closing.end_pos() >= bytes.len()
325 || matches!(
326 bytes.get(closing.end_pos()),
327 Some(&b' ')
328 | Some(&b'\t')
329 | Some(&b')')
330 | Some(&b']')
331 | Some(&b'}')
332 | Some(&b'"')
333 | Some(&b'\'')
334 | Some(&b'.')
335 | Some(&b',')
336 | Some(&b'!')
337 | Some(&b'?')
338 | Some(&b';')
339 | Some(&b':')
340 | Some(&b'<')
341 );
342
343 valid_opening && valid_closing && !content.contains('\n')
344}
345
346#[inline]
348pub fn is_valid_emphasis_content_fast(content: &str) -> bool {
349 !content.trim().is_empty()
350}
351
352pub fn is_likely_list_line(line: &str) -> bool {
354 LIST_MARKER.is_match(line)
355}
356
357pub fn has_doc_patterns(line: &str) -> bool {
359 if line.contains("{*") && TEMPLATE_SHORTCODE_PATTERN.is_match(line) {
362 return true;
363 }
364
365 (QUICK_DOC_CHECK.is_match(line) || QUICK_BOLD_CHECK.is_match(line))
366 && (DOC_METADATA_PATTERN.is_match(line) || BOLD_TEXT_PATTERN.is_match(line))
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372
373 #[test]
374 fn test_emphasis_marker_parsing() {
375 let markers = find_emphasis_markers("This has *single* and **double** emphasis");
376 assert_eq!(markers.len(), 4); let markers = find_emphasis_markers("*start* and *end*");
379 assert_eq!(markers.len(), 4); }
381
382 #[test]
383 fn test_single_emphasis_span_detection() {
384 let markers = find_emphasis_markers("This has *valid* emphasis and **strong** too");
385 let spans = find_single_emphasis_spans("This has *valid* emphasis and **strong** too", markers);
386 assert_eq!(spans.len(), 1); assert_eq!(spans[0].content, "valid");
388 assert!(!spans[0].has_leading_space);
389 assert!(!spans[0].has_trailing_space);
390 }
391
392 #[test]
393 fn test_emphasis_with_spaces() {
394 let markers = find_emphasis_markers("This has * invalid * emphasis");
395 let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
396 assert_eq!(spans.len(), 1);
397 assert_eq!(spans[0].content, " invalid ");
398 assert!(spans[0].has_leading_space);
399 assert!(spans[0].has_trailing_space);
400 }
401
402 #[test]
403 fn test_mixed_markers() {
404 let markers = find_emphasis_markers("This has *asterisk* and _underscore_ emphasis");
405 let spans = find_single_emphasis_spans("This has *asterisk* and _underscore_ emphasis", markers);
406 assert_eq!(spans.len(), 2);
407 assert_eq!(spans[0].opening.as_char(), '*');
408 assert_eq!(spans[1].opening.as_char(), '_');
409 }
410
411 #[test]
412 fn test_template_shortcode_detection() {
413 assert!(has_doc_patterns(
415 "{* ../../docs_src/cookie_param_models/tutorial001.py hl[9:12,16] *}"
416 ));
417 assert!(has_doc_patterns(
418 "{* ../../docs_src/conditional_openapi/tutorial001.py hl[6,11] *}"
419 ));
420 assert!(has_doc_patterns("{* file.py *}"));
422 assert!(has_doc_patterns("{* ../path/to/file.py ln[1-10] *}"));
424
425 assert!(!has_doc_patterns("This has *emphasis* text"));
427 assert!(!has_doc_patterns("This has * spaces * in emphasis"));
428 assert!(!has_doc_patterns("{* incomplete"));
430 }
431
432 #[test]
433 fn test_doc_pattern_rejects_spaced_bold_metadata() {
434 assert!(has_doc_patterns("**Key**: value"));
436 assert!(has_doc_patterns("**Name**: another value"));
437 assert!(has_doc_patterns("**X**: single char"));
438 assert!(has_doc_patterns("* **Key**: list item with bold key"));
439
440 assert!(!has_doc_patterns("** Key**: value"));
443 assert!(!has_doc_patterns("**Key **: value"));
444 assert!(!has_doc_patterns("** Key **: value"));
445 assert!(!has_doc_patterns(
446 "** Explicit Import**: Convert markdownlint configs to rumdl format:"
447 ));
448 }
449}