rumdl_lib/utils/
emphasis_utils.rs1use regex::Regex;
2use std::sync::LazyLock;
3
4static INLINE_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap());
6
7static INLINE_MATH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
10
11static DOC_METADATA_PATTERN: LazyLock<Regex> =
13 LazyLock::new(|| Regex::new(r"^\s*\*?\s*\*\*(?:[^*\s][^*]*[^*\s]|[^*\s])\*\*\s*:").unwrap());
14
15static BOLD_TEXT_PATTERN: LazyLock<Regex> =
17 LazyLock::new(|| Regex::new(r"\*\*[^*\s][^*]*[^*\s]\*\*|\*\*[^*\s]\*\*").unwrap());
18
19static QUICK_DOC_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\s+\*").unwrap());
21static QUICK_BOLD_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*[^*\s]").unwrap());
22
23static TEMPLATE_SHORTCODE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\*.*\*\}").unwrap());
26
27#[derive(Debug, Clone, PartialEq)]
29pub struct EmphasisMarker {
30 pub marker_type: u8, pub count: u8, pub start_pos: usize, }
34
35impl EmphasisMarker {
36 #[inline]
37 pub fn end_pos(&self) -> usize {
38 self.start_pos + self.count as usize
39 }
40
41 #[inline]
42 pub fn as_char(&self) -> char {
43 self.marker_type as char
44 }
45}
46
47#[derive(Debug, Clone)]
49pub struct EmphasisSpan {
50 pub opening: EmphasisMarker,
51 pub closing: EmphasisMarker,
52 pub content: String,
53 pub has_leading_space: bool,
54 pub has_trailing_space: bool,
55}
56
57#[inline]
60pub fn replace_inline_code(line: &str) -> String {
61 if !line.contains('`') {
63 return line.to_string();
64 }
65
66 let mut result = line.to_string();
67 let mut offset = 0;
68
69 for cap in INLINE_CODE.captures_iter(line) {
70 if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
71 (cap.get(0), cap.get(1), cap.get(2), cap.get(3))
72 {
73 let match_start = full_match.start();
74 let match_end = full_match.end();
75 let placeholder = "X".repeat(match_end - match_start);
77
78 result.replace_range(match_start + offset..match_end + offset, &placeholder);
79 offset += placeholder.len() - (match_end - match_start);
80 }
81 }
82
83 result
84}
85
86pub fn replace_inline_math(line: &str) -> String {
89 if !line.contains('$') {
91 return line.to_string();
92 }
93
94 let mut result = line.to_string();
95 let mut offset: isize = 0;
96
97 for m in INLINE_MATH.find_iter(line) {
98 let match_start = m.start();
99 let match_end = m.end();
100 let placeholder = "M".repeat(match_end - match_start);
102
103 let adjusted_start = (match_start as isize + offset) as usize;
104 let adjusted_end = (match_end as isize + offset) as usize;
105 result.replace_range(adjusted_start..adjusted_end, &placeholder);
106 offset += placeholder.len() as isize - (match_end - match_start) as isize;
107 }
108
109 result
110}
111
112#[inline]
114pub fn find_emphasis_markers(line: &str) -> Vec<EmphasisMarker> {
115 if !line.contains('*') && !line.contains('_') {
117 return Vec::new();
118 }
119
120 let mut markers = Vec::new();
121 let bytes = line.as_bytes();
122 let mut i = 0;
123
124 while i < bytes.len() {
125 let byte = bytes[i];
126 if byte == b'*' || byte == b'_' {
127 let start_pos = i;
128 let mut count = 1u8;
129
130 while i + (count as usize) < bytes.len() && bytes[i + (count as usize)] == byte && count < 3 {
132 count += 1;
133 }
134
135 if count == 1 || count == 2 {
137 markers.push(EmphasisMarker {
138 marker_type: byte,
139 count,
140 start_pos,
141 });
142 }
143
144 i += count as usize;
145 } else {
146 i += 1;
147 }
148 }
149
150 markers
151}
152
153pub fn find_single_emphasis_spans(line: &str, markers: &[EmphasisMarker]) -> Vec<EmphasisSpan> {
155 if markers.len() < 2 {
157 return Vec::new();
158 }
159
160 let mut spans = Vec::new();
161 let mut used_markers = vec![false; markers.len()];
162
163 for i in 0..markers.len() {
165 if used_markers[i] || markers[i].count != 1 {
166 continue;
167 }
168
169 let opening = &markers[i];
170
171 for j in (i + 1)..markers.len() {
173 if used_markers[j] {
174 continue;
175 }
176
177 let closing = &markers[j];
178
179 if closing.marker_type == opening.marker_type && closing.count == 1 {
181 let content_start = opening.end_pos();
182 let content_end = closing.start_pos;
183
184 if content_end > content_start {
185 let content = &line[content_start..content_end];
186
187 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
189 let crosses_markers = markers[i + 1..j]
191 .iter()
192 .any(|marker| marker.marker_type == opening.marker_type && marker.count == 1);
193
194 if !crosses_markers {
195 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
196 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
197
198 spans.push(EmphasisSpan {
199 opening: opening.clone(),
200 closing: closing.clone(),
201 content: content.to_string(),
202 has_leading_space,
203 has_trailing_space,
204 });
205
206 used_markers[i] = true;
208 used_markers[j] = true;
209 break;
210 }
211 }
212 }
213 }
214 }
215 }
216
217 spans
218}
219
220pub fn find_emphasis_spans(line: &str, markers: &[EmphasisMarker]) -> Vec<EmphasisSpan> {
222 if markers.len() < 2 {
224 return Vec::new();
225 }
226
227 let mut spans = Vec::new();
228 let mut used_markers = vec![false; markers.len()];
229
230 for i in 0..markers.len() {
232 if used_markers[i] {
233 continue;
234 }
235
236 let opening = &markers[i];
237
238 for j in (i + 1)..markers.len() {
240 if used_markers[j] {
241 continue;
242 }
243
244 let closing = &markers[j];
245
246 if closing.marker_type == opening.marker_type && closing.count == opening.count {
248 let content_start = opening.end_pos();
249 let content_end = closing.start_pos;
250
251 if content_end > content_start {
252 let content = &line[content_start..content_end];
253
254 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
256 let crosses_markers = markers[i + 1..j]
258 .iter()
259 .any(|marker| marker.marker_type == opening.marker_type);
260
261 if !crosses_markers {
262 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
263 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
264
265 spans.push(EmphasisSpan {
266 opening: opening.clone(),
267 closing: closing.clone(),
268 content: content.to_string(),
269 has_leading_space,
270 has_trailing_space,
271 });
272
273 used_markers[i] = true;
275 used_markers[j] = true;
276 break;
277 }
278 }
279 }
280 }
281 }
282 }
283
284 spans
285}
286
287#[inline]
289fn is_valid_emphasis_span_fast(line: &str, opening: &EmphasisMarker, closing: &EmphasisMarker) -> bool {
290 let content_start = opening.end_pos();
291 let content_end = closing.start_pos;
292
293 if content_end <= content_start {
295 return false;
296 }
297
298 let content = &line[content_start..content_end];
299 if content.trim().is_empty() {
300 return false;
301 }
302
303 let bytes = line.as_bytes();
305
306 let valid_opening = opening.start_pos == 0
308 || matches!(
309 bytes.get(opening.start_pos.saturating_sub(1)),
310 Some(&b' ')
311 | Some(&b'\t')
312 | Some(&b'(')
313 | Some(&b'[')
314 | Some(&b'{')
315 | Some(&b'"')
316 | Some(&b'\'')
317 | Some(&b'>')
318 );
319
320 let valid_closing = closing.end_pos() >= bytes.len()
322 || matches!(
323 bytes.get(closing.end_pos()),
324 Some(&b' ')
325 | Some(&b'\t')
326 | Some(&b')')
327 | Some(&b']')
328 | Some(&b'}')
329 | Some(&b'"')
330 | Some(&b'\'')
331 | Some(&b'.')
332 | Some(&b',')
333 | Some(&b'!')
334 | Some(&b'?')
335 | Some(&b';')
336 | Some(&b':')
337 | Some(&b'<')
338 );
339
340 valid_opening && valid_closing && !content.contains('\n')
341}
342
343#[inline]
345fn is_valid_emphasis_content_fast(content: &str) -> bool {
346 !content.trim().is_empty()
347}
348
349pub fn has_doc_patterns(line: &str) -> bool {
351 if line.contains("{*") && TEMPLATE_SHORTCODE_PATTERN.is_match(line) {
354 return true;
355 }
356
357 (QUICK_DOC_CHECK.is_match(line) || QUICK_BOLD_CHECK.is_match(line))
358 && (DOC_METADATA_PATTERN.is_match(line) || BOLD_TEXT_PATTERN.is_match(line))
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 #[test]
366 fn test_emphasis_marker_parsing() {
367 let markers = find_emphasis_markers("This has *single* and **double** emphasis");
368 assert_eq!(markers.len(), 4); let markers = find_emphasis_markers("*start* and *end*");
371 assert_eq!(markers.len(), 4); }
373
374 #[test]
375 fn test_single_emphasis_span_detection() {
376 let markers = find_emphasis_markers("This has *valid* emphasis and **strong** too");
377 let spans = find_single_emphasis_spans("This has *valid* emphasis and **strong** too", &markers);
378 assert_eq!(spans.len(), 1); assert_eq!(spans[0].content, "valid");
380 assert!(!spans[0].has_leading_space);
381 assert!(!spans[0].has_trailing_space);
382 }
383
384 #[test]
385 fn test_emphasis_with_spaces() {
386 let markers = find_emphasis_markers("This has * invalid * emphasis");
387 let spans = find_emphasis_spans("This has * invalid * emphasis", &markers);
388 assert_eq!(spans.len(), 1);
389 assert_eq!(spans[0].content, " invalid ");
390 assert!(spans[0].has_leading_space);
391 assert!(spans[0].has_trailing_space);
392 }
393
394 #[test]
395 fn test_mixed_markers() {
396 let markers = find_emphasis_markers("This has *asterisk* and _underscore_ emphasis");
397 let spans = find_single_emphasis_spans("This has *asterisk* and _underscore_ emphasis", &markers);
398 assert_eq!(spans.len(), 2);
399 assert_eq!(spans[0].opening.as_char(), '*');
400 assert_eq!(spans[1].opening.as_char(), '_');
401 }
402
403 #[test]
404 fn test_template_shortcode_detection() {
405 assert!(has_doc_patterns(
407 "{* ../../docs_src/cookie_param_models/tutorial001.py hl[9:12,16] *}"
408 ));
409 assert!(has_doc_patterns(
410 "{* ../../docs_src/conditional_openapi/tutorial001.py hl[6,11] *}"
411 ));
412 assert!(has_doc_patterns("{* file.py *}"));
414 assert!(has_doc_patterns("{* ../path/to/file.py ln[1-10] *}"));
416
417 assert!(!has_doc_patterns("This has *emphasis* text"));
419 assert!(!has_doc_patterns("This has * spaces * in emphasis"));
420 assert!(!has_doc_patterns("{* incomplete"));
422 }
423
424 #[test]
425 fn test_doc_pattern_rejects_spaced_bold_metadata() {
426 assert!(has_doc_patterns("**Key**: value"));
428 assert!(has_doc_patterns("**Name**: another value"));
429 assert!(has_doc_patterns("**X**: single char"));
430 assert!(has_doc_patterns("* **Key**: list item with bold key"));
431
432 assert!(!has_doc_patterns("** Key**: value"));
435 assert!(!has_doc_patterns("**Key **: value"));
436 assert!(!has_doc_patterns("** Key **: value"));
437 assert!(!has_doc_patterns(
438 "** Explicit Import**: Convert markdownlint configs to rumdl format:"
439 ));
440 }
441}