rumdl_lib/utils/
emphasis_utils.rs1use regex::Regex;
2use std::sync::LazyLock;
3
4static INLINE_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap());
6
7static INLINE_MATH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
10
11static LIST_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[*+-]\s+").unwrap());
13
14static DOC_METADATA_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*?\s*\*\*[^*]+\*\*\s*:").unwrap());
16
17static BOLD_TEXT_PATTERN: LazyLock<Regex> =
19 LazyLock::new(|| Regex::new(r"\*\*[^*\s][^*]*[^*\s]\*\*|\*\*[^*\s]\*\*").unwrap());
20
21static QUICK_DOC_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\s+\*").unwrap());
23static QUICK_BOLD_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*[^*\s]").unwrap());
24
25static TEMPLATE_SHORTCODE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\*.*\*\}").unwrap());
28
29#[derive(Debug, Clone, PartialEq)]
31pub struct EmphasisMarker {
32 pub marker_type: u8, pub count: u8, pub start_pos: usize, }
36
37impl EmphasisMarker {
38 #[inline]
39 pub fn end_pos(&self) -> usize {
40 self.start_pos + self.count as usize
41 }
42
43 #[inline]
44 pub fn as_char(&self) -> char {
45 self.marker_type as char
46 }
47}
48
49#[derive(Debug, Clone)]
51pub struct EmphasisSpan {
52 pub opening: EmphasisMarker,
53 pub closing: EmphasisMarker,
54 pub content: String,
55 pub has_leading_space: bool,
56 pub has_trailing_space: bool,
57}
58
59#[inline]
62pub fn replace_inline_code(line: &str) -> String {
63 if !line.contains('`') {
65 return line.to_string();
66 }
67
68 let mut result = line.to_string();
69 let mut offset = 0;
70
71 for cap in INLINE_CODE.captures_iter(line) {
72 if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
73 (cap.get(0), cap.get(1), cap.get(2), cap.get(3))
74 {
75 let match_start = full_match.start();
76 let match_end = full_match.end();
77 let placeholder = "X".repeat(match_end - match_start);
79
80 result.replace_range(match_start + offset..match_end + offset, &placeholder);
81 offset += placeholder.len() - (match_end - match_start);
82 }
83 }
84
85 result
86}
87
88pub fn replace_inline_math(line: &str) -> String {
91 if !line.contains('$') {
93 return line.to_string();
94 }
95
96 let mut result = line.to_string();
97 let mut offset: isize = 0;
98
99 for m in INLINE_MATH.find_iter(line) {
100 let match_start = m.start();
101 let match_end = m.end();
102 let placeholder = "M".repeat(match_end - match_start);
104
105 let adjusted_start = (match_start as isize + offset) as usize;
106 let adjusted_end = (match_end as isize + offset) as usize;
107 result.replace_range(adjusted_start..adjusted_end, &placeholder);
108 offset += placeholder.len() as isize - (match_end - match_start) as isize;
109 }
110
111 result
112}
113
114#[inline]
116pub fn find_emphasis_markers(line: &str) -> Vec<EmphasisMarker> {
117 if !line.contains('*') && !line.contains('_') {
119 return Vec::new();
120 }
121
122 let mut markers = Vec::new();
123 let bytes = line.as_bytes();
124 let mut i = 0;
125
126 while i < bytes.len() {
127 let byte = bytes[i];
128 if byte == b'*' || byte == b'_' {
129 let start_pos = i;
130 let mut count = 1u8;
131
132 while i + (count as usize) < bytes.len() && bytes[i + (count as usize)] == byte && count < 3 {
134 count += 1;
135 }
136
137 if count == 1 || count == 2 {
139 markers.push(EmphasisMarker {
140 marker_type: byte,
141 count,
142 start_pos,
143 });
144 }
145
146 i += count as usize;
147 } else {
148 i += 1;
149 }
150 }
151
152 markers
153}
154
155pub fn find_single_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
157 if markers.len() < 2 {
159 return Vec::new();
160 }
161
162 let mut spans = Vec::new();
163 let mut used_markers = vec![false; markers.len()];
164
165 for i in 0..markers.len() {
167 if used_markers[i] || markers[i].count != 1 {
168 continue;
169 }
170
171 let opening = &markers[i];
172
173 for j in (i + 1)..markers.len() {
175 if used_markers[j] {
176 continue;
177 }
178
179 let closing = &markers[j];
180
181 if closing.marker_type == opening.marker_type && closing.count == 1 {
183 let content_start = opening.end_pos();
184 let content_end = closing.start_pos;
185
186 if content_end > content_start {
187 let content = &line[content_start..content_end];
188
189 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
191 let crosses_markers = markers[i + 1..j]
193 .iter()
194 .any(|marker| marker.marker_type == opening.marker_type && marker.count == 1);
195
196 if !crosses_markers {
197 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
198 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
199
200 spans.push(EmphasisSpan {
201 opening: opening.clone(),
202 closing: closing.clone(),
203 content: content.to_string(),
204 has_leading_space,
205 has_trailing_space,
206 });
207
208 used_markers[i] = true;
210 used_markers[j] = true;
211 break;
212 }
213 }
214 }
215 }
216 }
217 }
218
219 spans
220}
221
222pub fn find_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
224 if markers.len() < 2 {
226 return Vec::new();
227 }
228
229 let mut spans = Vec::new();
230 let mut used_markers = vec![false; markers.len()];
231
232 for i in 0..markers.len() {
234 if used_markers[i] {
235 continue;
236 }
237
238 let opening = &markers[i];
239
240 for j in (i + 1)..markers.len() {
242 if used_markers[j] {
243 continue;
244 }
245
246 let closing = &markers[j];
247
248 if closing.marker_type == opening.marker_type && closing.count == opening.count {
250 let content_start = opening.end_pos();
251 let content_end = closing.start_pos;
252
253 if content_end > content_start {
254 let content = &line[content_start..content_end];
255
256 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
258 let crosses_markers = markers[i + 1..j]
260 .iter()
261 .any(|marker| marker.marker_type == opening.marker_type);
262
263 if !crosses_markers {
264 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
265 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
266
267 spans.push(EmphasisSpan {
268 opening: opening.clone(),
269 closing: closing.clone(),
270 content: content.to_string(),
271 has_leading_space,
272 has_trailing_space,
273 });
274
275 used_markers[i] = true;
277 used_markers[j] = true;
278 break;
279 }
280 }
281 }
282 }
283 }
284 }
285
286 spans
287}
288
289#[inline]
291pub fn is_valid_emphasis_span_fast(line: &str, opening: &EmphasisMarker, closing: &EmphasisMarker) -> bool {
292 let content_start = opening.end_pos();
293 let content_end = closing.start_pos;
294
295 if content_end <= content_start {
297 return false;
298 }
299
300 let content = &line[content_start..content_end];
301 if content.trim().is_empty() {
302 return false;
303 }
304
305 let bytes = line.as_bytes();
307
308 let valid_opening = opening.start_pos == 0
310 || matches!(
311 bytes.get(opening.start_pos.saturating_sub(1)),
312 Some(&b' ')
313 | Some(&b'\t')
314 | Some(&b'(')
315 | Some(&b'[')
316 | Some(&b'{')
317 | Some(&b'"')
318 | Some(&b'\'')
319 | Some(&b'>')
320 );
321
322 let valid_closing = closing.end_pos() >= bytes.len()
324 || matches!(
325 bytes.get(closing.end_pos()),
326 Some(&b' ')
327 | Some(&b'\t')
328 | Some(&b')')
329 | Some(&b']')
330 | Some(&b'}')
331 | Some(&b'"')
332 | Some(&b'\'')
333 | Some(&b'.')
334 | Some(&b',')
335 | Some(&b'!')
336 | Some(&b'?')
337 | Some(&b';')
338 | Some(&b':')
339 | Some(&b'<')
340 );
341
342 valid_opening && valid_closing && !content.contains('\n')
343}
344
345#[inline]
347pub fn is_valid_emphasis_content_fast(content: &str) -> bool {
348 !content.trim().is_empty()
349}
350
351pub fn is_likely_list_line(line: &str) -> bool {
353 LIST_MARKER.is_match(line)
354}
355
356pub fn has_doc_patterns(line: &str) -> bool {
358 if line.contains("{*") && TEMPLATE_SHORTCODE_PATTERN.is_match(line) {
361 return true;
362 }
363
364 (QUICK_DOC_CHECK.is_match(line) || QUICK_BOLD_CHECK.is_match(line))
365 && (DOC_METADATA_PATTERN.is_match(line) || BOLD_TEXT_PATTERN.is_match(line))
366}
367
368#[cfg(test)]
369mod tests {
370 use super::*;
371
372 #[test]
373 fn test_emphasis_marker_parsing() {
374 let markers = find_emphasis_markers("This has *single* and **double** emphasis");
375 assert_eq!(markers.len(), 4); let markers = find_emphasis_markers("*start* and *end*");
378 assert_eq!(markers.len(), 4); }
380
381 #[test]
382 fn test_single_emphasis_span_detection() {
383 let markers = find_emphasis_markers("This has *valid* emphasis and **strong** too");
384 let spans = find_single_emphasis_spans("This has *valid* emphasis and **strong** too", markers);
385 assert_eq!(spans.len(), 1); assert_eq!(spans[0].content, "valid");
387 assert!(!spans[0].has_leading_space);
388 assert!(!spans[0].has_trailing_space);
389 }
390
391 #[test]
392 fn test_emphasis_with_spaces() {
393 let markers = find_emphasis_markers("This has * invalid * emphasis");
394 let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
395 assert_eq!(spans.len(), 1);
396 assert_eq!(spans[0].content, " invalid ");
397 assert!(spans[0].has_leading_space);
398 assert!(spans[0].has_trailing_space);
399 }
400
401 #[test]
402 fn test_mixed_markers() {
403 let markers = find_emphasis_markers("This has *asterisk* and _underscore_ emphasis");
404 let spans = find_single_emphasis_spans("This has *asterisk* and _underscore_ emphasis", markers);
405 assert_eq!(spans.len(), 2);
406 assert_eq!(spans[0].opening.as_char(), '*');
407 assert_eq!(spans[1].opening.as_char(), '_');
408 }
409
410 #[test]
411 fn test_template_shortcode_detection() {
412 assert!(has_doc_patterns(
414 "{* ../../docs_src/cookie_param_models/tutorial001.py hl[9:12,16] *}"
415 ));
416 assert!(has_doc_patterns(
417 "{* ../../docs_src/conditional_openapi/tutorial001.py hl[6,11] *}"
418 ));
419 assert!(has_doc_patterns("{* file.py *}"));
421 assert!(has_doc_patterns("{* ../path/to/file.py ln[1-10] *}"));
423
424 assert!(!has_doc_patterns("This has *emphasis* text"));
426 assert!(!has_doc_patterns("This has * spaces * in emphasis"));
427 assert!(!has_doc_patterns("{* incomplete"));
429 }
430}