rumdl_lib/utils/
emphasis_utils.rs1use regex::Regex;
2use std::sync::LazyLock;
3
4static INLINE_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap());
6
7static LIST_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[*+-]\s+").unwrap());
9
10static DOC_METADATA_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*?\s*\*\*[^*]+\*\*\s*:").unwrap());
12
13static BOLD_TEXT_PATTERN: LazyLock<Regex> =
15 LazyLock::new(|| Regex::new(r"\*\*[^*\s][^*]*[^*\s]\*\*|\*\*[^*\s]\*\*").unwrap());
16
17static QUICK_DOC_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\s+\*").unwrap());
19static QUICK_BOLD_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*[^*\s]").unwrap());
20
21static TEMPLATE_SHORTCODE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\*.*\*\}").unwrap());
24
25#[derive(Debug, Clone, PartialEq)]
27pub struct EmphasisMarker {
28 pub marker_type: u8, pub count: u8, pub start_pos: usize, }
32
33impl EmphasisMarker {
34 #[inline]
35 pub fn end_pos(&self) -> usize {
36 self.start_pos + self.count as usize
37 }
38
39 #[inline]
40 pub fn as_char(&self) -> char {
41 self.marker_type as char
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct EmphasisSpan {
48 pub opening: EmphasisMarker,
49 pub closing: EmphasisMarker,
50 pub content: String,
51 pub has_leading_space: bool,
52 pub has_trailing_space: bool,
53}
54
55#[inline]
58pub fn replace_inline_code(line: &str) -> String {
59 if !line.contains('`') {
61 return line.to_string();
62 }
63
64 let mut result = line.to_string();
65 let mut offset = 0;
66
67 for cap in INLINE_CODE.captures_iter(line) {
68 if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
69 (cap.get(0), cap.get(1), cap.get(2), cap.get(3))
70 {
71 let match_start = full_match.start();
72 let match_end = full_match.end();
73 let placeholder = "X".repeat(match_end - match_start);
75
76 result.replace_range(match_start + offset..match_end + offset, &placeholder);
77 offset += placeholder.len() - (match_end - match_start);
78 }
79 }
80
81 result
82}
83
84#[inline]
86pub fn find_emphasis_markers(line: &str) -> Vec<EmphasisMarker> {
87 if !line.contains('*') && !line.contains('_') {
89 return Vec::new();
90 }
91
92 let mut markers = Vec::new();
93 let bytes = line.as_bytes();
94 let mut i = 0;
95
96 while i < bytes.len() {
97 let byte = bytes[i];
98 if byte == b'*' || byte == b'_' {
99 let start_pos = i;
100 let mut count = 1u8;
101
102 while i + (count as usize) < bytes.len() && bytes[i + (count as usize)] == byte && count < 3 {
104 count += 1;
105 }
106
107 if count == 1 || count == 2 {
109 markers.push(EmphasisMarker {
110 marker_type: byte,
111 count,
112 start_pos,
113 });
114 }
115
116 i += count as usize;
117 } else {
118 i += 1;
119 }
120 }
121
122 markers
123}
124
125pub fn find_single_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
127 if markers.len() < 2 {
129 return Vec::new();
130 }
131
132 let mut spans = Vec::new();
133 let mut used_markers = vec![false; markers.len()];
134
135 for i in 0..markers.len() {
137 if used_markers[i] || markers[i].count != 1 {
138 continue;
139 }
140
141 let opening = &markers[i];
142
143 for j in (i + 1)..markers.len() {
145 if used_markers[j] {
146 continue;
147 }
148
149 let closing = &markers[j];
150
151 if closing.marker_type == opening.marker_type && closing.count == 1 {
153 let content_start = opening.end_pos();
154 let content_end = closing.start_pos;
155
156 if content_end > content_start {
157 let content = &line[content_start..content_end];
158
159 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
161 let crosses_markers = markers[i + 1..j]
163 .iter()
164 .any(|marker| marker.marker_type == opening.marker_type && marker.count == 1);
165
166 if !crosses_markers {
167 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
168 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
169
170 spans.push(EmphasisSpan {
171 opening: opening.clone(),
172 closing: closing.clone(),
173 content: content.to_string(),
174 has_leading_space,
175 has_trailing_space,
176 });
177
178 used_markers[i] = true;
180 used_markers[j] = true;
181 break;
182 }
183 }
184 }
185 }
186 }
187 }
188
189 spans
190}
191
192pub fn find_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
194 if markers.len() < 2 {
196 return Vec::new();
197 }
198
199 let mut spans = Vec::new();
200 let mut used_markers = vec![false; markers.len()];
201
202 for i in 0..markers.len() {
204 if used_markers[i] {
205 continue;
206 }
207
208 let opening = &markers[i];
209
210 for j in (i + 1)..markers.len() {
212 if used_markers[j] {
213 continue;
214 }
215
216 let closing = &markers[j];
217
218 if closing.marker_type == opening.marker_type && closing.count == opening.count {
220 let content_start = opening.end_pos();
221 let content_end = closing.start_pos;
222
223 if content_end > content_start {
224 let content = &line[content_start..content_end];
225
226 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
228 let crosses_markers = markers[i + 1..j]
230 .iter()
231 .any(|marker| marker.marker_type == opening.marker_type);
232
233 if !crosses_markers {
234 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
235 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
236
237 spans.push(EmphasisSpan {
238 opening: opening.clone(),
239 closing: closing.clone(),
240 content: content.to_string(),
241 has_leading_space,
242 has_trailing_space,
243 });
244
245 used_markers[i] = true;
247 used_markers[j] = true;
248 break;
249 }
250 }
251 }
252 }
253 }
254 }
255
256 spans
257}
258
259#[inline]
261pub fn is_valid_emphasis_span_fast(line: &str, opening: &EmphasisMarker, closing: &EmphasisMarker) -> bool {
262 let content_start = opening.end_pos();
263 let content_end = closing.start_pos;
264
265 if content_end <= content_start {
267 return false;
268 }
269
270 let content = &line[content_start..content_end];
271 if content.trim().is_empty() {
272 return false;
273 }
274
275 let bytes = line.as_bytes();
277
278 let valid_opening = opening.start_pos == 0
280 || matches!(
281 bytes.get(opening.start_pos.saturating_sub(1)),
282 Some(&b' ')
283 | Some(&b'\t')
284 | Some(&b'(')
285 | Some(&b'[')
286 | Some(&b'{')
287 | Some(&b'"')
288 | Some(&b'\'')
289 | Some(&b'>')
290 );
291
292 let valid_closing = closing.end_pos() >= bytes.len()
294 || matches!(
295 bytes.get(closing.end_pos()),
296 Some(&b' ')
297 | Some(&b'\t')
298 | Some(&b')')
299 | Some(&b']')
300 | Some(&b'}')
301 | Some(&b'"')
302 | Some(&b'\'')
303 | Some(&b'.')
304 | Some(&b',')
305 | Some(&b'!')
306 | Some(&b'?')
307 | Some(&b';')
308 | Some(&b':')
309 | Some(&b'<')
310 );
311
312 valid_opening && valid_closing && !content.contains('\n')
313}
314
315#[inline]
317pub fn is_valid_emphasis_content_fast(content: &str) -> bool {
318 !content.trim().is_empty()
319}
320
321pub fn is_likely_list_line(line: &str) -> bool {
323 LIST_MARKER.is_match(line)
324}
325
326pub fn has_doc_patterns(line: &str) -> bool {
328 if line.contains("{*") && TEMPLATE_SHORTCODE_PATTERN.is_match(line) {
331 return true;
332 }
333
334 (QUICK_DOC_CHECK.is_match(line) || QUICK_BOLD_CHECK.is_match(line))
335 && (DOC_METADATA_PATTERN.is_match(line) || BOLD_TEXT_PATTERN.is_match(line))
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341
342 #[test]
343 fn test_emphasis_marker_parsing() {
344 let markers = find_emphasis_markers("This has *single* and **double** emphasis");
345 assert_eq!(markers.len(), 4); let markers = find_emphasis_markers("*start* and *end*");
348 assert_eq!(markers.len(), 4); }
350
351 #[test]
352 fn test_single_emphasis_span_detection() {
353 let markers = find_emphasis_markers("This has *valid* emphasis and **strong** too");
354 let spans = find_single_emphasis_spans("This has *valid* emphasis and **strong** too", markers);
355 assert_eq!(spans.len(), 1); assert_eq!(spans[0].content, "valid");
357 assert!(!spans[0].has_leading_space);
358 assert!(!spans[0].has_trailing_space);
359 }
360
361 #[test]
362 fn test_emphasis_with_spaces() {
363 let markers = find_emphasis_markers("This has * invalid * emphasis");
364 let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
365 assert_eq!(spans.len(), 1);
366 assert_eq!(spans[0].content, " invalid ");
367 assert!(spans[0].has_leading_space);
368 assert!(spans[0].has_trailing_space);
369 }
370
371 #[test]
372 fn test_mixed_markers() {
373 let markers = find_emphasis_markers("This has *asterisk* and _underscore_ emphasis");
374 let spans = find_single_emphasis_spans("This has *asterisk* and _underscore_ emphasis", markers);
375 assert_eq!(spans.len(), 2);
376 assert_eq!(spans[0].opening.as_char(), '*');
377 assert_eq!(spans[1].opening.as_char(), '_');
378 }
379
380 #[test]
381 fn test_template_shortcode_detection() {
382 assert!(has_doc_patterns(
384 "{* ../../docs_src/cookie_param_models/tutorial001.py hl[9:12,16] *}"
385 ));
386 assert!(has_doc_patterns(
387 "{* ../../docs_src/conditional_openapi/tutorial001.py hl[6,11] *}"
388 ));
389 assert!(has_doc_patterns("{* file.py *}"));
391 assert!(has_doc_patterns("{* ../path/to/file.py ln[1-10] *}"));
393
394 assert!(!has_doc_patterns("This has *emphasis* text"));
396 assert!(!has_doc_patterns("This has * spaces * in emphasis"));
397 assert!(!has_doc_patterns("{* incomplete"));
399 }
400}