rumdl_lib/utils/
emphasis_utils.rs1use regex::Regex;
2use std::sync::LazyLock;
3
4static INLINE_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap());
6
7static LIST_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[*+-]\s+").unwrap());
9
10static DOC_METADATA_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*?\s*\*\*[^*]+\*\*\s*:").unwrap());
12
13static BOLD_TEXT_PATTERN: LazyLock<Regex> =
15 LazyLock::new(|| Regex::new(r"\*\*[^*\s][^*]*[^*\s]\*\*|\*\*[^*\s]\*\*").unwrap());
16
17static QUICK_DOC_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\s+\*").unwrap());
19static QUICK_BOLD_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*[^*\s]").unwrap());
20
21#[derive(Debug, Clone, PartialEq)]
23pub struct EmphasisMarker {
24 pub marker_type: u8, pub count: u8, pub start_pos: usize, }
28
29impl EmphasisMarker {
30 #[inline]
31 pub fn end_pos(&self) -> usize {
32 self.start_pos + self.count as usize
33 }
34
35 #[inline]
36 pub fn as_char(&self) -> char {
37 self.marker_type as char
38 }
39}
40
41#[derive(Debug, Clone)]
43pub struct EmphasisSpan {
44 pub opening: EmphasisMarker,
45 pub closing: EmphasisMarker,
46 pub content: String,
47 pub has_leading_space: bool,
48 pub has_trailing_space: bool,
49}
50
51#[inline]
54pub fn replace_inline_code(line: &str) -> String {
55 if !line.contains('`') {
57 return line.to_string();
58 }
59
60 let mut result = line.to_string();
61 let mut offset = 0;
62
63 for cap in INLINE_CODE.captures_iter(line) {
64 if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
65 (cap.get(0), cap.get(1), cap.get(2), cap.get(3))
66 {
67 let match_start = full_match.start();
68 let match_end = full_match.end();
69 let placeholder = "X".repeat(match_end - match_start);
71
72 result.replace_range(match_start + offset..match_end + offset, &placeholder);
73 offset += placeholder.len() - (match_end - match_start);
74 }
75 }
76
77 result
78}
79
80#[inline]
82pub fn find_emphasis_markers(line: &str) -> Vec<EmphasisMarker> {
83 if !line.contains('*') && !line.contains('_') {
85 return Vec::new();
86 }
87
88 let mut markers = Vec::new();
89 let bytes = line.as_bytes();
90 let mut i = 0;
91
92 while i < bytes.len() {
93 let byte = bytes[i];
94 if byte == b'*' || byte == b'_' {
95 let start_pos = i;
96 let mut count = 1u8;
97
98 while i + (count as usize) < bytes.len() && bytes[i + (count as usize)] == byte && count < 3 {
100 count += 1;
101 }
102
103 if count == 1 || count == 2 {
105 markers.push(EmphasisMarker {
106 marker_type: byte,
107 count,
108 start_pos,
109 });
110 }
111
112 i += count as usize;
113 } else {
114 i += 1;
115 }
116 }
117
118 markers
119}
120
121pub fn find_single_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
123 if markers.len() < 2 {
125 return Vec::new();
126 }
127
128 let mut spans = Vec::new();
129 let mut used_markers = vec![false; markers.len()];
130
131 for i in 0..markers.len() {
133 if used_markers[i] || markers[i].count != 1 {
134 continue;
135 }
136
137 let opening = &markers[i];
138
139 for j in (i + 1)..markers.len() {
141 if used_markers[j] {
142 continue;
143 }
144
145 let closing = &markers[j];
146
147 if closing.marker_type == opening.marker_type && closing.count == 1 {
149 let content_start = opening.end_pos();
150 let content_end = closing.start_pos;
151
152 if content_end > content_start {
153 let content = &line[content_start..content_end];
154
155 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
157 let crosses_markers = markers[i + 1..j]
159 .iter()
160 .any(|marker| marker.marker_type == opening.marker_type && marker.count == 1);
161
162 if !crosses_markers {
163 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
164 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
165
166 spans.push(EmphasisSpan {
167 opening: opening.clone(),
168 closing: closing.clone(),
169 content: content.to_string(),
170 has_leading_space,
171 has_trailing_space,
172 });
173
174 used_markers[i] = true;
176 used_markers[j] = true;
177 break;
178 }
179 }
180 }
181 }
182 }
183 }
184
185 spans
186}
187
188pub fn find_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
190 if markers.len() < 2 {
192 return Vec::new();
193 }
194
195 let mut spans = Vec::new();
196 let mut used_markers = vec![false; markers.len()];
197
198 for i in 0..markers.len() {
200 if used_markers[i] {
201 continue;
202 }
203
204 let opening = &markers[i];
205
206 for j in (i + 1)..markers.len() {
208 if used_markers[j] {
209 continue;
210 }
211
212 let closing = &markers[j];
213
214 if closing.marker_type == opening.marker_type && closing.count == opening.count {
216 let content_start = opening.end_pos();
217 let content_end = closing.start_pos;
218
219 if content_end > content_start {
220 let content = &line[content_start..content_end];
221
222 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
224 let crosses_markers = markers[i + 1..j]
226 .iter()
227 .any(|marker| marker.marker_type == opening.marker_type);
228
229 if !crosses_markers {
230 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
231 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
232
233 spans.push(EmphasisSpan {
234 opening: opening.clone(),
235 closing: closing.clone(),
236 content: content.to_string(),
237 has_leading_space,
238 has_trailing_space,
239 });
240
241 used_markers[i] = true;
243 used_markers[j] = true;
244 break;
245 }
246 }
247 }
248 }
249 }
250 }
251
252 spans
253}
254
255#[inline]
257pub fn is_valid_emphasis_span_fast(line: &str, opening: &EmphasisMarker, closing: &EmphasisMarker) -> bool {
258 let content_start = opening.end_pos();
259 let content_end = closing.start_pos;
260
261 if content_end <= content_start {
263 return false;
264 }
265
266 let content = &line[content_start..content_end];
267 if content.trim().is_empty() {
268 return false;
269 }
270
271 let bytes = line.as_bytes();
273
274 let valid_opening = opening.start_pos == 0
276 || matches!(
277 bytes.get(opening.start_pos.saturating_sub(1)),
278 Some(&b' ')
279 | Some(&b'\t')
280 | Some(&b'(')
281 | Some(&b'[')
282 | Some(&b'{')
283 | Some(&b'"')
284 | Some(&b'\'')
285 | Some(&b'>')
286 );
287
288 let valid_closing = closing.end_pos() >= bytes.len()
290 || matches!(
291 bytes.get(closing.end_pos()),
292 Some(&b' ')
293 | Some(&b'\t')
294 | Some(&b')')
295 | Some(&b']')
296 | Some(&b'}')
297 | Some(&b'"')
298 | Some(&b'\'')
299 | Some(&b'.')
300 | Some(&b',')
301 | Some(&b'!')
302 | Some(&b'?')
303 | Some(&b';')
304 | Some(&b':')
305 | Some(&b'<')
306 );
307
308 valid_opening && valid_closing && !content.contains('\n')
309}
310
311#[inline]
313pub fn is_valid_emphasis_content_fast(content: &str) -> bool {
314 !content.trim().is_empty()
315}
316
317pub fn is_likely_list_line(line: &str) -> bool {
319 LIST_MARKER.is_match(line)
320}
321
322pub fn has_doc_patterns(line: &str) -> bool {
324 (QUICK_DOC_CHECK.is_match(line) || QUICK_BOLD_CHECK.is_match(line))
325 && (DOC_METADATA_PATTERN.is_match(line) || BOLD_TEXT_PATTERN.is_match(line))
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_emphasis_marker_parsing() {
334 let markers = find_emphasis_markers("This has *single* and **double** emphasis");
335 assert_eq!(markers.len(), 4); let markers = find_emphasis_markers("*start* and *end*");
338 assert_eq!(markers.len(), 4); }
340
341 #[test]
342 fn test_single_emphasis_span_detection() {
343 let markers = find_emphasis_markers("This has *valid* emphasis and **strong** too");
344 let spans = find_single_emphasis_spans("This has *valid* emphasis and **strong** too", markers);
345 assert_eq!(spans.len(), 1); assert_eq!(spans[0].content, "valid");
347 assert!(!spans[0].has_leading_space);
348 assert!(!spans[0].has_trailing_space);
349 }
350
351 #[test]
352 fn test_emphasis_with_spaces() {
353 let markers = find_emphasis_markers("This has * invalid * emphasis");
354 let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
355 assert_eq!(spans.len(), 1);
356 assert_eq!(spans[0].content, " invalid ");
357 assert!(spans[0].has_leading_space);
358 assert!(spans[0].has_trailing_space);
359 }
360
361 #[test]
362 fn test_mixed_markers() {
363 let markers = find_emphasis_markers("This has *asterisk* and _underscore_ emphasis");
364 let spans = find_single_emphasis_spans("This has *asterisk* and _underscore_ emphasis", markers);
365 assert_eq!(spans.len(), 2);
366 assert_eq!(spans[0].opening.as_char(), '*');
367 assert_eq!(spans[1].opening.as_char(), '_');
368 }
369}