rumdl_lib/utils/
emphasis_utils.rs1use lazy_static::lazy_static;
2use regex::Regex;
3
4lazy_static! {
5 static ref FRONT_MATTER_DELIM: Regex = Regex::new(r"^---\s*$").unwrap();
7
8 static ref INLINE_CODE: Regex = Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap();
10
11 static ref LIST_MARKER: Regex = Regex::new(r"^\s*[*+-]\s+").unwrap();
13
14 static ref VALID_START_EMPHASIS: Regex = Regex::new(r"^(\*\*[^*\s]|\*[^*\s]|__[^_\s]|_[^_\s])").unwrap();
16
17 static ref DOC_METADATA_PATTERN: Regex = Regex::new(r"^\s*\*?\s*\*\*[^*]+\*\*\s*:").unwrap();
19
20 static ref BOLD_TEXT_PATTERN: Regex = Regex::new(r"\*\*[^*\s][^*]*[^*\s]\*\*|\*\*[^*\s]\*\*").unwrap();
22
23 static ref QUICK_DOC_CHECK: Regex = Regex::new(r"^\s*\*\s+\*").unwrap();
25 static ref QUICK_BOLD_CHECK: Regex = Regex::new(r"\*\*[^*\s]").unwrap();
26}
27
28#[derive(Debug, Clone, PartialEq)]
30pub struct EmphasisMarker {
31 pub marker_type: u8, pub count: u8, pub start_pos: usize, }
35
36impl EmphasisMarker {
37 #[inline]
38 pub fn end_pos(&self) -> usize {
39 self.start_pos + self.count as usize
40 }
41
42 #[inline]
43 pub fn as_char(&self) -> char {
44 self.marker_type as char
45 }
46}
47
48#[derive(Debug, Clone)]
50pub struct EmphasisSpan {
51 pub opening: EmphasisMarker,
52 pub closing: EmphasisMarker,
53 pub content: String,
54 pub has_leading_space: bool,
55 pub has_trailing_space: bool,
56}
57
58#[inline]
61pub fn replace_inline_code(line: &str) -> String {
62 if !line.contains('`') {
64 return line.to_string();
65 }
66
67 let mut result = line.to_string();
68 let mut offset = 0;
69
70 for cap in INLINE_CODE.captures_iter(line) {
71 if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
72 (cap.get(0), cap.get(1), cap.get(2), cap.get(3))
73 {
74 let match_start = full_match.start();
75 let match_end = full_match.end();
76 let placeholder = "X".repeat(match_end - match_start);
78
79 result.replace_range(match_start + offset..match_end + offset, &placeholder);
80 offset += placeholder.len() - (match_end - match_start);
81 }
82 }
83
84 result
85}
86
87#[inline]
89pub fn find_emphasis_markers(line: &str) -> Vec<EmphasisMarker> {
90 if !line.contains('*') && !line.contains('_') {
92 return Vec::new();
93 }
94
95 let mut markers = Vec::new();
96 let bytes = line.as_bytes();
97 let mut i = 0;
98
99 while i < bytes.len() {
100 let byte = bytes[i];
101 if byte == b'*' || byte == b'_' {
102 let start_pos = i;
103 let mut count = 1u8;
104
105 while i + (count as usize) < bytes.len() && bytes[i + (count as usize)] == byte && count < 3 {
107 count += 1;
108 }
109
110 if count == 1 || count == 2 {
112 markers.push(EmphasisMarker {
113 marker_type: byte,
114 count,
115 start_pos,
116 });
117 }
118
119 i += count as usize;
120 } else {
121 i += 1;
122 }
123 }
124
125 markers
126}
127
128pub fn find_single_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
130 if markers.len() < 2 {
132 return Vec::new();
133 }
134
135 let mut spans = Vec::new();
136 let mut used_markers = vec![false; markers.len()];
137
138 for i in 0..markers.len() {
140 if used_markers[i] || markers[i].count != 1 {
141 continue;
142 }
143
144 let opening = &markers[i];
145
146 for j in (i + 1)..markers.len() {
148 if used_markers[j] {
149 continue;
150 }
151
152 let closing = &markers[j];
153
154 if closing.marker_type == opening.marker_type && closing.count == 1 {
156 let content_start = opening.end_pos();
157 let content_end = closing.start_pos;
158
159 if content_end > content_start {
160 let content = &line[content_start..content_end];
161
162 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
164 let crosses_markers = markers[i + 1..j]
166 .iter()
167 .any(|marker| marker.marker_type == opening.marker_type && marker.count == 1);
168
169 if !crosses_markers {
170 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
171 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
172
173 spans.push(EmphasisSpan {
174 opening: opening.clone(),
175 closing: closing.clone(),
176 content: content.to_string(),
177 has_leading_space,
178 has_trailing_space,
179 });
180
181 used_markers[i] = true;
183 used_markers[j] = true;
184 break;
185 }
186 }
187 }
188 }
189 }
190 }
191
192 spans
193}
194
195pub fn find_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
197 if markers.len() < 2 {
199 return Vec::new();
200 }
201
202 let mut spans = Vec::new();
203 let mut used_markers = vec![false; markers.len()];
204
205 for i in 0..markers.len() {
207 if used_markers[i] {
208 continue;
209 }
210
211 let opening = &markers[i];
212
213 for j in (i + 1)..markers.len() {
215 if used_markers[j] {
216 continue;
217 }
218
219 let closing = &markers[j];
220
221 if closing.marker_type == opening.marker_type && closing.count == opening.count {
223 let content_start = opening.end_pos();
224 let content_end = closing.start_pos;
225
226 if content_end > content_start {
227 let content = &line[content_start..content_end];
228
229 if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
231 let crosses_markers = markers[i + 1..j]
233 .iter()
234 .any(|marker| marker.marker_type == opening.marker_type);
235
236 if !crosses_markers {
237 let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
238 let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
239
240 spans.push(EmphasisSpan {
241 opening: opening.clone(),
242 closing: closing.clone(),
243 content: content.to_string(),
244 has_leading_space,
245 has_trailing_space,
246 });
247
248 used_markers[i] = true;
250 used_markers[j] = true;
251 break;
252 }
253 }
254 }
255 }
256 }
257 }
258
259 spans
260}
261
262#[inline]
264pub fn is_valid_emphasis_span_fast(line: &str, opening: &EmphasisMarker, closing: &EmphasisMarker) -> bool {
265 let content_start = opening.end_pos();
266 let content_end = closing.start_pos;
267
268 if content_end <= content_start {
270 return false;
271 }
272
273 let content = &line[content_start..content_end];
274 if content.trim().is_empty() {
275 return false;
276 }
277
278 let bytes = line.as_bytes();
280
281 let valid_opening = opening.start_pos == 0
283 || matches!(
284 bytes.get(opening.start_pos.saturating_sub(1)),
285 Some(&b' ')
286 | Some(&b'\t')
287 | Some(&b'(')
288 | Some(&b'[')
289 | Some(&b'{')
290 | Some(&b'"')
291 | Some(&b'\'')
292 | Some(&b'>')
293 );
294
295 let valid_closing = closing.end_pos() >= bytes.len()
297 || matches!(
298 bytes.get(closing.end_pos()),
299 Some(&b' ')
300 | Some(&b'\t')
301 | Some(&b')')
302 | Some(&b']')
303 | Some(&b'}')
304 | Some(&b'"')
305 | Some(&b'\'')
306 | Some(&b'.')
307 | Some(&b',')
308 | Some(&b'!')
309 | Some(&b'?')
310 | Some(&b';')
311 | Some(&b':')
312 | Some(&b'<')
313 );
314
315 valid_opening && valid_closing && !content.contains('\n')
316}
317
318#[inline]
320pub fn is_valid_emphasis_content_fast(content: &str) -> bool {
321 !content.trim().is_empty()
322}
323
324pub fn is_likely_list_line(line: &str) -> bool {
326 LIST_MARKER.is_match(line)
327}
328
329pub fn has_doc_patterns(line: &str) -> bool {
331 (QUICK_DOC_CHECK.is_match(line) || QUICK_BOLD_CHECK.is_match(line))
332 && (DOC_METADATA_PATTERN.is_match(line) || BOLD_TEXT_PATTERN.is_match(line))
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338
339 #[test]
340 fn test_emphasis_marker_parsing() {
341 let markers = find_emphasis_markers("This has *single* and **double** emphasis");
342 assert_eq!(markers.len(), 4); let markers = find_emphasis_markers("*start* and *end*");
345 assert_eq!(markers.len(), 4); }
347
348 #[test]
349 fn test_single_emphasis_span_detection() {
350 let markers = find_emphasis_markers("This has *valid* emphasis and **strong** too");
351 let spans = find_single_emphasis_spans("This has *valid* emphasis and **strong** too", markers);
352 assert_eq!(spans.len(), 1); assert_eq!(spans[0].content, "valid");
354 assert!(!spans[0].has_leading_space);
355 assert!(!spans[0].has_trailing_space);
356 }
357
358 #[test]
359 fn test_emphasis_with_spaces() {
360 let markers = find_emphasis_markers("This has * invalid * emphasis");
361 let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
362 assert_eq!(spans.len(), 1);
363 assert_eq!(spans[0].content, " invalid ");
364 assert!(spans[0].has_leading_space);
365 assert!(spans[0].has_trailing_space);
366 }
367
368 #[test]
369 fn test_mixed_markers() {
370 let markers = find_emphasis_markers("This has *asterisk* and _underscore_ emphasis");
371 let spans = find_single_emphasis_spans("This has *asterisk* and _underscore_ emphasis", markers);
372 assert_eq!(spans.len(), 2);
373 assert_eq!(spans[0].opening.as_char(), '*');
374 assert_eq!(spans[1].opening.as_char(), '_');
375 }
376}