1pub mod fancy;
2#[cfg(feature = "pcre2-engine")]
3pub mod pcre2;
4pub mod rust_regex;
5
6use std::fmt;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum EngineKind {
10 RustRegex,
11 FancyRegex,
12 #[cfg(feature = "pcre2-engine")]
13 Pcre2,
14}
15
16impl EngineKind {
17 pub fn all() -> Vec<EngineKind> {
18 vec![
19 EngineKind::RustRegex,
20 EngineKind::FancyRegex,
21 #[cfg(feature = "pcre2-engine")]
22 EngineKind::Pcre2,
23 ]
24 }
25
26 pub fn next(self) -> EngineKind {
27 match self {
28 EngineKind::RustRegex => EngineKind::FancyRegex,
29 #[cfg(feature = "pcre2-engine")]
30 EngineKind::FancyRegex => EngineKind::Pcre2,
31 #[cfg(not(feature = "pcre2-engine"))]
32 EngineKind::FancyRegex => EngineKind::RustRegex,
33 #[cfg(feature = "pcre2-engine")]
34 EngineKind::Pcre2 => EngineKind::RustRegex,
35 }
36 }
37}
38
39impl fmt::Display for EngineKind {
40 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 match self {
42 EngineKind::RustRegex => write!(f, "Rust regex"),
43 EngineKind::FancyRegex => write!(f, "fancy-regex"),
44 #[cfg(feature = "pcre2-engine")]
45 EngineKind::Pcre2 => write!(f, "PCRE2"),
46 }
47 }
48}
49
50#[derive(Debug, Clone, Copy, Default)]
51pub struct EngineFlags {
52 pub case_insensitive: bool,
53 pub multi_line: bool,
54 pub dot_matches_newline: bool,
55 pub unicode: bool,
56 pub extended: bool,
57}
58
59impl EngineFlags {
60 pub fn to_inline_prefix(&self) -> String {
61 let mut s = String::new();
62 if self.case_insensitive {
63 s.push('i');
64 }
65 if self.multi_line {
66 s.push('m');
67 }
68 if self.dot_matches_newline {
69 s.push('s');
70 }
71 if self.unicode {
72 s.push('u');
73 }
74 if self.extended {
75 s.push('x');
76 }
77 s
78 }
79
80 pub fn wrap_pattern(&self, pattern: &str) -> String {
81 let prefix = self.to_inline_prefix();
82 if prefix.is_empty() {
83 pattern.to_string()
84 } else {
85 format!("(?{prefix}){pattern}")
86 }
87 }
88
89 pub fn toggle_case_insensitive(&mut self) {
90 self.case_insensitive = !self.case_insensitive;
91 }
92 pub fn toggle_multi_line(&mut self) {
93 self.multi_line = !self.multi_line;
94 }
95 pub fn toggle_dot_matches_newline(&mut self) {
96 self.dot_matches_newline = !self.dot_matches_newline;
97 }
98 pub fn toggle_unicode(&mut self) {
99 self.unicode = !self.unicode;
100 }
101 pub fn toggle_extended(&mut self) {
102 self.extended = !self.extended;
103 }
104}
105
106#[derive(Debug, Clone)]
107pub struct Match {
108 pub start: usize,
109 pub end: usize,
110 pub text: String,
111 pub captures: Vec<CaptureGroup>,
112}
113
114#[derive(Debug, Clone)]
115pub struct CaptureGroup {
116 pub index: usize,
117 pub name: Option<String>,
118 pub start: usize,
119 pub end: usize,
120 pub text: String,
121}
122
123#[derive(Debug)]
124pub enum EngineError {
125 CompileError(String),
126 MatchError(String),
127}
128
129impl fmt::Display for EngineError {
130 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
131 match self {
132 EngineError::CompileError(msg) => write!(f, "Compile error: {msg}"),
133 EngineError::MatchError(msg) => write!(f, "Match error: {msg}"),
134 }
135 }
136}
137
138impl std::error::Error for EngineError {}
139
140pub type EngineResult<T> = Result<T, EngineError>;
141
142pub trait RegexEngine: Send + Sync {
143 fn kind(&self) -> EngineKind;
144 fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>>;
145}
146
147pub trait CompiledRegex: Send + Sync {
148 fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>>;
149}
150
151pub fn create_engine(kind: EngineKind) -> Box<dyn RegexEngine> {
152 match kind {
153 EngineKind::RustRegex => Box::new(rust_regex::RustRegexEngine),
154 EngineKind::FancyRegex => Box::new(fancy::FancyRegexEngine),
155 #[cfg(feature = "pcre2-engine")]
156 EngineKind::Pcre2 => Box::new(pcre2::Pcre2Engine),
157 }
158}
159
160#[derive(Debug, Clone)]
163pub struct ReplaceSegment {
164 pub start: usize,
165 pub end: usize,
166 pub is_replacement: bool,
167}
168
169#[derive(Debug, Clone)]
170pub struct ReplaceResult {
171 pub output: String,
172 pub segments: Vec<ReplaceSegment>,
173}
174
175fn expand_replacement(template: &str, m: &Match) -> String {
180 let mut result = String::new();
181 let mut chars = template.char_indices().peekable();
182
183 while let Some((_i, c)) = chars.next() {
184 if c == '$' {
185 match chars.peek() {
186 None => {
187 result.push('$');
188 }
189 Some(&(_, '$')) => {
190 chars.next();
191 result.push('$');
192 }
193 Some(&(_, '&')) => {
194 chars.next();
195 result.push_str(&m.text);
196 }
197 Some(&(_, '{')) => {
198 chars.next(); let brace_start = chars.peek().map(|&(idx, _)| idx).unwrap_or(template.len());
200 if let Some(close) = template[brace_start..].find('}') {
201 let ref_name = &template[brace_start..brace_start + close];
202 if let Some(text) = lookup_capture(m, ref_name) {
203 result.push_str(text);
204 }
205 let end_byte = brace_start + close + 1;
207 while chars.peek().is_some_and(|&(idx, _)| idx < end_byte) {
208 chars.next();
209 }
210 } else {
211 result.push('$');
212 result.push('{');
213 }
214 }
215 Some(&(_, next_c)) if next_c.is_ascii_digit() => {
216 let (_, d1) = chars.next().unwrap();
217 let mut num_str = String::from(d1);
218 if let Some(&(_, d2)) = chars.peek() {
220 if d2.is_ascii_digit() {
221 chars.next();
222 num_str.push(d2);
223 }
224 }
225 let idx: usize = num_str.parse().unwrap_or(0);
226 if idx == 0 {
227 result.push_str(&m.text);
228 } else if let Some(cap) = m.captures.iter().find(|c| c.index == idx) {
229 result.push_str(&cap.text);
230 }
231 }
232 Some(_) => {
233 result.push('$');
234 }
235 }
236 } else {
237 result.push(c);
238 }
239 }
240
241 result
242}
243
244pub fn lookup_capture<'a>(m: &'a Match, key: &str) -> Option<&'a str> {
246 if let Ok(idx) = key.parse::<usize>() {
248 if idx == 0 {
249 return Some(&m.text);
250 }
251 return m
252 .captures
253 .iter()
254 .find(|c| c.index == idx)
255 .map(|c| c.text.as_str());
256 }
257 m.captures
259 .iter()
260 .find(|c| c.name.as_deref() == Some(key))
261 .map(|c| c.text.as_str())
262}
263
264pub fn replace_all(text: &str, matches: &[Match], template: &str) -> ReplaceResult {
266 let mut output = String::new();
267 let mut segments = Vec::new();
268 let mut pos = 0;
269
270 for m in matches {
271 if m.start > pos {
273 let seg_start = output.len();
274 output.push_str(&text[pos..m.start]);
275 segments.push(ReplaceSegment {
276 start: seg_start,
277 end: output.len(),
278 is_replacement: false,
279 });
280 }
281 let expanded = expand_replacement(template, m);
283 if !expanded.is_empty() {
284 let seg_start = output.len();
285 output.push_str(&expanded);
286 segments.push(ReplaceSegment {
287 start: seg_start,
288 end: output.len(),
289 is_replacement: true,
290 });
291 }
292 pos = m.end;
293 }
294
295 if pos < text.len() {
297 let seg_start = output.len();
298 output.push_str(&text[pos..]);
299 segments.push(ReplaceSegment {
300 start: seg_start,
301 end: output.len(),
302 is_replacement: false,
303 });
304 }
305
306 ReplaceResult { output, segments }
307}
308
309#[cfg(test)]
310mod tests {
311 use super::*;
312
313 fn make_match(start: usize, end: usize, text: &str, captures: Vec<CaptureGroup>) -> Match {
314 Match {
315 start,
316 end,
317 text: text.to_string(),
318 captures,
319 }
320 }
321
322 fn make_cap(
323 index: usize,
324 name: Option<&str>,
325 start: usize,
326 end: usize,
327 text: &str,
328 ) -> CaptureGroup {
329 CaptureGroup {
330 index,
331 name: name.map(|s| s.to_string()),
332 start,
333 end,
334 text: text.to_string(),
335 }
336 }
337
338 #[test]
339 fn test_replace_all_basic() {
340 let matches = vec![make_match(
341 0,
342 12,
343 "user@example",
344 vec![
345 make_cap(1, None, 0, 4, "user"),
346 make_cap(2, None, 5, 12, "example"),
347 ],
348 )];
349 let result = replace_all("user@example", &matches, "$2=$1");
350 assert_eq!(result.output, "example=user");
351 }
352
353 #[test]
354 fn test_replace_all_no_matches() {
355 let result = replace_all("hello world", &[], "replacement");
356 assert_eq!(result.output, "hello world");
357 assert_eq!(result.segments.len(), 1);
358 assert!(!result.segments[0].is_replacement);
359 }
360
361 #[test]
362 fn test_replace_all_empty_template() {
363 let matches = vec![
364 make_match(4, 7, "123", vec![]),
365 make_match(12, 15, "456", vec![]),
366 ];
367 let result = replace_all("abc 123 def 456 ghi", &matches, "");
368 assert_eq!(result.output, "abc def ghi");
369 }
370
371 #[test]
372 fn test_replace_all_literal_dollar() {
373 let matches = vec![make_match(0, 3, "foo", vec![])];
374 let result = replace_all("foo", &matches, "$$bar");
375 assert_eq!(result.output, "$bar");
376 }
377
378 #[test]
379 fn test_replace_all_named_groups() {
380 let matches = vec![make_match(
381 0,
382 7,
383 "2024-01",
384 vec![
385 make_cap(1, Some("y"), 0, 4, "2024"),
386 make_cap(2, Some("m"), 5, 7, "01"),
387 ],
388 )];
389 let result = replace_all("2024-01", &matches, "${m}/${y}");
390 assert_eq!(result.output, "01/2024");
391 }
392
393 #[test]
394 fn test_expand_replacement_whole_match() {
395 let m = make_match(0, 5, "hello", vec![]);
396 assert_eq!(expand_replacement("$0", &m), "hello");
397 assert_eq!(expand_replacement("$&", &m), "hello");
398 assert_eq!(expand_replacement("[$0]", &m), "[hello]");
399 }
400
401 #[test]
402 fn test_expand_replacement_non_ascii() {
403 let m = make_match(0, 5, "hello", vec![]);
404 assert_eq!(expand_replacement("café $0", &m), "café hello");
406 assert_eq!(expand_replacement("→$0←", &m), "→hello←");
407 assert_eq!(expand_replacement("日本語", &m), "日本語");
408 assert_eq!(expand_replacement("über $& cool", &m), "über hello cool");
409 }
410
411 #[test]
412 fn test_replace_segments_tracking() {
413 let matches = vec![make_match(6, 9, "123", vec![])];
414 let result = replace_all("hello 123 world", &matches, "NUM");
415 assert_eq!(result.output, "hello NUM world");
416 assert_eq!(result.segments.len(), 3);
417 assert!(!result.segments[0].is_replacement);
419 assert_eq!(
420 &result.output[result.segments[0].start..result.segments[0].end],
421 "hello "
422 );
423 assert!(result.segments[1].is_replacement);
425 assert_eq!(
426 &result.output[result.segments[1].start..result.segments[1].end],
427 "NUM"
428 );
429 assert!(!result.segments[2].is_replacement);
431 assert_eq!(
432 &result.output[result.segments[2].start..result.segments[2].end],
433 " world"
434 );
435 }
436}