1pub mod fancy;
2#[cfg(feature = "pcre2-engine")]
3pub mod pcre2;
4pub mod rust_regex;
5
6use std::fmt;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum EngineKind {
10 RustRegex,
11 FancyRegex,
12 #[cfg(feature = "pcre2-engine")]
13 Pcre2,
14}
15
16impl EngineKind {
17 pub fn all() -> Vec<EngineKind> {
18 vec![
19 EngineKind::RustRegex,
20 EngineKind::FancyRegex,
21 #[cfg(feature = "pcre2-engine")]
22 EngineKind::Pcre2,
23 ]
24 }
25
26 pub fn next(self) -> EngineKind {
27 match self {
28 EngineKind::RustRegex => EngineKind::FancyRegex,
29 #[cfg(feature = "pcre2-engine")]
30 EngineKind::FancyRegex => EngineKind::Pcre2,
31 #[cfg(not(feature = "pcre2-engine"))]
32 EngineKind::FancyRegex => EngineKind::RustRegex,
33 #[cfg(feature = "pcre2-engine")]
34 EngineKind::Pcre2 => EngineKind::RustRegex,
35 }
36 }
37}
38
39impl fmt::Display for EngineKind {
40 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 match self {
42 EngineKind::RustRegex => write!(f, "Rust regex"),
43 EngineKind::FancyRegex => write!(f, "fancy-regex"),
44 #[cfg(feature = "pcre2-engine")]
45 EngineKind::Pcre2 => write!(f, "PCRE2"),
46 }
47 }
48}
49
50#[derive(Debug, Clone, Default)]
51pub struct EngineFlags {
52 pub case_insensitive: bool,
53 pub multi_line: bool,
54 pub dot_matches_newline: bool,
55 pub unicode: bool,
56 pub extended: bool,
57}
58
59impl EngineFlags {
60 pub fn toggle_case_insensitive(&mut self) {
61 self.case_insensitive = !self.case_insensitive;
62 }
63 pub fn toggle_multi_line(&mut self) {
64 self.multi_line = !self.multi_line;
65 }
66 pub fn toggle_dot_matches_newline(&mut self) {
67 self.dot_matches_newline = !self.dot_matches_newline;
68 }
69 pub fn toggle_unicode(&mut self) {
70 self.unicode = !self.unicode;
71 }
72 pub fn toggle_extended(&mut self) {
73 self.extended = !self.extended;
74 }
75}
76
77#[derive(Debug, Clone)]
78pub struct Match {
79 pub start: usize,
80 pub end: usize,
81 pub text: String,
82 pub captures: Vec<CaptureGroup>,
83}
84
85#[derive(Debug, Clone)]
86pub struct CaptureGroup {
87 pub index: usize,
88 pub name: Option<String>,
89 pub start: usize,
90 pub end: usize,
91 pub text: String,
92}
93
94#[derive(Debug)]
95pub enum EngineError {
96 CompileError(String),
97 MatchError(String),
98}
99
100impl fmt::Display for EngineError {
101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 match self {
103 EngineError::CompileError(msg) => write!(f, "Compile error: {msg}"),
104 EngineError::MatchError(msg) => write!(f, "Match error: {msg}"),
105 }
106 }
107}
108
109impl std::error::Error for EngineError {}
110
111pub type EngineResult<T> = Result<T, EngineError>;
112
113pub trait RegexEngine: Send + Sync {
114 fn kind(&self) -> EngineKind;
115 fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>>;
116}
117
118pub trait CompiledRegex: Send + Sync {
119 fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>>;
120}
121
122pub fn create_engine(kind: EngineKind) -> Box<dyn RegexEngine> {
123 match kind {
124 EngineKind::RustRegex => Box::new(rust_regex::RustRegexEngine),
125 EngineKind::FancyRegex => Box::new(fancy::FancyRegexEngine),
126 #[cfg(feature = "pcre2-engine")]
127 EngineKind::Pcre2 => Box::new(pcre2::Pcre2Engine),
128 }
129}
130
131#[derive(Debug, Clone)]
134pub struct ReplaceSegment {
135 pub start: usize,
136 pub end: usize,
137 pub is_replacement: bool,
138}
139
140#[derive(Debug, Clone)]
141pub struct ReplaceResult {
142 pub output: String,
143 pub segments: Vec<ReplaceSegment>,
144}
145
146fn expand_replacement(template: &str, m: &Match) -> String {
151 let mut result = String::new();
152 let mut chars = template.char_indices().peekable();
153
154 while let Some((_i, c)) = chars.next() {
155 if c == '$' {
156 match chars.peek() {
157 None => {
158 result.push('$');
159 }
160 Some(&(_, '$')) => {
161 chars.next();
162 result.push('$');
163 }
164 Some(&(_, '&')) => {
165 chars.next();
166 result.push_str(&m.text);
167 }
168 Some(&(_, '{')) => {
169 chars.next(); let brace_start = chars.peek().map(|&(idx, _)| idx).unwrap_or(template.len());
171 if let Some(close) = template[brace_start..].find('}') {
172 let ref_name = &template[brace_start..brace_start + close];
173 if let Some(text) = lookup_capture(m, ref_name) {
174 result.push_str(text);
175 }
176 let end_byte = brace_start + close + 1;
178 while chars.peek().is_some_and(|&(idx, _)| idx < end_byte) {
179 chars.next();
180 }
181 } else {
182 result.push('$');
183 result.push('{');
184 }
185 }
186 Some(&(_, next_c)) if next_c.is_ascii_digit() => {
187 let (_, d1) = chars.next().unwrap();
188 let mut num_str = String::from(d1);
189 if let Some(&(_, d2)) = chars.peek() {
191 if d2.is_ascii_digit() {
192 chars.next();
193 num_str.push(d2);
194 }
195 }
196 let idx: usize = num_str.parse().unwrap_or(0);
197 if idx == 0 {
198 result.push_str(&m.text);
199 } else if let Some(cap) = m.captures.iter().find(|c| c.index == idx) {
200 result.push_str(&cap.text);
201 }
202 }
203 Some(_) => {
204 result.push('$');
205 }
206 }
207 } else {
208 result.push(c);
209 }
210 }
211
212 result
213}
214
215fn lookup_capture<'a>(m: &'a Match, key: &str) -> Option<&'a str> {
217 if let Ok(idx) = key.parse::<usize>() {
219 if idx == 0 {
220 return Some(&m.text);
221 }
222 return m
223 .captures
224 .iter()
225 .find(|c| c.index == idx)
226 .map(|c| c.text.as_str());
227 }
228 m.captures
230 .iter()
231 .find(|c| c.name.as_deref() == Some(key))
232 .map(|c| c.text.as_str())
233}
234
235pub fn replace_all(text: &str, matches: &[Match], template: &str) -> ReplaceResult {
237 let mut output = String::new();
238 let mut segments = Vec::new();
239 let mut pos = 0;
240
241 for m in matches {
242 if m.start > pos {
244 let seg_start = output.len();
245 output.push_str(&text[pos..m.start]);
246 segments.push(ReplaceSegment {
247 start: seg_start,
248 end: output.len(),
249 is_replacement: false,
250 });
251 }
252 let expanded = expand_replacement(template, m);
254 if !expanded.is_empty() {
255 let seg_start = output.len();
256 output.push_str(&expanded);
257 segments.push(ReplaceSegment {
258 start: seg_start,
259 end: output.len(),
260 is_replacement: true,
261 });
262 }
263 pos = m.end;
264 }
265
266 if pos < text.len() {
268 let seg_start = output.len();
269 output.push_str(&text[pos..]);
270 segments.push(ReplaceSegment {
271 start: seg_start,
272 end: output.len(),
273 is_replacement: false,
274 });
275 }
276
277 ReplaceResult { output, segments }
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283
284 fn make_match(start: usize, end: usize, text: &str, captures: Vec<CaptureGroup>) -> Match {
285 Match {
286 start,
287 end,
288 text: text.to_string(),
289 captures,
290 }
291 }
292
293 fn make_cap(
294 index: usize,
295 name: Option<&str>,
296 start: usize,
297 end: usize,
298 text: &str,
299 ) -> CaptureGroup {
300 CaptureGroup {
301 index,
302 name: name.map(|s| s.to_string()),
303 start,
304 end,
305 text: text.to_string(),
306 }
307 }
308
309 #[test]
310 fn test_replace_all_basic() {
311 let matches = vec![make_match(
312 0,
313 12,
314 "user@example",
315 vec![
316 make_cap(1, None, 0, 4, "user"),
317 make_cap(2, None, 5, 12, "example"),
318 ],
319 )];
320 let result = replace_all("user@example", &matches, "$2=$1");
321 assert_eq!(result.output, "example=user");
322 }
323
324 #[test]
325 fn test_replace_all_no_matches() {
326 let result = replace_all("hello world", &[], "replacement");
327 assert_eq!(result.output, "hello world");
328 assert_eq!(result.segments.len(), 1);
329 assert!(!result.segments[0].is_replacement);
330 }
331
332 #[test]
333 fn test_replace_all_empty_template() {
334 let matches = vec![
335 make_match(4, 7, "123", vec![]),
336 make_match(12, 15, "456", vec![]),
337 ];
338 let result = replace_all("abc 123 def 456 ghi", &matches, "");
339 assert_eq!(result.output, "abc def ghi");
340 }
341
342 #[test]
343 fn test_replace_all_literal_dollar() {
344 let matches = vec![make_match(0, 3, "foo", vec![])];
345 let result = replace_all("foo", &matches, "$$bar");
346 assert_eq!(result.output, "$bar");
347 }
348
349 #[test]
350 fn test_replace_all_named_groups() {
351 let matches = vec![make_match(
352 0,
353 7,
354 "2024-01",
355 vec![
356 make_cap(1, Some("y"), 0, 4, "2024"),
357 make_cap(2, Some("m"), 5, 7, "01"),
358 ],
359 )];
360 let result = replace_all("2024-01", &matches, "${m}/${y}");
361 assert_eq!(result.output, "01/2024");
362 }
363
364 #[test]
365 fn test_expand_replacement_whole_match() {
366 let m = make_match(0, 5, "hello", vec![]);
367 assert_eq!(expand_replacement("$0", &m), "hello");
368 assert_eq!(expand_replacement("$&", &m), "hello");
369 assert_eq!(expand_replacement("[$0]", &m), "[hello]");
370 }
371
372 #[test]
373 fn test_expand_replacement_non_ascii() {
374 let m = make_match(0, 5, "hello", vec![]);
375 assert_eq!(expand_replacement("café $0", &m), "café hello");
377 assert_eq!(expand_replacement("→$0←", &m), "→hello←");
378 assert_eq!(expand_replacement("日本語", &m), "日本語");
379 assert_eq!(expand_replacement("über $& cool", &m), "über hello cool");
380 }
381
382 #[test]
383 fn test_replace_segments_tracking() {
384 let matches = vec![make_match(6, 9, "123", vec![])];
385 let result = replace_all("hello 123 world", &matches, "NUM");
386 assert_eq!(result.output, "hello NUM world");
387 assert_eq!(result.segments.len(), 3);
388 assert!(!result.segments[0].is_replacement);
390 assert_eq!(
391 &result.output[result.segments[0].start..result.segments[0].end],
392 "hello "
393 );
394 assert!(result.segments[1].is_replacement);
396 assert_eq!(
397 &result.output[result.segments[1].start..result.segments[1].end],
398 "NUM"
399 );
400 assert!(!result.segments[2].is_replacement);
402 assert_eq!(
403 &result.output[result.segments[2].start..result.segments[2].end],
404 " world"
405 );
406 }
407}