1use crate::{Captures, EncodedChars, Error, Regex, RegexOptions, Region, SearchOptions};
2
3use std::os::raw::c_int;
4use std::ptr::null_mut;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum RegSetLead {
9 Position,
11 Regex,
13 PriorityToRegexOrder,
15}
16
17impl RegSetLead {
18 fn to_onig_lead(self) -> onig_sys::OnigRegSetLead {
19 match self {
20 RegSetLead::Position => onig_sys::OnigRegSetLead_ONIG_REGSET_POSITION_LEAD,
21 RegSetLead::Regex => onig_sys::OnigRegSetLead_ONIG_REGSET_REGEX_LEAD,
22 RegSetLead::PriorityToRegexOrder => {
23 onig_sys::OnigRegSetLead_ONIG_REGSET_PRIORITY_TO_REGEX_ORDER
24 }
25 }
26 }
27}
28
29#[derive(Debug)]
33pub struct RegSet {
34 raw: *mut onig_sys::OnigRegSet,
35 options: RegexOptions,
36}
37
38unsafe impl Send for RegSet {}
39unsafe impl Sync for RegSet {}
40
41impl RegSet {
42 pub fn new(patterns: &[&str]) -> Result<RegSet, Error> {
54 Self::with_options(patterns, RegexOptions::REGEX_OPTION_NONE)
55 }
56
57 pub fn with_options(patterns: &[&str], options: RegexOptions) -> Result<RegSet, Error> {
69 let mut regset = Self::empty_with_options(options)?;
70
71 for pat in patterns {
72 regset.add_pattern(pat)?;
73 }
74
75 Ok(regset)
76 }
77
78 pub fn empty() -> Result<RegSet, Error> {
93 Self::empty_with_options(RegexOptions::REGEX_OPTION_NONE)
94 }
95
96 pub fn empty_with_options(options: RegexOptions) -> Result<RegSet, Error> {
111 let mut raw_set: *mut onig_sys::OnigRegSet = null_mut();
112 let raw_set_ptr = &mut raw_set as *mut *mut onig_sys::OnigRegSet;
113
114 let err = unsafe { onig_sys::onig_regset_new(raw_set_ptr, 0, null_mut()) };
115
116 if err != onig_sys::ONIG_NORMAL as i32 {
117 return Err(Error::from_code(err));
118 }
119
120 if raw_set.is_null() {
121 return Err(Error::custom("Failed to create RegSet"));
122 }
123
124 Ok(RegSet {
125 raw: raw_set,
126 options,
127 })
128 }
129
130 pub fn add_pattern(&mut self, pattern: &str) -> Result<usize, Error> {
147 let new_regex = Regex::with_options(pattern, self.options, crate::Syntax::default())?;
149
150 let new_index = self.len();
152
153 let err = unsafe { onig_sys::onig_regset_add(self.raw, new_regex.as_raw()) };
155
156 if err != onig_sys::ONIG_NORMAL as i32 {
157 return Err(Error::from_code(err));
158 }
159
160 std::mem::forget(new_regex);
162
163 Ok(new_index)
164 }
165
166 pub fn replace_pattern(&mut self, index: usize, pattern: &str) -> Result<(), Error> {
180 let regset_len = self.len();
181 if index >= regset_len {
182 return Err(Error::custom(format!(
183 "Index {} is out of bounds for RegSet with {} regexes",
184 index, regset_len
185 )));
186 }
187
188 let new_regex = Regex::with_options(pattern, self.options, crate::Syntax::default())?;
189
190 let err =
192 unsafe { onig_sys::onig_regset_replace(self.raw, index as c_int, new_regex.as_raw()) };
193
194 if err != onig_sys::ONIG_NORMAL as i32 {
195 return Err(Error::from_code(err));
196 }
197
198 std::mem::forget(new_regex);
200
201 Ok(())
202 }
203
204 pub fn len(&self) -> usize {
206 unsafe { onig_sys::onig_regset_number_of_regex(self.raw) as usize }
207 }
208
209 pub fn is_empty(&self) -> bool {
211 self.len() == 0
212 }
213
214 pub fn find(&self, text: &str) -> Option<(usize, usize)> {
229 self.find_with_options(
230 text,
231 RegSetLead::Position,
232 SearchOptions::SEARCH_OPTION_NONE,
233 )
234 }
235
236 pub fn find_with_options(
253 &self,
254 text: &str,
255 lead: RegSetLead,
256 options: SearchOptions,
257 ) -> Option<(usize, usize)> {
258 self.search_with_encoding(text, 0, text.len(), lead, options)
259 }
260
261 pub fn captures<'t>(&self, text: &'t str) -> Option<(usize, Captures<'t>)> {
279 self.captures_with_options(
280 text,
281 0,
282 text.len(),
283 RegSetLead::Position,
284 SearchOptions::SEARCH_OPTION_NONE,
285 )
286 }
287
288 pub fn captures_with_options<'t>(
312 &self,
313 text: &'t str,
314 from: usize,
315 to: usize,
316 lead: RegSetLead,
317 options: SearchOptions,
318 ) -> Option<(usize, Captures<'t>)> {
319 if let Some((regex_index, match_pos)) =
320 self.do_search_with_encoding(&text, from, to, lead, options)
321 {
322 let region_ptr =
323 unsafe { onig_sys::onig_regset_get_region(self.raw, regex_index as c_int) };
324
325 if !region_ptr.is_null() {
326 let mut region = Region::with_capacity(10);
330 unsafe {
331 onig_sys::onig_region_copy(&mut region.raw, region_ptr);
332 }
333
334 let captures = Captures::new(text, region, match_pos);
335 return Some((regex_index, captures));
336 }
337 }
338 None
339 }
340
341 fn do_search_with_encoding<T>(
342 &self,
343 chars: &T,
344 from: usize,
345 to: usize,
346 lead: RegSetLead,
347 options: SearchOptions,
348 ) -> Option<(usize, usize)>
349 where
350 T: EncodedChars,
351 {
352 if from > chars.len() || to > chars.len() || from > to {
353 return None;
354 }
355
356 let mut rmatch_pos: c_int = 0;
357 let rmatch_pos_ptr = &mut rmatch_pos as *mut c_int;
358
359 let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
360
361 let result = unsafe {
362 let start = beg.add(from);
363 let range = beg.add(to);
364
365 onig_sys::onig_regset_search(
366 self.raw,
367 beg,
368 end,
369 start,
370 range,
371 lead.to_onig_lead(),
372 options.bits(),
373 rmatch_pos_ptr,
374 )
375 };
376
377 if result >= 0 {
378 Some((result as usize, rmatch_pos as usize))
379 } else {
380 None
381 }
382 }
383
384 fn search_with_encoding<T>(
385 &self,
386 chars: T,
387 from: usize,
388 to: usize,
389 lead: RegSetLead,
390 options: SearchOptions,
391 ) -> Option<(usize, usize)>
392 where
393 T: EncodedChars,
394 {
395 self.do_search_with_encoding(&chars, from, to, lead, options)
396 }
397}
398
399impl Drop for RegSet {
400 fn drop(&mut self) {
401 unsafe {
402 onig_sys::onig_regset_free(self.raw);
403 }
404 }
405}
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410
411 #[test]
412 fn test_regset_empty_patterns() {
413 let set = RegSet::new(&[]).unwrap();
414 assert_eq!(set.len(), 0);
415 assert!(set.is_empty());
416 }
417
418 #[test]
419 fn test_regset_new() {
420 let set = RegSet::new(&[r"\d+"]).unwrap();
421 assert_eq!(set.len(), 1);
422 assert!(!set.is_empty());
423 }
424
425 #[test]
426 fn test_regset_find_with_options() {
427 let set = RegSet::new(&[r"\d+", r"[a-z]+"]).unwrap();
428
429 let result = set.find_with_options(
430 "hello123",
431 RegSetLead::Position,
432 SearchOptions::SEARCH_OPTION_NONE,
433 );
434 assert!(result.is_some());
435
436 let result = set.find_with_options(
437 "hello123",
438 RegSetLead::Regex,
439 SearchOptions::SEARCH_OPTION_NONE,
440 );
441 assert!(result.is_some());
442
443 let result = set.find_with_options(
444 "!@#$%",
445 RegSetLead::Regex,
446 SearchOptions::SEARCH_OPTION_NONE,
447 );
448 assert!(result.is_none());
449 }
450
451 #[test]
452 fn test_regset_captures() {
453 let set = RegSet::new(&[r"(\d+)-(\d+)", r"([a-z]+)"]).unwrap();
454
455 if let Some((regex_index, captures)) = set.captures("hello123") {
456 assert_eq!(regex_index, 1); assert_eq!(captures.at(0), Some("hello"));
458 assert_eq!(captures.pos(0), Some((0, 5)));
459 } else {
460 panic!("Expected to find a match");
461 }
462
463 if let Some((regex_index, captures)) = set.captures("123-456") {
464 assert_eq!(regex_index, 0); assert_eq!(captures.len(), 3); assert_eq!(captures.at(0), Some("123-456"));
467 assert_eq!(captures.at(1), Some("123"));
468 assert_eq!(captures.at(2), Some("456"));
469 } else {
470 panic!("Expected to find a match");
471 }
472
473 assert!(set.captures("!@#$%").is_none());
474 }
475
476 #[test]
477 fn test_regset_replace_pattern() {
478 let mut set = RegSet::new(&[r"\d+", r"[a-z]+"]).unwrap();
479
480 assert!(set.find("123").is_some());
481 set.replace_pattern(0, r"[A-Z]+").unwrap();
482 assert!(set.replace_pattern(100, r"[A-Z]+").is_err());
483
484 assert!(set.find("123").is_none());
485 assert!(set.find("ABC").is_some());
486 assert!(set.find("hello").is_some());
487 assert_eq!(set.len(), 2);
488 }
489
490 #[test]
491 fn test_regset_add_pattern() {
492 let mut set = RegSet::empty().unwrap();
493
494 let idx1 = set.add_pattern(r"\d+").unwrap();
495 assert_eq!(idx1, 0);
496 assert_eq!(set.len(), 1);
497 assert_eq!(set.find("hello123"), Some((0, 5)));
498
499 let idx2 = set.add_pattern(r"[a-z]+").unwrap();
500 assert_eq!(idx2, 1);
501 assert_eq!(set.len(), 2);
502 assert_eq!(set.find("hello123"), Some((1, 0)));
503 }
504
505 #[test]
506 fn test_regset_add_pattern_captures() {
507 let mut set = RegSet::empty().unwrap();
508 set.add_pattern(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
509
510 let (idx, caps) = set.captures("2023-12-25").unwrap();
511 assert_eq!(idx, 0);
512 assert_eq!(caps.at(1), Some("2023"));
513 assert_eq!(caps.at(2), Some("12"));
514 assert_eq!(caps.at(3), Some("25"));
515 }
516
517 #[test]
518 fn test_regset_add_pattern_errors() {
519 let mut set = RegSet::empty().unwrap();
520
521 assert!(set.add_pattern(r"[").is_err());
522 assert_eq!(set.len(), 0);
523
524 assert!(set.replace_pattern(0, r"\d+").is_err());
525
526 set.add_pattern(r"\d+").unwrap();
527 assert_eq!(set.len(), 1);
528 }
529
530 #[test]
531 fn test_regset_captures_with_options() {
532 let set = RegSet::new(&[r"(\d+)", r"([a-z]+)"]).unwrap();
533
534 if let Some((regex_index, captures)) = set.captures_with_options(
535 "hello123",
536 0,
537 8,
538 RegSetLead::Position,
539 SearchOptions::SEARCH_OPTION_NONE,
540 ) {
541 assert_eq!(regex_index, 1); assert_eq!(captures.at(0), Some("hello"));
543 assert_eq!(captures.at(1), Some("hello"));
544 } else {
545 panic!("Expected to find a match");
546 }
547 }
548}