onig_regset/lib.rs
1//! This crate provides a safe wrapper around the
2//! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library.
3//!
4//! # Examples
5//!
6//! ## Single Regex Usage
7//!
8//! ```rust
9//! use onig::Regex;
10//!
11//! let regex = Regex::new("e(l+)").unwrap();
12//! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() {
13//! match pos {
14//! Some((beg, end)) =>
15//! println!("Group {} captured in position {}:{}", i, beg, end),
16//! None =>
17//! println!("Group {} is not captured", i)
18//! }
19//! }
20//! ```
21//!
22//! ## Multiple Regex Usage with RegSet
23//!
24//! ```rust
25//! use onig::RegSet;
26//!
27//! let set = RegSet::new(&[r"\d+", r"[a-z]+", r"[A-Z]+"]).unwrap();
28//! if let Some((regex_index, pos)) = set.find("hello123WORLD") {
29//! println!("Regex {} matched at position {}", regex_index, pos);
30//! }
31//! ```
32//!
33//! # Match vs Search
34//!
35//! There are two basic things you can do with a `Regex` pattern; test
36//! if the pattern matches the whole of a given string, and search for
37//! occurences of the pattern within a string. Oniguruma exposes these
38//! two concepts with the *match* and *search* APIs.
39//!
40//! In addition two these two base Onigurma APIs this crate exposes a
41//! third *find* API, built on top of the *search* API.
42//!
43//! ```
44//! # use onig::Regex;
45//! let pattern = Regex::new("hello").unwrap();
46//! assert_eq!(true, pattern.find("hello world").is_some());
47//! assert_eq!(false, pattern.is_match("hello world"));
48//! ```
49//!
50//! ## The *Match* API
51//!
52//! Functions in the match API check if a pattern matches the entire
53//! string. The simplest of these is `Regex::is_match`. This retuns a
54//! `true` if the pattern matches the string. For more complex useage
55//! then `Regex::match_with_options` and `Regex::match_with_encoding`
56//! can be used. These allow the capture groups to be inspected,
57//! matching with different options, and matching sub-sections of a
58//! given text.
59//!
60//! ## The *Search* API
61//!
62//! Function in the search API search for a pattern anywhere within a
63//! string. The simplist of these is `Regex::find`. This returns the
64//! offset of the first occurence of the pattern within the string.
65//! For more complex useage `Regex::search_with_options` and
66//! `Regex::search_with_encoding` can be used. These allow capture
67//! groups to be inspected, searching with different options and
68//! searching within subsections of a given text.
69//!
70//! ## The *Find* API
71//!
72//! The find API is built on top of the search API. Functions in this
73//! API allow iteration across all matches of the pattern within a
74//! string, not just the first one. The functions deal with some of
75//! the complexities of this, such as zero-length matches.
76//!
77//! The simplest step-up from the basic search API `Regex::find` is
78//! getting the captures relating to a match with the
79//! `Regex::captures` method. To find capture information for all
80//! matches within a string `Regex::find_iter` and
81//! `Regex::captures_iter` can be used. The former exposes the start
82//! and end of the match as `Regex::find` does, the latter exposes the
83//! whole capture group information as `Regex::captures` does.
84//!
85//! # The `std::pattern` API
86//!
87//! In addition to the main Oniguruma API it is possible to use the
88//! `Regex` object with the
89//! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/)
90//! API. To enable support compile with the `std-pattern` feature. If
91//! you're using Cargo you can do this by adding the following to your
92//! Cargo.toml:
93//!
94//! ```toml
95//! [dependencies.onig]
96//! version = "1.2"
97//! features = ["std-pattern"]
98//! ```
99
100#![cfg_attr(feature = "std-pattern", feature(pattern))]
101#![deny(missing_docs)]
102
103use once_cell::sync::Lazy;
104
105mod buffers;
106mod find;
107mod flags;
108mod match_param;
109mod names;
110mod region;
111mod regset;
112mod replace;
113mod syntax;
114mod tree;
115mod utils;
116
117#[cfg(feature = "std-pattern")]
118mod pattern;
119
120// re-export the onig types publically
121pub use crate::buffers::{EncodedBytes, EncodedChars};
122pub use crate::find::{
123 Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures, SubCapturesPos,
124};
125pub use crate::flags::*;
126pub use crate::match_param::MatchParam;
127pub use crate::region::Region;
128pub use crate::regset::{RegSet, RegSetLead};
129pub use crate::replace::Replacer;
130pub use crate::syntax::{MetaChar, Syntax};
131pub use crate::tree::{CaptureTreeNode, CaptureTreeNodeIter};
132pub use crate::utils::{copyright, define_user_property, version};
133
134use std::os::raw::c_int;
135use std::ptr::{null, null_mut};
136use std::sync::Mutex;
137use std::{error, fmt, str};
138
139#[derive(Debug)]
140enum ErrorData {
141 OnigError(c_int),
142 Custom,
143}
144
145/// This struture represents an error from the underlying Oniguruma libray.
146pub struct Error {
147 data: ErrorData,
148 description: String,
149}
150
151/// This struct is a wrapper around an Oniguruma regular expression
152/// pointer. This represents a compiled regex which can be used in
153/// search and match operations.
154#[derive(Debug, Eq, PartialEq)]
155pub struct Regex {
156 raw: onig_sys::OnigRegex,
157}
158
159unsafe impl Send for Regex {}
160unsafe impl Sync for Regex {}
161
162impl Error {
163 fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Self {
164 Error::new(code, info)
165 }
166
167 fn from_code(code: c_int) -> Self {
168 Error::new(code, null())
169 }
170
171 fn custom<T: Into<String>>(message: T) -> Self {
172 Error {
173 data: ErrorData::Custom,
174 description: message.into(),
175 }
176 }
177
178 fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Self {
179 let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
180 let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
181 let description = if let Ok(description) = str::from_utf8(&buff[..len as usize]) {
182 description
183 } else {
184 return Self::custom("Onig error string was invalid UTF-8");
185 };
186 Error {
187 data: ErrorData::OnigError(code),
188 description: description.to_owned(),
189 }
190 }
191
192 /// Return Oniguruma engine error code.
193 pub fn code(&self) -> i32 {
194 match self.data {
195 ErrorData::OnigError(code) => code,
196 _ => -1,
197 }
198 }
199
200 /// Return error description provided by Oniguruma engine.
201 pub fn description(&self) -> &str {
202 &self.description
203 }
204}
205
206impl error::Error for Error {
207 fn description(&self) -> &str {
208 &self.description
209 }
210}
211
212impl fmt::Display for Error {
213 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
214 write!(f, "Oniguruma error: {}", self.description())
215 }
216}
217
218impl fmt::Debug for Error {
219 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
220 write!(f, "Error({:?}, {})", self.data, self.description())
221 }
222}
223
224static REGEX_NEW_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
225
226impl Regex {
227 /// Create a Regex
228 ///
229 /// Simple regular expression constructor. Compiles a new regular
230 /// expression with the default options using the ruby syntax.
231 /// Once compiled, it can be used repeatedly to search in a string. If an
232 /// invalid expression is given, then an error is returned.
233 ///
234 /// # Arguments
235 ///
236 /// * `pattern` - The regex pattern to compile
237 ///
238 /// # Examples
239 ///
240 /// ```
241 /// use onig::Regex;
242 /// let r = Regex::new(r#"hello (\w+)"#);
243 /// assert!(r.is_ok());
244 /// ```
245 pub fn new(pattern: &str) -> Result<Self, Error> {
246 Regex::with_encoding(pattern)
247 }
248
249 /// Create a Regex, Specifying an Encoding
250 ///
251 /// Attempts to compile `pattern` into a new `Regex`
252 /// instance. Instead of assuming UTF-8 as the encoding scheme the
253 /// encoding is inferred from the `pattern` buffer.
254 ///
255 /// # Arguments
256 ///
257 /// * `pattern` - The regex pattern to compile
258 ///
259 /// # Examples
260 ///
261 /// ```
262 /// use onig::{Regex, EncodedBytes};
263 /// let utf8 = Regex::with_encoding("hello");
264 /// assert!(utf8.is_ok());
265 /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world"));
266 /// assert!(ascii.is_ok());
267 /// ```
268 pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
269 where
270 T: EncodedChars,
271 {
272 Regex::with_options_and_encoding(
273 pattern,
274 RegexOptions::REGEX_OPTION_NONE,
275 Syntax::default(),
276 )
277 }
278
279 /// Create a new Regex
280 ///
281 /// Attempts to compile a pattern into a new `Regex` instance.
282 /// Once compiled, it can be used repeatedly to search in a string. If an
283 /// invalid expression is given, then an error is returned.
284 /// See [`onig_sys::onig_new`][regex_new] for more information.
285 ///
286 /// # Arguments
287 ///
288 /// * `pattern` - The regex pattern to compile.
289 /// * `options` - The regex compilation options.
290 /// * `syntax` - The syntax which the regex is written in.
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// use onig::{Regex, Syntax, RegexOptions};
296 /// let r = Regex::with_options("hello.*world",
297 /// RegexOptions::REGEX_OPTION_NONE,
298 /// Syntax::default());
299 /// assert!(r.is_ok());
300 /// ```
301 ///
302 /// [regex_new]: ./onig_sys/fn.onig_new.html
303 pub fn with_options(
304 pattern: &str,
305 option: RegexOptions,
306 syntax: &Syntax,
307 ) -> Result<Regex, Error> {
308 Regex::with_options_and_encoding(pattern, option, syntax)
309 }
310
311 /// Create a new Regex, Specifying Options and Ecoding
312 ///
313 /// Attempts to comile the given `pattern` into a new `Regex`
314 /// instance. Instead of assuming UTF-8 as the encoding scheme the
315 /// encoding is inferred from the `pattern` buffer. If the regex
316 /// fails to compile the returned `Error` value from
317 /// [`onig_new`][regex_new] contains more information.
318 ///
319 /// [regex_new]: ./onig_sys/fn.onig_new.html
320 ///
321 /// # Arguments
322 ///
323 /// * `pattern` - The regex pattern to compile.
324 /// * `options` - The regex compilation options.
325 /// * `syntax` - The syntax which the regex is written in.
326 ///
327 /// # Examples
328 /// ```
329 /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions};
330 /// let pattern = EncodedBytes::ascii(b"hello");
331 /// let r = Regex::with_options_and_encoding(pattern,
332 /// RegexOptions::REGEX_OPTION_SINGLELINE,
333 /// Syntax::default());
334 /// assert!(r.is_ok());
335 /// ```
336 pub fn with_options_and_encoding<T>(
337 pattern: T,
338 option: RegexOptions,
339 syntax: &Syntax,
340 ) -> Result<Self, Error>
341 where
342 T: EncodedChars,
343 {
344 // Convert the rust types to those required for the call to
345 // `onig_new`.
346 let mut reg: onig_sys::OnigRegex = null_mut();
347 let reg_ptr = &mut reg as *mut onig_sys::OnigRegex;
348
349 // We can use this later to get an error message to pass back
350 // if regex creation fails.
351 let mut error = onig_sys::OnigErrorInfo {
352 enc: null_mut(),
353 par: null_mut(),
354 par_end: null_mut(),
355 };
356
357 let err = unsafe {
358 // Grab a lock to make sure that `onig_new` isn't called by
359 // more than one thread at a time.
360 let _guard = REGEX_NEW_MUTEX.lock().unwrap();
361 onig_sys::onig_new(
362 reg_ptr,
363 pattern.start_ptr(),
364 pattern.limit_ptr(),
365 option.bits(),
366 pattern.encoding(),
367 syntax as *const Syntax as *mut Syntax as *mut onig_sys::OnigSyntaxType,
368 &mut error,
369 )
370 };
371
372 if err == onig_sys::ONIG_NORMAL as i32 {
373 Ok(Regex { raw: reg })
374 } else {
375 Err(Error::from_code_and_info(err, &error))
376 }
377 }
378
379 /// Match String
380 ///
381 /// Try to match the regex against the given string slice,
382 /// starting at a given offset. This method works the same way as
383 /// `match_with_encoding`, but the encoding is always utf-8.
384 ///
385 /// For more information see [Match vs
386 /// Search](index.html#match-vs-search)
387 ///
388 /// # Arguments
389 ///
390 /// * `str` - The string slice to match against.
391 /// * `at` - The byte index in the passed slice to start matching
392 /// * `options` - The regex match options.
393 /// * `region` - The region for return group match range info
394 ///
395 /// # Returns
396 ///
397 /// `Some(len)` if the regex matched, with `len` being the number
398 /// of bytes matched. `None` if the regex doesn't match.
399 ///
400 /// # Examples
401 ///
402 /// ```
403 /// use onig::{Regex, SearchOptions};
404 ///
405 /// let r = Regex::new(".*").unwrap();
406 /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None);
407 /// assert!(res.is_some()); // it matches
408 /// assert!(res.unwrap() == 5); // 5 characters matched
409 /// ```
410 pub fn match_with_options(
411 &self,
412 str: &str,
413 at: usize,
414 options: SearchOptions,
415 region: Option<&mut Region>,
416 ) -> Option<usize> {
417 self.match_with_encoding(str, at, options, region)
418 }
419
420 /// Match String with Encoding
421 ///
422 /// Match the regex against a string. This method will start at
423 /// the offset `at` into the string and try and match the
424 /// regex. If the regex matches then the return value is the
425 /// number of characters which matched. If the regex doesn't match
426 /// the return is `None`.
427 ///
428 /// For more information see [Match vs
429 /// Search](index.html#match-vs-search)
430 ///
431 /// The contents of `chars` must have the same encoding that was
432 /// used to construct the regex.
433 ///
434 /// # Arguments
435 ///
436 /// * `chars` - The buffer to match against.
437 /// * `at` - The byte index in the passed buffer to start matching
438 /// * `options` - The regex match options.
439 /// * `region` - The region for return group match range info
440 ///
441 /// # Returns
442 ///
443 /// `Some(len)` if the regex matched, with `len` being the number
444 /// of bytes matched. `None` if the regex doesn't match.
445 ///
446 /// # Examples
447 ///
448 /// ```
449 /// use onig::{Regex, EncodedBytes, SearchOptions};
450 ///
451 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
452 /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"),
453 /// 0, SearchOptions::SEARCH_OPTION_NONE, None);
454 /// assert!(res.is_some()); // it matches
455 /// assert!(res.unwrap() == 5); // 5 characters matched
456 /// ```
457 pub fn match_with_encoding<T>(
458 &self,
459 chars: T,
460 at: usize,
461 options: SearchOptions,
462 region: Option<&mut Region>,
463 ) -> Option<usize>
464 where
465 T: EncodedChars,
466 {
467 let match_param = MatchParam::default();
468 let result = self.match_with_param(chars, at, options, region, match_param);
469
470 match result {
471 Ok(r) => r,
472 Err(e) => panic!("Onig: Regex match error: {}", e.description()),
473 }
474 }
475
476 /// Match string with encoding and match param
477 ///
478 /// Match the regex against a string. This method will start at
479 /// the offset `at` into the string and try and match the
480 /// regex. If the regex matches then the return value is the
481 /// number of characters which matched. If the regex doesn't match
482 /// the return is `None`.
483 ///
484 /// For more information see [Match vs
485 /// Search](index.html#match-vs-search)
486 ///
487 /// The contents of `chars` must have the same encoding that was
488 /// used to construct the regex.
489 ///
490 /// # Arguments
491 ///
492 /// * `chars` - The buffer to match against.
493 /// * `at` - The byte index in the passed buffer to start matching
494 /// * `options` - The regex match options.
495 /// * `region` - The region for return group match range info
496 /// * `match_param` - The match parameters
497 ///
498 /// # Returns
499 ///
500 /// `Ok(Some(len))` if the regex matched, with `len` being the number
501 /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an
502 /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded).
503 ///
504 /// # Examples
505 ///
506 /// ```
507 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
508 ///
509 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
510 /// let res = r.match_with_param(EncodedBytes::ascii(b"world"),
511 /// 0, SearchOptions::SEARCH_OPTION_NONE,
512 /// None, MatchParam::default());
513 /// assert!(res.is_ok()); // matching did not error
514 /// assert!(res.unwrap() == Some(5)); // 5 characters matched
515 /// ```
516 pub fn match_with_param<T>(
517 &self,
518 chars: T,
519 at: usize,
520 options: SearchOptions,
521 region: Option<&mut Region>,
522 match_param: MatchParam,
523 ) -> Result<Option<usize>, Error>
524 where
525 T: EncodedChars,
526 {
527 if chars.encoding() != self.encoding() {
528 return Err(Error::custom(format!(
529 "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
530 chars.encoding(),
531 self.encoding()
532 )));
533 }
534 let r = unsafe {
535 let offset = chars.start_ptr().add(at);
536 if offset > chars.limit_ptr() {
537 return Err(Error::custom(format!("Offset {} is too large", at)));
538 }
539 onig_sys::onig_match_with_param(
540 self.raw,
541 chars.start_ptr(),
542 chars.limit_ptr(),
543 offset,
544 match region {
545 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
546 None => std::ptr::null_mut(),
547 },
548 options.bits(),
549 match_param.as_raw(),
550 )
551 };
552
553 if r >= 0 {
554 Ok(Some(r as usize))
555 } else if r == onig_sys::ONIG_MISMATCH {
556 Ok(None)
557 } else {
558 Err(Error::from_code(r))
559 }
560 }
561
562 /// Search pattern in string
563 ///
564 /// Search for matches the regex in a string. This method will return the
565 /// index of the first match of the regex within the string, if
566 /// there is one. If `from` is less than `to`, then search is performed
567 /// in forward order, otherwise – in backward order.
568 ///
569 /// For more information see [Match vs
570 /// Search](index.html#match-vs-search)
571 ///
572 /// # Arguments
573 ///
574 /// * `str` - The string to search in.
575 /// * `from` - The byte index in the passed slice to start search
576 /// * `to` - The byte index in the passed slice to finish search
577 /// * `options` - The options for the search.
578 /// * `region` - The region for return group match range info
579 ///
580 /// # Returns
581 ///
582 /// `Some(pos)` if the regex matches, where `pos` is the
583 /// byte-position of the start of the match. `None` if the regex
584 /// doesn't match anywhere in `str`.
585 ///
586 /// # Examples
587 ///
588 /// ```
589 /// use onig::{Regex, SearchOptions};
590 ///
591 /// let r = Regex::new("l{1,2}").unwrap();
592 /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
593 /// assert!(res.is_some()); // it matches
594 /// assert!(res.unwrap() == 2); // match starts at character 3
595 /// ```
596 pub fn search_with_options(
597 &self,
598 str: &str,
599 from: usize,
600 to: usize,
601 options: SearchOptions,
602 region: Option<&mut Region>,
603 ) -> Option<usize> {
604 self.search_with_encoding(str, from, to, options, region)
605 }
606
607 /// Search for a Pattern in a String with an Encoding
608 ///
609 /// Search for matches the regex in a string. This method will
610 /// return the index of the first match of the regex within the
611 /// string, if there is one. If `from` is less than `to`, then
612 /// search is performed in forward order, otherwise – in backward
613 /// order.
614 ///
615 /// For more information see [Match vs
616 /// Search](index.html#match-vs-search)
617 ///
618 /// The encoding of the buffer passed to search in must match the
619 /// encoding of the regex.
620 ///
621 /// # Arguments
622 ///
623 /// * `chars` - The character buffer to search in.
624 /// * `from` - The byte index in the passed slice to start search
625 /// * `to` - The byte index in the passed slice to finish search
626 /// * `options` - The options for the search.
627 /// * `region` - The region for return group match range info
628 ///
629 /// # Returns
630 ///
631 /// `Some(pos)` if the regex matches, where `pos` is the
632 /// byte-position of the start of the match. `None` if the regex
633 /// doesn't match anywhere in `chars`.
634 ///
635 /// # Examples
636 ///
637 /// ```
638 /// use onig::{Regex, EncodedBytes, SearchOptions};
639 ///
640 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
641 /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"),
642 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
643 /// assert!(res.is_some()); // it matches
644 /// assert!(res.unwrap() == 2); // match starts at character 3
645 /// ```
646 pub fn search_with_encoding<T>(
647 &self,
648 chars: T,
649 from: usize,
650 to: usize,
651 options: SearchOptions,
652 region: Option<&mut Region>,
653 ) -> Option<usize>
654 where
655 T: EncodedChars,
656 {
657 let match_param = MatchParam::default();
658 let result = self.search_with_param(chars, from, to, options, region, match_param);
659
660 match result {
661 Ok(r) => r,
662 Err(e) => panic!("Onig: Regex search error: {}", e.description()),
663 }
664 }
665
666 /// Search pattern in string with encoding and match param
667 ///
668 /// Search for matches the regex in a string. This method will
669 /// return the index of the first match of the regex within the
670 /// string, if there is one. If `from` is less than `to`, then
671 /// search is performed in forward order, otherwise – in backward
672 /// order.
673 ///
674 /// For more information see [Match vs
675 /// Search](index.html#match-vs-search)
676 ///
677 /// The encoding of the buffer passed to search in must match the
678 /// encoding of the regex.
679 ///
680 /// # Arguments
681 ///
682 /// * `chars` - The character buffer to search in.
683 /// * `from` - The byte index in the passed slice to start search
684 /// * `to` - The byte index in the passed slice to finish search
685 /// * `options` - The options for the search.
686 /// * `region` - The region for return group match range info
687 /// * `match_param` - The match parameters
688 ///
689 /// # Returns
690 ///
691 /// `Ok(Some(pos))` if the regex matches, where `pos` is the
692 /// byte-position of the start of the match. `Ok(None)` if the regex
693 /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error
694 /// occurred (e.g. retry-limit-in-match exceeded).
695 ///
696 /// # Examples
697 ///
698 /// ```
699 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
700 ///
701 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
702 /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"),
703 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE,
704 /// None, MatchParam::default());
705 /// assert!(res.is_ok()); // matching did not error
706 /// assert!(res.unwrap() == Some(2)); // match starts at character 3
707 /// ```
708 pub fn search_with_param<T>(
709 &self,
710 chars: T,
711 from: usize,
712 to: usize,
713 options: SearchOptions,
714 region: Option<&mut Region>,
715 match_param: MatchParam,
716 ) -> Result<Option<usize>, Error>
717 where
718 T: EncodedChars,
719 {
720 let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
721 if chars.encoding() != self.encoding() {
722 return Err(Error::custom(format!(
723 "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
724 chars.encoding(),
725 self.encoding()
726 )));
727 }
728 let r = unsafe {
729 let start = beg.add(from);
730 let range = beg.add(to);
731 if start > end {
732 return Err(Error::custom("Start of match should be before end"));
733 }
734 if range > end {
735 return Err(Error::custom("Limit of match should be before end"));
736 }
737 onig_sys::onig_search_with_param(
738 self.raw,
739 beg,
740 end,
741 start,
742 range,
743 match region {
744 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
745 None => std::ptr::null_mut(),
746 },
747 options.bits(),
748 match_param.as_raw(),
749 )
750 };
751
752 if r >= 0 {
753 Ok(Some(r as usize))
754 } else if r == onig_sys::ONIG_MISMATCH {
755 Ok(None)
756 } else {
757 Err(Error::from_code(r))
758 }
759 }
760
761 /// Returns true if and only if the regex matches the string given.
762 ///
763 /// For more information see [Match vs
764 /// Search](index.html#match-vs-search)
765 ///
766 /// # Arguments
767 /// * `text` - The string slice to test against the pattern.
768 ///
769 /// # Returns
770 ///
771 /// `true` if the pattern matches the whole of `text`, `false` otherwise.
772 pub fn is_match(&self, text: &str) -> bool {
773 self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_WHOLE_STRING, None)
774 .map(|r| r == text.len())
775 .unwrap_or(false)
776 }
777
778 /// Find a Match in a Buffer, With Encoding
779 ///
780 /// Finds the first match of the regular expression within the
781 /// buffer.
782 ///
783 /// Note that this should only be used if you want to discover the
784 /// position of the match within a string. Testing if a pattern
785 /// matches the whole string is faster if you use `is_match`. For
786 /// more information see [Match vs
787 /// Search](index.html#match-vs-search)
788 ///
789 /// # Arguments
790 /// * `text` - The text to search in.
791 ///
792 /// # Returns
793 ///
794 /// The offset of the start and end of the first match. If no
795 /// match exists `None` is returned.
796 pub fn find(&self, text: &str) -> Option<(usize, usize)> {
797 self.find_with_encoding(text)
798 }
799
800 /// Find a Match in a Buffer, With Encoding
801 ///
802 /// Finds the first match of the regular expression within the
803 /// buffer.
804 ///
805 /// For more information see [Match vs
806 /// Search](index.html#match-vs-search)
807 ///
808 /// # Arguments
809 /// * `text` - The text to search in.
810 ///
811 /// # Returns
812 ///
813 /// The offset of the start and end of the first match. If no
814 /// match exists `None` is returned.
815 pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
816 where
817 T: EncodedChars,
818 {
819 let mut region = Region::new();
820 let len = text.len();
821 self.search_with_encoding(
822 text,
823 0,
824 len,
825 SearchOptions::SEARCH_OPTION_NONE,
826 Some(&mut region),
827 )
828 .and_then(|_| region.pos(0))
829 }
830
831 /// Get the Encoding of the Regex
832 ///
833 /// # Returns
834 ///
835 /// Returns a reference to an oniguruma encoding which was used
836 /// when this regex was created.
837 pub fn encoding(&self) -> onig_sys::OnigEncoding {
838 unsafe { onig_sys::onig_get_encoding(self.raw) }
839 }
840
841 /// Get the Number of Capture Groups in this Pattern
842 pub fn captures_len(&self) -> usize {
843 unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
844 }
845
846 /// Get the Size of the Capture Histories for this Pattern
847 pub fn capture_histories_len(&self) -> usize {
848 unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
849 }
850
851 /// Get the raw Oniguruma regex pointer
852 pub(crate) fn as_raw(&self) -> onig_sys::OnigRegex {
853 self.raw
854 }
855}
856
857impl Drop for Regex {
858 fn drop(&mut self) {
859 unsafe {
860 onig_sys::onig_free(self.raw);
861 }
862 }
863}
864
865#[cfg(test)]
866mod tests {
867 use super::*;
868 use std::panic;
869
870 #[test]
871 fn test_regex_create() {
872 Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
873
874 Regex::new(r#"a \w+ word"#).unwrap();
875 }
876
877 #[test]
878 fn test_regex_invalid() {
879 let e = Regex::new("\\p{foo}").unwrap_err();
880 assert_eq!(e.code(), -223);
881 assert_eq!(e.description(), "invalid character property name {foo}");
882 }
883
884 #[test]
885 fn test_failed_match() {
886 let regex = Regex::new("foo").unwrap();
887 let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
888 assert!(res.is_none());
889 }
890
891 #[test]
892 fn test_regex_search_with_options() {
893 let mut region = Region::new();
894 let regex = Regex::new("e(l+)").unwrap();
895
896 let r = regex.search_with_options(
897 "hello",
898 0,
899 5,
900 SearchOptions::SEARCH_OPTION_NONE,
901 Some(&mut region),
902 );
903
904 assert!(region.tree().is_none());
905 assert_eq!(r, Some(1));
906 assert_eq!(region.len(), 2);
907 let pos1 = region.pos(0).unwrap();
908 let pos2 = region.pos(1).unwrap();
909 assert_eq!(pos1, (1, 4));
910 assert_eq!(pos2, (2, 4));
911
912 // test cloning here since we already have a filled region
913 let cloned_region = region.clone();
914 let pos1_clone = cloned_region.pos(0).unwrap();
915 assert_eq!(pos1_clone, pos1);
916 }
917
918 #[test]
919 fn test_regex_match_with_options() {
920 let mut region = Region::new();
921 let regex = Regex::new("he(l+)").unwrap();
922
923 let r = regex.match_with_options(
924 "hello",
925 0,
926 SearchOptions::SEARCH_OPTION_NONE,
927 Some(&mut region),
928 );
929
930 assert!(region.tree().is_none());
931 assert_eq!(r, Some(4));
932 assert_eq!(region.len(), 2);
933 let pos1 = region.pos(0).unwrap();
934 let pos2 = region.pos(1).unwrap();
935 assert_eq!(pos1, (0, 4));
936 assert_eq!(pos2, (2, 4));
937 }
938
939 #[test]
940 fn test_regex_is_match() {
941 let regex = Regex::new("he(l+)o").unwrap();
942 assert!(regex.is_match("hello"));
943 assert!(!regex.is_match("hello 2.0"));
944 }
945
946 #[test]
947 fn test_is_match_chooses_longest_alternation() {
948 let regex = Regex::new("Greater|GreaterOrEqual").unwrap();
949 assert!(regex.is_match("Greater"));
950 assert!(regex.is_match("GreaterOrEqual"));
951 }
952
953 #[test]
954 fn test_regex_find() {
955 let regex = Regex::new("he(l+)o").unwrap();
956 assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
957 assert_eq!(regex.find("hey, honey!"), None);
958 }
959
960 #[test]
961 fn test_regex_captures_len() {
962 let regex = Regex::new("(he)(l+)(o)").unwrap();
963 assert_eq!(regex.captures_len(), 3);
964 }
965
966 #[test]
967 fn test_regex_error_is_match() {
968 let regex = Regex::new("(a|b|ab)*bc").unwrap();
969 let result = regex.match_with_param(
970 "ababababababababababababababababababababababababababababacbc",
971 0,
972 SearchOptions::SEARCH_OPTION_NONE,
973 None,
974 MatchParam::default(),
975 );
976
977 let e = result.err().unwrap();
978 assert_eq!("retry-limit-in-match over", e.description());
979 }
980
981 #[test]
982 fn test_regex_panic_is_match() {
983 let regex = Regex::new("(a|b|ab)*bc").unwrap();
984 let result = panic::catch_unwind(|| {
985 regex.is_match("ababababababababababababababababababababababababababababacbc")
986 });
987 let e = result.err().unwrap();
988 let message = e.downcast_ref::<String>().unwrap();
989 assert_eq!(
990 message.as_str(),
991 "Onig: Regex match error: retry-limit-in-match over"
992 );
993 }
994
995 #[test]
996 fn test_regex_error_find() {
997 let regex = Regex::new("(a|b|ab)*bc").unwrap();
998 let s = "ababababababababababababababababababababababababababababacbc";
999 let result = regex.search_with_param(
1000 s,
1001 0,
1002 s.len(),
1003 SearchOptions::SEARCH_OPTION_NONE,
1004 None,
1005 MatchParam::default(),
1006 );
1007
1008 let e = result.err().unwrap();
1009 assert_eq!("retry-limit-in-match over", e.description());
1010 }
1011
1012 #[test]
1013 fn test_regex_panic_find() {
1014 let regex = Regex::new("(a|b|ab)*bc").unwrap();
1015 let result = panic::catch_unwind(|| {
1016 regex.find("ababababababababababababababababababababababababababababacbc")
1017 });
1018 let e = result.err().unwrap();
1019 let message = e.downcast_ref::<String>().unwrap();
1020 assert_eq!(
1021 message.as_str(),
1022 "Onig: Regex search error: retry-limit-in-match over"
1023 );
1024 }
1025
1026 #[test]
1027 fn test_search_with_invalid_range() {
1028 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1029 .expect("regex");
1030 let string = "Ruby";
1031 let is_match = regex.search_with_param(
1032 string,
1033 5,
1034 string.len(),
1035 SearchOptions::SEARCH_OPTION_NONE,
1036 None,
1037 MatchParam::default(),
1038 );
1039 assert!(is_match.is_err());
1040
1041 let is_match = regex.search_with_param(
1042 string,
1043 2,
1044 string.len() + 1,
1045 SearchOptions::SEARCH_OPTION_NONE,
1046 None,
1047 MatchParam::default(),
1048 );
1049 assert!(is_match.is_err());
1050 }
1051
1052 #[test]
1053 fn test_search_with_invalid_range_panic() {
1054 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1055 .expect("regex");
1056 let string = "Ruby";
1057 let is_match = panic::catch_unwind(|| {
1058 regex.search_with_encoding(
1059 string,
1060 5,
1061 string.len(),
1062 SearchOptions::SEARCH_OPTION_NONE,
1063 None,
1064 )
1065 });
1066 assert!(is_match.is_err());
1067 }
1068
1069 #[test]
1070 fn test_match_with_invalid_range() {
1071 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1072 .expect("regex");
1073 let string = "Ruby";
1074 let is_match = regex.match_with_param(
1075 string,
1076 5,
1077 SearchOptions::SEARCH_OPTION_NONE,
1078 None,
1079 MatchParam::default(),
1080 );
1081 assert!(is_match.is_err());
1082 }
1083
1084 #[test]
1085 fn test_match_with_invalid_range_panic() {
1086 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1087 .expect("regex");
1088 let string = "Ruby";
1089 let is_match = panic::catch_unwind(|| {
1090 regex.match_with_encoding(string, 5, SearchOptions::SEARCH_OPTION_NONE, None)
1091 });
1092 assert!(is_match.is_err());
1093 }
1094}