onig_regset/
lib.rs

1//! This crate provides a safe wrapper around the
2//! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library.
3//!
4//! # Examples
5//!
6//! ## Single Regex Usage
7//!
8//! ```rust
9//! use onig::Regex;
10//!
11//! let regex = Regex::new("e(l+)").unwrap();
12//! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() {
13//!     match pos {
14//!          Some((beg, end)) =>
15//!              println!("Group {} captured in position {}:{}", i, beg, end),
16//!          None =>
17//!              println!("Group {} is not captured", i)
18//!     }
19//! }
20//! ```
21//!
22//! ## Multiple Regex Usage with RegSet
23//!
24//! ```rust
25//! use onig::RegSet;
26//!
27//! let set = RegSet::new(&[r"\d+", r"[a-z]+", r"[A-Z]+"]).unwrap();
28//! if let Some((regex_index, pos)) = set.find("hello123WORLD") {
29//!     println!("Regex {} matched at position {}", regex_index, pos);
30//! }
31//! ```
32//!
33//! # Match vs Search
34//!
35//! There are two basic things you can do with a `Regex` pattern; test
36//! if the pattern matches the whole of a given string, and search for
37//! occurences of the pattern within a string. Oniguruma exposes these
38//! two concepts with the *match* and *search* APIs.
39//!
40//! In addition two these two base Onigurma APIs this crate exposes a
41//! third *find* API, built on top of the *search* API.
42//!
43//! ```
44//! # use onig::Regex;
45//! let pattern = Regex::new("hello").unwrap();
46//! assert_eq!(true, pattern.find("hello world").is_some());
47//! assert_eq!(false, pattern.is_match("hello world"));
48//! ```
49//!
50//! ## The *Match* API
51//!
52//! Functions in the match API check if a pattern matches the entire
53//! string. The simplest of these is `Regex::is_match`. This retuns a
54//! `true` if the pattern matches the string. For more complex useage
55//! then `Regex::match_with_options` and `Regex::match_with_encoding`
56//! can be used. These allow the capture groups to be inspected,
57//! matching with different options, and matching sub-sections of a
58//! given text.
59//!
60//! ## The *Search* API
61//!
62//! Function in the search API search for a pattern anywhere within a
63//! string. The simplist of these is `Regex::find`. This returns the
64//! offset of the first occurence of the pattern within the string.
65//! For more complex useage `Regex::search_with_options` and
66//! `Regex::search_with_encoding` can be used. These allow capture
67//! groups to be inspected, searching with different options and
68//! searching within subsections of a given text.
69//!
70//! ## The *Find* API
71//!
72//! The find API is built on top of the search API. Functions in this
73//! API allow iteration across all matches of the pattern within a
74//! string, not just the first one. The functions deal with some of
75//! the complexities of this, such as zero-length matches.
76//!
77//! The simplest step-up from the basic search API `Regex::find` is
78//! getting the captures relating to a match with the
79//! `Regex::captures` method. To find capture information for all
80//! matches within a string `Regex::find_iter` and
81//! `Regex::captures_iter` can be used. The former exposes the start
82//! and end of the match as `Regex::find` does, the latter exposes the
83//! whole capture group information as `Regex::captures` does.
84//!
85//! # The `std::pattern` API
86//!
87//! In addition to the main Oniguruma API it is possible to use the
88//! `Regex` object with the
89//! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/)
90//! API. To enable support compile with the `std-pattern` feature. If
91//! you're using Cargo you can do this by adding the following to your
92//! Cargo.toml:
93//!
94//! ```toml
95//! [dependencies.onig]
96//! version = "1.2"
97//! features = ["std-pattern"]
98//! ```
99
100#![cfg_attr(feature = "std-pattern", feature(pattern))]
101#![deny(missing_docs)]
102
103use once_cell::sync::Lazy;
104
105mod buffers;
106mod find;
107mod flags;
108mod match_param;
109mod names;
110mod region;
111mod regset;
112mod replace;
113mod syntax;
114mod tree;
115mod utils;
116
117#[cfg(feature = "std-pattern")]
118mod pattern;
119
120// re-export the onig types publically
121pub use crate::buffers::{EncodedBytes, EncodedChars};
122pub use crate::find::{
123    Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures, SubCapturesPos,
124};
125pub use crate::flags::*;
126pub use crate::match_param::MatchParam;
127pub use crate::region::Region;
128pub use crate::regset::{RegSet, RegSetLead};
129pub use crate::replace::Replacer;
130pub use crate::syntax::{MetaChar, Syntax};
131pub use crate::tree::{CaptureTreeNode, CaptureTreeNodeIter};
132pub use crate::utils::{copyright, define_user_property, version};
133
134use std::os::raw::c_int;
135use std::ptr::{null, null_mut};
136use std::sync::Mutex;
137use std::{error, fmt, str};
138
139#[derive(Debug)]
140enum ErrorData {
141    OnigError(c_int),
142    Custom,
143}
144
145/// This struture represents an error from the underlying Oniguruma libray.
146pub struct Error {
147    data: ErrorData,
148    description: String,
149}
150
151/// This struct is a wrapper around an Oniguruma regular expression
152/// pointer. This represents a compiled regex which can be used in
153/// search and match operations.
154#[derive(Debug, Eq, PartialEq)]
155pub struct Regex {
156    raw: onig_sys::OnigRegex,
157}
158
159unsafe impl Send for Regex {}
160unsafe impl Sync for Regex {}
161
162impl Error {
163    fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Self {
164        Error::new(code, info)
165    }
166
167    fn from_code(code: c_int) -> Self {
168        Error::new(code, null())
169    }
170
171    fn custom<T: Into<String>>(message: T) -> Self {
172        Error {
173            data: ErrorData::Custom,
174            description: message.into(),
175        }
176    }
177
178    fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Self {
179        let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
180        let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
181        let description = if let Ok(description) = str::from_utf8(&buff[..len as usize]) {
182            description
183        } else {
184            return Self::custom("Onig error string was invalid UTF-8");
185        };
186        Error {
187            data: ErrorData::OnigError(code),
188            description: description.to_owned(),
189        }
190    }
191
192    /// Return Oniguruma engine error code.
193    pub fn code(&self) -> i32 {
194        match self.data {
195            ErrorData::OnigError(code) => code,
196            _ => -1,
197        }
198    }
199
200    /// Return error description provided by Oniguruma engine.
201    pub fn description(&self) -> &str {
202        &self.description
203    }
204}
205
206impl error::Error for Error {
207    fn description(&self) -> &str {
208        &self.description
209    }
210}
211
212impl fmt::Display for Error {
213    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
214        write!(f, "Oniguruma error: {}", self.description())
215    }
216}
217
218impl fmt::Debug for Error {
219    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
220        write!(f, "Error({:?}, {})", self.data, self.description())
221    }
222}
223
224static REGEX_NEW_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
225
226impl Regex {
227    /// Create a Regex
228    ///
229    /// Simple regular expression constructor. Compiles a new regular
230    /// expression with the default options using the ruby syntax.
231    /// Once compiled, it can be used repeatedly to search in a string. If an
232    /// invalid expression is given, then an error is returned.
233    ///
234    /// # Arguments
235    ///
236    /// * `pattern` - The regex pattern to compile
237    ///
238    /// # Examples
239    ///
240    /// ```
241    /// use onig::Regex;
242    /// let r = Regex::new(r#"hello (\w+)"#);
243    /// assert!(r.is_ok());
244    /// ```
245    pub fn new(pattern: &str) -> Result<Self, Error> {
246        Regex::with_encoding(pattern)
247    }
248
249    /// Create a Regex, Specifying an Encoding
250    ///
251    /// Attempts to compile `pattern` into a new `Regex`
252    /// instance. Instead of assuming UTF-8 as the encoding scheme the
253    /// encoding is inferred from the `pattern` buffer.
254    ///
255    /// # Arguments
256    ///
257    /// * `pattern` - The regex pattern to compile
258    ///
259    /// # Examples
260    ///
261    /// ```
262    /// use onig::{Regex, EncodedBytes};
263    /// let utf8 = Regex::with_encoding("hello");
264    /// assert!(utf8.is_ok());
265    /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world"));
266    /// assert!(ascii.is_ok());
267    /// ```
268    pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
269    where
270        T: EncodedChars,
271    {
272        Regex::with_options_and_encoding(
273            pattern,
274            RegexOptions::REGEX_OPTION_NONE,
275            Syntax::default(),
276        )
277    }
278
279    /// Create a new Regex
280    ///
281    /// Attempts to compile a pattern into a new `Regex` instance.
282    /// Once compiled, it can be used repeatedly to search in a string. If an
283    /// invalid expression is given, then an error is returned.
284    /// See [`onig_sys::onig_new`][regex_new] for more information.
285    ///
286    /// # Arguments
287    ///
288    ///  * `pattern` - The regex pattern to compile.
289    ///  * `options` - The regex compilation options.
290    ///  * `syntax`  - The syntax which the regex is written in.
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// use onig::{Regex, Syntax, RegexOptions};
296    /// let r = Regex::with_options("hello.*world",
297    ///                             RegexOptions::REGEX_OPTION_NONE,
298    ///                             Syntax::default());
299    /// assert!(r.is_ok());
300    /// ```
301    ///
302    /// [regex_new]: ./onig_sys/fn.onig_new.html
303    pub fn with_options(
304        pattern: &str,
305        option: RegexOptions,
306        syntax: &Syntax,
307    ) -> Result<Regex, Error> {
308        Regex::with_options_and_encoding(pattern, option, syntax)
309    }
310
311    /// Create a new Regex, Specifying Options and Ecoding
312    ///
313    /// Attempts to comile the given `pattern` into a new `Regex`
314    /// instance. Instead of assuming UTF-8 as the encoding scheme the
315    /// encoding is inferred from the `pattern` buffer. If the regex
316    /// fails to compile the returned `Error` value from
317    /// [`onig_new`][regex_new] contains more information.
318    ///
319    /// [regex_new]: ./onig_sys/fn.onig_new.html
320    ///
321    /// # Arguments
322    ///
323    ///  * `pattern` - The regex pattern to compile.
324    ///  * `options` - The regex compilation options.
325    ///  * `syntax`  - The syntax which the regex is written in.
326    ///
327    /// # Examples
328    /// ```
329    /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions};
330    /// let pattern = EncodedBytes::ascii(b"hello");
331    /// let r = Regex::with_options_and_encoding(pattern,
332    ///                                          RegexOptions::REGEX_OPTION_SINGLELINE,
333    ///                                          Syntax::default());
334    /// assert!(r.is_ok());
335    /// ```
336    pub fn with_options_and_encoding<T>(
337        pattern: T,
338        option: RegexOptions,
339        syntax: &Syntax,
340    ) -> Result<Self, Error>
341    where
342        T: EncodedChars,
343    {
344        // Convert the rust types to those required for the call to
345        // `onig_new`.
346        let mut reg: onig_sys::OnigRegex = null_mut();
347        let reg_ptr = &mut reg as *mut onig_sys::OnigRegex;
348
349        // We can use this later to get an error message to pass back
350        // if regex creation fails.
351        let mut error = onig_sys::OnigErrorInfo {
352            enc: null_mut(),
353            par: null_mut(),
354            par_end: null_mut(),
355        };
356
357        let err = unsafe {
358            // Grab a lock to make sure that `onig_new` isn't called by
359            // more than one thread at a time.
360            let _guard = REGEX_NEW_MUTEX.lock().unwrap();
361            onig_sys::onig_new(
362                reg_ptr,
363                pattern.start_ptr(),
364                pattern.limit_ptr(),
365                option.bits(),
366                pattern.encoding(),
367                syntax as *const Syntax as *mut Syntax as *mut onig_sys::OnigSyntaxType,
368                &mut error,
369            )
370        };
371
372        if err == onig_sys::ONIG_NORMAL as i32 {
373            Ok(Regex { raw: reg })
374        } else {
375            Err(Error::from_code_and_info(err, &error))
376        }
377    }
378
379    /// Match String
380    ///
381    /// Try to match the regex against the given string slice,
382    /// starting at a given offset. This method works the same way as
383    /// `match_with_encoding`, but the encoding is always utf-8.
384    ///
385    /// For more information see [Match vs
386    /// Search](index.html#match-vs-search)
387    ///
388    /// # Arguments
389    ///
390    /// * `str` - The string slice to match against.
391    /// * `at` - The byte index in the passed slice to start matching
392    /// * `options` - The regex match options.
393    /// * `region` - The region for return group match range info
394    ///
395    /// # Returns
396    ///
397    /// `Some(len)` if the regex matched, with `len` being the number
398    /// of bytes matched. `None` if the regex doesn't match.
399    ///
400    /// # Examples
401    ///
402    /// ```
403    /// use onig::{Regex, SearchOptions};
404    ///
405    /// let r = Regex::new(".*").unwrap();
406    /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None);
407    /// assert!(res.is_some()); // it matches
408    /// assert!(res.unwrap() == 5); // 5 characters matched
409    /// ```
410    pub fn match_with_options(
411        &self,
412        str: &str,
413        at: usize,
414        options: SearchOptions,
415        region: Option<&mut Region>,
416    ) -> Option<usize> {
417        self.match_with_encoding(str, at, options, region)
418    }
419
420    /// Match String with Encoding
421    ///
422    /// Match the regex against a string. This method will start at
423    /// the offset `at` into the string and try and match the
424    /// regex. If the regex matches then the return value is the
425    /// number of characters which matched. If the regex doesn't match
426    /// the return is `None`.
427    ///
428    /// For more information see [Match vs
429    /// Search](index.html#match-vs-search)
430    ///
431    /// The contents of `chars` must have the same encoding that was
432    /// used to construct the regex.
433    ///
434    /// # Arguments
435    ///
436    /// * `chars` - The buffer to match against.
437    /// * `at` - The byte index in the passed buffer to start matching
438    /// * `options` - The regex match options.
439    /// * `region` - The region for return group match range info
440    ///
441    /// # Returns
442    ///
443    /// `Some(len)` if the regex matched, with `len` being the number
444    /// of bytes matched. `None` if the regex doesn't match.
445    ///
446    /// # Examples
447    ///
448    /// ```
449    /// use onig::{Regex, EncodedBytes, SearchOptions};
450    ///
451    /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
452    /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"),
453    ///                                 0, SearchOptions::SEARCH_OPTION_NONE, None);
454    /// assert!(res.is_some()); // it matches
455    /// assert!(res.unwrap() == 5); // 5 characters matched
456    /// ```
457    pub fn match_with_encoding<T>(
458        &self,
459        chars: T,
460        at: usize,
461        options: SearchOptions,
462        region: Option<&mut Region>,
463    ) -> Option<usize>
464    where
465        T: EncodedChars,
466    {
467        let match_param = MatchParam::default();
468        let result = self.match_with_param(chars, at, options, region, match_param);
469
470        match result {
471            Ok(r) => r,
472            Err(e) => panic!("Onig: Regex match error: {}", e.description()),
473        }
474    }
475
476    /// Match string with encoding and match param
477    ///
478    /// Match the regex against a string. This method will start at
479    /// the offset `at` into the string and try and match the
480    /// regex. If the regex matches then the return value is the
481    /// number of characters which matched. If the regex doesn't match
482    /// the return is `None`.
483    ///
484    /// For more information see [Match vs
485    /// Search](index.html#match-vs-search)
486    ///
487    /// The contents of `chars` must have the same encoding that was
488    /// used to construct the regex.
489    ///
490    /// # Arguments
491    ///
492    /// * `chars` - The buffer to match against.
493    /// * `at` - The byte index in the passed buffer to start matching
494    /// * `options` - The regex match options.
495    /// * `region` - The region for return group match range info
496    /// * `match_param` - The match parameters
497    ///
498    /// # Returns
499    ///
500    /// `Ok(Some(len))` if the regex matched, with `len` being the number
501    /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an
502    /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded).
503    ///
504    /// # Examples
505    ///
506    /// ```
507    /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
508    ///
509    /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
510    /// let res = r.match_with_param(EncodedBytes::ascii(b"world"),
511    ///                              0, SearchOptions::SEARCH_OPTION_NONE,
512    ///                              None, MatchParam::default());
513    /// assert!(res.is_ok()); // matching did not error
514    /// assert!(res.unwrap() == Some(5)); // 5 characters matched
515    /// ```
516    pub fn match_with_param<T>(
517        &self,
518        chars: T,
519        at: usize,
520        options: SearchOptions,
521        region: Option<&mut Region>,
522        match_param: MatchParam,
523    ) -> Result<Option<usize>, Error>
524    where
525        T: EncodedChars,
526    {
527        if chars.encoding() != self.encoding() {
528            return Err(Error::custom(format!(
529                "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
530                chars.encoding(),
531                self.encoding()
532            )));
533        }
534        let r = unsafe {
535            let offset = chars.start_ptr().add(at);
536            if offset > chars.limit_ptr() {
537                return Err(Error::custom(format!("Offset {} is too large", at)));
538            }
539            onig_sys::onig_match_with_param(
540                self.raw,
541                chars.start_ptr(),
542                chars.limit_ptr(),
543                offset,
544                match region {
545                    Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
546                    None => std::ptr::null_mut(),
547                },
548                options.bits(),
549                match_param.as_raw(),
550            )
551        };
552
553        if r >= 0 {
554            Ok(Some(r as usize))
555        } else if r == onig_sys::ONIG_MISMATCH {
556            Ok(None)
557        } else {
558            Err(Error::from_code(r))
559        }
560    }
561
562    /// Search pattern in string
563    ///
564    /// Search for matches the regex in a string. This method will return the
565    /// index of the first match of the regex within the string, if
566    /// there is one. If `from` is less than `to`, then search is performed
567    /// in forward order, otherwise – in backward order.
568    ///
569    /// For more information see [Match vs
570    /// Search](index.html#match-vs-search)
571    ///
572    /// # Arguments
573    ///
574    ///  * `str` - The string to search in.
575    ///  * `from` - The byte index in the passed slice to start search
576    ///  * `to` - The byte index in the passed slice to finish search
577    ///  * `options` - The options for the search.
578    ///  * `region` - The region for return group match range info
579    ///
580    /// # Returns
581    ///
582    /// `Some(pos)` if the regex matches, where `pos` is the
583    /// byte-position of the start of the match. `None` if the regex
584    /// doesn't match anywhere in `str`.
585    ///
586    /// # Examples
587    ///
588    /// ```
589    /// use onig::{Regex, SearchOptions};
590    ///
591    /// let r = Regex::new("l{1,2}").unwrap();
592    /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
593    /// assert!(res.is_some()); // it matches
594    /// assert!(res.unwrap() == 2); // match starts at character 3
595    /// ```
596    pub fn search_with_options(
597        &self,
598        str: &str,
599        from: usize,
600        to: usize,
601        options: SearchOptions,
602        region: Option<&mut Region>,
603    ) -> Option<usize> {
604        self.search_with_encoding(str, from, to, options, region)
605    }
606
607    /// Search for a Pattern in a String with an Encoding
608    ///
609    /// Search for matches the regex in a string. This method will
610    /// return the index of the first match of the regex within the
611    /// string, if there is one. If `from` is less than `to`, then
612    /// search is performed in forward order, otherwise – in backward
613    /// order.
614    ///
615    /// For more information see [Match vs
616    /// Search](index.html#match-vs-search)
617    ///
618    /// The encoding of the buffer passed to search in must match the
619    /// encoding of the regex.
620    ///
621    /// # Arguments
622    ///
623    ///  * `chars` - The character buffer to search in.
624    ///  * `from` - The byte index in the passed slice to start search
625    ///  * `to` - The byte index in the passed slice to finish search
626    ///  * `options` - The options for the search.
627    ///  * `region` - The region for return group match range info
628    ///
629    /// # Returns
630    ///
631    /// `Some(pos)` if the regex matches, where `pos` is the
632    /// byte-position of the start of the match. `None` if the regex
633    /// doesn't match anywhere in `chars`.
634    ///
635    /// # Examples
636    ///
637    /// ```
638    /// use onig::{Regex, EncodedBytes, SearchOptions};
639    ///
640    /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
641    /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"),
642    ///                                  0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
643    /// assert!(res.is_some()); // it matches
644    /// assert!(res.unwrap() == 2); // match starts at character 3
645    /// ```
646    pub fn search_with_encoding<T>(
647        &self,
648        chars: T,
649        from: usize,
650        to: usize,
651        options: SearchOptions,
652        region: Option<&mut Region>,
653    ) -> Option<usize>
654    where
655        T: EncodedChars,
656    {
657        let match_param = MatchParam::default();
658        let result = self.search_with_param(chars, from, to, options, region, match_param);
659
660        match result {
661            Ok(r) => r,
662            Err(e) => panic!("Onig: Regex search error: {}", e.description()),
663        }
664    }
665
666    /// Search pattern in string with encoding and match param
667    ///
668    /// Search for matches the regex in a string. This method will
669    /// return the index of the first match of the regex within the
670    /// string, if there is one. If `from` is less than `to`, then
671    /// search is performed in forward order, otherwise – in backward
672    /// order.
673    ///
674    /// For more information see [Match vs
675    /// Search](index.html#match-vs-search)
676    ///
677    /// The encoding of the buffer passed to search in must match the
678    /// encoding of the regex.
679    ///
680    /// # Arguments
681    ///
682    ///  * `chars` - The character buffer to search in.
683    ///  * `from` - The byte index in the passed slice to start search
684    ///  * `to` - The byte index in the passed slice to finish search
685    ///  * `options` - The options for the search.
686    ///  * `region` - The region for return group match range info
687    ///  * `match_param` - The match parameters
688    ///
689    /// # Returns
690    ///
691    /// `Ok(Some(pos))` if the regex matches, where `pos` is the
692    /// byte-position of the start of the match. `Ok(None)` if the regex
693    /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error
694    /// occurred (e.g. retry-limit-in-match exceeded).
695    ///
696    /// # Examples
697    ///
698    /// ```
699    /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
700    ///
701    /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
702    /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"),
703    ///                               0, 5, SearchOptions::SEARCH_OPTION_NONE,
704    ///                               None, MatchParam::default());
705    /// assert!(res.is_ok()); // matching did not error
706    /// assert!(res.unwrap() == Some(2)); // match starts at character 3
707    /// ```
708    pub fn search_with_param<T>(
709        &self,
710        chars: T,
711        from: usize,
712        to: usize,
713        options: SearchOptions,
714        region: Option<&mut Region>,
715        match_param: MatchParam,
716    ) -> Result<Option<usize>, Error>
717    where
718        T: EncodedChars,
719    {
720        let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
721        if chars.encoding() != self.encoding() {
722            return Err(Error::custom(format!(
723                "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
724                chars.encoding(),
725                self.encoding()
726            )));
727        }
728        let r = unsafe {
729            let start = beg.add(from);
730            let range = beg.add(to);
731            if start > end {
732                return Err(Error::custom("Start of match should be before end"));
733            }
734            if range > end {
735                return Err(Error::custom("Limit of match should be before end"));
736            }
737            onig_sys::onig_search_with_param(
738                self.raw,
739                beg,
740                end,
741                start,
742                range,
743                match region {
744                    Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
745                    None => std::ptr::null_mut(),
746                },
747                options.bits(),
748                match_param.as_raw(),
749            )
750        };
751
752        if r >= 0 {
753            Ok(Some(r as usize))
754        } else if r == onig_sys::ONIG_MISMATCH {
755            Ok(None)
756        } else {
757            Err(Error::from_code(r))
758        }
759    }
760
761    /// Returns true if and only if the regex matches the string given.
762    ///
763    /// For more information see [Match vs
764    /// Search](index.html#match-vs-search)
765    ///
766    /// # Arguments
767    ///  * `text` - The string slice to test against the pattern.
768    ///
769    /// # Returns
770    ///
771    /// `true` if the pattern matches the whole of `text`, `false` otherwise.
772    pub fn is_match(&self, text: &str) -> bool {
773        self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_WHOLE_STRING, None)
774            .map(|r| r == text.len())
775            .unwrap_or(false)
776    }
777
778    /// Find a Match in a Buffer, With Encoding
779    ///
780    /// Finds the first match of the regular expression within the
781    /// buffer.
782    ///
783    /// Note that this should only be used if you want to discover the
784    /// position of the match within a string. Testing if a pattern
785    /// matches the whole string is faster if you use `is_match`.  For
786    /// more information see [Match vs
787    /// Search](index.html#match-vs-search)
788    ///
789    /// # Arguments
790    ///  * `text` - The text to search in.
791    ///
792    /// # Returns
793    ///
794    ///  The offset of the start and end of the first match. If no
795    ///  match exists `None` is returned.
796    pub fn find(&self, text: &str) -> Option<(usize, usize)> {
797        self.find_with_encoding(text)
798    }
799
800    /// Find a Match in a Buffer, With Encoding
801    ///
802    /// Finds the first match of the regular expression within the
803    /// buffer.
804    ///
805    /// For more information see [Match vs
806    /// Search](index.html#match-vs-search)
807    ///
808    /// # Arguments
809    ///  * `text` - The text to search in.
810    ///
811    /// # Returns
812    ///
813    ///  The offset of the start and end of the first match. If no
814    ///  match exists `None` is returned.
815    pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
816    where
817        T: EncodedChars,
818    {
819        let mut region = Region::new();
820        let len = text.len();
821        self.search_with_encoding(
822            text,
823            0,
824            len,
825            SearchOptions::SEARCH_OPTION_NONE,
826            Some(&mut region),
827        )
828        .and_then(|_| region.pos(0))
829    }
830
831    /// Get the Encoding of the Regex
832    ///
833    /// # Returns
834    ///
835    /// Returns a reference to an oniguruma encoding which was used
836    /// when this regex was created.
837    pub fn encoding(&self) -> onig_sys::OnigEncoding {
838        unsafe { onig_sys::onig_get_encoding(self.raw) }
839    }
840
841    /// Get the Number of Capture Groups in this Pattern
842    pub fn captures_len(&self) -> usize {
843        unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
844    }
845
846    /// Get the Size of the Capture Histories for this Pattern
847    pub fn capture_histories_len(&self) -> usize {
848        unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
849    }
850
851    /// Get the raw Oniguruma regex pointer
852    pub(crate) fn as_raw(&self) -> onig_sys::OnigRegex {
853        self.raw
854    }
855}
856
857impl Drop for Regex {
858    fn drop(&mut self) {
859        unsafe {
860            onig_sys::onig_free(self.raw);
861        }
862    }
863}
864
865#[cfg(test)]
866mod tests {
867    use super::*;
868    use std::panic;
869
870    #[test]
871    fn test_regex_create() {
872        Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
873
874        Regex::new(r#"a \w+ word"#).unwrap();
875    }
876
877    #[test]
878    fn test_regex_invalid() {
879        let e = Regex::new("\\p{foo}").unwrap_err();
880        assert_eq!(e.code(), -223);
881        assert_eq!(e.description(), "invalid character property name {foo}");
882    }
883
884    #[test]
885    fn test_failed_match() {
886        let regex = Regex::new("foo").unwrap();
887        let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
888        assert!(res.is_none());
889    }
890
891    #[test]
892    fn test_regex_search_with_options() {
893        let mut region = Region::new();
894        let regex = Regex::new("e(l+)").unwrap();
895
896        let r = regex.search_with_options(
897            "hello",
898            0,
899            5,
900            SearchOptions::SEARCH_OPTION_NONE,
901            Some(&mut region),
902        );
903
904        assert!(region.tree().is_none());
905        assert_eq!(r, Some(1));
906        assert_eq!(region.len(), 2);
907        let pos1 = region.pos(0).unwrap();
908        let pos2 = region.pos(1).unwrap();
909        assert_eq!(pos1, (1, 4));
910        assert_eq!(pos2, (2, 4));
911
912        // test cloning here since we already have a filled region
913        let cloned_region = region.clone();
914        let pos1_clone = cloned_region.pos(0).unwrap();
915        assert_eq!(pos1_clone, pos1);
916    }
917
918    #[test]
919    fn test_regex_match_with_options() {
920        let mut region = Region::new();
921        let regex = Regex::new("he(l+)").unwrap();
922
923        let r = regex.match_with_options(
924            "hello",
925            0,
926            SearchOptions::SEARCH_OPTION_NONE,
927            Some(&mut region),
928        );
929
930        assert!(region.tree().is_none());
931        assert_eq!(r, Some(4));
932        assert_eq!(region.len(), 2);
933        let pos1 = region.pos(0).unwrap();
934        let pos2 = region.pos(1).unwrap();
935        assert_eq!(pos1, (0, 4));
936        assert_eq!(pos2, (2, 4));
937    }
938
939    #[test]
940    fn test_regex_is_match() {
941        let regex = Regex::new("he(l+)o").unwrap();
942        assert!(regex.is_match("hello"));
943        assert!(!regex.is_match("hello 2.0"));
944    }
945
946    #[test]
947    fn test_is_match_chooses_longest_alternation() {
948        let regex = Regex::new("Greater|GreaterOrEqual").unwrap();
949        assert!(regex.is_match("Greater"));
950        assert!(regex.is_match("GreaterOrEqual"));
951    }
952
953    #[test]
954    fn test_regex_find() {
955        let regex = Regex::new("he(l+)o").unwrap();
956        assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
957        assert_eq!(regex.find("hey, honey!"), None);
958    }
959
960    #[test]
961    fn test_regex_captures_len() {
962        let regex = Regex::new("(he)(l+)(o)").unwrap();
963        assert_eq!(regex.captures_len(), 3);
964    }
965
966    #[test]
967    fn test_regex_error_is_match() {
968        let regex = Regex::new("(a|b|ab)*bc").unwrap();
969        let result = regex.match_with_param(
970            "ababababababababababababababababababababababababababababacbc",
971            0,
972            SearchOptions::SEARCH_OPTION_NONE,
973            None,
974            MatchParam::default(),
975        );
976
977        let e = result.err().unwrap();
978        assert_eq!("retry-limit-in-match over", e.description());
979    }
980
981    #[test]
982    fn test_regex_panic_is_match() {
983        let regex = Regex::new("(a|b|ab)*bc").unwrap();
984        let result = panic::catch_unwind(|| {
985            regex.is_match("ababababababababababababababababababababababababababababacbc")
986        });
987        let e = result.err().unwrap();
988        let message = e.downcast_ref::<String>().unwrap();
989        assert_eq!(
990            message.as_str(),
991            "Onig: Regex match error: retry-limit-in-match over"
992        );
993    }
994
995    #[test]
996    fn test_regex_error_find() {
997        let regex = Regex::new("(a|b|ab)*bc").unwrap();
998        let s = "ababababababababababababababababababababababababababababacbc";
999        let result = regex.search_with_param(
1000            s,
1001            0,
1002            s.len(),
1003            SearchOptions::SEARCH_OPTION_NONE,
1004            None,
1005            MatchParam::default(),
1006        );
1007
1008        let e = result.err().unwrap();
1009        assert_eq!("retry-limit-in-match over", e.description());
1010    }
1011
1012    #[test]
1013    fn test_regex_panic_find() {
1014        let regex = Regex::new("(a|b|ab)*bc").unwrap();
1015        let result = panic::catch_unwind(|| {
1016            regex.find("ababababababababababababababababababababababababababababacbc")
1017        });
1018        let e = result.err().unwrap();
1019        let message = e.downcast_ref::<String>().unwrap();
1020        assert_eq!(
1021            message.as_str(),
1022            "Onig: Regex search error: retry-limit-in-match over"
1023        );
1024    }
1025
1026    #[test]
1027    fn test_search_with_invalid_range() {
1028        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1029            .expect("regex");
1030        let string = "Ruby";
1031        let is_match = regex.search_with_param(
1032            string,
1033            5,
1034            string.len(),
1035            SearchOptions::SEARCH_OPTION_NONE,
1036            None,
1037            MatchParam::default(),
1038        );
1039        assert!(is_match.is_err());
1040
1041        let is_match = regex.search_with_param(
1042            string,
1043            2,
1044            string.len() + 1,
1045            SearchOptions::SEARCH_OPTION_NONE,
1046            None,
1047            MatchParam::default(),
1048        );
1049        assert!(is_match.is_err());
1050    }
1051
1052    #[test]
1053    fn test_search_with_invalid_range_panic() {
1054        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1055            .expect("regex");
1056        let string = "Ruby";
1057        let is_match = panic::catch_unwind(|| {
1058            regex.search_with_encoding(
1059                string,
1060                5,
1061                string.len(),
1062                SearchOptions::SEARCH_OPTION_NONE,
1063                None,
1064            )
1065        });
1066        assert!(is_match.is_err());
1067    }
1068
1069    #[test]
1070    fn test_match_with_invalid_range() {
1071        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1072            .expect("regex");
1073        let string = "Ruby";
1074        let is_match = regex.match_with_param(
1075            string,
1076            5,
1077            SearchOptions::SEARCH_OPTION_NONE,
1078            None,
1079            MatchParam::default(),
1080        );
1081        assert!(is_match.is_err());
1082    }
1083
1084    #[test]
1085    fn test_match_with_invalid_range_panic() {
1086        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1087            .expect("regex");
1088        let string = "Ruby";
1089        let is_match = panic::catch_unwind(|| {
1090            regex.match_with_encoding(string, 5, SearchOptions::SEARCH_OPTION_NONE, None)
1091        });
1092        assert!(is_match.is_err());
1093    }
1094}