tre_regex/
lib.rs

1//! These are safe bindings to the [`tre_regex_sys`] module.
2//!
3//! These bindings are designed to provide an idiomatic Rust-like API to the [TRE library] as much
4//! as possible. Most of the TRE API is suported, except [`reguexec`] from TRE; that is tricky to
5//! implement, although should be fairly simple to use yourself.
6//!
7//! This library uses Rust [`std::borrow::Cow`] strings to enable zero-copy of regex matches.
8//!
9//! # Examples
10//! Two API's are presented: the function API, and the object API. Whichever one you choose to use
11//! is up to you, although the function API is implemented as a thin wrapper around the object API.
12//!
13//! ## Object API
14//! ```
15//! # use tre_regex::Result;
16//! # fn main() -> Result<()> {
17//! use tre_regex::{RegcompFlags, RegexecFlags, Regex};
18//!
19//! let regcomp_flags = RegcompFlags::new().add(RegcompFlags::EXTENDED);
20//! let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
21//!
22//! let compiled_reg = Regex::new("^([[:alnum:]]+)[[:space:]]*([[:alnum:]]+)$", regcomp_flags)?;
23//! let matches = compiled_reg.regexec("hello world", 2, regexec_flags)?;
24//!
25//! for (i, matched) in matches.into_iter().enumerate() {
26//!     match matched {
27//!         Some(res) => {
28//!             match res {
29//!                 Ok(substr) => println!("Match {i}: '{}'", substr),
30//!                 Err(e) => println!("Match {i}: <Error: {e}>"),
31//!             }
32//!         },
33//!         None => println!("Match {i}: <None>"),
34//!     }
35//! }
36//! # Ok(())
37//! # }
38//! ```
39//!
40//! ## Function API
41//! ```
42//! # use tre_regex::Result;
43//! # fn main() -> Result<()> {
44//! use tre_regex::{RegcompFlags, RegexecFlags, regcomp, regexec};
45//!
46//! let regcomp_flags = RegcompFlags::new().add(RegcompFlags::EXTENDED);
47//! let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
48//!
49//! let compiled_reg = regcomp("^([[:alnum:]]+)[[:space:]]*([[:alnum:]]+)$", regcomp_flags)?;
50//! let matches = regexec(&compiled_reg, "hello world", 2, regexec_flags)?;
51//!
52//! for (i, matched) in matches.into_iter().enumerate() {
53//!     match matched {
54//!         Some(res) => {
55//!             match res {
56//!                 Ok(substr) => println!("Match {i}: '{}'", substr),
57//!                 Err(e) => println!("Match {i}: <Error: {e}>"),
58//!             }
59//!         },
60//!         None => println!("Match {i}: <None>"),
61//!     }
62//! }
63//! # Ok(())
64//! # }
65//! ```
66//!
67//! [TRE library]: <https://laurikari.net/tre/>
68//! [`reguexec`]: tre_regex_sys::tre_reguexec
69
70#![deny(clippy::all)]
71#![warn(clippy::pedantic)]
72#![warn(clippy::nursery)]
73
74/// Public re-export of the [`tre_regex_sys`] module.
75pub use tre_regex_sys as tre;
76
77#[cfg(feature = "approx")]
78mod approx;
79mod comp;
80mod err;
81mod exec;
82mod flags;
83#[cfg(test)]
84mod tests;
85#[cfg(feature = "wchar")]
86mod wchar;
87
88#[cfg(feature = "approx")]
89pub use crate::approx::*;
90pub use crate::comp::*;
91pub use crate::err::*;
92pub use crate::exec::*;
93pub use crate::flags::*;
94#[cfg(feature = "wchar")]
95pub use crate::wchar::*;
96
97/// The base regex object.
98///
99/// This object takes care of freeing itself upon dropping, so you don't have to call
100/// [`tre_regfree`](tre_regex_sys::tre_regfree) yourself.
101///
102/// This object provides an API similar to the function API. See the documentation on the
103/// individual functions for more information.
104#[derive(Debug)]
105pub struct Regex(Option<tre::regex_t>);
106
107impl Regex {
108    /// Create a new [`Regex`] object from the given [`regex_t`](tre_regex_sys::regex_t).
109    ///
110    /// This function is for advanced use only. Don't mess with it unless you know exactly what you
111    /// are doing.
112    ///
113    /// **WARNING**: Do **NOT** place a [`regex_t`](tre_regex_sys::regex_t) here that you didn't
114    /// get from [`regcomp`] or [`tre_regcomp`](tre_regex_sys::tre_regcomp). Otherwise, when the
115    /// [`Regex`] object drops, it will call [`tre_regfree`](tre_regex_sys::tre_regfree`) on memory
116    /// not allocated by TRE itself. This is **undefined behaviour** and will likely cause a
117    /// segfault. This is why the function is marked `unsafe`.
118    ///
119    /// # Arguments
120    /// * `regex`: A [`regex_t`](tre_regex_sys::regex_t) to wrap.
121    ///
122    /// # Returns
123    /// A new [`Regex`] object, containing the passed-in [`regex_t`](tre_regex_sys::regex_t).
124    ///
125    /// # Safety
126    /// The `regex` parameter must have been initalised by [`tre_regcomp`](tre_regex_sys::tre_regcomp)
127    /// or taken from another [`Regex`] object.
128    ///
129    /// [`regcomp`]: crate::regcomp
130    #[must_use]
131    #[inline]
132    pub const unsafe fn new_from(regex: tre::regex_t) -> Self {
133        Self(Some(regex))
134    }
135
136    /// Relinquish the underlying [`regex_t`](tre_regex_sys::regex_t) object.
137    ///
138    /// This is an advanced function and should not be used unless you know what you are doing.
139    ///
140    /// # Returns
141    /// `None` if the object is vacant, otherwise `Some(`[`regex_t`](tre_regex_sys::regex_t)`)`.
142    ///
143    /// # Safety
144    /// A leak could result if the object is not properly freed with
145    /// [`tre_regfree`](tre_regex_sys::tre_regfree) if the object was initalised by the TRE API.
146    #[must_use]
147    #[inline]
148    pub unsafe fn release(&mut self) -> Option<tre::regex_t> {
149        let regex = self.0;
150        self.0 = None;
151        regex
152    }
153
154    /// Gets an immutable reference to the underlying [`regex_t`](tre_regex_sys::regex_t) object.
155    #[must_use]
156    #[inline]
157    pub const fn get(&self) -> &Option<tre::regex_t> {
158        &self.0
159    }
160
161    /// Gets a mutable reference to the underlying [`regex_t`](tre_regex_sys::regex_t) object.
162    #[must_use]
163    #[inline]
164    pub fn get_mut(&mut self) -> &mut Option<tre::regex_t> {
165        &mut self.0
166    }
167}
168
169impl Drop for Regex {
170    /// Executes the destructor for this type.
171    ///
172    /// The destructor will call [`tre_regfree`](tre_regex_sys::tre_regfree) on the internal
173    /// [`regex_t`](tre_regex_sys::regex_t).
174    #[inline]
175    fn drop(&mut self) {
176        let Some(compiled_reg) = self.get_mut() else {
177            return;
178        };
179
180        // SAFETY: freeing data passed into the struct previously.
181        // If the data came from our API, this is safe. Otherwise, the user must opt into storing
182        // the regex here.
183        unsafe {
184            tre::tre_regfree(compiled_reg);
185        }
186    }
187}