tre_regex/lib.rs
1//! These are safe bindings to the [`tre_regex_sys`] module.
2//!
3//! These bindings are designed to provide an idiomatic Rust-like API to the [TRE library] as much
4//! as possible. Most of the TRE API is suported, except [`reguexec`] from TRE; that is tricky to
5//! implement, although should be fairly simple to use yourself.
6//!
7//! This library uses Rust [`std::borrow::Cow`] strings to enable zero-copy of regex matches.
8//!
9//! # Examples
10//! Two API's are presented: the function API, and the object API. Whichever one you choose to use
11//! is up to you, although the function API is implemented as a thin wrapper around the object API.
12//!
13//! ## Object API
14//! ```
15//! # use tre_regex::Result;
16//! # fn main() -> Result<()> {
17//! use tre_regex::{RegcompFlags, RegexecFlags, Regex};
18//!
19//! let regcomp_flags = RegcompFlags::new().add(RegcompFlags::EXTENDED);
20//! let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
21//!
22//! let compiled_reg = Regex::new("^([[:alnum:]]+)[[:space:]]*([[:alnum:]]+)$", regcomp_flags)?;
23//! let matches = compiled_reg.regexec("hello world", 2, regexec_flags)?;
24//!
25//! for (i, matched) in matches.into_iter().enumerate() {
26//! match matched {
27//! Some(res) => {
28//! match res {
29//! Ok(substr) => println!("Match {i}: '{}'", substr),
30//! Err(e) => println!("Match {i}: <Error: {e}>"),
31//! }
32//! },
33//! None => println!("Match {i}: <None>"),
34//! }
35//! }
36//! # Ok(())
37//! # }
38//! ```
39//!
40//! ## Function API
41//! ```
42//! # use tre_regex::Result;
43//! # fn main() -> Result<()> {
44//! use tre_regex::{RegcompFlags, RegexecFlags, regcomp, regexec};
45//!
46//! let regcomp_flags = RegcompFlags::new().add(RegcompFlags::EXTENDED);
47//! let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
48//!
49//! let compiled_reg = regcomp("^([[:alnum:]]+)[[:space:]]*([[:alnum:]]+)$", regcomp_flags)?;
50//! let matches = regexec(&compiled_reg, "hello world", 2, regexec_flags)?;
51//!
52//! for (i, matched) in matches.into_iter().enumerate() {
53//! match matched {
54//! Some(res) => {
55//! match res {
56//! Ok(substr) => println!("Match {i}: '{}'", substr),
57//! Err(e) => println!("Match {i}: <Error: {e}>"),
58//! }
59//! },
60//! None => println!("Match {i}: <None>"),
61//! }
62//! }
63//! # Ok(())
64//! # }
65//! ```
66//!
67//! [TRE library]: <https://laurikari.net/tre/>
68//! [`reguexec`]: tre_regex_sys::tre_reguexec
69
70#![deny(clippy::all)]
71#![warn(clippy::pedantic)]
72#![warn(clippy::nursery)]
73
74/// Public re-export of the [`tre_regex_sys`] module.
75pub use tre_regex_sys as tre;
76
77#[cfg(feature = "approx")]
78mod approx;
79mod comp;
80mod err;
81mod exec;
82mod flags;
83#[cfg(test)]
84mod tests;
85#[cfg(feature = "wchar")]
86mod wchar;
87
88#[cfg(feature = "approx")]
89pub use crate::approx::*;
90pub use crate::comp::*;
91pub use crate::err::*;
92pub use crate::exec::*;
93pub use crate::flags::*;
94#[cfg(feature = "wchar")]
95pub use crate::wchar::*;
96
97/// The base regex object.
98///
99/// This object takes care of freeing itself upon dropping, so you don't have to call
100/// [`tre_regfree`](tre_regex_sys::tre_regfree) yourself.
101///
102/// This object provides an API similar to the function API. See the documentation on the
103/// individual functions for more information.
104#[derive(Debug)]
105pub struct Regex(Option<tre::regex_t>);
106
107impl Regex {
108 /// Create a new [`Regex`] object from the given [`regex_t`](tre_regex_sys::regex_t).
109 ///
110 /// This function is for advanced use only. Don't mess with it unless you know exactly what you
111 /// are doing.
112 ///
113 /// **WARNING**: Do **NOT** place a [`regex_t`](tre_regex_sys::regex_t) here that you didn't
114 /// get from [`regcomp`] or [`tre_regcomp`](tre_regex_sys::tre_regcomp). Otherwise, when the
115 /// [`Regex`] object drops, it will call [`tre_regfree`](tre_regex_sys::tre_regfree`) on memory
116 /// not allocated by TRE itself. This is **undefined behaviour** and will likely cause a
117 /// segfault. This is why the function is marked `unsafe`.
118 ///
119 /// # Arguments
120 /// * `regex`: A [`regex_t`](tre_regex_sys::regex_t) to wrap.
121 ///
122 /// # Returns
123 /// A new [`Regex`] object, containing the passed-in [`regex_t`](tre_regex_sys::regex_t).
124 ///
125 /// # Safety
126 /// The `regex` parameter must have been initalised by [`tre_regcomp`](tre_regex_sys::tre_regcomp)
127 /// or taken from another [`Regex`] object.
128 ///
129 /// [`regcomp`]: crate::regcomp
130 #[must_use]
131 #[inline]
132 pub const unsafe fn new_from(regex: tre::regex_t) -> Self {
133 Self(Some(regex))
134 }
135
136 /// Relinquish the underlying [`regex_t`](tre_regex_sys::regex_t) object.
137 ///
138 /// This is an advanced function and should not be used unless you know what you are doing.
139 ///
140 /// # Returns
141 /// `None` if the object is vacant, otherwise `Some(`[`regex_t`](tre_regex_sys::regex_t)`)`.
142 ///
143 /// # Safety
144 /// A leak could result if the object is not properly freed with
145 /// [`tre_regfree`](tre_regex_sys::tre_regfree) if the object was initalised by the TRE API.
146 #[must_use]
147 #[inline]
148 pub unsafe fn release(&mut self) -> Option<tre::regex_t> {
149 let regex = self.0;
150 self.0 = None;
151 regex
152 }
153
154 /// Gets an immutable reference to the underlying [`regex_t`](tre_regex_sys::regex_t) object.
155 #[must_use]
156 #[inline]
157 pub const fn get(&self) -> &Option<tre::regex_t> {
158 &self.0
159 }
160
161 /// Gets a mutable reference to the underlying [`regex_t`](tre_regex_sys::regex_t) object.
162 #[must_use]
163 #[inline]
164 pub fn get_mut(&mut self) -> &mut Option<tre::regex_t> {
165 &mut self.0
166 }
167}
168
169impl Drop for Regex {
170 /// Executes the destructor for this type.
171 ///
172 /// The destructor will call [`tre_regfree`](tre_regex_sys::tre_regfree) on the internal
173 /// [`regex_t`](tre_regex_sys::regex_t).
174 #[inline]
175 fn drop(&mut self) {
176 let Some(compiled_reg) = self.get_mut() else {
177 return;
178 };
179
180 // SAFETY: freeing data passed into the struct previously.
181 // If the data came from our API, this is safe. Otherwise, the user must opt into storing
182 // the regex here.
183 unsafe {
184 tre::tre_regfree(compiled_reg);
185 }
186 }
187}