regex_cache/
lazy.rs

1// Copyright 2017 1aim GmbH
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy of
4// this software and associated documentation files (the "Software"), to deal in
5// the Software without restriction, including without limitation the rights to
6// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7// of the Software, and to permit persons to whom the Software is furnished to do
8// so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in all
11// copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19// SOFTWARE.
20
21use std::ops::Deref;
22use std::fmt;
23use std::str;
24
25use std::sync::Arc;
26use oncemutex::OnceMutex;
27
28use regex::{Regex, RegexBuilder, Error};
29use crate::syntax;
30use crate::options::Options;
31
32/// A lazily created `Regex`.
33///
34/// At the first `Deref` the given source will be compiled and saved in the
35/// Local Thread Storage, thus avoiding locking.
36///
37/// # Example
38///
39/// Find the location of a US phone number:
40///
41/// ```
42/// # use regex_cache::LazyRegex;
43/// let re = LazyRegex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
44/// let m  = re.find("phone: 111-222-3333").unwrap();
45/// assert_eq!((m.start(), m.end()), (7, 19));
46/// ```
47#[derive(Clone)]
48pub struct LazyRegex {
49	builder: LazyRegexBuilder,
50	regex:   Arc<OnceMutex<Option<Regex>>>
51}
52
53impl LazyRegex {
54	/// Create a new lazy `Regex` for the given source, checking the syntax is
55	/// valid.
56	pub fn new(source: &str) -> Result<LazyRegex, Error> {
57		if let Err(err) = syntax::Parser::new().parse(source) {
58			return Err(Error::Syntax(err.to_string()));
59		}
60
61		Ok(LazyRegex::from(LazyRegexBuilder::new(source)))
62	}
63
64	fn from(builder: LazyRegexBuilder) -> Self {
65		LazyRegex {
66			builder: builder,
67			regex:   Arc::new(OnceMutex::new(None)),
68		}
69	}
70
71	fn create(builder: &LazyRegexBuilder) -> Regex {
72		builder.options.define(&mut RegexBuilder::new(&builder.source))
73			.build().unwrap()
74	}
75}
76
77impl Deref for LazyRegex {
78	type Target = Regex;
79
80	fn deref(&self) -> &Regex {
81		self.as_ref()
82	}
83}
84
85impl AsRef<Regex> for LazyRegex {
86	fn as_ref(&self) -> &Regex {
87		if let Some(mut guard) = self.regex.lock() {
88			*guard = Some(LazyRegex::create(&self.builder));
89		}
90
91		(*self.regex).as_ref().unwrap()
92	}
93}
94
95impl Into<Regex> for LazyRegex {
96	fn into(self) -> Regex {
97		let (regex, builder) = (self.regex, self.builder);
98
99		Arc::try_unwrap(regex).ok().and_then(|m| m.into_inner()).unwrap_or_else(||
100			LazyRegex::create(&builder))
101	}
102}
103
104impl fmt::Debug for LazyRegex {
105	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
106		fmt::Debug::fmt(&**self, f)
107	}
108}
109
110impl fmt::Display for LazyRegex {
111	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
112		fmt::Display::fmt(&**self, f)
113	}
114}
115
116impl str::FromStr for LazyRegex {
117	type Err = Error;
118
119	fn from_str(s: &str) -> Result<LazyRegex, Error> {
120		LazyRegex::new(s)
121	}
122}
123
124/// A configurable builder for a lazy `Regex`.
125#[derive(Clone, Eq, PartialEq, Debug)]
126pub struct LazyRegexBuilder {
127	source: String,
128	options: Options,
129}
130
131impl LazyRegexBuilder {
132	/// Create a new regular expression builder with the given pattern.
133	///
134	/// If the pattern is invalid, then an error will be returned when
135	/// `compile` is called.
136	pub fn new(source: &str) -> LazyRegexBuilder {
137		LazyRegexBuilder {
138			source: source.to_owned(),
139			options: Default::default(),
140		}
141	}
142
143	/// Consume the builder and compile the regular expression.
144	///
145	/// Note that calling `as_str` on the resulting `Regex` will produce the
146	/// pattern given to `new` verbatim. Notably, it will not incorporate any
147	/// of the flags set on this builder.
148	pub fn build(&self) -> Result<LazyRegex, Error> {
149		if let Err(err) = syntax::Parser::new().parse(&self.source) {
150			return Err(Error::Syntax(err.to_string()));
151		}
152
153		Ok(LazyRegex::from(self.clone()))
154	}
155
156	/// Set the value for the case insensitive (`i`) flag.
157	pub fn case_insensitive(&mut self, yes: bool) -> &mut LazyRegexBuilder {
158		self.options.case_insensitive = yes;
159		self
160	}
161
162	/// Set the value for the multi-line matching (`m`) flag.
163	pub fn multi_line(&mut self, yes: bool) -> &mut LazyRegexBuilder {
164		self.options.multi_line = yes;
165		self
166	}
167
168	/// Set the value for the any character (`s`) flag, where in `.` matches
169	/// anything when `s` is set and matches anything except for new line when
170	/// it is not set (the default).
171	///
172	/// N.B. "matches anything" means "any byte" for `regex::bytes::Regex`
173	/// expressions and means "any Unicode scalar value" for `regex::Regex`
174	/// expressions.
175	pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut LazyRegexBuilder {
176		self.options.dot_matches_new_line = yes;
177		self
178	}
179
180	/// Set the value for the greedy swap (`U`) flag.
181	pub fn swap_greed(&mut self, yes: bool) -> &mut LazyRegexBuilder {
182		self.options.swap_greed = yes;
183		self
184	}
185
186	/// Set the value for the ignore whitespace (`x`) flag.
187	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut LazyRegexBuilder {
188		self.options.ignore_whitespace = yes;
189		self
190	}
191
192	/// Set the value for the Unicode (`u`) flag.
193	pub fn unicode(&mut self, yes: bool) -> &mut LazyRegexBuilder {
194		self.options.unicode = yes;
195		self
196	}
197
198	/// Set the approximate size limit of the compiled regular expression.
199	///
200	/// This roughly corresponds to the number of bytes occupied by a single
201	/// compiled program. If the program exceeds this number, then a
202	/// compilation error is returned.
203	pub fn size_limit(&mut self, limit: usize) -> &mut LazyRegexBuilder {
204		self.options.size_limit = limit;
205		self
206	}
207
208	/// Set the approximate size of the cache used by the DFA.
209	///
210	/// This roughly corresponds to the number of bytes that the DFA will
211	/// use while searching.
212	///
213	/// Note that this is a *per thread* limit. There is no way to set a global
214	/// limit. In particular, if a regex is used from multiple threads
215	/// simulanteously, then each thread may use up to the number of bytes
216	/// specified here.
217	pub fn dfa_size_limit(&mut self, limit: usize) -> &mut LazyRegexBuilder {
218		self.options.dfa_size_limit = limit;
219		self
220	}
221}
222
223#[cfg(test)]
224mod test {
225	use crate::{LazyRegex, LazyRegexBuilder};
226
227	#[test]
228	fn new() {
229		assert!(LazyRegex::new(r"^\d+$").unwrap()
230			.is_match("2345"));
231
232		assert!(!LazyRegex::new(r"^[a-z]+$").unwrap()
233			.is_match("2345"));
234	}
235
236	#[test]
237	fn build() {
238		assert!(LazyRegexBuilder::new(r"^abc$")
239			.case_insensitive(true).build().unwrap()
240			.is_match("ABC"));
241
242		assert!(!LazyRegexBuilder::new(r"^abc$")
243			.case_insensitive(false).build().unwrap()
244			.is_match("ABC"));
245	}
246
247	#[test]
248	fn same() {
249		let re = LazyRegex::new(r"^\d+$").unwrap();
250
251		assert!(re.is_match("1234"));
252		assert!(re.is_match("1234"));
253		assert!(re.is_match("1234"));
254		assert!(re.is_match("1234"));
255	}
256}