1use crate::{
2 ParseResult, Parser, ParserExt, ParserHandle, ParserNoValue, ParserOutput, ParserRegistry,
3};
4use std::{cell::RefCell, collections::HashMap, sync::Arc};
5
6pub mod shorthand {
7 use super::*;
8 use crate::shorthand::map;
9
10 pub fn regex(pattern: impl AsRef<str>) -> ParserHandle {
11 RegexParser::new(pattern).into_handle()
12 }
13
14 pub fn regex_capture(pattern: impl AsRef<str>, capture: impl ToString) -> ParserHandle {
15 RegexParser::new_capture(pattern, capture).into_handle()
16 }
17
18 pub fn any() -> ParserHandle {
19 regex(r".")
20 }
21
22 pub fn nl() -> ParserHandle {
23 regex(r"[\r\n]")
24 }
25
26 pub fn digit_hex() -> ParserHandle {
27 regex(r"[0-9a-fA-F]&")
28 }
29
30 pub fn digit() -> ParserHandle {
31 regex(r"\d")
32 }
33
34 pub fn number_int_pos() -> ParserHandle {
35 regex(r"\d+")
36 }
37
38 pub fn number_int() -> ParserHandle {
39 regex(r"-?\d+")
40 }
41
42 pub fn number_float() -> ParserHandle {
43 regex(r"-?\d+(\.\d+(e-?\d+)?)?")
44 }
45
46 pub fn alphanum() -> ParserHandle {
47 regex(r"\w")
48 }
49
50 pub fn alpha_low() -> ParserHandle {
51 regex(r"[a-z]")
52 }
53
54 pub fn alpha_up() -> ParserHandle {
55 regex(r"[A-Z]")
56 }
57
58 pub fn alpha() -> ParserHandle {
59 regex(r"[a-zA-Z]")
60 }
61
62 pub fn word() -> ParserHandle {
63 regex(r"\w+")
64 }
65
66 pub fn string(open: &str, close: &str) -> ParserHandle {
67 let open = open.escape_unicode().to_string();
68 let close = close.escape_unicode().to_string();
69 let pattern = format!("{open}(?<content>[^{close}]*){close}");
70 map(regex_capture(pattern, "content"), move |value: String| {
71 snailquote::unescape(&value).unwrap()
72 })
73 }
74
75 pub fn id_start() -> ParserHandle {
76 regex(r"[a-zA-Z_]")
77 }
78
79 pub fn id_continue() -> ParserHandle {
80 regex(r"[0-9a-zA-Z_]*")
81 }
82
83 pub fn id() -> ParserHandle {
84 regex(r"[a-zA-Z_][0-9a-zA-Z_]*")
85 }
86
87 pub fn ws() -> ParserHandle {
88 WhiteSpaceParser::default().into_handle()
89 }
90
91 pub fn ows() -> ParserHandle {
92 OptionalWhiteSpaceParser::default().into_handle()
93 }
94}
95
96thread_local! {
97 static REGEX_CACHE: RefCell<HashMap<String, Arc<regex::Regex>>> = Default::default();
98}
99
100#[derive(Clone)]
101pub struct RegexParser {
102 regex: Arc<regex::Regex>,
103 capture: Option<String>,
104}
105
106impl RegexParser {
107 pub fn new(pattern: impl AsRef<str>) -> Self {
108 let pattern = pattern.as_ref();
109 REGEX_CACHE.with_borrow_mut(|cache| {
110 if let Some(cached) = cache.get(pattern) {
111 return Self {
112 regex: cached.clone(),
113 capture: None,
114 };
115 }
116 let regex = Arc::new(
117 regex::Regex::new(&format!(r"^{}", pattern)).expect("Expected valid regex"),
118 );
119 cache.insert(pattern.to_string(), regex.clone());
120 Self {
121 regex,
122 capture: None,
123 }
124 })
125 }
126
127 pub fn new_capture(pattern: impl AsRef<str>, capture: impl ToString) -> Self {
128 let pattern = pattern.as_ref();
129 let capture = capture.to_string();
130 REGEX_CACHE.with_borrow_mut(|cache| {
131 if let Some(cached) = cache.get(pattern) {
132 return Self {
133 regex: cached.clone(),
134 capture: Some(capture),
135 };
136 }
137 let regex = Arc::new(
138 regex::Regex::new(&format!(r"^{}", pattern)).expect("Expected valid regex"),
139 );
140 cache.insert(pattern.to_string(), regex.clone());
141 Self {
142 regex,
143 capture: Some(capture),
144 }
145 })
146 }
147}
148
149impl Parser for RegexParser {
150 fn parse<'a>(&self, _: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
151 if let Some(capture) = self.capture.as_deref() {
152 if let Some(cap) = self.regex.captures(input) {
153 Ok((
154 &input[cap.get(0).unwrap().end()..],
155 ParserOutput::new(
156 cap.name(capture)
157 .map(|mat| mat.as_str())
158 .unwrap_or("")
159 .to_owned(),
160 )
161 .ok()
162 .unwrap(),
163 ))
164 } else {
165 Err(format!(
166 "Expected regex match '{}' with capture: '{}'",
167 self.regex, capture
168 )
169 .into())
170 }
171 } else if let Some(mat) = self.regex.find(input) {
172 Ok((
173 &input[mat.end()..],
174 ParserOutput::new(mat.as_str().to_owned()).ok().unwrap(),
175 ))
176 } else {
177 Err(format!("Expected regex match '{}'", self.regex).into())
178 }
179 }
180}
181
182#[derive(Clone)]
183pub struct WhiteSpaceParser(RegexParser);
184
185impl Default for WhiteSpaceParser {
186 fn default() -> Self {
187 Self(RegexParser::new(r"\s+"))
188 }
189}
190
191impl Parser for WhiteSpaceParser {
192 fn parse<'a>(&self, registry: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
193 match self.0.parse(registry, input) {
194 Ok((rest, _)) => Ok((rest, ParserOutput::new(ParserNoValue).ok().unwrap())),
195 Err(error) => Err(error),
196 }
197 }
198}
199
200#[derive(Clone)]
201pub struct OptionalWhiteSpaceParser(RegexParser);
202
203impl Default for OptionalWhiteSpaceParser {
204 fn default() -> Self {
205 Self(RegexParser::new(r"\s*"))
206 }
207}
208
209impl Parser for OptionalWhiteSpaceParser {
210 fn parse<'a>(&self, registry: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
211 match self.0.parse(registry, input) {
212 Ok((rest, _)) => Ok((rest, ParserOutput::new(ParserNoValue).ok().unwrap())),
213 Err(error) => Err(error),
214 }
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use crate::{
221 ParserRegistry,
222 regex::{OptionalWhiteSpaceParser, RegexParser, WhiteSpaceParser},
223 shorthand::{ows, regex, regex_capture, string, ws},
224 };
225
226 fn is_async<T: Send + Sync>() {}
227
228 #[test]
229 fn test_regex() {
230 is_async::<RegexParser>();
231 is_async::<WhiteSpaceParser>();
232 is_async::<OptionalWhiteSpaceParser>();
233
234 let registry = ParserRegistry::default();
235
236 let keyword = regex_capture(r"\s+(?<name>\w+)\s+", "name");
237 let (rest, result) = keyword.parse(®istry, " foo ").unwrap();
238 assert_eq!(rest, "");
239 assert_eq!(result.read::<String>().unwrap().as_str(), "foo");
240
241 let keyword = string("`", "`");
242 let (rest, result) = keyword.parse(®istry, "`Hello World!`").unwrap();
243 assert_eq!(rest, "");
244 assert_eq!(result.read::<String>().unwrap().as_str(), "Hello World!");
245
246 let keyword = string("(", ")");
247 let (rest, result) = keyword.parse(®istry, "(Hello World!)").unwrap();
248 assert_eq!(rest, "");
249 assert_eq!(result.read::<String>().unwrap().as_str(), "Hello World!");
250
251 let keyword = regex(r"\w+");
252 assert_eq!(keyword.parse(®istry, "foo bar").unwrap().0, " bar");
253
254 let ws = ws();
255 assert_eq!(ws.parse(®istry, " \t \n").unwrap().0, "");
256 assert_eq!(
257 format!("{}", ws.parse(®istry, "a").err().unwrap()),
258 "Expected regex match '^\\s+'"
259 );
260
261 let ows = ows();
262 assert_eq!(ows.parse(®istry, " \t \n").unwrap().0, "");
263 assert_eq!(ows.parse(®istry, "foo").unwrap().0, "foo");
264 }
265}