1use std::{cell::RefCell, collections::HashMap, sync::Arc};
2
3use crate::{
4 ParseResult, Parser, ParserExt, ParserHandle, ParserNoValue, ParserOutput, ParserRegistry,
5};
6
7pub mod shorthand {
8 use super::*;
9 use crate::shorthand::map;
10
11 pub fn regex(pattern: impl AsRef<str>) -> ParserHandle {
12 RegexParser::new(pattern).into_handle()
13 }
14
15 pub fn regex_capture(pattern: impl AsRef<str>, capture: impl ToString) -> ParserHandle {
16 RegexParser::new_capture(pattern, capture).into_handle()
17 }
18
19 pub fn any() -> ParserHandle {
20 regex(r".")
21 }
22
23 pub fn nl() -> ParserHandle {
24 regex(r"[\r\n]")
25 }
26
27 pub fn digit_hex() -> ParserHandle {
28 regex(r"[0-9a-fA-F]&")
29 }
30
31 pub fn digit() -> ParserHandle {
32 regex(r"\d")
33 }
34
35 pub fn number_int_pos() -> ParserHandle {
36 regex(r"\d+")
37 }
38
39 pub fn number_int() -> ParserHandle {
40 regex(r"-?\d+")
41 }
42
43 pub fn number_float() -> ParserHandle {
44 regex(r"-?\d+(\.\d+(e-?\d+)?)?")
45 }
46
47 pub fn alphanum() -> ParserHandle {
48 regex(r"\w")
49 }
50
51 pub fn alpha_low() -> ParserHandle {
52 regex(r"[a-z]")
53 }
54
55 pub fn alpha_up() -> ParserHandle {
56 regex(r"[A-Z]")
57 }
58
59 pub fn alpha() -> ParserHandle {
60 regex(r"[a-zA-Z]")
61 }
62
63 pub fn word() -> ParserHandle {
64 regex(r"\w+")
65 }
66
67 pub fn string(open: &str, close: &str) -> ParserHandle {
68 let open = open.escape_unicode().to_string();
69 let close = close.escape_unicode().to_string();
70 let pattern = format!("{open}(?<content>[^{close}]*){close}");
71 map(regex_capture(pattern, "content"), move |value: String| {
72 snailquote::unescape(&value).unwrap()
73 })
74 }
75
76 pub fn id_start() -> ParserHandle {
77 regex(r"[a-zA-Z_]")
78 }
79
80 pub fn id_continue() -> ParserHandle {
81 regex(r"[0-9a-zA-Z_]*")
82 }
83
84 pub fn id() -> ParserHandle {
85 regex(r"[a-zA-Z_][0-9a-zA-Z_]*")
86 }
87
88 pub fn ws() -> ParserHandle {
89 WhiteSpaceParser::default().into_handle()
90 }
91
92 pub fn ows() -> ParserHandle {
93 OptionalWhiteSpaceParser::default().into_handle()
94 }
95}
96
97thread_local! {
98 static REGEX_CACHE: RefCell<HashMap<String, Arc<regex::Regex>>> = Default::default();
99}
100
101#[derive(Clone)]
102pub struct RegexParser {
103 regex: Arc<regex::Regex>,
104 capture: Option<String>,
105}
106
107impl RegexParser {
108 pub fn new(pattern: impl AsRef<str>) -> Self {
109 let pattern = pattern.as_ref();
110 REGEX_CACHE.with_borrow_mut(|cache| {
111 if let Some(cached) = cache.get(pattern) {
112 return Self {
113 regex: cached.clone(),
114 capture: None,
115 };
116 }
117 let regex = Arc::new(
118 regex::Regex::new(&format!(r"^{}", pattern)).expect("Expected valid regex"),
119 );
120 cache.insert(pattern.to_string(), regex.clone());
121 Self {
122 regex,
123 capture: None,
124 }
125 })
126 }
127
128 pub fn new_capture(pattern: impl AsRef<str>, capture: impl ToString) -> Self {
129 let pattern = pattern.as_ref();
130 let capture = capture.to_string();
131 REGEX_CACHE.with_borrow_mut(|cache| {
132 if let Some(cached) = cache.get(pattern) {
133 return Self {
134 regex: cached.clone(),
135 capture: Some(capture),
136 };
137 }
138 let regex = Arc::new(
139 regex::Regex::new(&format!(r"^{}", pattern)).expect("Expected valid regex"),
140 );
141 cache.insert(pattern.to_string(), regex.clone());
142 Self {
143 regex,
144 capture: Some(capture),
145 }
146 })
147 }
148}
149
150impl Parser for RegexParser {
151 fn parse<'a>(&self, _: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
152 if let Some(capture) = self.capture.as_deref() {
153 if let Some(cap) = self.regex.captures(input) {
154 Ok((
155 &input[cap.get(0).unwrap().end()..],
156 ParserOutput::new(
157 cap.name(capture)
158 .map(|mat| mat.as_str())
159 .unwrap_or("")
160 .to_owned(),
161 )
162 .ok()
163 .unwrap(),
164 ))
165 } else {
166 Err(format!(
167 "Expected regex match '{}' with capture: '{}'",
168 self.regex, capture
169 )
170 .into())
171 }
172 } else if let Some(mat) = self.regex.find(input) {
173 Ok((
174 &input[mat.end()..],
175 ParserOutput::new(mat.as_str().to_owned()).ok().unwrap(),
176 ))
177 } else {
178 Err(format!("Expected regex match '{}'", self.regex).into())
179 }
180 }
181}
182
183#[derive(Clone)]
184pub struct WhiteSpaceParser(RegexParser);
185
186impl Default for WhiteSpaceParser {
187 fn default() -> Self {
188 Self(RegexParser::new(r"\s+"))
189 }
190}
191
192impl Parser for WhiteSpaceParser {
193 fn parse<'a>(&self, registry: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
194 match self.0.parse(registry, input) {
195 Ok((rest, _)) => Ok((rest, ParserOutput::new(ParserNoValue).ok().unwrap())),
196 Err(error) => Err(error),
197 }
198 }
199}
200
201#[derive(Clone)]
202pub struct OptionalWhiteSpaceParser(RegexParser);
203
204impl Default for OptionalWhiteSpaceParser {
205 fn default() -> Self {
206 Self(RegexParser::new(r"\s*"))
207 }
208}
209
210impl Parser for OptionalWhiteSpaceParser {
211 fn parse<'a>(&self, registry: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
212 match self.0.parse(registry, input) {
213 Ok((rest, _)) => Ok((rest, ParserOutput::new(ParserNoValue).ok().unwrap())),
214 Err(error) => Err(error),
215 }
216 }
217}
218
219#[cfg(test)]
220mod tests {
221 use crate::{
222 ParserRegistry,
223 regex::{OptionalWhiteSpaceParser, RegexParser, WhiteSpaceParser},
224 shorthand::{ows, regex, regex_capture, string, ws},
225 };
226
227 fn is_async<T: Send + Sync>() {}
228
229 #[test]
230 fn test_regex() {
231 is_async::<RegexParser>();
232 is_async::<WhiteSpaceParser>();
233 is_async::<OptionalWhiteSpaceParser>();
234
235 let registry = ParserRegistry::default();
236
237 let keyword = regex_capture(r"\s+(?<name>\w+)\s+", "name");
238 let (rest, result) = keyword.parse(®istry, " foo ").unwrap();
239 assert_eq!(rest, "");
240 assert_eq!(result.read::<String>().unwrap().as_str(), "foo");
241
242 let keyword = string("`", "`");
243 let (rest, result) = keyword.parse(®istry, "`Hello World!`").unwrap();
244 assert_eq!(rest, "");
245 assert_eq!(result.read::<String>().unwrap().as_str(), "Hello World!");
246
247 let keyword = string("(", ")");
248 let (rest, result) = keyword.parse(®istry, "(Hello World!)").unwrap();
249 assert_eq!(rest, "");
250 assert_eq!(result.read::<String>().unwrap().as_str(), "Hello World!");
251
252 let keyword = regex(r"\w+");
253 assert_eq!(keyword.parse(®istry, "foo bar").unwrap().0, " bar");
254
255 let ws = ws();
256 assert_eq!(ws.parse(®istry, " \t \n").unwrap().0, "");
257 assert_eq!(
258 format!("{}", ws.parse(®istry, "a").err().unwrap()),
259 "Expected regex match '^\\s+'"
260 );
261
262 let ows = ows();
263 assert_eq!(ows.parse(®istry, " \t \n").unwrap().0, "");
264 assert_eq!(ows.parse(®istry, "foo").unwrap().0, "foo");
265 }
266}