noa_parser/bytes/components/groups.rs
1//! Group components
2
3use crate::bytes::token::Token;
4use crate::errors::ParseResult;
5use crate::peek::{PeekResult, Peekable};
6use crate::recognizer::Recognizable;
7use crate::scanner::Scanner;
8
9/// Try to recognize either a start group or an end group token.
10///
11/// If the start group token is recognized, increment the balancing counter.
12/// If the end group token is recognized, decrement the balancing counter.
13/// If neither is recognized, move the tokenizer by one byte.
14///
15/// # Arguments
16///
17/// * `tokenizer` - The tokenizer to use
18/// * `balance` - A mutable reference to the balancing counter
19/// * `start` - The start group token to recognize
20/// * `end` - The end group token to recognize
21///
22/// # Errors
23///
24/// Returns `Err(ParseError)` if the tokenizer encounters an error.
25///
26/// # Examples
27///
28///
29pub fn match_for_balanced_group<'a, V1, T1, V2, T2>(
30 tokenizer: &mut Scanner<'a, u8>,
31 balance: &mut usize,
32 start: T1,
33 end: T2,
34) -> ParseResult<()>
35where
36 T1: Recognizable<'a, u8, V1> + Copy,
37 T2: Recognizable<'a, u8, V2> + Copy,
38{
39 // try to recognize start group token
40 match start.recognize(tokenizer)? {
41 // if not start token try to recognize end group token
42 None => match end.recognize(tokenizer)? {
43 // if end group token decrement balancing counter
44 Some(_end_token) => *balance -= 1,
45 // if neither, move by one byte
46 None => {
47 tokenizer.bump_by(1);
48 return Ok(());
49 }
50 },
51 // if start group token increment balancing counter
52 Some(_start_token) => *balance += 1,
53 };
54
55 Ok(())
56}
57
58/// A closure that takes a slice of bytes and returns a `PeekResult` indicating
59/// whether the slice matches a balanced group.
60///
61/// A balanced group is a sequence of bytes that has the same number of start
62/// and end group tokens. The start group token is recognized by the `start`
63/// parameter and the end group token is recognized by the `end` parameter.
64///
65/// The closure returns `Ok(PeekResult::Found { end_slice, start, end })` if the
66/// slice matches a balanced group, `Ok(PeekResult::NotFound)` if the slice
67/// does not match a balanced group, and `Err(ParseError)` if there is an error
68/// recognizing the tokens.
69///
70/// # Arguments
71///
72/// * `start` - The start group token to recognize
73/// * `end` - The end group token to recognize
74///
75/// # Returns
76///
77/// A closure that takes a slice of bytes and returns a `PeekResult` indicating
78/// whether the slice matches a balanced group.
79pub fn match_group<'a, V1, T1, V2, T2>(
80 start: T1,
81 end: T2,
82) -> impl Fn(&'a [u8]) -> ParseResult<PeekResult<T1, T2>> + 'a
83where
84 T1: Recognizable<'a, u8, V1> + Copy + 'a,
85 T2: Recognizable<'a, u8, V2> + Copy + 'a,
86{
87 move |input: &'a [u8]| {
88 // 0 if number of start token equals number of end token
89 // i.e: ( 5 + 3 - ( 10 * 8 ) ) => 2 "(" and 2 ")" => balanced
90 // ( 5 + 3 - ( 10 * 8 ) => 2 "(" and 1 ")" => unbalanced
91 let mut balance = 0;
92
93 let mut tokenizer = Scanner::new(input);
94
95 loop {
96 match_for_balanced_group(&mut tokenizer, &mut balance, start, end)?;
97 // if balancing is 0 then either there is no group at all or is balanced
98 if balance == 0 {
99 break;
100 }
101 }
102
103 // not enough bytes to create a group
104 if tokenizer.current_position() == 1 {
105 return Ok(PeekResult::NotFound);
106 }
107
108 Ok(PeekResult::Found {
109 end_slice: tokenizer.current_position(),
110 start,
111 end,
112 })
113 }
114}
115
116/// A closure that takes a slice of bytes and returns a `PeekResult` indicating
117/// whether the slice matches a delimited group.
118///
119/// A delimited group is a sequence of bytes that starts and ends with the same
120/// token and has no other occurrence of that token in between. The token is
121/// recognized by the `token` parameter and the escape token is recognized by the
122/// `escape_token` parameter.
123///
124/// The closure returns `Ok(PeekResult::Found { end_slice, start, end })` if the
125/// slice matches a delimited group, `Ok(PeekResult::NotFound)` if the slice
126/// does not match a delimited group, and `Err(ParseError)` if there is an error
127/// recognizing the tokens.
128///
129/// # Arguments
130///
131/// * `token` - The token to recognize at the start and end of the group
132/// * `escape_token` - The escape token to recognize and ignore in the group
133///
134/// # Returns
135///
136/// A closure that takes a slice of bytes and returns a `PeekResult` indicating
137/// whether the slice matches a delimited group.
138pub fn match_for_delimited_group<'a, V, T, V2, T2>(
139 token: T,
140 escape_token: T2,
141) -> impl Fn(&'a [u8]) -> ParseResult<PeekResult<T, T>> + 'a
142where
143 T: Recognizable<'a, u8, V> + Copy + 'a,
144 T2: Recognizable<'a, u8, V2> + Copy + 'a,
145{
146 move |input: &'a [u8]| {
147 // le groupe doit au moins faire 2 tokens de taille
148 if input.len() < token.size() * 2 {
149 return Ok(PeekResult::NotFound);
150 }
151
152 // on créé un scanner à partir des données
153 let mut tokenizer = Scanner::new(input);
154
155 // le groupe doit obligatoirement débuter par le token
156 if token.recognize(&mut tokenizer)?.is_none() {
157 return Ok(PeekResult::NotFound);
158 }
159 // on avance de la taille du token reconnu
160 tokenizer.bump_by(token.size());
161
162 // ce flag permet de savoir si la prédiction a été un succès
163 let mut found = false;
164
165 // tant que la slice contient des bytes, on essaie de reconnaître le token
166 while !tokenizer.remaining().is_empty() {
167 // si le token est reconnu quelque part dans la slice
168 if token.recognize(&mut tokenizer)?.is_some() {
169 // on créé un nouveau scanner qui est un token et un \ en arrière
170 let mut rewind_tokenizer = Scanner::new(
171 &tokenizer.data()
172 [tokenizer.current_position() - token.size() - escape_token.size()..],
173 );
174 // on tente de reconnaître le \
175 if escape_token.recognize(&mut rewind_tokenizer)?.is_some() {
176 // s'il est présent, le token est échappé
177 continue;
178 }
179 // sinon on a atteint la fin du groupe et la prédiction est un succès
180 found = true;
181 break;
182 }
183 // sinon on avance d'un byte
184 tokenizer.bump_by(1);
185 }
186
187 // Si la prédiction est un échec
188 if !found {
189 return Ok(PeekResult::NotFound);
190 }
191
192 Ok(PeekResult::Found {
193 end_slice: tokenizer.current_position(),
194 start: token,
195 end: token,
196 })
197 }
198}
199
200/// Types of groups
201///
202/// This enum is used to specify the type of a group in a matcher.
203pub enum GroupKind {
204 /// A group enclosed in parentheses
205 Parenthesis,
206 /// A group enclosed in single quotes
207 Quotes,
208 /// A group enclosed in double quotes
209 DoubleQuotes,
210}
211
212impl GroupKind {
213 fn matcher<'a>(&self) -> Box<dyn Fn(&'a [u8]) -> ParseResult<PeekResult<Token, Token>> + 'a>
214where {
215 match self {
216 GroupKind::Parenthesis => Box::new(match_group(Token::OpenParen, Token::CloseParen)),
217 GroupKind::Quotes => {
218 Box::new(match_for_delimited_group(Token::Quote, Token::Backslash))
219 }
220 GroupKind::DoubleQuotes => Box::new(match_for_delimited_group(
221 Token::DoubleQuote,
222 Token::Backslash,
223 )),
224 }
225 }
226}
227
228impl<'a> Peekable<'a, u8, Token, Token> for GroupKind {
229 fn peek(&self, data: &Scanner<'a, u8>) -> ParseResult<PeekResult<Token, Token>> {
230 self.matcher()(data.remaining())
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use crate::bytes::components::groups::{GroupKind, match_for_delimited_group, match_group};
237 use crate::bytes::token::Token;
238 use crate::peek::{PeekResult, Peeking, peek};
239 use crate::scanner::Scanner;
240
241 #[test]
242 fn test_match_group() {
243 let data = b"( 5 + 3 - ( 10 * 8 ) ) + 54";
244 let result =
245 match_group(Token::OpenParen, Token::CloseParen)(data).expect("failed to parse");
246 assert_eq!(
247 result,
248 PeekResult::Found {
249 end_slice: 22,
250 start: Token::OpenParen,
251 end: Token::CloseParen
252 }
253 );
254 assert_eq!(&data[..22], b"( 5 + 3 - ( 10 * 8 ) )");
255 }
256
257 #[test]
258 fn test_match_group_delimited() {
259 let data = b"( 5 + 3 - ( 10 * 8 ) ) + 54";
260 let mut tokenizer = Scanner::new(data);
261 let result = peek(GroupKind::Parenthesis, &mut tokenizer).expect("failed to parse");
262 assert_eq!(
263 result,
264 Some(Peeking {
265 start: Token::OpenParen,
266 end: Token::CloseParen,
267 data: &data[0..22],
268 end_slice: 22
269 })
270 );
271 assert_eq!(&data[..22], b"( 5 + 3 - ( 10 * 8 ) )");
272 }
273
274 #[test]
275 fn test_match_quotes() {
276 let data = b"'hello world' data";
277 let result = match_for_delimited_group(Token::Quote, Token::Backslash)(data)
278 .expect("failed to parse");
279 assert_eq!(
280 result,
281 PeekResult::Found {
282 end_slice: 13,
283 start: Token::Quote,
284 end: Token::Quote
285 }
286 );
287 assert_eq!(&data[..13], b"'hello world'");
288
289 let data = r#"'hello world l\'éléphant' data"#;
290 let result = match_for_delimited_group(Token::Quote, Token::Backslash)(data.as_bytes())
291 .expect("failed to parse");
292 assert_eq!(
293 result,
294 PeekResult::Found {
295 end_slice: 27,
296 start: Token::Quote,
297 end: Token::Quote
298 }
299 );
300 assert_eq!(&data[..27], r#"'hello world l\'éléphant'"#);
301
302 let data = "\"hello world\" data";
303 let result =
304 match_for_delimited_group(Token::DoubleQuote, Token::Backslash)(data.as_bytes())
305 .expect("failed to parse");
306 assert_eq!(
307 result,
308 PeekResult::Found {
309 end_slice: 13,
310 start: Token::DoubleQuote,
311 end: Token::DoubleQuote
312 }
313 );
314 assert_eq!(&data[..13], "\"hello world\"");
315
316 let data = r#""hello world" data"#;
317 let result =
318 match_for_delimited_group(Token::DoubleQuote, Token::Backslash)(data.as_bytes())
319 .expect("failed to parse");
320 assert_eq!(
321 result,
322 PeekResult::Found {
323 end_slice: 13,
324 start: Token::DoubleQuote,
325 end: Token::DoubleQuote
326 }
327 );
328 assert_eq!(&data[..13], r#""hello world""#);
329 }
330}