1use super::core::Lexer;
20use super::core::Token;
21use super::core::TokenId;
22use crate::parser::core::Result;
23use crate::syntax::Literal;
24use crate::syntax::Unquoted;
25use crate::syntax::Word;
26use std::fmt;
27use std::pin::Pin;
28use thiserror::Error;
29
30#[derive(Clone, Copy, Debug, Eq, PartialEq)]
32pub enum Operator {
33 Newline,
35 And,
37 AndAnd,
39 OpenParen,
41 CloseParen,
43 Semicolon,
45 SemicolonAnd,
47 SemicolonSemicolon,
49 SemicolonSemicolonAnd,
51 SemicolonBar,
53 Less,
55 LessAnd,
57 LessOpenParen,
59 LessLess,
61 LessLessDash,
63 LessLessLess,
65 LessGreater,
67 Greater,
69 GreaterAnd,
71 GreaterOpenParen,
73 GreaterGreater,
75 GreaterGreaterBar,
77 GreaterBar,
79 Bar,
81 BarBar,
83}
84
85impl Operator {
86 #[must_use]
88 pub const fn as_str(&self) -> &'static str {
89 use Operator::*;
90 match self {
91 Newline => "\n",
92 And => "&",
93 AndAnd => "&&",
94 OpenParen => "(",
95 CloseParen => ")",
96 Semicolon => ";",
97 SemicolonAnd => ";&",
98 SemicolonSemicolon => ";;",
99 SemicolonSemicolonAnd => ";;&",
100 SemicolonBar => ";|",
101 Less => "<",
102 LessAnd => "<&",
103 LessOpenParen => "<(",
104 LessLess => "<<",
105 LessLessDash => "<<-",
106 LessLessLess => "<<<",
107 LessGreater => "<>",
108 Greater => ">",
109 GreaterAnd => ">&",
110 GreaterOpenParen => ">(",
111 GreaterGreater => ">>",
112 GreaterGreaterBar => ">>|",
113 GreaterBar => ">|",
114 Bar => "|",
115 BarBar => "||",
116 }
117 }
118
119 #[must_use]
129 pub const fn is_clause_delimiter(self) -> bool {
130 use Operator::*;
131 match self {
132 CloseParen
133 | SemicolonAnd
134 | SemicolonSemicolon
135 | SemicolonSemicolonAnd
136 | SemicolonBar => true,
137
138 Newline | And | AndAnd | OpenParen | Semicolon | Less | LessAnd | LessOpenParen
139 | LessLess | LessLessDash | LessLessLess | LessGreater | Greater | GreaterAnd
140 | GreaterOpenParen | GreaterGreater | GreaterGreaterBar | GreaterBar | Bar | BarBar => {
141 false
142 }
143 }
144 }
145}
146
147impl fmt::Display for Operator {
148 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149 f.write_str(self.as_str())
150 }
151}
152
153#[derive(Copy, Clone, Debug)]
157pub struct Trie(&'static [Edge]);
158
159#[derive(Copy, Clone, Debug)]
161pub struct Edge {
162 pub key: char,
164 pub value: Option<Operator>,
167 pub next: Trie,
169}
170
171impl Trie {
172 pub fn is_empty(&self) -> bool {
174 self.0.is_empty()
175 }
176
177 pub fn edge(&self, key: char) -> Option<&Edge> {
179 self.0
180 .binary_search_by_key(&key, |edge| edge.key)
181 .ok()
182 .map(|i| &self.0[i])
183 }
184}
185
186pub const OPERATORS: Trie = Trie(&[
188 Edge {
189 key: '\n',
190 value: Some(Operator::Newline),
191 next: NONE,
192 },
193 Edge {
194 key: '&',
195 value: Some(Operator::And),
196 next: AND,
197 },
198 Edge {
199 key: '(',
200 value: Some(Operator::OpenParen),
201 next: NONE,
202 },
203 Edge {
204 key: ')',
205 value: Some(Operator::CloseParen),
206 next: NONE,
207 },
208 Edge {
209 key: ';',
210 value: Some(Operator::Semicolon),
211 next: SEMICOLON,
212 },
213 Edge {
214 key: '<',
215 value: Some(Operator::Less),
216 next: LESS,
217 },
218 Edge {
219 key: '>',
220 value: Some(Operator::Greater),
221 next: GREATER,
222 },
223 Edge {
224 key: '|',
225 value: Some(Operator::Bar),
226 next: BAR,
227 },
228]);
229
230const AND: Trie = Trie(&[Edge {
232 key: '&',
233 value: Some(Operator::AndAnd),
234 next: NONE,
235}]);
236
237const SEMICOLON: Trie = Trie(&[
239 Edge {
240 key: '&',
241 value: Some(Operator::SemicolonAnd),
242 next: NONE,
243 },
244 Edge {
245 key: ';',
246 value: Some(Operator::SemicolonSemicolon),
247 next: SEMICOLON_SEMICOLON,
248 },
249 Edge {
250 key: '|',
251 value: Some(Operator::SemicolonBar),
252 next: NONE,
253 },
254]);
255
256const SEMICOLON_SEMICOLON: Trie = Trie(&[Edge {
258 key: '&',
259 value: Some(Operator::SemicolonSemicolonAnd),
260 next: NONE,
261}]);
262
263const LESS: Trie = Trie(&[
265 Edge {
266 key: '&',
267 value: Some(Operator::LessAnd),
268 next: NONE,
269 },
270 Edge {
271 key: '(',
272 value: Some(Operator::LessOpenParen),
273 next: NONE,
274 },
275 Edge {
276 key: '<',
277 value: Some(Operator::LessLess),
278 next: LESS_LESS,
279 },
280 Edge {
281 key: '>',
282 value: Some(Operator::LessGreater),
283 next: NONE,
284 },
285]);
286
287const LESS_LESS: Trie = Trie(&[
289 Edge {
290 key: '-',
291 value: Some(Operator::LessLessDash),
292 next: NONE,
293 },
294 Edge {
295 key: '<',
296 value: Some(Operator::LessLessLess),
297 next: NONE,
298 },
299]);
300
301const GREATER: Trie = Trie(&[
303 Edge {
304 key: '&',
305 value: Some(Operator::GreaterAnd),
306 next: NONE,
307 },
308 Edge {
309 key: '(',
310 value: Some(Operator::GreaterOpenParen),
311 next: NONE,
312 },
313 Edge {
314 key: '>',
315 value: Some(Operator::GreaterGreater),
316 next: GREATER_GREATER,
317 },
318 Edge {
319 key: '|',
320 value: Some(Operator::GreaterBar),
321 next: NONE,
322 },
323]);
324
325const GREATER_GREATER: Trie = Trie(&[Edge {
327 key: '|',
328 value: Some(Operator::GreaterGreaterBar),
329 next: NONE,
330}]);
331
332const BAR: Trie = Trie(&[Edge {
334 key: '|',
335 value: Some(Operator::BarBar),
336 next: NONE,
337}]);
338
339const NONE: Trie = Trie(&[]);
341
342pub fn is_operator_char(c: char) -> bool {
344 OPERATORS.edge(c).is_some()
345}
346
347struct OperatorTail {
349 pub operator: Operator,
350 pub reversed_key: Vec<char>,
351}
352
353impl Lexer<'_> {
354 fn operator_tail(
356 &mut self,
357 trie: Trie,
358 ) -> Pin<Box<dyn Future<Output = Result<Option<OperatorTail>>> + '_>> {
359 Box::pin(async move {
360 if trie.is_empty() {
361 return Ok(None);
362 }
363
364 let c = match self.peek_char().await? {
365 None => return Ok(None),
366 Some(c) => c,
367 };
368 let edge = match trie.edge(c) {
369 None => return Ok(None),
370 Some(edge) => edge,
371 };
372
373 let old_index = self.index();
374 self.consume_char();
375
376 if let Some(mut operator_tail) = self.operator_tail(edge.next).await? {
377 operator_tail.reversed_key.push(c);
378 return Ok(Some(operator_tail));
379 }
380
381 match edge.value {
382 None => {
383 self.rewind(old_index);
384 Ok(None)
385 }
386 Some(operator) => Ok(Some(OperatorTail {
387 operator,
388 reversed_key: vec![c],
389 })),
390 }
391 })
392 }
393
394 pub async fn operator(&mut self) -> Result<Option<Token>> {
396 let index = self.index();
397 self.operator_tail(OPERATORS).await.map(|o| {
398 o.map(|ot| {
399 let OperatorTail {
400 operator,
401 reversed_key,
402 } = ot;
403 let units = reversed_key
404 .into_iter()
405 .rev()
406 .map(|c| Unquoted(Literal(c)))
407 .collect::<Vec<_>>();
408 let location = self.location_range(index..self.index());
409 let word = Word { units, location };
410 let id = TokenId::Operator(operator);
411 Token { word, id, index }
412 })
413 })
414 }
415}
416
417#[derive(Clone, Debug, Eq, Error, PartialEq)]
422pub struct ParseOperatorError;
423
424impl std::fmt::Display for ParseOperatorError {
425 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
426 f.write_str("not a valid operator")
427 }
428}
429
430#[derive(Clone, Debug, Eq, Error, PartialEq)]
432pub struct TryFromOperatorError;
433
434impl fmt::Display for TryFromOperatorError {
435 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
436 f.write_str("inconvertible operator")
437 }
438}
439
440#[cfg(test)]
441mod tests {
442 use super::*;
443 use crate::input::Context;
444 use crate::input::Input;
445 use crate::source::Source;
446 use crate::syntax::TextUnit;
447 use crate::syntax::WordUnit;
448 use futures_util::FutureExt;
449
450 fn ensure_sorted(trie: &Trie) {
451 assert!(
452 trie.0.windows(2).all(|pair| pair[0].key < pair[1].key),
453 "trie should be sorted: {trie:?}"
454 );
455
456 for edge in trie.0 {
457 ensure_sorted(&edge.next);
458 }
459 }
460
461 #[test]
462 fn tries_are_sorted() {
463 ensure_sorted(&OPERATORS);
464 }
465
466 #[test]
467 fn lexer_operator_longest_match() {
468 let mut lexer = Lexer::with_code("<<-");
469
470 let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
471 assert_eq!(t.word.units.len(), 3);
472 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
473 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
474 assert_eq!(t.word.units[2], WordUnit::Unquoted(TextUnit::Literal('-')));
475 assert_eq!(*t.word.location.code.value.borrow(), "<<-");
476 assert_eq!(t.word.location.code.start_line_number.get(), 1);
477 assert_eq!(*t.word.location.code.source, Source::Unknown);
478 assert_eq!(t.word.location.range, 0..3);
479 assert_eq!(t.id, TokenId::Operator(Operator::LessLessDash));
480
481 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
482 }
483
484 #[test]
485 fn lexer_operator_delimited_by_another_operator() {
486 let mut lexer = Lexer::with_code("<<>");
487
488 let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
489 assert_eq!(t.word.units.len(), 2);
490 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
491 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
492 assert_eq!(*t.word.location.code.value.borrow(), "<<>");
493 assert_eq!(t.word.location.code.start_line_number.get(), 1);
494 assert_eq!(*t.word.location.code.source, Source::Unknown);
495 assert_eq!(t.word.location.range, 0..2);
496 assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
497
498 assert_eq!(
499 lexer.location().now_or_never().unwrap().unwrap().range,
500 2..3
501 );
502 }
503
504 #[test]
505 fn lexer_operator_delimited_by_eof() {
506 let mut lexer = Lexer::with_code("<<");
507
508 let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
509 assert_eq!(t.word.units.len(), 2);
510 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
511 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
512 assert_eq!(*t.word.location.code.value.borrow(), "<<");
513 assert_eq!(t.word.location.code.start_line_number.get(), 1);
514 assert_eq!(*t.word.location.code.source, Source::Unknown);
515 assert_eq!(t.word.location.range, 0..2);
516 assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
517
518 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
519 }
520
521 #[test]
522 fn lexer_operator_containing_line_continuations() {
523 let mut lexer = Lexer::with_code("\\\n\\\n<\\\n<\\\n>");
524
525 let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
526 assert_eq!(t.word.units.len(), 2);
527 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
528 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
529 assert_eq!(*t.word.location.code.value.borrow(), "\\\n\\\n<\\\n<\\\n>");
530 assert_eq!(t.word.location.code.start_line_number.get(), 1);
531 assert_eq!(*t.word.location.code.source, Source::Unknown);
532 assert_eq!(t.word.location.range, 0..10);
533 assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
534
535 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('>')));
536 }
537
538 #[test]
539 fn lexer_operator_none() {
540 let mut lexer = Lexer::with_code("\\\n ");
541
542 let r = lexer.operator().now_or_never().unwrap().unwrap();
543 assert!(r.is_none(), "unexpected success: {r:?}");
544 }
545
546 #[test]
547 fn lexer_operator_should_not_peek_beyond_newline() {
548 struct OneLineInput(Option<String>);
549 impl Input for OneLineInput {
550 async fn next_line(&mut self, _: &Context) -> crate::input::Result {
551 if let Some(line) = self.0.take() {
552 Ok(line)
553 } else {
554 panic!("The second line should not be read")
555 }
556 }
557 }
558
559 let mut lexer = Lexer::new(Box::new(OneLineInput(Some("\n".to_owned()))));
560
561 let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
562 assert_eq!(t.word.units, [WordUnit::Unquoted(TextUnit::Literal('\n'))]);
563 assert_eq!(*t.word.location.code.value.borrow(), "\n");
564 assert_eq!(t.word.location.code.start_line_number.get(), 1);
565 assert_eq!(*t.word.location.code.source, Source::Unknown);
566 assert_eq!(t.word.location.range, 0..1);
567 assert_eq!(t.id, TokenId::Operator(Operator::Newline));
568 }
569}