yash_syntax/parser/lex/
op.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Part of the lexer that parses operators
18
19use super::core::Lexer;
20use super::core::Token;
21use super::core::TokenId;
22use crate::parser::core::Result;
23use crate::syntax::Literal;
24use crate::syntax::Unquoted;
25use crate::syntax::Word;
26use std::fmt;
27use std::pin::Pin;
28use thiserror::Error;
29
30/// Operator token identifier
31#[derive(Clone, Copy, Debug, Eq, PartialEq)]
32pub enum Operator {
33    /// Newline
34    Newline,
35    /// `&`
36    And,
37    /// `&&`
38    AndAnd,
39    /// `(`
40    OpenParen,
41    /// `)`
42    CloseParen,
43    /// `;`
44    Semicolon,
45    /// `;&`
46    SemicolonAnd,
47    /// `;;`
48    SemicolonSemicolon,
49    /// `;;&`
50    SemicolonSemicolonAnd,
51    /// `;|`
52    SemicolonBar,
53    /// `<`
54    Less,
55    /// `<&`
56    LessAnd,
57    /// `<(`
58    LessOpenParen,
59    /// `<<`
60    LessLess,
61    /// `<<-`
62    LessLessDash,
63    /// `<<<`
64    LessLessLess,
65    /// `<>`
66    LessGreater,
67    /// `>`
68    Greater,
69    /// `>&`
70    GreaterAnd,
71    /// `>(`
72    GreaterOpenParen,
73    /// `>>`
74    GreaterGreater,
75    /// `>>|`
76    GreaterGreaterBar,
77    /// `>|`
78    GreaterBar,
79    /// `|`
80    Bar,
81    /// `||`
82    BarBar,
83}
84
85impl Operator {
86    /// Returns the literal string representation of the operator.
87    #[must_use]
88    pub const fn as_str(&self) -> &'static str {
89        use Operator::*;
90        match self {
91            Newline => "\n",
92            And => "&",
93            AndAnd => "&&",
94            OpenParen => "(",
95            CloseParen => ")",
96            Semicolon => ";",
97            SemicolonAnd => ";&",
98            SemicolonSemicolon => ";;",
99            SemicolonSemicolonAnd => ";;&",
100            SemicolonBar => ";|",
101            Less => "<",
102            LessAnd => "<&",
103            LessOpenParen => "<(",
104            LessLess => "<<",
105            LessLessDash => "<<-",
106            LessLessLess => "<<<",
107            LessGreater => "<>",
108            Greater => ">",
109            GreaterAnd => ">&",
110            GreaterOpenParen => ">(",
111            GreaterGreater => ">>",
112            GreaterGreaterBar => ">>|",
113            GreaterBar => ">|",
114            Bar => "|",
115            BarBar => "||",
116        }
117    }
118
119    /// Determines if this token can be a delimiter of a clause.
120    ///
121    /// This function returns `true` for the following operators:
122    ///
123    /// - `CloseParen` (`)`)
124    /// - `SemicolonAnd` (`;&`)
125    /// - `SemicolonSemicolon` (`;;`)
126    /// - `SemicolonSemicolonAnd` (`;;&`)
127    /// - `SemicolonBar` (`;|`)
128    #[must_use]
129    pub const fn is_clause_delimiter(self) -> bool {
130        use Operator::*;
131        match self {
132            CloseParen
133            | SemicolonAnd
134            | SemicolonSemicolon
135            | SemicolonSemicolonAnd
136            | SemicolonBar => true,
137
138            Newline | And | AndAnd | OpenParen | Semicolon | Less | LessAnd | LessOpenParen
139            | LessLess | LessLessDash | LessLessLess | LessGreater | Greater | GreaterAnd
140            | GreaterOpenParen | GreaterGreater | GreaterGreaterBar | GreaterBar | Bar | BarBar => {
141                false
142            }
143        }
144    }
145}
146
147impl fmt::Display for Operator {
148    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149        f.write_str(self.as_str())
150    }
151}
152
153/// Trie data structure that defines a set of operator tokens.
154///
155/// This struct represents a node of the trie. A node is a sorted array of [`Edge`]s.
156#[derive(Copy, Clone, Debug)]
157pub struct Trie(&'static [Edge]);
158
159/// Edge of a [`Trie`]
160#[derive(Copy, Clone, Debug)]
161pub struct Edge {
162    /// Character value of this edge
163    pub key: char,
164    /// Final operator token that is delimited after taking this edge if there are no longer
165    /// matches
166    pub value: Option<Operator>,
167    /// Sub-trie containing values for keys that have the common prefix
168    pub next: Trie,
169}
170
171impl Trie {
172    /// Tests if this trie is empty.
173    pub fn is_empty(&self) -> bool {
174        self.0.is_empty()
175    }
176
177    /// Finds an edge for the given key.
178    pub fn edge(&self, key: char) -> Option<&Edge> {
179        self.0
180            .binary_search_by_key(&key, |edge| edge.key)
181            .ok()
182            .map(|i| &self.0[i])
183    }
184}
185
186/// Trie containing all the operators
187pub const OPERATORS: Trie = Trie(&[
188    Edge {
189        key: '\n',
190        value: Some(Operator::Newline),
191        next: NONE,
192    },
193    Edge {
194        key: '&',
195        value: Some(Operator::And),
196        next: AND,
197    },
198    Edge {
199        key: '(',
200        value: Some(Operator::OpenParen),
201        next: NONE,
202    },
203    Edge {
204        key: ')',
205        value: Some(Operator::CloseParen),
206        next: NONE,
207    },
208    Edge {
209        key: ';',
210        value: Some(Operator::Semicolon),
211        next: SEMICOLON,
212    },
213    Edge {
214        key: '<',
215        value: Some(Operator::Less),
216        next: LESS,
217    },
218    Edge {
219        key: '>',
220        value: Some(Operator::Greater),
221        next: GREATER,
222    },
223    Edge {
224        key: '|',
225        value: Some(Operator::Bar),
226        next: BAR,
227    },
228]);
229
230/// Trie of the operators that start with `&`
231const AND: Trie = Trie(&[Edge {
232    key: '&',
233    value: Some(Operator::AndAnd),
234    next: NONE,
235}]);
236
237/// Trie of the operators that start with `;`
238const SEMICOLON: Trie = Trie(&[
239    Edge {
240        key: '&',
241        value: Some(Operator::SemicolonAnd),
242        next: NONE,
243    },
244    Edge {
245        key: ';',
246        value: Some(Operator::SemicolonSemicolon),
247        next: SEMICOLON_SEMICOLON,
248    },
249    Edge {
250        key: '|',
251        value: Some(Operator::SemicolonBar),
252        next: NONE,
253    },
254]);
255
256/// Trie of the operators that start with `;;`
257const SEMICOLON_SEMICOLON: Trie = Trie(&[Edge {
258    key: '&',
259    value: Some(Operator::SemicolonSemicolonAnd),
260    next: NONE,
261}]);
262
263/// Trie of the operators that start with `<`
264const LESS: Trie = Trie(&[
265    Edge {
266        key: '&',
267        value: Some(Operator::LessAnd),
268        next: NONE,
269    },
270    Edge {
271        key: '(',
272        value: Some(Operator::LessOpenParen),
273        next: NONE,
274    },
275    Edge {
276        key: '<',
277        value: Some(Operator::LessLess),
278        next: LESS_LESS,
279    },
280    Edge {
281        key: '>',
282        value: Some(Operator::LessGreater),
283        next: NONE,
284    },
285]);
286
287/// Trie of the operators that start with `<<`
288const LESS_LESS: Trie = Trie(&[
289    Edge {
290        key: '-',
291        value: Some(Operator::LessLessDash),
292        next: NONE,
293    },
294    Edge {
295        key: '<',
296        value: Some(Operator::LessLessLess),
297        next: NONE,
298    },
299]);
300
301/// Trie of the operators that start with `>`
302const GREATER: Trie = Trie(&[
303    Edge {
304        key: '&',
305        value: Some(Operator::GreaterAnd),
306        next: NONE,
307    },
308    Edge {
309        key: '(',
310        value: Some(Operator::GreaterOpenParen),
311        next: NONE,
312    },
313    Edge {
314        key: '>',
315        value: Some(Operator::GreaterGreater),
316        next: GREATER_GREATER,
317    },
318    Edge {
319        key: '|',
320        value: Some(Operator::GreaterBar),
321        next: NONE,
322    },
323]);
324
325/// Trie of the operators that start with `>>`
326const GREATER_GREATER: Trie = Trie(&[Edge {
327    key: '|',
328    value: Some(Operator::GreaterGreaterBar),
329    next: NONE,
330}]);
331
332/// Trie of the operators that start with `|`
333const BAR: Trie = Trie(&[Edge {
334    key: '|',
335    value: Some(Operator::BarBar),
336    next: NONE,
337}]);
338
339/// Trie containing nothing
340const NONE: Trie = Trie(&[]);
341
342/// Tests whether the given character is the first character of an operator.
343pub fn is_operator_char(c: char) -> bool {
344    OPERATORS.edge(c).is_some()
345}
346
347/// Return type for [`Lexer::operator_tail`]
348struct OperatorTail {
349    pub operator: Operator,
350    pub reversed_key: Vec<char>,
351}
352
353impl Lexer<'_> {
354    /// Parses an operator that matches a key in the given trie, if any.
355    fn operator_tail(
356        &mut self,
357        trie: Trie,
358    ) -> Pin<Box<dyn Future<Output = Result<Option<OperatorTail>>> + '_>> {
359        Box::pin(async move {
360            if trie.is_empty() {
361                return Ok(None);
362            }
363
364            let c = match self.peek_char().await? {
365                None => return Ok(None),
366                Some(c) => c,
367            };
368            let edge = match trie.edge(c) {
369                None => return Ok(None),
370                Some(edge) => edge,
371            };
372
373            let old_index = self.index();
374            self.consume_char();
375
376            if let Some(mut operator_tail) = self.operator_tail(edge.next).await? {
377                operator_tail.reversed_key.push(c);
378                return Ok(Some(operator_tail));
379            }
380
381            match edge.value {
382                None => {
383                    self.rewind(old_index);
384                    Ok(None)
385                }
386                Some(operator) => Ok(Some(OperatorTail {
387                    operator,
388                    reversed_key: vec![c],
389                })),
390            }
391        })
392    }
393
394    /// Parses an operator token.
395    pub async fn operator(&mut self) -> Result<Option<Token>> {
396        let index = self.index();
397        self.operator_tail(OPERATORS).await.map(|o| {
398            o.map(|ot| {
399                let OperatorTail {
400                    operator,
401                    reversed_key,
402                } = ot;
403                let units = reversed_key
404                    .into_iter()
405                    .rev()
406                    .map(|c| Unquoted(Literal(c)))
407                    .collect::<Vec<_>>();
408                let location = self.location_range(index..self.index());
409                let word = Word { units, location };
410                let id = TokenId::Operator(operator);
411                Token { word, id, index }
412            })
413        })
414    }
415}
416
417/// Error value indicating that the input string is not a valid operator
418///
419/// This error is returned by [`FromStr`](std::str::FromStr) when the input
420/// string is not a valid operator.
421#[derive(Clone, Debug, Eq, Error, PartialEq)]
422pub struct ParseOperatorError;
423
424impl std::fmt::Display for ParseOperatorError {
425    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
426        f.write_str("not a valid operator")
427    }
428}
429
430/// Error value indicating an operand conversion failure
431#[derive(Clone, Debug, Eq, Error, PartialEq)]
432pub struct TryFromOperatorError;
433
434impl fmt::Display for TryFromOperatorError {
435    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
436        f.write_str("inconvertible operator")
437    }
438}
439
440#[cfg(test)]
441mod tests {
442    use super::*;
443    use crate::input::Context;
444    use crate::input::Input;
445    use crate::source::Source;
446    use crate::syntax::TextUnit;
447    use crate::syntax::WordUnit;
448    use futures_util::FutureExt;
449
450    fn ensure_sorted(trie: &Trie) {
451        assert!(
452            trie.0.windows(2).all(|pair| pair[0].key < pair[1].key),
453            "trie should be sorted: {trie:?}"
454        );
455
456        for edge in trie.0 {
457            ensure_sorted(&edge.next);
458        }
459    }
460
461    #[test]
462    fn tries_are_sorted() {
463        ensure_sorted(&OPERATORS);
464    }
465
466    #[test]
467    fn lexer_operator_longest_match() {
468        let mut lexer = Lexer::with_code("<<-");
469
470        let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
471        assert_eq!(t.word.units.len(), 3);
472        assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
473        assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
474        assert_eq!(t.word.units[2], WordUnit::Unquoted(TextUnit::Literal('-')));
475        assert_eq!(*t.word.location.code.value.borrow(), "<<-");
476        assert_eq!(t.word.location.code.start_line_number.get(), 1);
477        assert_eq!(*t.word.location.code.source, Source::Unknown);
478        assert_eq!(t.word.location.range, 0..3);
479        assert_eq!(t.id, TokenId::Operator(Operator::LessLessDash));
480
481        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
482    }
483
484    #[test]
485    fn lexer_operator_delimited_by_another_operator() {
486        let mut lexer = Lexer::with_code("<<>");
487
488        let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
489        assert_eq!(t.word.units.len(), 2);
490        assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
491        assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
492        assert_eq!(*t.word.location.code.value.borrow(), "<<>");
493        assert_eq!(t.word.location.code.start_line_number.get(), 1);
494        assert_eq!(*t.word.location.code.source, Source::Unknown);
495        assert_eq!(t.word.location.range, 0..2);
496        assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
497
498        assert_eq!(
499            lexer.location().now_or_never().unwrap().unwrap().range,
500            2..3
501        );
502    }
503
504    #[test]
505    fn lexer_operator_delimited_by_eof() {
506        let mut lexer = Lexer::with_code("<<");
507
508        let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
509        assert_eq!(t.word.units.len(), 2);
510        assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
511        assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
512        assert_eq!(*t.word.location.code.value.borrow(), "<<");
513        assert_eq!(t.word.location.code.start_line_number.get(), 1);
514        assert_eq!(*t.word.location.code.source, Source::Unknown);
515        assert_eq!(t.word.location.range, 0..2);
516        assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
517
518        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
519    }
520
521    #[test]
522    fn lexer_operator_containing_line_continuations() {
523        let mut lexer = Lexer::with_code("\\\n\\\n<\\\n<\\\n>");
524
525        let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
526        assert_eq!(t.word.units.len(), 2);
527        assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
528        assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
529        assert_eq!(*t.word.location.code.value.borrow(), "\\\n\\\n<\\\n<\\\n>");
530        assert_eq!(t.word.location.code.start_line_number.get(), 1);
531        assert_eq!(*t.word.location.code.source, Source::Unknown);
532        assert_eq!(t.word.location.range, 0..10);
533        assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
534
535        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('>')));
536    }
537
538    #[test]
539    fn lexer_operator_none() {
540        let mut lexer = Lexer::with_code("\\\n ");
541
542        let r = lexer.operator().now_or_never().unwrap().unwrap();
543        assert!(r.is_none(), "unexpected success: {r:?}");
544    }
545
546    #[test]
547    fn lexer_operator_should_not_peek_beyond_newline() {
548        struct OneLineInput(Option<String>);
549        impl Input for OneLineInput {
550            async fn next_line(&mut self, _: &Context) -> crate::input::Result {
551                if let Some(line) = self.0.take() {
552                    Ok(line)
553                } else {
554                    panic!("The second line should not be read")
555                }
556            }
557        }
558
559        let mut lexer = Lexer::new(Box::new(OneLineInput(Some("\n".to_owned()))));
560
561        let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
562        assert_eq!(t.word.units, [WordUnit::Unquoted(TextUnit::Literal('\n'))]);
563        assert_eq!(*t.word.location.code.value.borrow(), "\n");
564        assert_eq!(t.word.location.code.start_line_number.get(), 1);
565        assert_eq!(*t.word.location.code.source, Source::Unknown);
566        assert_eq!(t.word.location.range, 0..1);
567        assert_eq!(t.id, TokenId::Operator(Operator::Newline));
568    }
569}