yash_syntax/parser/lex/
modifier.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2021 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Part of the lexer that parses suffix modifiers
18
19use super::core::Lexer;
20use super::core::WordContext;
21use super::core::WordLexer;
22use crate::parser::core::Result;
23use crate::parser::error::Error;
24use crate::parser::error::SyntaxError;
25use crate::syntax::Modifier;
26use crate::syntax::Switch;
27use crate::syntax::SwitchCondition;
28use crate::syntax::SwitchType;
29use crate::syntax::Trim;
30use crate::syntax::TrimLength;
31use crate::syntax::TrimSide;
32
33impl Lexer<'_> {
34    /// Returns an invalid modifier error.
35    ///
36    /// The `start_index` must be the index of the first character of the modifier.
37    fn invalid_modifier(&mut self, start_index: usize) -> Result<Modifier> {
38        let cause = SyntaxError::InvalidModifier.into();
39        let location = self.location_range(start_index..self.index());
40        Err(Error { cause, location })
41    }
42
43    fn suffix_modifier_not_found(&mut self, start_index: usize, colon: bool) -> Result<Modifier> {
44        if colon {
45            self.invalid_modifier(start_index)
46        } else {
47            Ok(Modifier::None)
48        }
49    }
50
51    /// Parses a [trim](Trim).
52    ///
53    /// This function blindly consumes the current character, which must be
54    /// `symbol`.
55    async fn trim(&mut self, start_index: usize, colon: bool, symbol: char) -> Result<Modifier> {
56        self.consume_char();
57        if colon {
58            return self.invalid_modifier(start_index);
59        }
60
61        let side = match symbol {
62            '#' => TrimSide::Prefix,
63            '%' => TrimSide::Suffix,
64            _ => unreachable!(),
65        };
66
67        let length = if self.skip_if(|c| c == symbol).await? {
68            TrimLength::Longest
69        } else {
70            TrimLength::Shortest
71        };
72
73        let mut lexer = WordLexer {
74            lexer: self,
75            context: WordContext::Word,
76        };
77        // Boxing needed for recursion
78        let mut pattern = Box::pin(lexer.word(|c| c == '}')).await?;
79        pattern.parse_tilde_front();
80
81        Ok(Modifier::Trim(Trim {
82            side,
83            length,
84            pattern,
85        }))
86    }
87}
88
89impl WordLexer<'_, '_> {
90    /// Parses a [switch](Switch), except the optional initial colon.
91    ///
92    /// This function blindly consumes the current character, which must be
93    /// `symbol`.
94    async fn switch(&mut self, colon: bool, symbol: char) -> Result<Modifier> {
95        self.consume_char();
96        let r#type = match symbol {
97            '+' => SwitchType::Alter,
98            '-' => SwitchType::Default,
99            '=' => SwitchType::Assign,
100            '?' => SwitchType::Error,
101            _ => unreachable!(),
102        };
103
104        let condition = if colon {
105            SwitchCondition::UnsetOrEmpty
106        } else {
107            SwitchCondition::Unset
108        };
109
110        // Boxing needed for recursion
111        let mut word = Box::pin(self.word(|c| c == '}')).await?;
112        match self.context {
113            WordContext::Text => (),
114            WordContext::Word => word.parse_tilde_front(),
115        }
116
117        Ok(Modifier::Switch(Switch {
118            r#type,
119            condition,
120            word,
121        }))
122    }
123
124    /// Parses a suffix modifier, i.e., a modifier other than the length prefix.
125    ///
126    /// If there is a [switch](Switch), [`self.context`](Self::context) affects
127    /// how the word of the switch is parsed: If the context is `Word`, a tilde
128    /// expansion is recognized at the beginning of the word and any character
129    /// can be escaped by a backslash. If the context is `Text`, only `$`, `"`,
130    /// `` ` ``, `\` and `}` can be escaped and single quotes are not recognized
131    /// in the word.
132    pub async fn suffix_modifier(&mut self) -> Result<Modifier> {
133        let start_index = self.index();
134        let colon = self.skip_if(|c| c == ':').await?;
135
136        if let Some(symbol) = self.peek_char().await? {
137            match symbol {
138                '+' | '-' | '=' | '?' => self.switch(colon, symbol).await,
139                '#' | '%' => self.trim(start_index, colon, symbol).await,
140                _ => self.suffix_modifier_not_found(start_index, colon),
141            }
142        } else {
143            self.suffix_modifier_not_found(start_index, colon)
144        }
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use crate::parser::error::ErrorCause;
152    use crate::syntax::Text;
153    use crate::syntax::TextUnit;
154    use crate::syntax::WordUnit;
155    use assert_matches::assert_matches;
156    use futures_util::FutureExt;
157
158    #[test]
159    fn lexer_suffix_modifier_eof() {
160        let mut lexer = Lexer::with_code("");
161        let mut lexer = WordLexer {
162            lexer: &mut lexer,
163            context: WordContext::Word,
164        };
165
166        let result = lexer.suffix_modifier().now_or_never().unwrap();
167        assert_eq!(result, Ok(Modifier::None));
168    }
169
170    #[test]
171    fn lexer_suffix_modifier_none() {
172        let mut lexer = Lexer::with_code("}");
173        let mut lexer = WordLexer {
174            lexer: &mut lexer,
175            context: WordContext::Word,
176        };
177
178        let result = lexer.suffix_modifier().now_or_never().unwrap();
179        assert_eq!(result, Ok(Modifier::None));
180
181        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
182    }
183
184    #[test]
185    fn lexer_suffix_modifier_alter_empty() {
186        let mut lexer = Lexer::with_code("+}");
187        let mut lexer = WordLexer {
188            lexer: &mut lexer,
189            context: WordContext::Word,
190        };
191
192        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
193        assert_matches!(result, Modifier::Switch(switch) => {
194            assert_eq!(switch.r#type, SwitchType::Alter);
195            assert_eq!(switch.condition, SwitchCondition::Unset);
196            assert_eq!(switch.word.units, []);
197            assert_eq!(*switch.word.location.code.value.borrow(), "+}");
198            assert_eq!(switch.word.location.range, 1..1);
199        });
200
201        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
202    }
203
204    #[test]
205    fn lexer_suffix_modifier_alter_word() {
206        let mut lexer = Lexer::with_code(r"+a  z}");
207        let mut lexer = WordLexer {
208            lexer: &mut lexer,
209            context: WordContext::Word,
210        };
211
212        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
213        assert_matches!(result, Modifier::Switch(switch) => {
214            assert_eq!(switch.r#type, SwitchType::Alter);
215            assert_eq!(switch.condition, SwitchCondition::Unset);
216            assert_eq!(
217                switch.word.units,
218                [
219                    WordUnit::Unquoted(TextUnit::Literal('a')),
220                    WordUnit::Unquoted(TextUnit::Literal(' ')),
221                    WordUnit::Unquoted(TextUnit::Literal(' ')),
222                    WordUnit::Unquoted(TextUnit::Literal('z')),
223                ]
224            );
225            assert_eq!(*switch.word.location.code.value.borrow(), "+a  z}");
226            assert_eq!(switch.word.location.range, 1..5);
227        });
228
229        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
230    }
231
232    #[test]
233    fn lexer_suffix_modifier_colon_alter_empty() {
234        let mut lexer = Lexer::with_code(":+}");
235        let mut lexer = WordLexer {
236            lexer: &mut lexer,
237            context: WordContext::Word,
238        };
239
240        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
241        assert_matches!(result, Modifier::Switch(switch) => {
242            assert_eq!(switch.r#type, SwitchType::Alter);
243            assert_eq!(switch.condition, SwitchCondition::UnsetOrEmpty);
244            assert_eq!(switch.word.units, []);
245            assert_eq!(*switch.word.location.code.value.borrow(), ":+}");
246            assert_eq!(switch.word.location.range, 2..2);
247        });
248
249        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
250    }
251
252    #[test]
253    fn lexer_suffix_modifier_default_empty() {
254        let mut lexer = Lexer::with_code("-}");
255        let mut lexer = WordLexer {
256            lexer: &mut lexer,
257            context: WordContext::Word,
258        };
259
260        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
261        assert_matches!(result, Modifier::Switch(switch) => {
262            assert_eq!(switch.r#type, SwitchType::Default);
263            assert_eq!(switch.condition, SwitchCondition::Unset);
264            assert_eq!(switch.word.units, []);
265            assert_eq!(*switch.word.location.code.value.borrow(), "-}");
266            assert_eq!(switch.word.location.range, 1..1);
267        });
268
269        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
270    }
271
272    #[test]
273    fn lexer_suffix_modifier_colon_default_word() {
274        let mut lexer = Lexer::with_code(r":-cool}");
275        let mut lexer = WordLexer {
276            lexer: &mut lexer,
277            context: WordContext::Word,
278        };
279
280        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
281        assert_matches!(result, Modifier::Switch(switch) => {
282            assert_eq!(switch.r#type, SwitchType::Default);
283            assert_eq!(switch.condition, SwitchCondition::UnsetOrEmpty);
284            assert_eq!(
285                switch.word.units,
286                [
287                    WordUnit::Unquoted(TextUnit::Literal('c')),
288                    WordUnit::Unquoted(TextUnit::Literal('o')),
289                    WordUnit::Unquoted(TextUnit::Literal('o')),
290                    WordUnit::Unquoted(TextUnit::Literal('l')),
291                ]
292            );
293            assert_eq!(*switch.word.location.code.value.borrow(), ":-cool}");
294            assert_eq!(switch.word.location.range, 2..6);
295        });
296
297        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
298    }
299
300    #[test]
301    fn lexer_suffix_modifier_colon_assign_empty() {
302        let mut lexer = Lexer::with_code(":=}");
303        let mut lexer = WordLexer {
304            lexer: &mut lexer,
305            context: WordContext::Word,
306        };
307
308        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
309        assert_matches!(result, Modifier::Switch(switch) => {
310            assert_eq!(switch.r#type, SwitchType::Assign);
311            assert_eq!(switch.condition, SwitchCondition::UnsetOrEmpty);
312            assert_eq!(switch.word.units, []);
313            assert_eq!(*switch.word.location.code.value.borrow(), ":=}");
314            assert_eq!(switch.word.location.range, 2..2);
315        });
316
317        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
318    }
319
320    #[test]
321    fn lexer_suffix_modifier_assign_word() {
322        let mut lexer = Lexer::with_code(r"=Yes}");
323        let mut lexer = WordLexer {
324            lexer: &mut lexer,
325            context: WordContext::Word,
326        };
327
328        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
329        assert_matches!(result, Modifier::Switch(switch) => {
330            assert_eq!(switch.r#type, SwitchType::Assign);
331            assert_eq!(switch.condition, SwitchCondition::Unset);
332            assert_eq!(
333                switch.word.units,
334                [
335                    WordUnit::Unquoted(TextUnit::Literal('Y')),
336                    WordUnit::Unquoted(TextUnit::Literal('e')),
337                    WordUnit::Unquoted(TextUnit::Literal('s')),
338                ]
339            );
340            assert_eq!(*switch.word.location.code.value.borrow(), "=Yes}");
341            assert_eq!(switch.word.location.range, 1..4);
342        });
343
344        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
345    }
346
347    #[test]
348    fn lexer_suffix_modifier_error_empty() {
349        let mut lexer = Lexer::with_code("?}");
350        let mut lexer = WordLexer {
351            lexer: &mut lexer,
352            context: WordContext::Word,
353        };
354
355        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
356        assert_matches!(result, Modifier::Switch(switch) => {
357            assert_eq!(switch.r#type, SwitchType::Error);
358            assert_eq!(switch.condition, SwitchCondition::Unset);
359            assert_eq!(switch.word.units, []);
360            assert_eq!(*switch.word.location.code.value.borrow(), "?}");
361            assert_eq!(switch.word.location.range, 1..1);
362        });
363
364        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
365    }
366
367    #[test]
368    fn lexer_suffix_modifier_colon_error_word() {
369        let mut lexer = Lexer::with_code(r":?No}");
370        let mut lexer = WordLexer {
371            lexer: &mut lexer,
372            context: WordContext::Word,
373        };
374
375        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
376        assert_matches!(result, Modifier::Switch(switch) => {
377            assert_eq!(switch.r#type, SwitchType::Error);
378            assert_eq!(switch.condition, SwitchCondition::UnsetOrEmpty);
379            assert_eq!(
380                switch.word.units,
381                [
382                    WordUnit::Unquoted(TextUnit::Literal('N')),
383                    WordUnit::Unquoted(TextUnit::Literal('o')),
384                ]
385            );
386            assert_eq!(*switch.word.location.code.value.borrow(), ":?No}");
387            assert_eq!(switch.word.location.range, 2..4);
388        });
389
390        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
391    }
392
393    #[test]
394    fn lexer_suffix_modifier_tilde_expansion_in_switch_word_in_word_context() {
395        let mut lexer = Lexer::with_code(r"-~}");
396        let mut lexer = WordLexer {
397            lexer: &mut lexer,
398            context: WordContext::Word,
399        };
400
401        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
402        assert_matches!(result, Modifier::Switch(switch) => {
403            assert_eq!(switch.word.units, [WordUnit::Tilde("".to_string())]);
404        });
405    }
406
407    #[test]
408    fn lexer_suffix_modifier_tilde_expansion_in_switch_word_in_text_context() {
409        let mut lexer = Lexer::with_code(r"-~}");
410        let mut lexer = WordLexer {
411            lexer: &mut lexer,
412            context: WordContext::Text,
413        };
414
415        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
416        assert_matches!(result, Modifier::Switch(switch) => {
417            assert_eq!(
418                switch.word.units,
419                [WordUnit::Unquoted(TextUnit::Literal('~'))]
420            );
421        });
422    }
423
424    #[test]
425    fn lexer_suffix_modifier_trim_shortest_prefix_in_word_context() {
426        let mut lexer = Lexer::with_code("#'*'}");
427        let mut lexer = WordLexer {
428            lexer: &mut lexer,
429            context: WordContext::Word,
430        };
431
432        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
433        assert_matches!(result, Modifier::Trim(trim) => {
434            assert_eq!(trim.side, TrimSide::Prefix);
435            assert_eq!(trim.length, TrimLength::Shortest);
436            assert_eq!(trim.pattern.units, [WordUnit::SingleQuote("*".to_string())]);
437            assert_eq!(*trim.pattern.location.code.value.borrow(), "#'*'}");
438            assert_eq!(trim.pattern.location.range, 1..4);
439        });
440
441        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
442    }
443
444    #[test]
445    fn lexer_suffix_modifier_trim_shortest_prefix_in_text_context() {
446        let mut lexer = Lexer::with_code("#'*'}");
447        let mut lexer = WordLexer {
448            lexer: &mut lexer,
449            context: WordContext::Text,
450        };
451
452        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
453        assert_matches!(result, Modifier::Trim(trim) => {
454            assert_eq!(trim.side, TrimSide::Prefix);
455            assert_eq!(trim.length, TrimLength::Shortest);
456            assert_eq!(trim.pattern.units, [WordUnit::SingleQuote("*".to_string())]);
457            assert_eq!(*trim.pattern.location.code.value.borrow(), "#'*'}");
458            assert_eq!(trim.pattern.location.range, 1..4);
459        });
460
461        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
462    }
463
464    #[test]
465    fn lexer_suffix_modifier_trim_longest_prefix() {
466        let mut lexer = Lexer::with_code(r#"##"?"}"#);
467        let mut lexer = WordLexer {
468            lexer: &mut lexer,
469            context: WordContext::Word,
470        };
471
472        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
473        assert_matches!(result, Modifier::Trim(trim) => {
474            assert_eq!(trim.side, TrimSide::Prefix);
475            assert_eq!(trim.length, TrimLength::Longest);
476            assert_eq!(trim.pattern.units.len(), 1, "{:?}", trim.pattern);
477            assert_matches!(&trim.pattern.units[0], WordUnit::DoubleQuote(Text(units)) => {
478                assert_eq!(units[..], [TextUnit::Literal('?')]);
479            });
480            assert_eq!(*trim.pattern.location.code.value.borrow(), r#"##"?"}"#);
481            assert_eq!(trim.pattern.location.range, 2..5);
482        });
483
484        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
485    }
486
487    #[test]
488    fn lexer_suffix_modifier_trim_shortest_suffix() {
489        let mut lexer = Lexer::with_code(r"%\%}");
490        let mut lexer = WordLexer {
491            lexer: &mut lexer,
492            context: WordContext::Word,
493        };
494
495        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
496        assert_matches!(result, Modifier::Trim(trim) => {
497            assert_eq!(trim.side, TrimSide::Suffix);
498            assert_eq!(trim.length, TrimLength::Shortest);
499            assert_eq!(
500                trim.pattern.units,
501                [WordUnit::Unquoted(TextUnit::Backslashed('%'))]
502            );
503            assert_eq!(*trim.pattern.location.code.value.borrow(), r"%\%}");
504            assert_eq!(trim.pattern.location.range, 1..3);
505        });
506
507        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
508    }
509
510    #[test]
511    fn lexer_suffix_modifier_trim_longest_suffix() {
512        let mut lexer = Lexer::with_code("%%%}");
513        let mut lexer = WordLexer {
514            lexer: &mut lexer,
515            context: WordContext::Word,
516        };
517
518        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
519        assert_matches!(result, Modifier::Trim(trim) => {
520            assert_eq!(trim.side, TrimSide::Suffix);
521            assert_eq!(trim.length, TrimLength::Longest);
522            assert_eq!(
523                trim.pattern.units,
524                [WordUnit::Unquoted(TextUnit::Literal('%'))]
525            );
526            assert_eq!(*trim.pattern.location.code.value.borrow(), "%%%}");
527            assert_eq!(trim.pattern.location.range, 2..3);
528        });
529
530        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('}')));
531    }
532
533    #[test]
534    fn lexer_suffix_modifier_tilde_expansion_in_trim_word() {
535        let mut lexer = Lexer::with_code(r"#~}");
536        let mut lexer = WordLexer {
537            lexer: &mut lexer,
538            context: WordContext::Word,
539        };
540
541        let result = lexer.suffix_modifier().now_or_never().unwrap().unwrap();
542        assert_matches!(result, Modifier::Trim(trim) => {
543            assert_eq!(trim.pattern.units, [WordUnit::Tilde("".to_string())]);
544        });
545    }
546
547    #[test]
548    fn lexer_suffix_modifier_orphan_colon_eof() {
549        let mut lexer = Lexer::with_code(r":");
550        let mut lexer = WordLexer {
551            lexer: &mut lexer,
552            context: WordContext::Word,
553        };
554
555        let e = lexer.suffix_modifier().now_or_never().unwrap().unwrap_err();
556        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::InvalidModifier));
557        assert_eq!(*e.location.code.value.borrow(), ":");
558        assert_eq!(e.location.range, 0..1);
559    }
560
561    #[test]
562    fn lexer_suffix_modifier_orphan_colon_followed_by_letter() {
563        let mut lexer = Lexer::with_code(r":x}");
564        let mut lexer = WordLexer {
565            lexer: &mut lexer,
566            context: WordContext::Word,
567        };
568
569        let e = lexer.suffix_modifier().now_or_never().unwrap().unwrap_err();
570        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::InvalidModifier));
571        assert_eq!(*e.location.code.value.borrow(), ":x}");
572        assert_eq!(e.location.range, 0..1);
573    }
574
575    #[test]
576    fn lexer_suffix_modifier_orphan_colon_followed_by_symbol() {
577        let mut lexer = Lexer::with_code(r":#}");
578        let mut lexer = WordLexer {
579            lexer: &mut lexer,
580            context: WordContext::Word,
581        };
582
583        let e = lexer.suffix_modifier().now_or_never().unwrap().unwrap_err();
584        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::InvalidModifier));
585        assert_eq!(*e.location.code.value.borrow(), ":#}");
586        assert_eq!(e.location.range, 0..2);
587    }
588}