yash_syntax/parser/lex/
tilde.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2021 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Tilde expansion parser
18//!
19//! This module defines additional functions to parse tilde expansions in a word.
20
21use crate::syntax::TextUnit::Literal;
22use crate::syntax::Word;
23use crate::syntax::WordUnit::{self, Tilde, Unquoted};
24
25/// Parses a tilde expansion.
26///
27/// This function expects the first word unit to be an unquoted tilde character.
28/// Following the tilde character, a sequence of unquoted literal characters is
29/// parsed as the name of the tilde expansion. The sequence is terminated by a
30/// slash character (or a colon character if `delimit_at_colon` is `true`).
31///
32/// If successful, this function returns a tuple of the length of the parsed
33/// word units (including the tilde character) and the name of the tilde
34/// expansion (excluding the tilde character and the delimiter). Note that the
35/// name may be empty.
36///
37/// If the first word unit is not an unquoted tilde character or the name is
38/// delimited by a word unit other than an unquoted literal character, this
39/// function returns `None`.
40fn parse_tilde<'a, I>(units: I, delimit_at_colon: bool) -> Option<(usize, String)>
41where
42    I: IntoIterator<Item = &'a WordUnit>,
43{
44    let mut units = units.into_iter();
45    if units.next() != Some(&Unquoted(Literal('~'))) {
46        return None;
47    }
48
49    let mut name = String::new();
50    let mut count = 1;
51
52    for unit in units {
53        match unit {
54            Unquoted(Literal('/')) => break,
55            Unquoted(Literal(':')) if delimit_at_colon => break,
56            Unquoted(Literal(c)) => {
57                name.push(*c);
58                count += 1;
59            }
60            _ => return None,
61        }
62    }
63
64    Some((count, name))
65}
66
67impl Word {
68    /// Parses a tilde expansion at the beginning of the word.
69    ///
70    /// This function checks if `self.units` begins with an unquoted tilde
71    /// character, i.e., `WordUnit::Unquoted(TextUnit::Literal('~'))`. If so, the
72    /// word unit is replaced with a `WordUnit::Tilde` value. Other unquoted
73    /// characters that follow the tilde are together replaced to produce the
74    /// value of the `WordUnit::Tilde`.
75    ///
76    /// ```
77    /// # use std::str::FromStr;
78    /// # use yash_syntax::syntax::{Word, WordUnit::Tilde};
79    /// let mut word = Word::from_str("~").unwrap();
80    /// word.parse_tilde_front();
81    /// assert_eq!(word.units, [Tilde("".to_string())]);
82    /// ```
83    ///
84    /// ```
85    /// # use std::str::FromStr;
86    /// # use yash_syntax::syntax::{Word, WordUnit::Tilde};
87    /// let mut word = Word::from_str("~foo").unwrap();
88    /// word.parse_tilde_front();
89    /// assert_eq!(word.units, [Tilde("foo".to_string())]);
90    /// ```
91    ///
92    /// If there is no leading tilde, `self.units` will have the same content
93    /// when this function returns.
94    ///
95    /// ```
96    /// # use std::str::FromStr;
97    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::Unquoted};
98    /// let mut word = Word::from_str("X").unwrap();
99    /// assert_eq!(word.units, [Unquoted(Literal('X'))]);
100    /// word.parse_tilde_front();
101    /// assert_eq!(word.units, [Unquoted(Literal('X'))]);
102    /// ```
103    ///
104    /// This function parses a literal word units only, which differs from the
105    /// strictly POSIX-conforming behavior. For example, POSIX requires the word
106    /// `~$()` to be regarded as a tilde expansion, but this function does not
107    /// convert it to `WordUnit::Tilde("$()".to_string())`.
108    ///
109    /// This function only parses a tilde expansion at the beginning of the word.
110    /// If the word is a colon-separated list of paths, you might want to use
111    /// [`parse_tilde_everywhere`](Self::parse_tilde_everywhere) instead.
112    ///
113    /// The tilde expansion is delimited by an unquoted slash. Unlike
114    /// `parse_tilde_everywhere`, unquoted colons are not considered as
115    /// delimiters.
116    #[inline]
117    pub fn parse_tilde_front(&mut self) {
118        if let Some((len, name)) = parse_tilde(&self.units, false) {
119            self.units.splice(..len, std::iter::once(Tilde(name)));
120        }
121    }
122
123    /// Parses tilde expansions in the word.
124    ///
125    /// This function works the same as
126    /// [`parse_tilde_front`](Self::parse_tilde_front) except that it parses
127    /// tilde expansions not only at the beginning of the word but also after
128    /// each unquoted colon.
129    ///
130    /// ```
131    /// # use std::str::FromStr;
132    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::{Tilde, Unquoted}};
133    /// let mut word = Word::from_str("~:~a/b:~c").unwrap();
134    /// word.parse_tilde_everywhere();
135    /// assert_eq!(
136    ///     word.units,
137    ///     [
138    ///         Tilde("".to_string()),
139    ///         Unquoted(Literal(':')),
140    ///         Tilde("a".to_string()),
141    ///         Unquoted(Literal('/')),
142    ///         Unquoted(Literal('b')),
143    ///         Unquoted(Literal(':')),
144    ///         Tilde("c".to_string()),
145    ///     ]
146    /// );
147    /// ```
148    #[inline]
149    pub fn parse_tilde_everywhere(&mut self) {
150        let mut i = 0;
151        loop {
152            // Parse a tilde expansion at index `i`.
153            if let Some((len, name)) = parse_tilde(&self.units[i..], true) {
154                self.units.splice(i..i + len, std::iter::once(Tilde(name)));
155                i += 1;
156            }
157
158            // Find the next colon separator.
159            let Some(colon) = self.units[i..]
160                .iter()
161                .position(|unit| unit == &Unquoted(Literal(':')))
162            else {
163                break;
164            };
165            i += colon + 1;
166        }
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use crate::syntax::Text;
174    use crate::syntax::TextUnit::Backslashed;
175    use crate::syntax::WordUnit::{DoubleQuote, SingleQuote};
176    use std::str::FromStr;
177
178    fn parse_tilde_front(word: &Word) -> Word {
179        let mut word = word.clone();
180        word.parse_tilde_front();
181        word
182    }
183
184    fn parse_tilde_everywhere(word: &Word) -> Word {
185        let mut word = word.clone();
186        word.parse_tilde_everywhere();
187        word
188    }
189
190    #[test]
191    fn word_parse_tilde_front_not_starting_with_tilde() {
192        let input = Word::from_str("").unwrap();
193        let result = parse_tilde_front(&input);
194        assert_eq!(result, input);
195
196        let input = Word::from_str("a").unwrap();
197        let result = parse_tilde_front(&input);
198        assert_eq!(result, input);
199
200        let input = Word::from_str("''").unwrap();
201        let result = parse_tilde_front(&input);
202        assert_eq!(result, input);
203    }
204
205    #[test]
206    fn word_parse_tilde_front_only_tilde() {
207        let input = Word::from_str("~").unwrap();
208        let result = parse_tilde_front(&input);
209        assert_eq!(result.location, input.location);
210        assert_eq!(result.units, [Tilde("".to_string())]);
211    }
212
213    #[test]
214    fn word_parse_tilde_front_with_name() {
215        let input = Word::from_str("~foo").unwrap();
216        let result = parse_tilde_front(&input);
217        assert_eq!(result.location, input.location);
218        assert_eq!(result.units, [Tilde("foo".to_string())]);
219    }
220
221    #[test]
222    fn word_parse_tilde_front_ending_with_slash() {
223        let input = Word::from_str("~bar/''").unwrap();
224        let result = parse_tilde_front(&input);
225        assert_eq!(result.location, input.location);
226        assert_eq!(
227            result.units,
228            [
229                Tilde("bar".to_string()),
230                Unquoted(Literal('/')),
231                SingleQuote("".to_string()),
232            ]
233        );
234    }
235
236    #[test]
237    fn word_parse_tilde_front_including_colon() {
238        let input = Word::from_str("~bar:baz").unwrap();
239        let result = parse_tilde_front(&input);
240        assert_eq!(result.location, input.location);
241        assert_eq!(result.units, [Tilde("bar:baz".to_string())]);
242    }
243
244    #[test]
245    fn word_parse_tilde_front_interrupted_by_non_literal() {
246        let input = Word::from_str(r"~foo\/").unwrap();
247        let result = parse_tilde_front(&input);
248        assert_eq!(result.location, input.location);
249        assert_eq!(
250            result.units,
251            [
252                Unquoted(Literal('~')),
253                Unquoted(Literal('f')),
254                Unquoted(Literal('o')),
255                Unquoted(Literal('o')),
256                Unquoted(Backslashed('/')),
257            ]
258        );
259
260        let input = Word::from_str("~bar''").unwrap();
261        let result = parse_tilde_front(&input);
262        assert_eq!(result.location, input.location);
263        assert_eq!(
264            result.units,
265            [
266                Unquoted(Literal('~')),
267                Unquoted(Literal('b')),
268                Unquoted(Literal('a')),
269                Unquoted(Literal('r')),
270                SingleQuote("".to_string()),
271            ]
272        );
273    }
274
275    #[test]
276    fn word_parse_tilde_front_not_after_colon() {
277        let input = Word::from_str("a~").unwrap();
278        let result = parse_tilde_front(&input);
279        assert_eq!(result, input);
280
281        let input = Word::from_str("/~a").unwrap();
282        let result = parse_tilde_front(&input);
283        assert_eq!(result, input);
284
285        let input = Word::from_str("''~/").unwrap();
286        let result = parse_tilde_front(&input);
287        assert_eq!(result, input);
288    }
289
290    #[test]
291    fn word_parse_tilde_front_after_colon() {
292        let input = Word::from_str(":~").unwrap();
293        let result = parse_tilde_front(&input);
294        assert_eq!(result.location, input.location);
295        assert_eq!(
296            result.units,
297            [Unquoted(Literal(':')), Unquoted(Literal('~'))]
298        );
299
300        let input = Word::from_str(":~foo/a:~bar").unwrap();
301        let result = parse_tilde_front(&input);
302        assert_eq!(result.location, input.location);
303        assert_eq!(
304            result.units,
305            [
306                Unquoted(Literal(':')),
307                Unquoted(Literal('~')),
308                Unquoted(Literal('f')),
309                Unquoted(Literal('o')),
310                Unquoted(Literal('o')),
311                Unquoted(Literal('/')),
312                Unquoted(Literal('a')),
313                Unquoted(Literal(':')),
314                Unquoted(Literal('~')),
315                Unquoted(Literal('b')),
316                Unquoted(Literal('a')),
317                Unquoted(Literal('r')),
318            ]
319        );
320
321        let input = Word::from_str("~a/b:~c/d").unwrap();
322        let result = parse_tilde_front(&input);
323        assert_eq!(result.location, input.location);
324        assert_eq!(
325            result.units,
326            [
327                Tilde("a".to_string()),
328                Unquoted(Literal('/')),
329                Unquoted(Literal('b')),
330                Unquoted(Literal(':')),
331                Unquoted(Literal('~')),
332                Unquoted(Literal('c')),
333                Unquoted(Literal('/')),
334                Unquoted(Literal('d')),
335            ]
336        );
337    }
338
339    #[test]
340    fn word_parse_tilde_everywhere_not_starting_with_tilde() {
341        let input = Word::from_str("").unwrap();
342        let result = parse_tilde_everywhere(&input);
343        assert_eq!(result, input);
344
345        let input = Word::from_str("a").unwrap();
346        let result = parse_tilde_everywhere(&input);
347        assert_eq!(result, input);
348
349        let input = Word::from_str("''").unwrap();
350        let result = parse_tilde_everywhere(&input);
351        assert_eq!(result, input);
352    }
353
354    #[test]
355    fn word_parse_tilde_everywhere_only_tilde() {
356        let input = Word::from_str("~").unwrap();
357        let result = parse_tilde_everywhere(&input);
358        assert_eq!(result.location, input.location);
359        assert_eq!(result.units, [Tilde("".to_string())]);
360    }
361
362    #[test]
363    fn word_parse_tilde_everywhere_with_name() {
364        let input = Word::from_str("~foo").unwrap();
365        let result = parse_tilde_everywhere(&input);
366        assert_eq!(result.location, input.location);
367        assert_eq!(result.units, [Tilde("foo".to_string())]);
368    }
369
370    #[test]
371    fn word_parse_tilde_everywhere_ending_with_slash() {
372        let input = Word::from_str("~bar/''").unwrap();
373        let result = parse_tilde_everywhere(&input);
374        assert_eq!(result.location, input.location);
375        assert_eq!(
376            result.units,
377            [
378                Tilde("bar".to_string()),
379                Unquoted(Literal('/')),
380                SingleQuote("".to_string()),
381            ]
382        );
383    }
384
385    #[test]
386    fn word_parse_tilde_everywhere_ending_with_colon() {
387        let input = Word::from_str("~bar:\"\"").unwrap();
388        let result = parse_tilde_everywhere(&input);
389        assert_eq!(result.location, input.location);
390        assert_eq!(
391            result.units,
392            [
393                Tilde("bar".to_string()),
394                Unquoted(Literal(':')),
395                DoubleQuote(Text(vec![])),
396            ]
397        );
398    }
399
400    #[test]
401    fn word_parse_tilde_everywhere_interrupted_by_non_literal() {
402        let input = Word::from_str(r"~foo\/").unwrap();
403        let result = parse_tilde_everywhere(&input);
404        assert_eq!(result.location, input.location);
405        assert_eq!(
406            result.units,
407            [
408                Unquoted(Literal('~')),
409                Unquoted(Literal('f')),
410                Unquoted(Literal('o')),
411                Unquoted(Literal('o')),
412                Unquoted(Backslashed('/')),
413            ]
414        );
415
416        let input = Word::from_str("~bar''").unwrap();
417        let result = parse_tilde_everywhere(&input);
418        assert_eq!(result.location, input.location);
419        assert_eq!(
420            result.units,
421            [
422                Unquoted(Literal('~')),
423                Unquoted(Literal('b')),
424                Unquoted(Literal('a')),
425                Unquoted(Literal('r')),
426                SingleQuote("".to_string()),
427            ]
428        );
429    }
430
431    #[test]
432    fn word_parse_tilde_everywhere_not_after_colon() {
433        let input = Word::from_str("a~").unwrap();
434        let result = parse_tilde_everywhere(&input);
435        assert_eq!(result, input);
436
437        let input = Word::from_str("/~a").unwrap();
438        let result = parse_tilde_everywhere(&input);
439        assert_eq!(result, input);
440
441        let input = Word::from_str("''~/").unwrap();
442        let result = parse_tilde_everywhere(&input);
443        assert_eq!(result, input);
444    }
445
446    #[test]
447    fn word_parse_tilde_everywhere_after_colon() {
448        let input = Word::from_str(":~").unwrap();
449        let result = parse_tilde_everywhere(&input);
450        assert_eq!(result.location, input.location);
451        assert_eq!(
452            result.units,
453            [Unquoted(Literal(':')), Tilde("".to_string())]
454        );
455
456        let input = Word::from_str(":~foo/a:~bar").unwrap();
457        let result = parse_tilde_everywhere(&input);
458        assert_eq!(result.location, input.location);
459        assert_eq!(
460            result.units,
461            [
462                Unquoted(Literal(':')),
463                Tilde("foo".to_string()),
464                Unquoted(Literal('/')),
465                Unquoted(Literal('a')),
466                Unquoted(Literal(':')),
467                Tilde("bar".to_string()),
468            ]
469        );
470
471        let input = Word::from_str("~a/b:~c/d::~").unwrap();
472        let result = parse_tilde_everywhere(&input);
473        assert_eq!(result.location, input.location);
474        assert_eq!(
475            result.units,
476            [
477                Tilde("a".to_string()),
478                Unquoted(Literal('/')),
479                Unquoted(Literal('b')),
480                Unquoted(Literal(':')),
481                Tilde("c".to_string()),
482                Unquoted(Literal('/')),
483                Unquoted(Literal('d')),
484                Unquoted(Literal(':')),
485                Unquoted(Literal(':')),
486                Tilde("".to_string()),
487            ]
488        );
489    }
490}