yash_syntax/parser/lex/
tilde.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2021 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Tilde expansion parser
18//!
19//! This module defines additional functions to parse tilde expansions in a word.
20
21use crate::syntax::TextUnit::Literal;
22use crate::syntax::Word;
23use crate::syntax::WordUnit::{self, Tilde, Unquoted};
24
25/// Parses a tilde expansion.
26///
27/// This function expects the first word unit to be an unquoted tilde character.
28/// Following the tilde character, a sequence of unquoted literal characters is
29/// parsed as the name of the tilde expansion. The sequence is terminated by a
30/// slash character (or a colon character if `delimit_at_colon` is `true`).
31///
32/// If successful, this function returns a tuple of the length of the parsed
33/// word units (including the tilde character), the name of the tilde
34/// expansion (excluding the tilde character and the delimiter), and a Boolean
35/// indicating whether the name is followed by a slash character.
36/// Note that the name may be empty.
37///
38/// If the first word unit is not an unquoted tilde character or the name is
39/// delimited by a word unit other than an unquoted literal character, this
40/// function returns `None`.
41fn parse_tilde<'a, I>(units: I, delimit_at_colon: bool) -> Option<(usize, String, bool)>
42where
43    I: IntoIterator<Item = &'a WordUnit>,
44{
45    let mut units = units.into_iter();
46    if units.next() != Some(&Unquoted(Literal('~'))) {
47        return None;
48    }
49
50    let mut name = String::new();
51    let mut count = 1;
52
53    for unit in units {
54        match unit {
55            Unquoted(Literal('/')) => return Some((count, name, true)),
56            Unquoted(Literal(':')) if delimit_at_colon => break,
57            Unquoted(Literal(c)) => {
58                name.push(*c);
59                count += 1;
60            }
61            _ => return None,
62        }
63    }
64
65    Some((count, name, false))
66}
67
68impl Word {
69    /// Parses a tilde expansion at the beginning of the word.
70    ///
71    /// This function checks if `self.units` begins with an unquoted tilde
72    /// character, i.e., `WordUnit::Unquoted(TextUnit::Literal('~'))`. If so, the
73    /// word unit is replaced with a `WordUnit::Tilde` value. Other unquoted
74    /// characters that follow the tilde are together replaced to produce the
75    /// value of the `WordUnit::Tilde`.
76    ///
77    /// ```
78    /// # use std::str::FromStr;
79    /// # use yash_syntax::syntax::{Word, WordUnit::Tilde};
80    /// let mut word = Word::from_str("~").unwrap();
81    /// word.parse_tilde_front();
82    /// assert_eq!(word.units, [Tilde { name: "".to_string(), followed_by_slash: false }]);
83    /// ```
84    ///
85    /// ```
86    /// # use std::str::FromStr;
87    /// # use yash_syntax::syntax::{Word, WordUnit::Tilde};
88    /// let mut word = Word::from_str("~foo").unwrap();
89    /// word.parse_tilde_front();
90    /// assert_eq!(word.units, [Tilde { name: "foo".to_string(), followed_by_slash: false }]);
91    /// ```
92    ///
93    /// If there is no leading tilde, `self.units` will have the same content
94    /// when this function returns.
95    ///
96    /// ```
97    /// # use std::str::FromStr;
98    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::Unquoted};
99    /// let mut word = Word::from_str("X").unwrap();
100    /// assert_eq!(word.units, [Unquoted(Literal('X'))]);
101    /// word.parse_tilde_front();
102    /// assert_eq!(word.units, [Unquoted(Literal('X'))]);
103    /// ```
104    ///
105    /// This function parses a literal word units only, which differs from the
106    /// strictly POSIX-conforming behavior. For example, POSIX requires the word
107    /// `~$()` to be regarded as a tilde expansion, but this function does not
108    /// convert it to `WordUnit::Tilde("$()".to_string())`.
109    ///
110    /// This function only parses a tilde expansion at the beginning of the word.
111    /// If the word is a colon-separated list of paths, you might want to use
112    /// [`parse_tilde_everywhere`](Self::parse_tilde_everywhere) instead.
113    ///
114    /// The tilde expansion is delimited by an unquoted slash. Unlike
115    /// `parse_tilde_everywhere`, unquoted colons are not considered as
116    /// delimiters.
117    #[inline]
118    pub fn parse_tilde_front(&mut self) {
119        if let Some((len, name, followed_by_slash)) = parse_tilde(&self.units, false) {
120            self.units.splice(
121                ..len,
122                std::iter::once(Tilde {
123                    name,
124                    followed_by_slash,
125                }),
126            );
127        }
128    }
129
130    /// Parses tilde expansions in the word.
131    ///
132    /// This function works the same as
133    /// [`parse_tilde_front`](Self::parse_tilde_front) except that it parses
134    /// tilde expansions not only at the beginning of the word but also after
135    /// each unquoted colon.
136    ///
137    /// ```
138    /// # use std::str::FromStr;
139    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::{Tilde, Unquoted}};
140    /// let mut word = Word::from_str("~:~a/b:~c").unwrap();
141    /// word.parse_tilde_everywhere();
142    /// assert_eq!(
143    ///     word.units,
144    ///     [
145    ///         Tilde { name: "".to_string(), followed_by_slash: false },
146    ///         Unquoted(Literal(':')),
147    ///         Tilde { name: "a".to_string(), followed_by_slash: true },
148    ///         Unquoted(Literal('/')),
149    ///         Unquoted(Literal('b')),
150    ///         Unquoted(Literal(':')),
151    ///         Tilde { name: "c".to_string(), followed_by_slash: false },
152    ///     ]
153    /// );
154    /// ```
155    ///
156    /// See also
157    /// [`parse_tilde_everywhere_after`](Self::parse_tilde_everywhere_after),
158    /// which allows you to parse tilde expansions only after a specified index.
159    #[inline]
160    pub fn parse_tilde_everywhere(&mut self) {
161        self.parse_tilde_everywhere_after(0);
162    }
163
164    /// Parses tilde expansions in the word after the specified index.
165    ///
166    /// This function works the same as
167    /// [`parse_tilde_everywhere`](Self::parse_tilde_everywhere) except that it
168    /// starts parsing tilde expansions after the specified index of
169    /// `self.units`. Tilde expansions are parsed at the specified index and
170    /// after each unquoted colon.
171    ///
172    /// ```
173    /// # use std::str::FromStr;
174    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::{Tilde, Unquoted}};
175    /// let mut word = Word::from_str("~=~a/b:~c").unwrap();
176    /// word.parse_tilde_everywhere_after(2);
177    /// assert_eq!(
178    ///     word.units,
179    ///     [
180    ///         // The initial tilde is not parsed because it is before index 2.
181    ///         Unquoted(Literal('~')),
182    ///         Unquoted(Literal('=')),
183    ///         // This tilde is parsed because it is at index 2,
184    ///         // even though it is not after a colon.
185    ///         Tilde { name: "a".to_string(), followed_by_slash: true },
186    ///         Unquoted(Literal('/')),
187    ///         Unquoted(Literal('b')),
188    ///         Unquoted(Literal(':')),
189    ///         Tilde { name: "c".to_string(), followed_by_slash: false },
190    ///     ]
191    /// );
192    /// ```
193    ///
194    /// Compare [`parse_tilde_everywhere`](Self::parse_tilde_everywhere), which
195    /// is equivalent to `parse_tilde_everywhere_after(0)`.
196    pub fn parse_tilde_everywhere_after(&mut self, index: usize) {
197        let mut i = index;
198        loop {
199            // Parse a tilde expansion at index `i`.
200            if let Some((len, name, followed_by_slash)) = parse_tilde(&self.units[i..], true) {
201                self.units.splice(
202                    i..i + len,
203                    std::iter::once(Tilde {
204                        name,
205                        followed_by_slash,
206                    }),
207                );
208                i += 1;
209            }
210
211            // Find the next colon separator.
212            let Some(colon) = self.units[i..]
213                .iter()
214                .position(|unit| unit == &Unquoted(Literal(':')))
215            else {
216                break;
217            };
218            i += colon + 1;
219        }
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use crate::syntax::Text;
227    use crate::syntax::TextUnit::Backslashed;
228    use crate::syntax::WordUnit::{DoubleQuote, SingleQuote};
229    use std::str::FromStr;
230
231    fn parse_tilde_front(word: &Word) -> Word {
232        let mut word = word.clone();
233        word.parse_tilde_front();
234        word
235    }
236
237    fn parse_tilde_everywhere(word: &Word) -> Word {
238        let mut word = word.clone();
239        word.parse_tilde_everywhere();
240        word
241    }
242
243    #[test]
244    fn word_parse_tilde_front_not_starting_with_tilde() {
245        let input = Word::from_str("").unwrap();
246        let result = parse_tilde_front(&input);
247        assert_eq!(result, input);
248
249        let input = Word::from_str("a").unwrap();
250        let result = parse_tilde_front(&input);
251        assert_eq!(result, input);
252
253        let input = Word::from_str("''").unwrap();
254        let result = parse_tilde_front(&input);
255        assert_eq!(result, input);
256    }
257
258    #[test]
259    fn word_parse_tilde_front_only_tilde() {
260        let input = Word::from_str("~").unwrap();
261        let result = parse_tilde_front(&input);
262        assert_eq!(result.location, input.location);
263        assert_eq!(
264            result.units,
265            [Tilde {
266                name: "".to_string(),
267                followed_by_slash: false
268            }]
269        );
270    }
271
272    #[test]
273    fn word_parse_tilde_front_with_name() {
274        let input = Word::from_str("~foo").unwrap();
275        let result = parse_tilde_front(&input);
276        assert_eq!(result.location, input.location);
277        assert_eq!(
278            result.units,
279            [Tilde {
280                name: "foo".to_string(),
281                followed_by_slash: false
282            }]
283        );
284    }
285
286    #[test]
287    fn word_parse_tilde_front_ending_with_slash() {
288        let input = Word::from_str("~bar/''").unwrap();
289        let result = parse_tilde_front(&input);
290        assert_eq!(result.location, input.location);
291        assert_eq!(
292            result.units,
293            [
294                Tilde {
295                    name: "bar".to_string(),
296                    followed_by_slash: true,
297                },
298                Unquoted(Literal('/')),
299                SingleQuote("".to_string()),
300            ]
301        );
302    }
303
304    #[test]
305    fn word_parse_tilde_front_including_colon() {
306        let input = Word::from_str("~bar:baz").unwrap();
307        let result = parse_tilde_front(&input);
308        assert_eq!(result.location, input.location);
309        assert_eq!(
310            result.units,
311            [Tilde {
312                name: "bar:baz".to_string(),
313                followed_by_slash: false
314            }]
315        );
316    }
317
318    #[test]
319    fn word_parse_tilde_front_interrupted_by_non_literal() {
320        let input = Word::from_str(r"~foo\/").unwrap();
321        let result = parse_tilde_front(&input);
322        assert_eq!(result.location, input.location);
323        assert_eq!(
324            result.units,
325            [
326                Unquoted(Literal('~')),
327                Unquoted(Literal('f')),
328                Unquoted(Literal('o')),
329                Unquoted(Literal('o')),
330                Unquoted(Backslashed('/')),
331            ]
332        );
333
334        let input = Word::from_str("~bar''").unwrap();
335        let result = parse_tilde_front(&input);
336        assert_eq!(result.location, input.location);
337        assert_eq!(
338            result.units,
339            [
340                Unquoted(Literal('~')),
341                Unquoted(Literal('b')),
342                Unquoted(Literal('a')),
343                Unquoted(Literal('r')),
344                SingleQuote("".to_string()),
345            ]
346        );
347    }
348
349    #[test]
350    fn word_parse_tilde_front_not_after_colon() {
351        let input = Word::from_str("a~").unwrap();
352        let result = parse_tilde_front(&input);
353        assert_eq!(result, input);
354
355        let input = Word::from_str("/~a").unwrap();
356        let result = parse_tilde_front(&input);
357        assert_eq!(result, input);
358
359        let input = Word::from_str("''~/").unwrap();
360        let result = parse_tilde_front(&input);
361        assert_eq!(result, input);
362    }
363
364    #[test]
365    fn word_parse_tilde_front_after_colon() {
366        let input = Word::from_str(":~").unwrap();
367        let result = parse_tilde_front(&input);
368        assert_eq!(result.location, input.location);
369        assert_eq!(
370            result.units,
371            [Unquoted(Literal(':')), Unquoted(Literal('~'))]
372        );
373
374        let input = Word::from_str(":~foo/a:~bar").unwrap();
375        let result = parse_tilde_front(&input);
376        assert_eq!(result.location, input.location);
377        assert_eq!(
378            result.units,
379            [
380                Unquoted(Literal(':')),
381                Unquoted(Literal('~')),
382                Unquoted(Literal('f')),
383                Unquoted(Literal('o')),
384                Unquoted(Literal('o')),
385                Unquoted(Literal('/')),
386                Unquoted(Literal('a')),
387                Unquoted(Literal(':')),
388                Unquoted(Literal('~')),
389                Unquoted(Literal('b')),
390                Unquoted(Literal('a')),
391                Unquoted(Literal('r')),
392            ]
393        );
394
395        let input = Word::from_str("~a/b:~c/d").unwrap();
396        let result = parse_tilde_front(&input);
397        assert_eq!(result.location, input.location);
398        assert_eq!(
399            result.units,
400            [
401                Tilde {
402                    name: "a".to_string(),
403                    followed_by_slash: true,
404                },
405                Unquoted(Literal('/')),
406                Unquoted(Literal('b')),
407                Unquoted(Literal(':')),
408                Unquoted(Literal('~')),
409                Unquoted(Literal('c')),
410                Unquoted(Literal('/')),
411                Unquoted(Literal('d')),
412            ]
413        );
414    }
415
416    #[test]
417    fn word_parse_tilde_everywhere_not_starting_with_tilde() {
418        let input = Word::from_str("").unwrap();
419        let result = parse_tilde_everywhere(&input);
420        assert_eq!(result, input);
421
422        let input = Word::from_str("a").unwrap();
423        let result = parse_tilde_everywhere(&input);
424        assert_eq!(result, input);
425
426        let input = Word::from_str("''").unwrap();
427        let result = parse_tilde_everywhere(&input);
428        assert_eq!(result, input);
429    }
430
431    #[test]
432    fn word_parse_tilde_everywhere_only_tilde() {
433        let input = Word::from_str("~").unwrap();
434        let result = parse_tilde_everywhere(&input);
435        assert_eq!(result.location, input.location);
436        assert_eq!(
437            result.units,
438            [Tilde {
439                name: "".to_string(),
440                followed_by_slash: false
441            }]
442        );
443    }
444
445    #[test]
446    fn word_parse_tilde_everywhere_with_name() {
447        let input = Word::from_str("~foo").unwrap();
448        let result = parse_tilde_everywhere(&input);
449        assert_eq!(result.location, input.location);
450        assert_eq!(
451            result.units,
452            [Tilde {
453                name: "foo".to_string(),
454                followed_by_slash: false
455            }]
456        );
457    }
458
459    #[test]
460    fn word_parse_tilde_everywhere_ending_with_slash() {
461        let input = Word::from_str("~bar/''").unwrap();
462        let result = parse_tilde_everywhere(&input);
463        assert_eq!(result.location, input.location);
464        assert_eq!(
465            result.units,
466            [
467                Tilde {
468                    name: "bar".to_string(),
469                    followed_by_slash: true
470                },
471                Unquoted(Literal('/')),
472                SingleQuote("".to_string()),
473            ]
474        );
475    }
476
477    #[test]
478    fn word_parse_tilde_everywhere_ending_with_colon() {
479        let input = Word::from_str("~bar:\"\"").unwrap();
480        let result = parse_tilde_everywhere(&input);
481        assert_eq!(result.location, input.location);
482        assert_eq!(
483            result.units,
484            [
485                Tilde {
486                    name: "bar".to_string(),
487                    followed_by_slash: false
488                },
489                Unquoted(Literal(':')),
490                DoubleQuote(Text(vec![])),
491            ]
492        );
493    }
494
495    #[test]
496    fn word_parse_tilde_everywhere_interrupted_by_non_literal() {
497        let input = Word::from_str(r"~foo\/").unwrap();
498        let result = parse_tilde_everywhere(&input);
499        assert_eq!(result.location, input.location);
500        assert_eq!(
501            result.units,
502            [
503                Unquoted(Literal('~')),
504                Unquoted(Literal('f')),
505                Unquoted(Literal('o')),
506                Unquoted(Literal('o')),
507                Unquoted(Backslashed('/')),
508            ]
509        );
510
511        let input = Word::from_str("~bar''").unwrap();
512        let result = parse_tilde_everywhere(&input);
513        assert_eq!(result.location, input.location);
514        assert_eq!(
515            result.units,
516            [
517                Unquoted(Literal('~')),
518                Unquoted(Literal('b')),
519                Unquoted(Literal('a')),
520                Unquoted(Literal('r')),
521                SingleQuote("".to_string()),
522            ]
523        );
524    }
525
526    #[test]
527    fn word_parse_tilde_everywhere_not_after_colon() {
528        let input = Word::from_str("a~").unwrap();
529        let result = parse_tilde_everywhere(&input);
530        assert_eq!(result, input);
531
532        let input = Word::from_str("/~a").unwrap();
533        let result = parse_tilde_everywhere(&input);
534        assert_eq!(result, input);
535
536        let input = Word::from_str("''~/").unwrap();
537        let result = parse_tilde_everywhere(&input);
538        assert_eq!(result, input);
539    }
540
541    #[test]
542    fn word_parse_tilde_everywhere_after_colon() {
543        let input = Word::from_str(":~").unwrap();
544        let result = parse_tilde_everywhere(&input);
545        assert_eq!(result.location, input.location);
546        assert_eq!(
547            result.units,
548            [
549                Unquoted(Literal(':')),
550                Tilde {
551                    name: "".to_string(),
552                    followed_by_slash: false
553                }
554            ]
555        );
556
557        let input = Word::from_str(":~foo/a:~bar").unwrap();
558        let result = parse_tilde_everywhere(&input);
559        assert_eq!(result.location, input.location);
560        assert_eq!(
561            result.units,
562            [
563                Unquoted(Literal(':')),
564                Tilde {
565                    name: "foo".to_string(),
566                    followed_by_slash: true,
567                },
568                Unquoted(Literal('/')),
569                Unquoted(Literal('a')),
570                Unquoted(Literal(':')),
571                Tilde {
572                    name: "bar".to_string(),
573                    followed_by_slash: false
574                },
575            ]
576        );
577
578        let input = Word::from_str("~a/b:~c/d::~").unwrap();
579        let result = parse_tilde_everywhere(&input);
580        assert_eq!(result.location, input.location);
581        assert_eq!(
582            result.units,
583            [
584                Tilde {
585                    name: "a".to_string(),
586                    followed_by_slash: true,
587                },
588                Unquoted(Literal('/')),
589                Unquoted(Literal('b')),
590                Unquoted(Literal(':')),
591                Tilde {
592                    name: "c".to_string(),
593                    followed_by_slash: true,
594                },
595                Unquoted(Literal('/')),
596                Unquoted(Literal('d')),
597                Unquoted(Literal(':')),
598                Unquoted(Literal(':')),
599                Tilde {
600                    name: "".to_string(),
601                    followed_by_slash: false
602                },
603            ]
604        );
605    }
606}