yash_syntax/parser/lex/
tilde.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2021 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Tilde expansion parser
18//!
19//! This module defines additional functions to parse tilde expansions in a word.
20
21use crate::syntax::TextUnit::Literal;
22use crate::syntax::Word;
23use crate::syntax::WordUnit::{self, Tilde, Unquoted};
24
25/// Parses a tilde expansion.
26///
27/// This function expects the first word unit to be an unquoted tilde character.
28/// Following the tilde character, a sequence of unquoted literal characters is
29/// parsed as the name of the tilde expansion. The sequence is terminated by a
30/// slash character (or a colon character if `delimit_at_colon` is `true`).
31///
32/// If successful, this function returns a tuple of the length of the parsed
33/// word units (including the tilde character) and the name of the tilde
34/// expansion (excluding the tilde character and the delimiter). Note that the
35/// name may be empty.
36///
37/// If the first word unit is not an unquoted tilde character or the name is
38/// delimited by a word unit other than an unquoted literal character, this
39/// function returns `None`.
40fn parse_tilde<'a, I>(units: I, delimit_at_colon: bool) -> Option<(usize, String)>
41where
42    I: IntoIterator<Item = &'a WordUnit>,
43{
44    let mut units = units.into_iter();
45    if units.next() != Some(&Unquoted(Literal('~'))) {
46        return None;
47    }
48
49    let mut name = String::new();
50    let mut count = 1;
51
52    for unit in units {
53        match unit {
54            Unquoted(Literal('/')) => break,
55            Unquoted(Literal(':')) if delimit_at_colon => break,
56            Unquoted(Literal(c)) => {
57                name.push(*c);
58                count += 1;
59            }
60            _ => return None,
61        }
62    }
63
64    Some((count, name))
65}
66
67impl Word {
68    /// Parses a tilde expansion at the beginning of the word.
69    ///
70    /// This function checks if `self.units` begins with an unquoted tilde
71    /// character, i.e., `WordUnit::Unquoted(TextUnit::Literal('~'))`. If so, the
72    /// word unit is replaced with a `WordUnit::Tilde` value. Other unquoted
73    /// characters that follow the tilde are together replaced to produce the
74    /// value of the `WordUnit::Tilde`.
75    ///
76    /// ```
77    /// # use std::str::FromStr;
78    /// # use yash_syntax::syntax::{Word, WordUnit::Tilde};
79    /// let mut word = Word::from_str("~").unwrap();
80    /// word.parse_tilde_front();
81    /// assert_eq!(word.units, [Tilde("".to_string())]);
82    /// ```
83    ///
84    /// ```
85    /// # use std::str::FromStr;
86    /// # use yash_syntax::syntax::{Word, WordUnit::Tilde};
87    /// let mut word = Word::from_str("~foo").unwrap();
88    /// word.parse_tilde_front();
89    /// assert_eq!(word.units, [Tilde("foo".to_string())]);
90    /// ```
91    ///
92    /// If there is no leading tilde, `self.units` will have the same content
93    /// when this function returns.
94    ///
95    /// ```
96    /// # use std::str::FromStr;
97    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::Unquoted};
98    /// let mut word = Word::from_str("X").unwrap();
99    /// assert_eq!(word.units, [Unquoted(Literal('X'))]);
100    /// word.parse_tilde_front();
101    /// assert_eq!(word.units, [Unquoted(Literal('X'))]);
102    /// ```
103    ///
104    /// This function parses a literal word units only, which differs from the
105    /// strictly POSIX-conforming behavior. For example, POSIX requires the word
106    /// `~$()` to be regarded as a tilde expansion, but this function does not
107    /// convert it to `WordUnit::Tilde("$()".to_string())`.
108    ///
109    /// This function only parses a tilde expansion at the beginning of the word.
110    /// If the word is a colon-separated list of paths, you might want to use
111    /// [`parse_tilde_everywhere`](Self::parse_tilde_everywhere) instead.
112    ///
113    /// The tilde expansion is delimited by an unquoted slash. Unlike
114    /// `parse_tilde_everywhere`, unquoted colons are not considered as
115    /// delimiters.
116    #[inline]
117    pub fn parse_tilde_front(&mut self) {
118        if let Some((len, name)) = parse_tilde(&self.units, false) {
119            self.units.splice(..len, std::iter::once(Tilde(name)));
120        }
121    }
122
123    /// Parses tilde expansions in the word.
124    ///
125    /// This function works the same as
126    /// [`parse_tilde_front`](Self::parse_tilde_front) except that it parses
127    /// tilde expansions not only at the beginning of the word but also after
128    /// each unquoted colon.
129    ///
130    /// ```
131    /// # use std::str::FromStr;
132    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::{Tilde, Unquoted}};
133    /// let mut word = Word::from_str("~:~a/b:~c").unwrap();
134    /// word.parse_tilde_everywhere();
135    /// assert_eq!(
136    ///     word.units,
137    ///     [
138    ///         Tilde("".to_string()),
139    ///         Unquoted(Literal(':')),
140    ///         Tilde("a".to_string()),
141    ///         Unquoted(Literal('/')),
142    ///         Unquoted(Literal('b')),
143    ///         Unquoted(Literal(':')),
144    ///         Tilde("c".to_string()),
145    ///     ]
146    /// );
147    /// ```
148    ///
149    /// See also
150    /// [`parse_tilde_everywhere_after`](Self::parse_tilde_everywhere_after),
151    /// which allows you to parse tilde expansions only after a specified index.
152    #[inline]
153    pub fn parse_tilde_everywhere(&mut self) {
154        self.parse_tilde_everywhere_after(0);
155    }
156
157    /// Parses tilde expansions in the word after the specified index.
158    ///
159    /// This function works the same as
160    /// [`parse_tilde_everywhere`](Self::parse_tilde_everywhere) except that it
161    /// starts parsing tilde expansions after the specified index of
162    /// `self.units`. Tilde expansions are parsed at the specified index and
163    /// after each unquoted colon.
164    ///
165    /// ```
166    /// # use std::str::FromStr;
167    /// # use yash_syntax::syntax::{TextUnit::Literal, Word, WordUnit::{Tilde, Unquoted}};
168    /// let mut word = Word::from_str("~=~a/b:~c").unwrap();
169    /// word.parse_tilde_everywhere_after(2);
170    /// assert_eq!(
171    ///     word.units,
172    ///     [
173    ///         // The initial tilde is not parsed because it is before index 2.
174    ///         Unquoted(Literal('~')),
175    ///         Unquoted(Literal('=')),
176    ///         // This tilde is parsed because it is at index 2,
177    ///         // even though it is not after a colon.
178    ///         Tilde("a".to_string()),
179    ///         Unquoted(Literal('/')),
180    ///         Unquoted(Literal('b')),
181    ///         Unquoted(Literal(':')),
182    ///         Tilde("c".to_string()),
183    ///     ]
184    /// );
185    /// ```
186    ///
187    /// Compare [`parse_tilde_everywhere`](Self::parse_tilde_everywhere), which
188    /// is equivalent to `parse_tilde_everywhere_after(0)`.
189    pub fn parse_tilde_everywhere_after(&mut self, index: usize) {
190        let mut i = index;
191        loop {
192            // Parse a tilde expansion at index `i`.
193            if let Some((len, name)) = parse_tilde(&self.units[i..], true) {
194                self.units.splice(i..i + len, std::iter::once(Tilde(name)));
195                i += 1;
196            }
197
198            // Find the next colon separator.
199            let Some(colon) = self.units[i..]
200                .iter()
201                .position(|unit| unit == &Unquoted(Literal(':')))
202            else {
203                break;
204            };
205            i += colon + 1;
206        }
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use crate::syntax::Text;
214    use crate::syntax::TextUnit::Backslashed;
215    use crate::syntax::WordUnit::{DoubleQuote, SingleQuote};
216    use std::str::FromStr;
217
218    fn parse_tilde_front(word: &Word) -> Word {
219        let mut word = word.clone();
220        word.parse_tilde_front();
221        word
222    }
223
224    fn parse_tilde_everywhere(word: &Word) -> Word {
225        let mut word = word.clone();
226        word.parse_tilde_everywhere();
227        word
228    }
229
230    #[test]
231    fn word_parse_tilde_front_not_starting_with_tilde() {
232        let input = Word::from_str("").unwrap();
233        let result = parse_tilde_front(&input);
234        assert_eq!(result, input);
235
236        let input = Word::from_str("a").unwrap();
237        let result = parse_tilde_front(&input);
238        assert_eq!(result, input);
239
240        let input = Word::from_str("''").unwrap();
241        let result = parse_tilde_front(&input);
242        assert_eq!(result, input);
243    }
244
245    #[test]
246    fn word_parse_tilde_front_only_tilde() {
247        let input = Word::from_str("~").unwrap();
248        let result = parse_tilde_front(&input);
249        assert_eq!(result.location, input.location);
250        assert_eq!(result.units, [Tilde("".to_string())]);
251    }
252
253    #[test]
254    fn word_parse_tilde_front_with_name() {
255        let input = Word::from_str("~foo").unwrap();
256        let result = parse_tilde_front(&input);
257        assert_eq!(result.location, input.location);
258        assert_eq!(result.units, [Tilde("foo".to_string())]);
259    }
260
261    #[test]
262    fn word_parse_tilde_front_ending_with_slash() {
263        let input = Word::from_str("~bar/''").unwrap();
264        let result = parse_tilde_front(&input);
265        assert_eq!(result.location, input.location);
266        assert_eq!(
267            result.units,
268            [
269                Tilde("bar".to_string()),
270                Unquoted(Literal('/')),
271                SingleQuote("".to_string()),
272            ]
273        );
274    }
275
276    #[test]
277    fn word_parse_tilde_front_including_colon() {
278        let input = Word::from_str("~bar:baz").unwrap();
279        let result = parse_tilde_front(&input);
280        assert_eq!(result.location, input.location);
281        assert_eq!(result.units, [Tilde("bar:baz".to_string())]);
282    }
283
284    #[test]
285    fn word_parse_tilde_front_interrupted_by_non_literal() {
286        let input = Word::from_str(r"~foo\/").unwrap();
287        let result = parse_tilde_front(&input);
288        assert_eq!(result.location, input.location);
289        assert_eq!(
290            result.units,
291            [
292                Unquoted(Literal('~')),
293                Unquoted(Literal('f')),
294                Unquoted(Literal('o')),
295                Unquoted(Literal('o')),
296                Unquoted(Backslashed('/')),
297            ]
298        );
299
300        let input = Word::from_str("~bar''").unwrap();
301        let result = parse_tilde_front(&input);
302        assert_eq!(result.location, input.location);
303        assert_eq!(
304            result.units,
305            [
306                Unquoted(Literal('~')),
307                Unquoted(Literal('b')),
308                Unquoted(Literal('a')),
309                Unquoted(Literal('r')),
310                SingleQuote("".to_string()),
311            ]
312        );
313    }
314
315    #[test]
316    fn word_parse_tilde_front_not_after_colon() {
317        let input = Word::from_str("a~").unwrap();
318        let result = parse_tilde_front(&input);
319        assert_eq!(result, input);
320
321        let input = Word::from_str("/~a").unwrap();
322        let result = parse_tilde_front(&input);
323        assert_eq!(result, input);
324
325        let input = Word::from_str("''~/").unwrap();
326        let result = parse_tilde_front(&input);
327        assert_eq!(result, input);
328    }
329
330    #[test]
331    fn word_parse_tilde_front_after_colon() {
332        let input = Word::from_str(":~").unwrap();
333        let result = parse_tilde_front(&input);
334        assert_eq!(result.location, input.location);
335        assert_eq!(
336            result.units,
337            [Unquoted(Literal(':')), Unquoted(Literal('~'))]
338        );
339
340        let input = Word::from_str(":~foo/a:~bar").unwrap();
341        let result = parse_tilde_front(&input);
342        assert_eq!(result.location, input.location);
343        assert_eq!(
344            result.units,
345            [
346                Unquoted(Literal(':')),
347                Unquoted(Literal('~')),
348                Unquoted(Literal('f')),
349                Unquoted(Literal('o')),
350                Unquoted(Literal('o')),
351                Unquoted(Literal('/')),
352                Unquoted(Literal('a')),
353                Unquoted(Literal(':')),
354                Unquoted(Literal('~')),
355                Unquoted(Literal('b')),
356                Unquoted(Literal('a')),
357                Unquoted(Literal('r')),
358            ]
359        );
360
361        let input = Word::from_str("~a/b:~c/d").unwrap();
362        let result = parse_tilde_front(&input);
363        assert_eq!(result.location, input.location);
364        assert_eq!(
365            result.units,
366            [
367                Tilde("a".to_string()),
368                Unquoted(Literal('/')),
369                Unquoted(Literal('b')),
370                Unquoted(Literal(':')),
371                Unquoted(Literal('~')),
372                Unquoted(Literal('c')),
373                Unquoted(Literal('/')),
374                Unquoted(Literal('d')),
375            ]
376        );
377    }
378
379    #[test]
380    fn word_parse_tilde_everywhere_not_starting_with_tilde() {
381        let input = Word::from_str("").unwrap();
382        let result = parse_tilde_everywhere(&input);
383        assert_eq!(result, input);
384
385        let input = Word::from_str("a").unwrap();
386        let result = parse_tilde_everywhere(&input);
387        assert_eq!(result, input);
388
389        let input = Word::from_str("''").unwrap();
390        let result = parse_tilde_everywhere(&input);
391        assert_eq!(result, input);
392    }
393
394    #[test]
395    fn word_parse_tilde_everywhere_only_tilde() {
396        let input = Word::from_str("~").unwrap();
397        let result = parse_tilde_everywhere(&input);
398        assert_eq!(result.location, input.location);
399        assert_eq!(result.units, [Tilde("".to_string())]);
400    }
401
402    #[test]
403    fn word_parse_tilde_everywhere_with_name() {
404        let input = Word::from_str("~foo").unwrap();
405        let result = parse_tilde_everywhere(&input);
406        assert_eq!(result.location, input.location);
407        assert_eq!(result.units, [Tilde("foo".to_string())]);
408    }
409
410    #[test]
411    fn word_parse_tilde_everywhere_ending_with_slash() {
412        let input = Word::from_str("~bar/''").unwrap();
413        let result = parse_tilde_everywhere(&input);
414        assert_eq!(result.location, input.location);
415        assert_eq!(
416            result.units,
417            [
418                Tilde("bar".to_string()),
419                Unquoted(Literal('/')),
420                SingleQuote("".to_string()),
421            ]
422        );
423    }
424
425    #[test]
426    fn word_parse_tilde_everywhere_ending_with_colon() {
427        let input = Word::from_str("~bar:\"\"").unwrap();
428        let result = parse_tilde_everywhere(&input);
429        assert_eq!(result.location, input.location);
430        assert_eq!(
431            result.units,
432            [
433                Tilde("bar".to_string()),
434                Unquoted(Literal(':')),
435                DoubleQuote(Text(vec![])),
436            ]
437        );
438    }
439
440    #[test]
441    fn word_parse_tilde_everywhere_interrupted_by_non_literal() {
442        let input = Word::from_str(r"~foo\/").unwrap();
443        let result = parse_tilde_everywhere(&input);
444        assert_eq!(result.location, input.location);
445        assert_eq!(
446            result.units,
447            [
448                Unquoted(Literal('~')),
449                Unquoted(Literal('f')),
450                Unquoted(Literal('o')),
451                Unquoted(Literal('o')),
452                Unquoted(Backslashed('/')),
453            ]
454        );
455
456        let input = Word::from_str("~bar''").unwrap();
457        let result = parse_tilde_everywhere(&input);
458        assert_eq!(result.location, input.location);
459        assert_eq!(
460            result.units,
461            [
462                Unquoted(Literal('~')),
463                Unquoted(Literal('b')),
464                Unquoted(Literal('a')),
465                Unquoted(Literal('r')),
466                SingleQuote("".to_string()),
467            ]
468        );
469    }
470
471    #[test]
472    fn word_parse_tilde_everywhere_not_after_colon() {
473        let input = Word::from_str("a~").unwrap();
474        let result = parse_tilde_everywhere(&input);
475        assert_eq!(result, input);
476
477        let input = Word::from_str("/~a").unwrap();
478        let result = parse_tilde_everywhere(&input);
479        assert_eq!(result, input);
480
481        let input = Word::from_str("''~/").unwrap();
482        let result = parse_tilde_everywhere(&input);
483        assert_eq!(result, input);
484    }
485
486    #[test]
487    fn word_parse_tilde_everywhere_after_colon() {
488        let input = Word::from_str(":~").unwrap();
489        let result = parse_tilde_everywhere(&input);
490        assert_eq!(result.location, input.location);
491        assert_eq!(
492            result.units,
493            [Unquoted(Literal(':')), Tilde("".to_string())]
494        );
495
496        let input = Word::from_str(":~foo/a:~bar").unwrap();
497        let result = parse_tilde_everywhere(&input);
498        assert_eq!(result.location, input.location);
499        assert_eq!(
500            result.units,
501            [
502                Unquoted(Literal(':')),
503                Tilde("foo".to_string()),
504                Unquoted(Literal('/')),
505                Unquoted(Literal('a')),
506                Unquoted(Literal(':')),
507                Tilde("bar".to_string()),
508            ]
509        );
510
511        let input = Word::from_str("~a/b:~c/d::~").unwrap();
512        let result = parse_tilde_everywhere(&input);
513        assert_eq!(result.location, input.location);
514        assert_eq!(
515            result.units,
516            [
517                Tilde("a".to_string()),
518                Unquoted(Literal('/')),
519                Unquoted(Literal('b')),
520                Unquoted(Literal(':')),
521                Tilde("c".to_string()),
522                Unquoted(Literal('/')),
523                Unquoted(Literal('d')),
524                Unquoted(Literal(':')),
525                Unquoted(Literal(':')),
526                Tilde("".to_string()),
527            ]
528        );
529    }
530}