yash_semantics/expansion/
glob.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2022 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Pathname expansion
18//!
19//! Pathname expansion (a.k.a. globbing) scans directories and produces
20//! pathnames matching the input field.
21//!
22//! # Pattern syntax
23//!
24//! An input field is split by `/`, and each component is parsed as a pattern
25//! that may contain the following non-literal elements:
26//!
27//! - `?`
28//! - `*`
29//! - Bracket expression (a set of characters enclosed in brackets)
30//! - `\`
31//!
32//! Refer to the [`yash-fnmatch`](yash_fnmatch) crate for pattern syntax and
33//! semantics details.
34//!
35//! # Directory scanning
36//!
37//! The expansion scans directories corresponding to components containing any
38//! non-literal elements above. The scan requires read permission for the
39//! directory. For components that have only literal characters, no scan is
40//! performed. Search permissions for all ancestor directories are needed to
41//! check if the file exists referred to by the resulting pathname.
42//!
43//! # Results
44//!
45//! Pathname expansion returns pathnames that have matched the input pattern,
46//! sorted alphabetically. Any errors are silently ignored. If directory
47//! scanning produces no pathnames, the input pattern is returned intact. (TODO:
48//! the null-glob option)
49//!
50//! If the input field contains no non-literal elements subject to pattern
51//! matching at all, the result is the input intact.
52
53use super::attr::AttrChar;
54use super::attr::AttrField;
55use super::attr::Origin;
56use std::ffi::CString;
57use std::iter::Once;
58use std::marker::PhantomData;
59use yash_env::Env;
60use yash_env::System;
61use yash_env::option::State::Off;
62use yash_env::semantics::Field;
63use yash_env::system::AT_FDCWD;
64use yash_fnmatch::Config;
65use yash_fnmatch::Pattern;
66use yash_fnmatch::PatternChar;
67use yash_syntax::source::Location;
68
69#[derive(Debug)]
70enum Inner {
71    One(Once<Field>),
72    Many(std::vec::IntoIter<Field>),
73}
74
75impl From<Field> for Inner {
76    fn from(field: Field) -> Self {
77        Inner::One(std::iter::once(field))
78    }
79}
80
81/// Iterator that provides results of parameter expansion
82///
83/// This iterator is created with the [`glob`] function.
84#[derive(Debug)]
85pub struct Glob<'a> {
86    /// Dummy to allow retaining a mutable reference to `Env` in the future
87    ///
88    /// The current [`glob`] implementation pre-computes all results before
89    /// returning a `Glob`. The future implementation may optimize by using a
90    /// [generator], which will need a real reference to `Env`.
91    ///
92    /// [generator]: https://github.com/rust-lang/rust/issues/43122
93    env: PhantomData<&'a mut Env>,
94
95    inner: Inner,
96}
97
98impl From<Inner> for Glob<'_> {
99    fn from(inner: Inner) -> Self {
100        Glob {
101            env: PhantomData,
102            inner,
103        }
104    }
105}
106
107impl Iterator for Glob<'_> {
108    type Item = Field;
109    fn next(&mut self) -> Option<Field> {
110        match &mut self.inner {
111            Inner::One(once) => once.next(),
112            Inner::Many(many) => many.next(),
113        }
114    }
115}
116
117/// Converts a field to a glob pattern.
118fn to_pattern(field: &[AttrChar]) -> Option<Pattern> {
119    #[derive(Clone, Debug)]
120    struct Chars<'a> {
121        inner: std::slice::Iter<'a, AttrChar>,
122        next_quoted: bool,
123    }
124    impl Iterator for Chars<'_> {
125        type Item = PatternChar;
126        fn next(&mut self) -> Option<PatternChar> {
127            for c in &mut self.inner {
128                let quoted = std::mem::replace(&mut self.next_quoted, false);
129                if c.is_quoting {
130                    continue;
131                } else if quoted || c.is_quoted || c.origin == Origin::HardExpansion {
132                    return Some(PatternChar::Literal(c.value));
133                } else {
134                    self.next_quoted = c.value == '\\';
135                    return Some(PatternChar::Normal(c.value));
136                }
137            }
138            None
139        }
140    }
141
142    let chars = Chars {
143        inner: field.iter(),
144        next_quoted: false,
145    };
146    let mut config = Config::default();
147    config.anchor_begin = true;
148    config.anchor_end = true;
149    config.literal_period = true;
150    Pattern::parse_with_config(chars, config).ok()
151}
152
153fn remove_quotes_and_strip(chars: &[AttrChar]) -> impl Iterator<Item = char> + '_ {
154    use super::attr_strip::Strip;
155    use super::quote_removal::skip_quotes;
156    skip_quotes(chars.iter().copied()).strip()
157}
158
159#[derive(Debug)]
160struct SearchEnv<'e> {
161    env: &'e mut Env,
162    prefix: String,
163    origin: Location,
164    results: Vec<Field>,
165}
166
167impl SearchEnv<'_> {
168    /// Recursively searches directories for matching pathnames.
169    fn search_dir(&mut self, suffix: &[AttrChar]) {
170        let (this, new_suffix) = match suffix.iter().position(|c| c.value == '/') {
171            None => (suffix, None),
172            Some(index) => (&suffix[..index], Some(&suffix[index + 1..])),
173        };
174
175        match to_pattern(this).map(Pattern::into_literal) {
176            None => {
177                self.push_component(new_suffix, false, |prefix| {
178                    prefix.extend(remove_quotes_and_strip(this))
179                });
180            }
181            Some(Ok(literal)) => {
182                self.push_component(new_suffix, false, |prefix| prefix.push_str(&literal));
183            }
184            Some(Err(pattern)) => {
185                let dir_path = if self.prefix.is_empty() {
186                    c".".to_owned()
187                } else if let Ok(dir_path) = CString::new(self.prefix.as_str()) {
188                    dir_path
189                } else {
190                    return;
191                };
192
193                if let Ok(mut dir) = self.env.system.opendir(&dir_path) {
194                    while let Ok(Some(entry)) = dir.next() {
195                        if let Some(name) = entry.name.to_str() {
196                            if name != "." && name != ".." && pattern.is_match(name) {
197                                self.push_component(new_suffix, true, |prefix| {
198                                    prefix.push_str(name)
199                                });
200                            }
201                        }
202                    }
203                }
204            }
205        }
206    }
207
208    fn file_exists(&mut self) -> bool {
209        let Ok(path) = CString::new(self.prefix.as_str()) else {
210            return false;
211        };
212        self.env
213            .system
214            .fstatat(AT_FDCWD, &path, /* follow symlinks */ true)
215            .is_ok()
216    }
217
218    /// Pushes a pathname component to `prefix` and starts processing the next
219    /// suffix component.
220    ///
221    /// If `suffix` is `None`, the pathname is checked for existence and added
222    /// to the results if it exists. If `file_exists` is `true`, the pathname is
223    /// assumed to exist.
224    ///
225    /// `push` is a closure that appends the suffix to the prefix.
226    fn push_component<F>(&mut self, suffix: Option<&[AttrChar]>, file_exists: bool, push: F)
227    where
228        F: FnOnce(&mut String),
229    {
230        let old_prefix_len = self.prefix.len();
231        push(&mut self.prefix);
232
233        match suffix {
234            None => {
235                if file_exists || self.file_exists() {
236                    self.results.push(Field {
237                        value: self.prefix.clone(),
238                        origin: self.origin.clone(),
239                    });
240                }
241            }
242            Some(suffix) => {
243                self.prefix.push('/');
244                self.search_dir(suffix);
245            }
246        }
247
248        self.prefix.truncate(old_prefix_len);
249    }
250}
251
252/// Performs parameter expansion.
253///
254/// This function returns an iterator that yields fields resulting from the
255/// expansion.
256///
257/// If the `Glob` option is `Off` in `env.options`, the expansion is skipped.
258pub fn glob(env: &mut Env, field: AttrField) -> Glob<'_> {
259    if env.options.get(yash_env::option::Option::Glob) == Off {
260        return Glob::from(Inner::from(field.remove_quotes_and_strip()));
261    }
262
263    // TODO Quick check for *, ?, [ containment
264
265    let mut search_env = SearchEnv {
266        env,
267        prefix: String::with_capacity(1024 /*nix::libc::PATH_MAX*/),
268        origin: field.origin,
269        results: Vec::new(),
270    };
271    search_env.search_dir(&field.chars);
272
273    let mut results = search_env.results;
274    Glob::from(if results.is_empty() {
275        let field = AttrField {
276            chars: field.chars,
277            origin: search_env.origin,
278        };
279        Inner::from(field.remove_quotes_and_strip())
280    } else {
281        results.sort_unstable_by(|a, b| a.value.cmp(&b.value));
282        Inner::Many(results.into_iter())
283    })
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use crate::expansion::AttrChar;
290    use crate::expansion::Origin;
291    use std::rc::Rc;
292    use yash_env::VirtualSystem;
293    use yash_env::path::Path;
294    use yash_env::str::UnixStr;
295    use yash_env::system::Mode;
296    use yash_syntax::source::Location;
297
298    fn dummy_attr_field(s: &str) -> AttrField {
299        let chars = s
300            .chars()
301            .map(|c| AttrChar {
302                value: c,
303                origin: Origin::SoftExpansion,
304                is_quoted: false,
305                is_quoting: false,
306            })
307            .collect();
308        let origin = Location::dummy("");
309        AttrField { chars, origin }
310    }
311
312    fn env_with_dummy_files<I, P>(paths: I) -> Env
313    where
314        I: IntoIterator<Item = P>,
315        P: AsRef<Path>,
316    {
317        let system = VirtualSystem::new();
318        let mut state = system.state.borrow_mut();
319        for path in paths {
320            state.file_system.save(path, Rc::default()).unwrap();
321        }
322        drop(state);
323        Env::with_system(Box::new(system))
324    }
325
326    #[test]
327    fn literal_field() {
328        let mut env = Env::new_virtual();
329        let f = dummy_attr_field("abc");
330        let mut i = glob(&mut env, f);
331        assert_eq!(i.next().unwrap().value, "abc");
332        assert_eq!(i.next(), None);
333    }
334
335    #[test]
336    fn backslash_escapes_next_char() {
337        let mut env = env_with_dummy_files(["a", r"\a"]);
338        // The backslash escapes the '?', so this is not a pattern.
339        let f = dummy_attr_field(r"\?");
340        let mut i = glob(&mut env, f);
341        assert_eq!(i.next().unwrap().value, r"\?");
342        assert_eq!(i.next(), None);
343    }
344
345    #[test]
346    fn quoting_characters_are_removed() {
347        let mut env = Env::new_virtual();
348        let mut f = dummy_attr_field("aXbcYde");
349        f.chars[1].is_quoting = true;
350        f.chars[4].is_quoting = true;
351        let mut i = glob(&mut env, f);
352        assert_eq!(i.next().unwrap().value, "abcde");
353        assert_eq!(i.next(), None);
354    }
355
356    #[test]
357    fn quoted_characters_do_not_expand() {
358        let mut env = env_with_dummy_files(["foo.exe"]);
359        let mut f = dummy_attr_field("foo.*");
360        f.chars[4].is_quoted = true;
361        let mut i = glob(&mut env, f);
362        assert_eq!(i.next().unwrap().value, "foo.*");
363        assert_eq!(i.next(), None);
364    }
365
366    #[test]
367    fn characters_from_hard_expansion_do_not_expand() {
368        let mut env = env_with_dummy_files(["foo.exe"]);
369        let mut f = dummy_attr_field("foo.*");
370        f.chars[4].origin = Origin::HardExpansion;
371        let mut i = glob(&mut env, f);
372        assert_eq!(i.next().unwrap().value, "foo.*");
373        assert_eq!(i.next(), None);
374    }
375
376    #[test]
377    fn single_component_pattern_no_match() {
378        let mut env = env_with_dummy_files(["foo.exe"]);
379        let f = dummy_attr_field("*.txt");
380        let mut i = glob(&mut env, f);
381        assert_eq!(i.next().unwrap().value, "*.txt");
382        assert_eq!(i.next(), None);
383    }
384
385    #[test]
386    fn single_component_pattern_single_match() {
387        let mut env = env_with_dummy_files(["foo.exe", "foo.txt"]);
388        let f = dummy_attr_field("*.txt");
389        let mut i = glob(&mut env, f);
390        assert_eq!(i.next().unwrap().value, "foo.txt");
391        assert_eq!(i.next(), None);
392    }
393
394    #[test]
395    fn single_component_pattern_many_matches() {
396        let mut env = env_with_dummy_files(["foo.exe", "foo.txt"]);
397        let f = dummy_attr_field("foo.*");
398        let mut i = glob(&mut env, f);
399        assert_eq!(i.next().unwrap().value, "foo.exe");
400        assert_eq!(i.next().unwrap().value, "foo.txt");
401        assert_eq!(i.next(), None);
402    }
403
404    #[test]
405    fn no_pattern_matches_dot_or_dot_dot() {
406        let mut env = env_with_dummy_files([".foo"]);
407        let f = dummy_attr_field(".*");
408        let mut i = glob(&mut env, f);
409        assert_eq!(i.next().unwrap().value, ".foo");
410        assert_eq!(i.next(), None);
411    }
412
413    #[test]
414    fn absolute_path_single_component_pattern_many_matches() {
415        let mut env = env_with_dummy_files(["/foo.exe", "/foo.txt"]);
416        let f = dummy_attr_field("/foo.*");
417        let mut i = glob(&mut env, f);
418        assert_eq!(i.next().unwrap().value, "/foo.exe");
419        assert_eq!(i.next().unwrap().value, "/foo.txt");
420        assert_eq!(i.next(), None);
421    }
422
423    #[test]
424    fn multi_component_pattern_ending_with_pattern() {
425        let mut env = env_with_dummy_files([
426            "a/a/a/a", "a/a/a/b", "a/a/a/no", "a/a/b/a", "a/b/a/a", "a/b/a/b", "a/b/a/no",
427            "a/no/a/a", "b/a/a/a",
428        ]);
429        let f = dummy_attr_field("a/?/a/?");
430        let mut i = glob(&mut env, f);
431        assert_eq!(i.next().unwrap().value, "a/a/a/a");
432        assert_eq!(i.next().unwrap().value, "a/a/a/b");
433        assert_eq!(i.next().unwrap().value, "a/b/a/a");
434        assert_eq!(i.next().unwrap().value, "a/b/a/b");
435        assert_eq!(i.next(), None);
436    }
437
438    #[test]
439    fn multi_component_pattern_ending_with_literal() {
440        let mut env = env_with_dummy_files([
441            "/a/a/a/a",
442            "/a/a/a/b",
443            "/a/a/b/a",
444            "/a/a/no/a",
445            "/a/b/a/a",
446            "/b/a/a/a",
447            "/b/a/b/b",
448            "/b/a/c",
449            "/b/a/no",
450            "/c/a",
451            "/no/a",
452        ]);
453        let f = dummy_attr_field("/?/a/?/a");
454        let mut i = glob(&mut env, f);
455        assert_eq!(i.next().unwrap().value, "/a/a/a/a");
456        assert_eq!(i.next().unwrap().value, "/a/a/b/a");
457        assert_eq!(i.next().unwrap().value, "/b/a/a/a");
458        assert_eq!(i.next(), None);
459    }
460
461    #[test]
462    fn multi_component_pattern_ending_with_slash() {
463        let mut env = env_with_dummy_files(["a/a/_", "a/b/_", "a/c"]);
464        let f = dummy_attr_field("a/*/");
465        let mut i = glob(&mut env, f);
466        assert_eq!(i.next().unwrap().value, "a/a/");
467        assert_eq!(i.next().unwrap().value, "a/b/");
468        assert_eq!(i.next(), None);
469    }
470
471    #[test]
472    fn multi_component_pattern_with_adjacent_slashes() {
473        let mut env = env_with_dummy_files(["a/b", "b/a"]);
474        let f = dummy_attr_field("?//?");
475        let mut i = glob(&mut env, f);
476        assert_eq!(i.next().unwrap().value, "a//b");
477        assert_eq!(i.next().unwrap().value, "b//a");
478        assert_eq!(i.next(), None);
479    }
480
481    #[test]
482    fn no_search_permission_needed_if_last_component_is_pattern() {
483        let system = VirtualSystem::new();
484        {
485            let mut state = system.state.borrow_mut();
486            state
487                .file_system
488                .save("foo/bar", Default::default())
489                .unwrap();
490            let dir = state.file_system.get("foo").unwrap();
491            dir.borrow_mut().permissions = Mode::ALL_READ | Mode::ALL_WRITE;
492        }
493        let mut env = Env::with_system(Box::new(system));
494        let f = dummy_attr_field("foo/*");
495        let mut i = glob(&mut env, f);
496        assert_eq!(i.next().unwrap().value, "foo/bar");
497        assert_eq!(i.next(), None);
498    }
499
500    #[test]
501    fn invalid_pattern_remains_intact() {
502        let mut env = env_with_dummy_files(["foo.txt"]);
503        let f = dummy_attr_field("*[[:wrong:]]*");
504        let mut i = glob(&mut env, f);
505        assert_eq!(i.next().unwrap().value, "*[[:wrong:]]*");
506        assert_eq!(i.next(), None);
507    }
508
509    #[test]
510    fn slash_between_brackets() {
511        let mut env = env_with_dummy_files(["abd", "a/d"]);
512        let f = dummy_attr_field("a[b/c]d");
513        let mut i = glob(&mut env, f);
514        assert_eq!(i.next().unwrap().value, "a[b/c]d");
515        assert_eq!(i.next(), None);
516    }
517
518    #[test]
519    fn nul_byte_in_literal_followed_by_pattern() {
520        let mut env = env_with_dummy_files(["x", "y/y"]);
521        let f = dummy_attr_field("\0/*");
522        let mut i = glob(&mut env, f);
523        assert_eq!(i.next().unwrap().value, "\0/*");
524        assert_eq!(i.next(), None);
525    }
526
527    #[test]
528    fn broken_utf8_byte_in_directory_entry_name() {
529        let mut env = env_with_dummy_files([UnixStr::from_bytes(b"foo/\xFF")]);
530        let f = dummy_attr_field("foo/*");
531        let mut i = glob(&mut env, f);
532        assert_eq!(i.next().unwrap().value, "foo/*");
533        assert_eq!(i.next(), None);
534    }
535
536    #[test]
537    fn noglob_option() {
538        let mut env = env_with_dummy_files(["foo.exe"]);
539        env.options.set(yash_env::option::Option::Glob, Off);
540        let f = dummy_attr_field("foo.*");
541        let mut i = glob(&mut env, f);
542        assert_eq!(i.next().unwrap().value, "foo.*");
543        assert_eq!(i.next(), None);
544    }
545}