Skip to main content

langcodec_cli/
path_glob.rs

1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3
4use globset::{GlobBuilder, GlobSetBuilder};
5use ignore::WalkBuilder;
6use rayon::prelude::*;
7
8/// Expand possible glob patterns in a list of input strings into concrete file paths.
9/// Uses ignore + globset for fast, parallel, .gitignore-aware traversal.
10pub fn expand_input_globs(inputs: &Vec<String>) -> Result<Vec<String>, String> {
11    fn has_glob_meta(s: &str) -> bool {
12        s.bytes().any(|b| matches!(b, b'*' | b'?' | b'[' | b'{'))
13    }
14
15    // Extract a static directory prefix before the first glob meta-character
16    fn static_prefix_dir(pattern: &str) -> PathBuf {
17        let bytes = pattern.as_bytes();
18        let mut idx = 0usize;
19        while idx < bytes.len() {
20            match bytes[idx] {
21                b'*' | b'?' | b'[' | b'{' => break,
22                _ => idx += 1,
23            }
24        }
25        let prefix = &pattern[..idx];
26        let p = Path::new(prefix);
27        if p.is_dir() {
28            p.to_path_buf()
29        } else {
30            p.parent()
31                .map(|pp| pp.to_path_buf())
32                .unwrap_or_else(|| PathBuf::from("."))
33        }
34    }
35
36    // Build one GlobSet for all patterns (literal_separator to avoid '/' matching)
37    let mut builder = GlobSetBuilder::new();
38    for pat in inputs {
39        let glob = GlobBuilder::new(pat)
40            .literal_separator(true)
41            .build()
42            .map_err(|e| format!("Invalid glob pattern '{}': {}", pat, e))?;
43        builder.add(glob);
44    }
45    let set = builder
46        .build()
47        .map_err(|e| format!("Failed to build glob set: {}", e))?;
48
49    // Collect unique roots to minimize directory walks
50    let mut roots: Vec<PathBuf> = Vec::new();
51    for pat in inputs {
52        let root = if has_glob_meta(pat) {
53            static_prefix_dir(pat)
54        } else {
55            Path::new(pat)
56                .parent()
57                .map(|p| p.to_path_buf())
58                .unwrap_or_else(|| PathBuf::from("."))
59        };
60        if !roots.iter().any(|r| r == &root) {
61            roots.push(root);
62        }
63    }
64
65    // Walk roots in parallel and match files against the GlobSet
66    let collected: Vec<String> = roots
67        .par_iter()
68        .map(|root| {
69            let mut out: Vec<String> = Vec::new();
70            let walker = WalkBuilder::new(root)
71                .git_ignore(true)
72                .git_global(true)
73                .git_exclude(true)
74                .hidden(false)
75                .ignore(true)
76                .parents(true)
77                .build();
78
79            for dent in walker {
80                let dent = match dent {
81                    Ok(d) => d,
82                    Err(_e) => continue,
83                };
84                let ftype = match dent.file_type() {
85                    Some(t) => t,
86                    None => continue,
87                };
88                if !ftype.is_file() {
89                    continue;
90                }
91                let s = dent.path().to_string_lossy();
92                if set.is_match(s.as_ref()) {
93                    out.push(s.to_string());
94                }
95            }
96            out
97        })
98        .flatten()
99        .collect();
100
101    // If nothing matched, preserve original inputs to surface errors later
102    if collected.is_empty() {
103        return Ok(inputs.clone());
104    }
105
106    // Deduplicate while preserving order
107    let mut seen: HashSet<String> = HashSet::new();
108    let mut results: Vec<String> = Vec::with_capacity(collected.len());
109    for s in collected {
110        if seen.insert(s.clone()) {
111            results.push(s);
112        }
113    }
114    Ok(results)
115}