vortex_io/filesystem/
glob.rs1use futures::StreamExt;
5use futures::TryStreamExt;
6use futures::stream;
7use futures::stream::BoxStream;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_err;
11
12use crate::filesystem::FileListing;
13use crate::filesystem::FileSystem;
14
15impl dyn FileSystem + '_ {
16 pub fn glob(&self, pattern: &str) -> VortexResult<BoxStream<'_, VortexResult<FileListing>>> {
24 validate_glob(pattern)?;
25
26 if !pattern.contains(['*', '?', '[']) {
33 let pattern = pattern.to_string();
34 let stream = stream::once(async move { self.head(&pattern).await })
35 .try_filter_map(|listing| async move { Ok(listing) })
36 .boxed();
37 return Ok(stream);
38 }
39
40 let glob_pattern = glob::Pattern::new(pattern)
41 .map_err(|e| vortex_err!("Invalid glob pattern '{}': {}", pattern, e))?;
42
43 let listing_prefix = glob_list_prefix(pattern).trim_end_matches('/');
44
45 tracing::debug!(
46 "Performing glob with pattern '{}' and listing prefix '{}'",
47 pattern,
48 listing_prefix
49 );
50 let stream = self
51 .list(listing_prefix)
52 .try_filter(move |listing| {
53 let matches = glob_pattern.matches(&listing.path);
54 async move { matches }
55 })
56 .into_stream()
57 .boxed();
58
59 Ok(stream)
60 }
61}
62
63fn glob_list_prefix(pattern: &str) -> &str {
68 let glob_pos = pattern.find(['*', '?', '[']).unwrap_or(pattern.len());
69 match pattern[..glob_pos].rfind('/') {
70 Some(slash_pos) => &pattern[..=slash_pos],
71 None => "",
72 }
73}
74
75fn validate_glob(pattern: &str) -> VortexResult<()> {
77 for escape_pattern in ["\\*", "\\?", "\\["] {
78 if pattern.contains(escape_pattern) {
79 vortex_bail!(
80 "Escaped glob characters are not allowed in patterns. Found '{}' in: {}",
81 escape_pattern,
82 pattern
83 );
84 }
85 }
86 Ok(())
87}
88
89#[cfg(test)]
90mod tests {
91 use std::sync::Arc;
92
93 use async_trait::async_trait;
94 use futures::TryStreamExt;
95 use vortex_error::vortex_panic;
96
97 use super::*;
98 use crate::VortexReadAt;
99 use crate::filesystem::FileSystem;
100
101 #[derive(Debug)]
106 struct HeadFileSystem {
107 files: Vec<FileListing>,
108 }
109
110 impl HeadFileSystem {
111 fn new(files: &[(&str, u64)]) -> Self {
112 Self {
113 files: files
114 .iter()
115 .map(|&(path, size)| FileListing {
116 path: path.to_string(),
117 size: Some(size),
118 })
119 .collect(),
120 }
121 }
122 }
123
124 #[async_trait]
125 impl FileSystem for HeadFileSystem {
126 fn list(&self, _prefix: &str) -> BoxStream<'_, VortexResult<FileListing>> {
127 vortex_panic!("list() must not be called for an exact path; glob should use head()")
128 }
129
130 async fn head(&self, path: &str) -> VortexResult<Option<FileListing>> {
131 Ok(self
132 .files
133 .iter()
134 .find(|listing| listing.path == path)
135 .cloned())
136 }
137
138 async fn open_read(&self, _path: &str) -> VortexResult<Arc<dyn VortexReadAt>> {
139 vortex_panic!("open_read() should not be called")
140 }
141
142 async fn delete(&self, _path: &str) -> VortexResult<()> {
143 vortex_panic!("delete() should not be called")
144 }
145 }
146
147 #[tokio::test]
148 async fn test_glob_exact_path_existing_returns_listing_with_size() -> VortexResult<()> {
149 let fs = HeadFileSystem::new(&[("data/file.vortex", 1024)]);
150 let fs_dyn: &dyn FileSystem = &fs;
151 let results: Vec<FileListing> = fs_dyn.glob("data/file.vortex")?.try_collect().await?;
152 assert_eq!(results.len(), 1);
153 assert_eq!(results[0].path, "data/file.vortex");
154 assert_eq!(
155 results[0].size,
156 Some(1024),
157 "exact-path glob should propagate the size reported by head"
158 );
159 Ok(())
160 }
161
162 #[tokio::test]
163 async fn test_glob_exact_path_missing_returns_empty_stream() -> VortexResult<()> {
164 let fs = HeadFileSystem::new(&[]);
165 let fs_dyn: &dyn FileSystem = &fs;
166 let results: Vec<FileListing> = fs_dyn.glob("data/missing.vortex")?.try_collect().await?;
167 assert!(
168 results.is_empty(),
169 "missing exact path should yield an empty stream"
170 );
171 Ok(())
172 }
173
174 #[tokio::test]
175 async fn test_glob_exact_path_ignores_prefix_siblings() -> VortexResult<()> {
176 let fs = HeadFileSystem::new(&[("foo.vortex", 10), ("foo.vortex.backup", 20)]);
180 let fs_dyn: &dyn FileSystem = &fs;
181 let results: Vec<FileListing> = fs_dyn.glob("foo.vortex")?.try_collect().await?;
182 assert_eq!(results.len(), 1);
183 assert_eq!(results[0].path, "foo.vortex");
184 assert_eq!(results[0].size, Some(10));
185 Ok(())
186 }
187
188 #[test]
189 fn test_glob_list_prefix_with_wildcard_in_filename() {
190 assert_eq!(glob_list_prefix("folder/file*.txt"), "folder/");
191 }
192
193 #[test]
194 fn test_glob_list_prefix_with_wildcard_in_directory() {
195 assert_eq!(glob_list_prefix("folder/*/file.txt"), "folder/");
196 }
197
198 #[test]
199 fn test_glob_list_prefix_nested_directories() {
200 assert_eq!(glob_list_prefix("data/2023/*/logs/*.log"), "data/2023/");
201 }
202
203 #[test]
204 fn test_glob_list_prefix_wildcard_at_root() {
205 assert_eq!(glob_list_prefix("*.txt"), "");
206 }
207
208 #[test]
209 fn test_glob_list_prefix_no_wildcards() {
210 assert_eq!(
211 glob_list_prefix("folder/subfolder/file.txt"),
212 "folder/subfolder/"
213 );
214 }
215
216 #[test]
217 fn test_glob_list_prefix_question_mark() {
218 assert_eq!(glob_list_prefix("folder/file?.txt"), "folder/");
219 }
220
221 #[test]
222 fn test_glob_list_prefix_bracket() {
223 assert_eq!(glob_list_prefix("folder/file[abc].txt"), "folder/");
224 }
225
226 #[test]
227 fn test_glob_list_prefix_empty() {
228 assert_eq!(glob_list_prefix(""), "");
229 }
230
231 #[test]
232 fn test_validate_glob_valid() -> VortexResult<()> {
233 validate_glob("path/*.txt")?;
234 validate_glob("path/to/**/*.vortex")?;
235 Ok(())
236 }
237
238 #[test]
239 fn test_validate_glob_escaped_asterisk() {
240 assert!(validate_glob("path\\*.txt").is_err());
241 }
242
243 #[test]
244 fn test_validate_glob_escaped_question() {
245 assert!(validate_glob("path\\?.txt").is_err());
246 }
247
248 #[test]
249 fn test_validate_glob_escaped_bracket() {
250 assert!(validate_glob("path\\[test].txt").is_err());
251 }
252}