vortex_io/filesystem/
glob.rs1use futures::StreamExt;
5use futures::TryStreamExt;
6use futures::stream::BoxStream;
7use vortex_error::VortexResult;
8use vortex_error::vortex_bail;
9use vortex_error::vortex_err;
10
11use crate::filesystem::FileListing;
12use crate::filesystem::FileSystem;
13
14impl dyn FileSystem + '_ {
15 pub fn glob(&self, pattern: &str) -> VortexResult<BoxStream<'_, VortexResult<FileListing>>> {
23 validate_glob(pattern)?;
24
25 if !pattern.contains(['*', '?', '[']) {
28 let listing = FileListing {
29 path: pattern.to_string(),
30 size: None,
31 };
32 return Ok(futures::stream::once(async { Ok(listing) }).boxed());
33 }
34
35 let glob_pattern = glob::Pattern::new(pattern)
36 .map_err(|e| vortex_err!("Invalid glob pattern '{}': {}", pattern, e))?;
37
38 let listing_prefix = glob_list_prefix(pattern).trim_end_matches('/');
39
40 tracing::debug!(
41 "Performing glob with pattern '{}' and listing prefix '{}'",
42 pattern,
43 listing_prefix
44 );
45 let stream = self
46 .list(listing_prefix)
47 .try_filter(move |listing| {
48 let matches = glob_pattern.matches(&listing.path);
49 async move { matches }
50 })
51 .into_stream()
52 .boxed();
53
54 Ok(stream)
55 }
56}
57
58fn glob_list_prefix(pattern: &str) -> &str {
63 let glob_pos = pattern.find(['*', '?', '[']).unwrap_or(pattern.len());
64 match pattern[..glob_pos].rfind('/') {
65 Some(slash_pos) => &pattern[..=slash_pos],
66 None => "",
67 }
68}
69
70fn validate_glob(pattern: &str) -> VortexResult<()> {
72 for escape_pattern in ["\\*", "\\?", "\\["] {
73 if pattern.contains(escape_pattern) {
74 vortex_bail!(
75 "Escaped glob characters are not allowed in patterns. Found '{}' in: {}",
76 escape_pattern,
77 pattern
78 );
79 }
80 }
81 Ok(())
82}
83
84#[cfg(test)]
85mod tests {
86 use std::sync::Arc;
87
88 use async_trait::async_trait;
89 use futures::TryStreamExt;
90 use vortex_error::vortex_panic;
91
92 use super::*;
93 use crate::VortexReadAt;
94 use crate::filesystem::FileSystem;
95
96 #[derive(Debug)]
98 struct NoListFileSystem;
99
100 #[async_trait]
101 impl FileSystem for NoListFileSystem {
102 fn list(&self, _prefix: &str) -> BoxStream<'_, VortexResult<FileListing>> {
103 vortex_panic!("list() should not be called for exact paths")
104 }
105
106 async fn open_read(&self, _path: &str) -> VortexResult<Arc<dyn VortexReadAt>> {
107 vortex_panic!("open_read() should not be called")
108 }
109
110 async fn delete(&self, _path: &str) -> VortexResult<()> {
111 vortex_panic!("delete() should not be called")
112 }
113 }
114
115 #[tokio::test]
116 async fn test_glob_exact_path_skips_list() -> VortexResult<()> {
117 let fs: &dyn FileSystem = &NoListFileSystem;
118 let results: Vec<FileListing> = fs.glob("data/file.vortex")?.try_collect().await?;
119 assert_eq!(results.len(), 1);
120 assert_eq!(results[0].path, "data/file.vortex");
121 assert_eq!(results[0].size, None);
122 Ok(())
123 }
124
125 #[test]
126 fn test_glob_list_prefix_with_wildcard_in_filename() {
127 assert_eq!(glob_list_prefix("folder/file*.txt"), "folder/");
128 }
129
130 #[test]
131 fn test_glob_list_prefix_with_wildcard_in_directory() {
132 assert_eq!(glob_list_prefix("folder/*/file.txt"), "folder/");
133 }
134
135 #[test]
136 fn test_glob_list_prefix_nested_directories() {
137 assert_eq!(glob_list_prefix("data/2023/*/logs/*.log"), "data/2023/");
138 }
139
140 #[test]
141 fn test_glob_list_prefix_wildcard_at_root() {
142 assert_eq!(glob_list_prefix("*.txt"), "");
143 }
144
145 #[test]
146 fn test_glob_list_prefix_no_wildcards() {
147 assert_eq!(
148 glob_list_prefix("folder/subfolder/file.txt"),
149 "folder/subfolder/"
150 );
151 }
152
153 #[test]
154 fn test_glob_list_prefix_question_mark() {
155 assert_eq!(glob_list_prefix("folder/file?.txt"), "folder/");
156 }
157
158 #[test]
159 fn test_glob_list_prefix_bracket() {
160 assert_eq!(glob_list_prefix("folder/file[abc].txt"), "folder/");
161 }
162
163 #[test]
164 fn test_glob_list_prefix_empty() {
165 assert_eq!(glob_list_prefix(""), "");
166 }
167
168 #[test]
169 fn test_validate_glob_valid() -> VortexResult<()> {
170 validate_glob("path/*.txt")?;
171 validate_glob("path/to/**/*.vortex")?;
172 Ok(())
173 }
174
175 #[test]
176 fn test_validate_glob_escaped_asterisk() {
177 assert!(validate_glob("path\\*.txt").is_err());
178 }
179
180 #[test]
181 fn test_validate_glob_escaped_question() {
182 assert!(validate_glob("path\\?.txt").is_err());
183 }
184
185 #[test]
186 fn test_validate_glob_escaped_bracket() {
187 assert!(validate_glob("path\\[test].txt").is_err());
188 }
189}