vortex_io/filesystem/
glob.rs1use futures::StreamExt;
5use futures::TryStreamExt;
6use futures::stream::BoxStream;
7use vortex_error::VortexResult;
8use vortex_error::vortex_bail;
9use vortex_error::vortex_err;
10
11use crate::filesystem::FileListing;
12use crate::filesystem::FileSystem;
13
14impl dyn FileSystem + '_ {
15 pub fn glob(&self, pattern: &str) -> VortexResult<BoxStream<'_, VortexResult<FileListing>>> {
23 validate_glob(pattern)?;
24
25 if !pattern.contains(['*', '?', '[']) {
28 let listing = FileListing {
29 path: pattern.to_string(),
30 size: None,
31 };
32 return Ok(futures::stream::once(async { Ok(listing) }).boxed());
33 }
34
35 let glob_pattern = glob::Pattern::new(pattern)
36 .map_err(|e| vortex_err!("Invalid glob pattern '{}': {}", pattern, e))?;
37
38 let listing_prefix = glob_list_prefix(pattern).trim_end_matches('/');
39
40 tracing::debug!(
41 "Performing glob with pattern '{}' and listing prefix '{}'",
42 pattern,
43 listing_prefix
44 );
45 let stream = self
46 .list(listing_prefix)
47 .try_filter(move |listing| {
48 let matches = glob_pattern.matches(&listing.path);
49 async move { matches }
50 })
51 .into_stream()
52 .boxed();
53
54 Ok(stream)
55 }
56}
57
58fn glob_list_prefix(pattern: &str) -> &str {
63 let glob_pos = pattern.find(['*', '?', '[']).unwrap_or(pattern.len());
64 match pattern[..glob_pos].rfind('/') {
65 Some(slash_pos) => &pattern[..=slash_pos],
66 None => "",
67 }
68}
69
70fn validate_glob(pattern: &str) -> VortexResult<()> {
72 for escape_pattern in ["\\*", "\\?", "\\["] {
73 if pattern.contains(escape_pattern) {
74 vortex_bail!(
75 "Escaped glob characters are not allowed in patterns. Found '{}' in: {}",
76 escape_pattern,
77 pattern
78 );
79 }
80 }
81 Ok(())
82}
83
84#[cfg(test)]
85mod tests {
86 use std::sync::Arc;
87
88 use async_trait::async_trait;
89 use futures::TryStreamExt;
90 use vortex_error::vortex_panic;
91
92 use super::*;
93 use crate::VortexReadAt;
94 use crate::filesystem::FileSystem;
95
96 #[derive(Debug)]
98 struct NoListFileSystem;
99
100 #[async_trait]
101 impl FileSystem for NoListFileSystem {
102 fn list(&self, _prefix: &str) -> BoxStream<'_, VortexResult<FileListing>> {
103 vortex_panic!("list() should not be called for exact paths")
104 }
105
106 async fn open_read(&self, _path: &str) -> VortexResult<Arc<dyn VortexReadAt>> {
107 vortex_panic!("open_read() should not be called")
108 }
109 }
110
111 #[tokio::test]
112 async fn test_glob_exact_path_skips_list() -> VortexResult<()> {
113 let fs: &dyn FileSystem = &NoListFileSystem;
114 let results: Vec<FileListing> = fs.glob("data/file.vortex")?.try_collect().await?;
115 assert_eq!(results.len(), 1);
116 assert_eq!(results[0].path, "data/file.vortex");
117 assert_eq!(results[0].size, None);
118 Ok(())
119 }
120
121 #[test]
122 fn test_glob_list_prefix_with_wildcard_in_filename() {
123 assert_eq!(glob_list_prefix("folder/file*.txt"), "folder/");
124 }
125
126 #[test]
127 fn test_glob_list_prefix_with_wildcard_in_directory() {
128 assert_eq!(glob_list_prefix("folder/*/file.txt"), "folder/");
129 }
130
131 #[test]
132 fn test_glob_list_prefix_nested_directories() {
133 assert_eq!(glob_list_prefix("data/2023/*/logs/*.log"), "data/2023/");
134 }
135
136 #[test]
137 fn test_glob_list_prefix_wildcard_at_root() {
138 assert_eq!(glob_list_prefix("*.txt"), "");
139 }
140
141 #[test]
142 fn test_glob_list_prefix_no_wildcards() {
143 assert_eq!(
144 glob_list_prefix("folder/subfolder/file.txt"),
145 "folder/subfolder/"
146 );
147 }
148
149 #[test]
150 fn test_glob_list_prefix_question_mark() {
151 assert_eq!(glob_list_prefix("folder/file?.txt"), "folder/");
152 }
153
154 #[test]
155 fn test_glob_list_prefix_bracket() {
156 assert_eq!(glob_list_prefix("folder/file[abc].txt"), "folder/");
157 }
158
159 #[test]
160 fn test_glob_list_prefix_empty() {
161 assert_eq!(glob_list_prefix(""), "");
162 }
163
164 #[test]
165 fn test_validate_glob_valid() -> VortexResult<()> {
166 validate_glob("path/*.txt")?;
167 validate_glob("path/to/**/*.vortex")?;
168 Ok(())
169 }
170
171 #[test]
172 fn test_validate_glob_escaped_asterisk() {
173 assert!(validate_glob("path\\*.txt").is_err());
174 }
175
176 #[test]
177 fn test_validate_glob_escaped_question() {
178 assert!(validate_glob("path\\?.txt").is_err());
179 }
180
181 #[test]
182 fn test_validate_glob_escaped_bracket() {
183 assert!(validate_glob("path\\[test].txt").is_err());
184 }
185}