1use globset::{Glob, GlobBuilder};
5use owo_colors::OwoColorize;
6use thiserror::Error;
7
8#[derive(Debug, Error)]
9pub enum PortableGlobError {
10 #[error(transparent)]
12 GlobError(#[from] globset::Error),
13 #[error(
14 "The parent directory operator (`..`) at position {pos} is not allowed in glob: `{glob}`"
15 )]
16 ParentDirectory { glob: String, pos: usize },
17 #[error("Invalid character `{invalid}` at position {pos} in glob: `{glob}`")]
18 InvalidCharacter {
19 glob: String,
20 pos: usize,
21 invalid: char,
22 },
23 #[error(
24 "Invalid character `{invalid}` at position {pos} in glob: `{glob}`. {}{} Characters can be escaped with a backslash",
25 "hint".bold().cyan(),
26 ":".bold()
27 )]
28 InvalidCharacterUv {
29 glob: String,
30 pos: usize,
31 invalid: char,
32 },
33 #[error(
34 "Only forward slashes are allowed as path separator, invalid character at position {pos} in glob: `{glob}`"
35 )]
36 InvalidBackslash { glob: String, pos: usize },
37 #[error(
38 "Path separators can't be escaped, invalid character at position {pos} in glob: `{glob}`"
39 )]
40 InvalidEscapee { glob: String, pos: usize },
41 #[error("Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`")]
42 InvalidCharacterRange {
43 glob: String,
44 pos: usize,
45 invalid: char,
46 },
47 #[error("Too many at stars at position {pos} in glob: `{glob}`")]
48 TooManyStars { glob: String, pos: usize },
49 #[error("Trailing backslash at position {pos} in glob: `{glob}`")]
50 TrailingEscape { glob: String, pos: usize },
51}
52
53#[derive(Debug, PartialEq, Eq, Clone, Copy)]
59pub enum PortableGlobParser {
60 Pep639,
62 Uv,
67}
68
69impl PortableGlobParser {
70 fn backslash_escape(self) -> bool {
71 match self {
72 Self::Pep639 => false,
73 Self::Uv => true,
74 }
75 }
76
77 pub fn parse(&self, glob: &str) -> Result<Glob, PortableGlobError> {
94 self.check(glob)?;
95 Ok(GlobBuilder::new(glob)
96 .literal_separator(true)
97 .backslash_escape(self.backslash_escape())
99 .build()?)
100 }
101
102 pub fn check(&self, glob: &str) -> Result<(), PortableGlobError> {
104 let mut chars = glob.chars().enumerate().peekable();
105 let mut start_or_slash = true;
108 while let Some((pos, c)) = chars.next() {
110 if c == '*' {
114 let mut star_run = 1;
115 while let Some((_, c)) = chars.peek() {
116 if *c == '*' {
117 star_run += 1;
118 chars.next();
119 } else {
120 break;
121 }
122 }
123 if star_run >= 3 {
124 return Err(PortableGlobError::TooManyStars {
125 glob: glob.to_string(),
126 pos,
128 });
129 } else if star_run == 2 {
130 if chars.peek().is_some_and(|(_, c)| *c != '/') {
131 return Err(PortableGlobError::TooManyStars {
132 glob: glob.to_string(),
133 pos,
135 });
136 }
137 }
138 start_or_slash = false;
139 } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') {
140 start_or_slash = false;
141 } else if c == '.' {
142 if start_or_slash && matches!(chars.peek(), Some((_, '.'))) {
143 return Err(PortableGlobError::ParentDirectory {
144 pos,
145 glob: glob.to_string(),
146 });
147 }
148 start_or_slash = false;
149 } else if c == '/' {
150 start_or_slash = true;
151 } else if c == '[' {
152 for (pos, c) in chars.by_ref() {
153 if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') {
154 } else if c == ']' {
156 break;
157 } else {
158 return Err(PortableGlobError::InvalidCharacterRange {
159 glob: glob.to_string(),
160 pos,
161 invalid: c,
162 });
163 }
164 }
165 start_or_slash = false;
166 } else if c == '\\' {
167 match self {
168 Self::Pep639 => {
169 return Err(PortableGlobError::InvalidBackslash {
170 glob: glob.to_string(),
171 pos,
172 });
173 }
174 Self::Uv => {
175 match chars.next() {
176 Some((pos, '/' | '\\')) => {
177 return Err(PortableGlobError::InvalidEscapee {
180 glob: glob.to_string(),
181 pos,
182 });
183 }
184 Some(_) => {
185 }
187 None => {
188 return Err(PortableGlobError::TrailingEscape {
189 glob: glob.to_string(),
190 pos,
191 });
192 }
193 }
194 }
195 }
196 } else {
197 let err = match self {
198 Self::Pep639 => PortableGlobError::InvalidCharacter {
199 glob: glob.to_string(),
200 pos,
201 invalid: c,
202 },
203 Self::Uv => PortableGlobError::InvalidCharacterUv {
204 glob: glob.to_string(),
205 pos,
206 invalid: c,
207 },
208 };
209 return Err(err);
210 }
211 }
212 Ok(())
213 }
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219 use insta::assert_snapshot;
220
221 #[test]
222 fn test_error() {
223 let parse_err = |glob| {
224 let error = PortableGlobParser::Pep639.parse(glob).unwrap_err();
225 anstream::adapter::strip_str(&error.to_string()).to_string()
226 };
227 assert_snapshot!(
228 parse_err(".."),
229 @"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`"
230 );
231 assert_snapshot!(
232 parse_err("licenses/.."),
233 @"The parent directory operator (`..`) at position 9 is not allowed in glob: `licenses/..`"
234 );
235 assert_snapshot!(
236 parse_err("licenses/LICEN!E.txt"),
237 @"Invalid character `!` at position 14 in glob: `licenses/LICEN!E.txt`"
238 );
239 assert_snapshot!(
240 parse_err("licenses/LICEN[!C]E.txt"),
241 @"Invalid character `!` in range at position 15 in glob: `licenses/LICEN[!C]E.txt`"
242 );
243 assert_snapshot!(
244 parse_err("licenses/LICEN[C?]E.txt"),
245 @"Invalid character `?` in range at position 16 in glob: `licenses/LICEN[C?]E.txt`"
246 );
247 assert_snapshot!(
248 parse_err("******"),
249 @"Too many at stars at position 0 in glob: `******`"
250 );
251 assert_snapshot!(
252 parse_err("licenses/**license"),
253 @"Too many at stars at position 9 in glob: `licenses/**license`"
254 );
255 assert_snapshot!(
256 parse_err("licenses/***/licenses.csv"),
257 @"Too many at stars at position 9 in glob: `licenses/***/licenses.csv`"
258 );
259 assert_snapshot!(
260 parse_err(r"licenses\eula.txt"),
261 @r"Only forward slashes are allowed as path separator, invalid character at position 8 in glob: `licenses\eula.txt`"
262 );
263 assert_snapshot!(
264 parse_err(r"**/@test"),
265 @"Invalid character `@` at position 3 in glob: `**/@test`"
266 );
267 assert_snapshot!(
269 parse_err(r"public domain/Gulliver\\’s Travels.txt"),
270 @r"Invalid character ` ` at position 6 in glob: `public domain/Gulliver\\’s Travels.txt`"
271 );
272 let parse_err_uv = |glob| {
273 let error = PortableGlobParser::Uv.parse(glob).unwrap_err();
274 anstream::adapter::strip_str(&error.to_string()).to_string()
275 };
276 assert_snapshot!(
277 parse_err_uv(r"**/@test"),
278 @"Invalid character `@` at position 3 in glob: `**/@test`. hint: Characters can be escaped with a backslash"
279 );
280 assert_snapshot!(
282 parse_err_uv(r"licenses\\MIT.txt"),
283 @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\\MIT.txt`"
284 );
285 assert_snapshot!(
286 parse_err_uv(r"licenses\/MIT.txt"),
287 @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\/MIT.txt`"
288 );
289 }
290
291 #[test]
292 fn test_valid() {
293 let cases = [
294 r"licenses/*.txt",
295 r"licenses/**/*.txt",
296 r"LICEN[CS]E.txt",
297 r"LICEN?E.txt",
298 r"[a-z].txt",
299 r"[a-z._-].txt",
300 r"*/**",
301 r"LICENSE..txt",
302 r"LICENSE_file-1.txt",
303 r"licenses/라이센스*.txt",
305 r"licenses/ライセンス*.txt",
306 r"licenses/执照*.txt",
307 r"src/**",
308 ];
309 let cases_uv = [
310 r"public-domain/Gulliver\’s\ Travels.txt",
311 r"**/\@test",
313 ];
314 for case in cases {
315 PortableGlobParser::Pep639.parse(case).unwrap();
316 }
317 for case in cases.iter().chain(cases_uv.iter()) {
318 PortableGlobParser::Uv.parse(case).unwrap();
319 }
320 }
321}