use glob::{Pattern, PatternError};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Pep639GlobError {
#[error(transparent)]
PatternError(#[from] PatternError),
#[error(
"The parent directory operator (`..`) at position {pos} is not allowed in glob: `{glob}`"
)]
ParentDirectory { glob: String, pos: usize },
#[error("Invalid character `{invalid}` at position {pos} in glob: `{glob}`")]
InvalidCharacter {
glob: String,
pos: usize,
invalid: char,
},
#[error("Only forward slashes are allowed as path separator, invalid character at position {pos} in glob: `{glob}`")]
InvalidBackslash { glob: String, pos: usize },
#[error("Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`")]
InvalidCharacterRange {
glob: String,
pos: usize,
invalid: char,
},
#[error("Too many at stars at position {pos} in glob: `{glob}`")]
TooManyStars { glob: String, pos: usize },
}
pub fn parse_pep639_glob(glob: &str) -> Result<Pattern, Pep639GlobError> {
check_pep639_glob(glob)?;
Ok(Pattern::new(glob)?)
}
pub fn check_pep639_glob(glob: &str) -> Result<(), Pep639GlobError> {
let mut chars = glob.chars().enumerate().peekable();
let mut start_or_slash = true;
while let Some((pos, c)) = chars.next() {
if c == '*' {
let mut star_run = 1;
while let Some((_, c)) = chars.peek() {
if *c == '*' {
star_run += 1;
chars.next();
} else {
break;
}
}
if star_run >= 3 {
return Err(Pep639GlobError::TooManyStars {
glob: glob.to_string(),
pos,
});
} else if star_run == 2 {
if let Some((_, c)) = chars.peek() {
if *c != '/' {
return Err(Pep639GlobError::TooManyStars {
glob: glob.to_string(),
pos,
});
}
}
}
start_or_slash = false;
} else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') {
start_or_slash = false;
} else if c == '.' {
if start_or_slash && matches!(chars.peek(), Some((_, '.'))) {
return Err(Pep639GlobError::ParentDirectory {
pos,
glob: glob.to_string(),
});
}
start_or_slash = false;
} else if c == '/' {
start_or_slash = true;
} else if c == '[' {
for (pos, c) in chars.by_ref() {
if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') {
} else if c == ']' {
break;
} else {
return Err(Pep639GlobError::InvalidCharacterRange {
glob: glob.to_string(),
pos,
invalid: c,
});
}
}
start_or_slash = false;
} else if c == '\\' {
return Err(Pep639GlobError::InvalidBackslash {
glob: glob.to_string(),
pos,
});
} else {
return Err(Pep639GlobError::InvalidCharacter {
glob: glob.to_string(),
pos,
invalid: c,
});
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use insta::assert_snapshot;
#[test]
fn test_error() {
let parse_err = |glob| parse_pep639_glob(glob).unwrap_err().to_string();
assert_snapshot!(
parse_err(".."),
@"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`"
);
assert_snapshot!(
parse_err("licenses/.."),
@"The parent directory operator (`..`) at position 9 is not allowed in glob: `licenses/..`"
);
assert_snapshot!(
parse_err("licenses/LICEN!E.txt"),
@"Invalid character `!` at position 14 in glob: `licenses/LICEN!E.txt`"
);
assert_snapshot!(
parse_err("licenses/LICEN[!C]E.txt"),
@"Invalid character `!` in range at position 15 in glob: `licenses/LICEN[!C]E.txt`"
);
assert_snapshot!(
parse_err("licenses/LICEN[C?]E.txt"),
@"Invalid character `?` in range at position 16 in glob: `licenses/LICEN[C?]E.txt`"
);
assert_snapshot!(
parse_err("******"),
@"Too many at stars at position 0 in glob: `******`"
);
assert_snapshot!(
parse_err("licenses/**license"),
@"Too many at stars at position 9 in glob: `licenses/**license`"
);
assert_snapshot!(
parse_err("licenses/***/licenses.csv"),
@"Too many at stars at position 9 in glob: `licenses/***/licenses.csv`"
);
assert_snapshot!(
parse_err(r"licenses\eula.txt"),
@r"Only forward slashes are allowed as path separator, invalid character at position 8 in glob: `licenses\eula.txt`"
);
assert_snapshot!(
parse_err(r"**/@test"),
@"Invalid character `@` at position 3 in glob: `**/@test`"
);
assert_snapshot!(
parse_err(r"public domain/Gulliver\\'s Travels.txt"),
@r"Invalid character ` ` at position 6 in glob: `public domain/Gulliver\\'s Travels.txt`"
);
}
#[test]
fn test_valid() {
let cases = [
"licenses/*.txt",
"licenses/**/*.txt",
"LICEN[CS]E.txt",
"LICEN?E.txt",
"[a-z].txt",
"[a-z._-].txt",
"*/**",
"LICENSE..txt",
"LICENSE_file-1.txt",
"licenses/라이센스*.txt",
"licenses/ライセンス*.txt",
"licenses/执照*.txt",
"src/**",
];
for case in cases {
parse_pep639_glob(case).unwrap();
}
}
}